Add Readme and rename files to something more fitting
This commit is contained in:
58
Readme.md
Normal file
58
Readme.md
Normal file
@@ -0,0 +1,58 @@
|
||||
# MFT Carver and Parser
|
||||
|
||||
This repository contains two tools designed to help you work with NTFS Master File Table (MFT) records:
|
||||
|
||||
- **carve_mft.go (Golang Program)**
|
||||
A robust, multi-threaded tool for carving MFT records from disk images. It extracts 1024-byte blocks starting at each MFT entry, parses various attributes, and outputs detailed information as JSONL. It supports processing multiple files or entire directories in parallel, with optional raw record dumping.
|
||||
|
||||
- **show_mft.py (Python Script)**
|
||||
A self-contained Python script that parses a single extracted MFT record and prints out as much detail as possible—including MAC times, file attributes, object IDs, security descriptors, and file hashes—in a human-readable format.
|
||||
|
||||
---
|
||||
|
||||
## Features
|
||||
|
||||
### carve_mft.go (Golang)
|
||||
- **Parallel Processing:** Utilizes all available CPU cores to carve and parse disk images efficiently.
|
||||
- **Flexible Input:** Accepts a single disk image or a directory of images.
|
||||
- **Configurable Output:** Options to dump raw MFT records to a specified folder and to append parsed records to a JSONL file.
|
||||
- **Robust Parsing:** Extracts multiple attributes (e.g., Standard Information, File Name, Data, and more) and is designed to handle corrupted disk images gracefully.
|
||||
|
||||
### show_mft.py (Python)
|
||||
- **Detailed Record Inspection:** Parses a given MFT record file and displays comprehensive details using Python's pretty-print.
|
||||
- **Hashing Support:** Computes MD5, SHA256, SHA512, and CRC32 of the record for integrity verification.
|
||||
- **Minimal Dependencies:** Self-contained script with minimal external requirements.
|
||||
|
||||
---
|
||||
|
||||
## Requirements
|
||||
|
||||
- **carve_mft:** Go (latest stable version recommended)
|
||||
- **show_mft.py:** Python 3.x
|
||||
|
||||
---
|
||||
|
||||
## Usage
|
||||
|
||||
### Building and Running carve_mft
|
||||
|
||||
1. **Build:**
|
||||
```bash
|
||||
go build -o carve_mft ./path/to/carve_mft.go
|
||||
|
||||
Run: The tool accepts several command-line options:
|
||||
--dump: Enable dumping of raw MFT records (off by default).
|
||||
--jsonl: Specify the output JSONL file path. If not provided, a default name based on the input file and timestamp is used.
|
||||
--dump-folder: Specify a folder to store raw dumps. If not provided, a default folder is created.
|
||||
|
||||
Example:
|
||||
|
||||
./carve_mft --dump --jsonl=/path/to/output.jsonl --dump-folder=/path/to/dumps /path/to/disk/image/or/directory
|
||||
|
||||
2. **Running show_mft.py**
|
||||
|
||||
Simply point the script at an extracted MFT record file:
|
||||
```bash
|
||||
python show_mft.py path/to/extracted_record.bin
|
||||
````
|
||||
|
||||
@@ -132,33 +132,25 @@ func parseZoneIdentifier(content []byte) map[string]string {
|
||||
// Attribute Parsing
|
||||
// --------------------
|
||||
|
||||
// parseAttributes iterates over the attribute area and processes known types.
|
||||
// It uses safeSlice to ensure we don’t read beyond the record.
|
||||
func parseAttributes(data []byte, rec *MFTRecord) {
|
||||
offset := int(rec.FirstAttrOffset)
|
||||
for offset < len(data)-8 {
|
||||
// First 4 bytes: attribute type
|
||||
if attrBytes, ok := safeSlice(data, offset, 4); !ok {
|
||||
break
|
||||
} else {
|
||||
attrType := binary.LittleEndian.Uint32(attrBytes)
|
||||
// End marker
|
||||
if attrType == 0xFFFFFFFF {
|
||||
break
|
||||
}
|
||||
// Next 4 bytes: attribute length
|
||||
if attrLenBytes, ok := safeSlice(data, offset+4, 4); !ok {
|
||||
break
|
||||
} else {
|
||||
attrLen := binary.LittleEndian.Uint32(attrLenBytes)
|
||||
if attrLen < 8 || offset+int(attrLen) > len(data) {
|
||||
// Malformed attribute, skip one byte and try to re-sync.
|
||||
offset++
|
||||
continue
|
||||
}
|
||||
// Resident flag at offset+8.
|
||||
residentFlag := data[offset+8]
|
||||
// For resident attributes, extract value length and value offset.
|
||||
var valLen uint32
|
||||
var valOffset uint16
|
||||
if residentFlag == 0 {
|
||||
@@ -175,7 +167,6 @@ func parseAttributes(data []byte, rec *MFTRecord) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
// Process known attribute types.
|
||||
switch attrType {
|
||||
case 0x10: // $STANDARD_INFORMATION
|
||||
if residentFlag == 0 {
|
||||
@@ -206,7 +197,6 @@ func parseAttributes(data []byte, rec *MFTRecord) {
|
||||
}
|
||||
case 0x80: // $DATA
|
||||
var ds DataStream
|
||||
// Extract the attribute's name if any.
|
||||
if nameInfo, ok := safeSlice(data, offset+9, 1); ok {
|
||||
nameLen := nameInfo[0]
|
||||
if nameBytes, ok := safeSlice(data, offset+10, int(nameLen)*2); ok && nameLen > 0 {
|
||||
@@ -228,7 +218,7 @@ func parseAttributes(data []byte, rec *MFTRecord) {
|
||||
ds.NonResident = true
|
||||
}
|
||||
rec.DataStreams = append(rec.DataStreams, ds)
|
||||
case 0x40: // $OBJECT_ID (when used as such)
|
||||
case 0x40: // $OBJECT_ID
|
||||
if residentFlag == 0 {
|
||||
if objData, ok := safeSlice(data, offset+int(valOffset), 16); ok {
|
||||
rec.ObjectID = fmt.Sprintf("%x", objData)
|
||||
@@ -236,16 +226,12 @@ func parseAttributes(data []byte, rec *MFTRecord) {
|
||||
}
|
||||
case 0x50: // $SECURITY_DESCRIPTOR
|
||||
if residentFlag == 0 {
|
||||
// A minimal parser: extract the first 20 bytes fields if possible.
|
||||
if secData, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok && len(secData) >= 20 {
|
||||
// We could decode further; here we just store raw hex values.
|
||||
rec.SecurityDescriptor = map[string]interface{}{
|
||||
"raw": fmt.Sprintf("%x", secData),
|
||||
}
|
||||
}
|
||||
}
|
||||
// (Other attribute types such as $ATTRIBUTE_LIST, $VOLUME_NAME, etc.
|
||||
// can be added here following similar patterns.)
|
||||
}
|
||||
offset += int(attrLen)
|
||||
}
|
||||
@@ -253,8 +239,7 @@ func parseAttributes(data []byte, rec *MFTRecord) {
|
||||
}
|
||||
}
|
||||
|
||||
// parseMFTRecord attempts to parse a 1024-byte MFT record.
|
||||
// It returns an error if the record is too short or if the expected "FILE" marker is missing.
|
||||
// parseMFTRecord parses a 1024-byte MFT record.
|
||||
func parseMFTRecord(data []byte) (*MFTRecord, error) {
|
||||
if len(data) < 46 {
|
||||
return nil, fmt.Errorf("data too short to be a valid record")
|
||||
@@ -284,6 +269,12 @@ func parseMFTRecord(data []byte) (*MFTRecord, error) {
|
||||
// Parallel Processing and Main
|
||||
// --------------------
|
||||
|
||||
var (
|
||||
dumpFlag = flag.Bool("dump", false, "Dump raw MFT records (off by default)")
|
||||
jsonlOut = flag.String("jsonl", "", "Output JSONL file path (if not provided, a default is used per input file)")
|
||||
dumpFolder = flag.String("dump-folder", "", "Folder to dump raw MFT records (if not provided, a default folder is created per input file)")
|
||||
)
|
||||
|
||||
func processImageFile(inputFile string, wg *sync.WaitGroup) {
|
||||
defer wg.Done()
|
||||
|
||||
@@ -301,18 +292,37 @@ func processImageFile(inputFile string, wg *sync.WaitGroup) {
|
||||
}
|
||||
fileSize := fi.Size()
|
||||
|
||||
// Create an output folder and JSONL file based on input file name and current timestamp.
|
||||
timestamp := time.Now().Format("20060102150405")
|
||||
baseName := filepath.Base(inputFile)
|
||||
outDir := fmt.Sprintf("%s_%s", baseName, timestamp)
|
||||
|
||||
// Determine dump folder: if provided, use that; else use default "<inputFile>_<timestamp>"
|
||||
var outDir string
|
||||
if *dumpFolder != "" {
|
||||
outDir = *dumpFolder
|
||||
// Ensure the folder exists.
|
||||
if err := os.MkdirAll(outDir, 0755); err != nil {
|
||||
log.Printf("Failed to create dump folder %s: %v", outDir, err)
|
||||
return
|
||||
}
|
||||
} else {
|
||||
outDir = fmt.Sprintf("%s_%s", baseName, timestamp)
|
||||
if err := os.Mkdir(outDir, 0755); err != nil {
|
||||
log.Printf("Failed to create output directory for %s: %v", inputFile, err)
|
||||
return
|
||||
}
|
||||
jsonlFileName := fmt.Sprintf("%s_%s.jsonl", baseName, timestamp)
|
||||
jsonlFile, err := os.Create(jsonlFileName)
|
||||
}
|
||||
|
||||
// Determine JSONL output file: if provided, use that; else use default "<inputFile>_<timestamp>.jsonl"
|
||||
var jsonlPath string
|
||||
if *jsonlOut != "" {
|
||||
jsonlPath = *jsonlOut
|
||||
} else {
|
||||
jsonlPath = fmt.Sprintf("%s_%s.jsonl", baseName, timestamp)
|
||||
}
|
||||
// Open JSONL file in append mode (create if it doesn't exist)
|
||||
jsonlFile, err := os.OpenFile(jsonlPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
||||
if err != nil {
|
||||
log.Printf("Failed to create JSONL file for %s: %v", inputFile, err)
|
||||
log.Printf("Failed to open JSONL file for %s: %v", inputFile, err)
|
||||
return
|
||||
}
|
||||
defer jsonlFile.Close()
|
||||
@@ -320,7 +330,6 @@ func processImageFile(inputFile string, wg *sync.WaitGroup) {
|
||||
carvedChan := make(chan CarvedRecord, 100)
|
||||
parsedChan := make(chan ParsedRecord, 100)
|
||||
|
||||
// Worker pool for carving/parsing records.
|
||||
numWorkers := runtime.NumCPU()
|
||||
var workerWg sync.WaitGroup
|
||||
for i := 0; i < numWorkers; i++ {
|
||||
@@ -328,19 +337,17 @@ func processImageFile(inputFile string, wg *sync.WaitGroup) {
|
||||
go func() {
|
||||
defer workerWg.Done()
|
||||
for carved := range carvedChan {
|
||||
// Write raw record to disk.
|
||||
// If dumping is enabled, write the raw record file.
|
||||
if *dumpFlag {
|
||||
recordFileName := filepath.Join(outDir, fmt.Sprintf("%d.mftrecord", carved.Offset))
|
||||
if err := os.WriteFile(recordFileName, carved.RecordBytes, 0644); err != nil {
|
||||
log.Printf("Failed to write record at offset %d in %s: %v", carved.Offset, inputFile, err)
|
||||
continue
|
||||
}
|
||||
// Parse the record.
|
||||
}
|
||||
mft, err := parseMFTRecord(carved.RecordBytes)
|
||||
if err != nil {
|
||||
// Skip records that cannot be parsed.
|
||||
continue
|
||||
}
|
||||
// Build JSON record (omitting the magic field).
|
||||
recordMap := map[string]interface{}{
|
||||
"input_image": inputFile,
|
||||
"offset": carved.Offset,
|
||||
@@ -370,7 +377,6 @@ func processImageFile(inputFile string, wg *sync.WaitGroup) {
|
||||
}()
|
||||
}
|
||||
|
||||
// Writer goroutine to output JSONL records.
|
||||
var writerWg sync.WaitGroup
|
||||
writerWg.Add(1)
|
||||
go func() {
|
||||
@@ -383,7 +389,6 @@ func processImageFile(inputFile string, wg *sync.WaitGroup) {
|
||||
}
|
||||
}()
|
||||
|
||||
// Scan the file for the "FILE0" pattern.
|
||||
reader := bufio.NewReader(f)
|
||||
const recordSize = 1024
|
||||
const chunkSize = 1024 * 1024
|
||||
@@ -399,7 +404,6 @@ func processImageFile(inputFile string, wg *sync.WaitGroup) {
|
||||
break
|
||||
}
|
||||
data := append(leftover, chunk[:n]...)
|
||||
// Progress update every ~5 seconds.
|
||||
if time.Since(lastPrint) > 5*time.Second {
|
||||
perc := float64(fileOffset) / float64(fileSize) * 100.0
|
||||
log.Printf("Processing %s: %.2f%% complete", inputFile, perc)
|
||||
@@ -436,17 +440,16 @@ func processImageFile(inputFile string, wg *sync.WaitGroup) {
|
||||
workerWg.Wait()
|
||||
close(parsedChan)
|
||||
writerWg.Wait()
|
||||
log.Printf("Finished processing %s. Raw records are in %s and JSONL file is %s", inputFile, outDir, jsonlFileName)
|
||||
log.Printf("Finished processing %s. Dumps (if enabled) are in %s and JSONL file is %s", inputFile, outDir, jsonlPath)
|
||||
}
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
if flag.NArg() == 0 {
|
||||
fmt.Printf("Usage: %s <disk image files or directories>\n", os.Args[0])
|
||||
fmt.Printf("Usage: %s [options] <disk image files or directories>\n", os.Args[0])
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Build list of files from provided arguments (recursively if directories).
|
||||
var files []string
|
||||
for _, arg := range flag.Args() {
|
||||
fi, err := os.Stat(arg)
|
||||
Reference in New Issue
Block a user