From 1849418ee689c96db5a2d0fe0a21384f85a53cc4 Mon Sep 17 00:00:00 2001
From: tobias <tobiaskessels@cydefex.de>
Date: Sun, 16 Feb 2025 19:31:11 +0100
Subject: [PATCH] Add Readme and rename files to something more fitting

---
 Readme.md              | 58 +++++++++++++++++++++++++++++
 mft.go => carve_mft.go | 83 ++++++++++++++++++++++--------------------
 mft.py => show_mft.py  |  0
 3 files changed, 101 insertions(+), 40 deletions(-)
 create mode 100644 Readme.md
 rename mft.go => carve_mft.go (85%)
 rename mft.py => show_mft.py (100%)

diff --git a/Readme.md b/Readme.md
new file mode 100644
index 0000000..42461be
--- /dev/null
+++ b/Readme.md
@@ -0,0 +1,58 @@
+# MFT Carver and Parser
+
+This repository contains two tools designed to help you work with NTFS Master File Table (MFT) records:
+
+- **carve_mft.go (Golang Program)**  
+  A robust, multi-threaded tool for carving MFT records from disk images. It extracts 1024-byte blocks starting at each MFT entry, parses various attributes, and outputs detailed information as JSONL. It supports processing multiple files or entire directories in parallel, with optional raw record dumping.
+
+- **show_mft.py (Python Script)**  
+  A self-contained Python script that parses a single extracted MFT record and prints out as much detail as possible—including MAC times, file attributes, object IDs, security descriptors, and file hashes—in a human-readable format.
+
+---
+
+## Features
+
+### carve_mft.go (Golang)
+- **Parallel Processing:** Utilizes all available CPU cores to carve and parse disk images efficiently.
+- **Flexible Input:** Accepts a single disk image or a directory of images.
+- **Configurable Output:** Options to dump raw MFT records to a specified folder and to append parsed records to a JSONL file.
+- **Robust Parsing:** Extracts multiple attributes (e.g., Standard Information, File Name, Data, and more) and is designed to handle corrupted disk images gracefully.
+
+### show_mft.py (Python)
+- **Detailed Record Inspection:** Parses a given MFT record file and displays comprehensive details using Python's pretty-print.
+- **Hashing Support:** Computes MD5, SHA256, SHA512, and CRC32 of the record for integrity verification.
+- **Minimal Dependencies:** Self-contained script with minimal external requirements.
+
+---
+
+## Requirements
+
+- **carve_mft:** Go (latest stable version recommended)
+- **show_mft.py:** Python 3.x
+
+---
+
+## Usage
+
+### Building and Running carve_mft
+
+1. **Build:**
+   ```bash
+   go build -o carve_mft ./path/to/carve_mft.go
+
+    Run: The tool accepts several command-line options:
+        --dump: Enable dumping of raw MFT records (off by default).
+        --jsonl: Specify the output JSONL file path. If not provided, a default name based on the input file and timestamp is used.
+        --dump-folder: Specify a folder to store raw dumps. If not provided, a default folder is created.
+
+    Example:
+
+    ./carve_mft --dump --jsonl=/path/to/output.jsonl --dump-folder=/path/to/dumps /path/to/disk/image/or/directory
+
+2. **Running show_mft.py**
+
+Simply point the script at an extracted MFT record file:
+```bash
+python show_mft.py path/to/extracted_record.bin
+````
+
diff --git a/mft.go b/carve_mft.go
similarity index 85%
rename from mft.go
rename to carve_mft.go
index 7665e35..1d085bb 100755
--- a/mft.go
+++ b/carve_mft.go
@@ -132,33 +132,25 @@ func parseZoneIdentifier(content []byte) map[string]string {
 // Attribute Parsing
 // --------------------
 
-// parseAttributes iterates over the attribute area and processes known types.
-// It uses safeSlice to ensure we don’t read beyond the record.
 func parseAttributes(data []byte, rec *MFTRecord) {
 	offset := int(rec.FirstAttrOffset)
 	for offset < len(data)-8 {
-		// First 4 bytes: attribute type
 		if attrBytes, ok := safeSlice(data, offset, 4); !ok {
 			break
 		} else {
 			attrType := binary.LittleEndian.Uint32(attrBytes)
-			// End marker
 			if attrType == 0xFFFFFFFF {
 				break
 			}
-			// Next 4 bytes: attribute length
 			if attrLenBytes, ok := safeSlice(data, offset+4, 4); !ok {
 				break
 			} else {
 				attrLen := binary.LittleEndian.Uint32(attrLenBytes)
 				if attrLen < 8 || offset+int(attrLen) > len(data) {
-					// Malformed attribute, skip one byte and try to re-sync.
 					offset++
 					continue
 				}
-				// Resident flag at offset+8.
 				residentFlag := data[offset+8]
-				// For resident attributes, extract value length and value offset.
 				var valLen uint32
 				var valOffset uint16
 				if residentFlag == 0 {
@@ -175,7 +167,6 @@ func parseAttributes(data []byte, rec *MFTRecord) {
 						continue
 					}
 				}
-				// Process known attribute types.
 				switch attrType {
 				case 0x10: // $STANDARD_INFORMATION
 					if residentFlag == 0 {
@@ -206,7 +197,6 @@ func parseAttributes(data []byte, rec *MFTRecord) {
 					}
 				case 0x80: // $DATA
 					var ds DataStream
-					// Extract the attribute's name if any.
 					if nameInfo, ok := safeSlice(data, offset+9, 1); ok {
 						nameLen := nameInfo[0]
 						if nameBytes, ok := safeSlice(data, offset+10, int(nameLen)*2); ok && nameLen > 0 {
@@ -228,7 +218,7 @@ func parseAttributes(data []byte, rec *MFTRecord) {
 						ds.NonResident = true
 					}
 					rec.DataStreams = append(rec.DataStreams, ds)
-				case 0x40: // $OBJECT_ID (when used as such)
+				case 0x40: // $OBJECT_ID
 					if residentFlag == 0 {
 						if objData, ok := safeSlice(data, offset+int(valOffset), 16); ok {
 							rec.ObjectID = fmt.Sprintf("%x", objData)
@@ -236,16 +226,12 @@ func parseAttributes(data []byte, rec *MFTRecord) {
 					}
 				case 0x50: // $SECURITY_DESCRIPTOR
 					if residentFlag == 0 {
-						// A minimal parser: extract the first 20 bytes fields if possible.
 						if secData, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok && len(secData) >= 20 {
-							// We could decode further; here we just store raw hex values.
 							rec.SecurityDescriptor = map[string]interface{}{
 								"raw": fmt.Sprintf("%x", secData),
 							}
 						}
 					}
-					// (Other attribute types such as $ATTRIBUTE_LIST, $VOLUME_NAME, etc.
-					// can be added here following similar patterns.)
 				}
 				offset += int(attrLen)
 			}
@@ -253,8 +239,7 @@ func parseAttributes(data []byte, rec *MFTRecord) {
 	}
 }
 
-// parseMFTRecord attempts to parse a 1024-byte MFT record.
-// It returns an error if the record is too short or if the expected "FILE" marker is missing.
+// parseMFTRecord parses a 1024-byte MFT record.
 func parseMFTRecord(data []byte) (*MFTRecord, error) {
 	if len(data) < 46 {
 		return nil, fmt.Errorf("data too short to be a valid record")
@@ -284,6 +269,12 @@ func parseMFTRecord(data []byte) (*MFTRecord, error) {
 // Parallel Processing and Main
 // --------------------
 
+var (
+	dumpFlag   = flag.Bool("dump", false, "Dump raw MFT records (off by default)")
+	jsonlOut   = flag.String("jsonl", "", "Output JSONL file path (if not provided, a default is used per input file)")
+	dumpFolder = flag.String("dump-folder", "", "Folder to dump raw MFT records (if not provided, a default folder is created per input file)")
+)
+
 func processImageFile(inputFile string, wg *sync.WaitGroup) {
 	defer wg.Done()
 
@@ -301,18 +292,37 @@ func processImageFile(inputFile string, wg *sync.WaitGroup) {
 	}
 	fileSize := fi.Size()
 
-	// Create an output folder and JSONL file based on input file name and current timestamp.
 	timestamp := time.Now().Format("20060102150405")
 	baseName := filepath.Base(inputFile)
-	outDir := fmt.Sprintf("%s_%s", baseName, timestamp)
-	if err := os.Mkdir(outDir, 0755); err != nil {
-		log.Printf("Failed to create output directory for %s: %v", inputFile, err)
-		return
+
+	// Determine dump folder: if provided, use that; else use default "<inputFile>_<timestamp>"
+	var outDir string
+	if *dumpFolder != "" {
+		outDir = *dumpFolder
+		// Ensure the folder exists.
+		if err := os.MkdirAll(outDir, 0755); err != nil {
+			log.Printf("Failed to create dump folder %s: %v", outDir, err)
+			return
+		}
+	} else {
+		outDir = fmt.Sprintf("%s_%s", baseName, timestamp)
+		if err := os.Mkdir(outDir, 0755); err != nil {
+			log.Printf("Failed to create output directory for %s: %v", inputFile, err)
+			return
+		}
 	}
-	jsonlFileName := fmt.Sprintf("%s_%s.jsonl", baseName, timestamp)
-	jsonlFile, err := os.Create(jsonlFileName)
+
+	// Determine JSONL output file: if provided, use that; else use default "<inputFile>_<timestamp>.jsonl"
+	var jsonlPath string
+	if *jsonlOut != "" {
+		jsonlPath = *jsonlOut
+	} else {
+		jsonlPath = fmt.Sprintf("%s_%s.jsonl", baseName, timestamp)
+	}
+	// Open JSONL file in append mode (create if it doesn't exist)
+	jsonlFile, err := os.OpenFile(jsonlPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
 	if err != nil {
-		log.Printf("Failed to create JSONL file for %s: %v", inputFile, err)
+		log.Printf("Failed to open JSONL file for %s: %v", inputFile, err)
 		return
 	}
 	defer jsonlFile.Close()
@@ -320,7 +330,6 @@ func processImageFile(inputFile string, wg *sync.WaitGroup) {
 	carvedChan := make(chan CarvedRecord, 100)
 	parsedChan := make(chan ParsedRecord, 100)
 
-	// Worker pool for carving/parsing records.
 	numWorkers := runtime.NumCPU()
 	var workerWg sync.WaitGroup
 	for i := 0; i < numWorkers; i++ {
@@ -328,19 +337,17 @@ func processImageFile(inputFile string, wg *sync.WaitGroup) {
 		go func() {
 			defer workerWg.Done()
 			for carved := range carvedChan {
-				// Write raw record to disk.
-				recordFileName := filepath.Join(outDir, fmt.Sprintf("%d.mftrecord", carved.Offset))
-				if err := os.WriteFile(recordFileName, carved.RecordBytes, 0644); err != nil {
-					log.Printf("Failed to write record at offset %d in %s: %v", carved.Offset, inputFile, err)
-					continue
+				// If dumping is enabled, write the raw record file.
+				if *dumpFlag {
+					recordFileName := filepath.Join(outDir, fmt.Sprintf("%d.mftrecord", carved.Offset))
+					if err := os.WriteFile(recordFileName, carved.RecordBytes, 0644); err != nil {
+						log.Printf("Failed to write record at offset %d in %s: %v", carved.Offset, inputFile, err)
+					}
 				}
-				// Parse the record.
 				mft, err := parseMFTRecord(carved.RecordBytes)
 				if err != nil {
-					// Skip records that cannot be parsed.
 					continue
 				}
-				// Build JSON record (omitting the magic field).
 				recordMap := map[string]interface{}{
 					"input_image":         inputFile,
 					"offset":              carved.Offset,
@@ -370,7 +377,6 @@ func processImageFile(inputFile string, wg *sync.WaitGroup) {
 		}()
 	}
 
-	// Writer goroutine to output JSONL records.
 	var writerWg sync.WaitGroup
 	writerWg.Add(1)
 	go func() {
@@ -383,7 +389,6 @@ func processImageFile(inputFile string, wg *sync.WaitGroup) {
 		}
 	}()
 
-	// Scan the file for the "FILE0" pattern.
 	reader := bufio.NewReader(f)
 	const recordSize = 1024
 	const chunkSize = 1024 * 1024
@@ -399,7 +404,6 @@ func processImageFile(inputFile string, wg *sync.WaitGroup) {
 			break
 		}
 		data := append(leftover, chunk[:n]...)
-		// Progress update every ~5 seconds.
 		if time.Since(lastPrint) > 5*time.Second {
 			perc := float64(fileOffset) / float64(fileSize) * 100.0
 			log.Printf("Processing %s: %.2f%% complete", inputFile, perc)
@@ -436,17 +440,16 @@ func processImageFile(inputFile string, wg *sync.WaitGroup) {
 	workerWg.Wait()
 	close(parsedChan)
 	writerWg.Wait()
-	log.Printf("Finished processing %s. Raw records are in %s and JSONL file is %s", inputFile, outDir, jsonlFileName)
+	log.Printf("Finished processing %s. Dumps (if enabled) are in %s and JSONL file is %s", inputFile, outDir, jsonlPath)
 }
 
 func main() {
 	flag.Parse()
 	if flag.NArg() == 0 {
-		fmt.Printf("Usage: %s <disk image files or directories>\n", os.Args[0])
+		fmt.Printf("Usage: %s [options] <disk image files or directories>\n", os.Args[0])
 		os.Exit(1)
 	}
 
-	// Build list of files from provided arguments (recursively if directories).
 	var files []string
 	for _, arg := range flag.Args() {
 		fi, err := os.Stat(arg)
diff --git a/mft.py b/show_mft.py
similarity index 100%
rename from mft.py
rename to show_mft.py