Add Readme and rename files to something more fitting

This commit is contained in:
tobias
2025-02-16 19:31:11 +01:00
parent 1973dc7031
commit 1849418ee6
3 changed files with 101 additions and 40 deletions

58
Readme.md Normal file
View File

@@ -0,0 +1,58 @@
# MFT Carver and Parser
This repository contains two tools designed to help you work with NTFS Master File Table (MFT) records:
- **carve_mft.go (Golang Program)**
A robust, multi-threaded tool for carving MFT records from disk images. It extracts 1024-byte blocks starting at each MFT entry, parses various attributes, and outputs detailed information as JSONL. It supports processing multiple files or entire directories in parallel, with optional raw record dumping.
- **show_mft.py (Python Script)**
A self-contained Python script that parses a single extracted MFT record and prints out as much detail as possible—including MAC times, file attributes, object IDs, security descriptors, and file hashes—in a human-readable format.
---
## Features
### carve_mft.go (Golang)
- **Parallel Processing:** Utilizes all available CPU cores to carve and parse disk images efficiently.
- **Flexible Input:** Accepts a single disk image or a directory of images.
- **Configurable Output:** Options to dump raw MFT records to a specified folder and to append parsed records to a JSONL file.
- **Robust Parsing:** Extracts multiple attributes (e.g., Standard Information, File Name, Data, and more) and is designed to handle corrupted disk images gracefully.
### show_mft.py (Python)
- **Detailed Record Inspection:** Parses a given MFT record file and displays comprehensive details using Python's pretty-print.
- **Hashing Support:** Computes MD5, SHA256, SHA512, and CRC32 of the record for integrity verification.
- **Minimal Dependencies:** Self-contained script with minimal external requirements.
---
## Requirements
- **carve_mft:** Go (latest stable version recommended)
- **show_mft.py:** Python 3.x
---
## Usage
### Building and Running carve_mft
1. **Build:**
```bash
go build -o carve_mft ./path/to/carve_mft.go
Run: The tool accepts several command-line options:
--dump: Enable dumping of raw MFT records (off by default).
--jsonl: Specify the output JSONL file path. If not provided, a default name based on the input file and timestamp is used.
--dump-folder: Specify a folder to store raw dumps. If not provided, a default folder is created.
Example:
./carve_mft --dump --jsonl=/path/to/output.jsonl --dump-folder=/path/to/dumps /path/to/disk/image/or/directory
2. **Running show_mft.py**
Simply point the script at an extracted MFT record file:
```bash
python show_mft.py path/to/extracted_record.bin
````

View File

@@ -132,33 +132,25 @@ func parseZoneIdentifier(content []byte) map[string]string {
// Attribute Parsing // Attribute Parsing
// -------------------- // --------------------
// parseAttributes iterates over the attribute area and processes known types.
// It uses safeSlice to ensure we dont read beyond the record.
func parseAttributes(data []byte, rec *MFTRecord) { func parseAttributes(data []byte, rec *MFTRecord) {
offset := int(rec.FirstAttrOffset) offset := int(rec.FirstAttrOffset)
for offset < len(data)-8 { for offset < len(data)-8 {
// First 4 bytes: attribute type
if attrBytes, ok := safeSlice(data, offset, 4); !ok { if attrBytes, ok := safeSlice(data, offset, 4); !ok {
break break
} else { } else {
attrType := binary.LittleEndian.Uint32(attrBytes) attrType := binary.LittleEndian.Uint32(attrBytes)
// End marker
if attrType == 0xFFFFFFFF { if attrType == 0xFFFFFFFF {
break break
} }
// Next 4 bytes: attribute length
if attrLenBytes, ok := safeSlice(data, offset+4, 4); !ok { if attrLenBytes, ok := safeSlice(data, offset+4, 4); !ok {
break break
} else { } else {
attrLen := binary.LittleEndian.Uint32(attrLenBytes) attrLen := binary.LittleEndian.Uint32(attrLenBytes)
if attrLen < 8 || offset+int(attrLen) > len(data) { if attrLen < 8 || offset+int(attrLen) > len(data) {
// Malformed attribute, skip one byte and try to re-sync.
offset++ offset++
continue continue
} }
// Resident flag at offset+8.
residentFlag := data[offset+8] residentFlag := data[offset+8]
// For resident attributes, extract value length and value offset.
var valLen uint32 var valLen uint32
var valOffset uint16 var valOffset uint16
if residentFlag == 0 { if residentFlag == 0 {
@@ -175,7 +167,6 @@ func parseAttributes(data []byte, rec *MFTRecord) {
continue continue
} }
} }
// Process known attribute types.
switch attrType { switch attrType {
case 0x10: // $STANDARD_INFORMATION case 0x10: // $STANDARD_INFORMATION
if residentFlag == 0 { if residentFlag == 0 {
@@ -206,7 +197,6 @@ func parseAttributes(data []byte, rec *MFTRecord) {
} }
case 0x80: // $DATA case 0x80: // $DATA
var ds DataStream var ds DataStream
// Extract the attribute's name if any.
if nameInfo, ok := safeSlice(data, offset+9, 1); ok { if nameInfo, ok := safeSlice(data, offset+9, 1); ok {
nameLen := nameInfo[0] nameLen := nameInfo[0]
if nameBytes, ok := safeSlice(data, offset+10, int(nameLen)*2); ok && nameLen > 0 { if nameBytes, ok := safeSlice(data, offset+10, int(nameLen)*2); ok && nameLen > 0 {
@@ -228,7 +218,7 @@ func parseAttributes(data []byte, rec *MFTRecord) {
ds.NonResident = true ds.NonResident = true
} }
rec.DataStreams = append(rec.DataStreams, ds) rec.DataStreams = append(rec.DataStreams, ds)
case 0x40: // $OBJECT_ID (when used as such) case 0x40: // $OBJECT_ID
if residentFlag == 0 { if residentFlag == 0 {
if objData, ok := safeSlice(data, offset+int(valOffset), 16); ok { if objData, ok := safeSlice(data, offset+int(valOffset), 16); ok {
rec.ObjectID = fmt.Sprintf("%x", objData) rec.ObjectID = fmt.Sprintf("%x", objData)
@@ -236,16 +226,12 @@ func parseAttributes(data []byte, rec *MFTRecord) {
} }
case 0x50: // $SECURITY_DESCRIPTOR case 0x50: // $SECURITY_DESCRIPTOR
if residentFlag == 0 { if residentFlag == 0 {
// A minimal parser: extract the first 20 bytes fields if possible.
if secData, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok && len(secData) >= 20 { if secData, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok && len(secData) >= 20 {
// We could decode further; here we just store raw hex values.
rec.SecurityDescriptor = map[string]interface{}{ rec.SecurityDescriptor = map[string]interface{}{
"raw": fmt.Sprintf("%x", secData), "raw": fmt.Sprintf("%x", secData),
} }
} }
} }
// (Other attribute types such as $ATTRIBUTE_LIST, $VOLUME_NAME, etc.
// can be added here following similar patterns.)
} }
offset += int(attrLen) offset += int(attrLen)
} }
@@ -253,8 +239,7 @@ func parseAttributes(data []byte, rec *MFTRecord) {
} }
} }
// parseMFTRecord attempts to parse a 1024-byte MFT record. // parseMFTRecord parses a 1024-byte MFT record.
// It returns an error if the record is too short or if the expected "FILE" marker is missing.
func parseMFTRecord(data []byte) (*MFTRecord, error) { func parseMFTRecord(data []byte) (*MFTRecord, error) {
if len(data) < 46 { if len(data) < 46 {
return nil, fmt.Errorf("data too short to be a valid record") return nil, fmt.Errorf("data too short to be a valid record")
@@ -284,6 +269,12 @@ func parseMFTRecord(data []byte) (*MFTRecord, error) {
// Parallel Processing and Main // Parallel Processing and Main
// -------------------- // --------------------
var (
dumpFlag = flag.Bool("dump", false, "Dump raw MFT records (off by default)")
jsonlOut = flag.String("jsonl", "", "Output JSONL file path (if not provided, a default is used per input file)")
dumpFolder = flag.String("dump-folder", "", "Folder to dump raw MFT records (if not provided, a default folder is created per input file)")
)
func processImageFile(inputFile string, wg *sync.WaitGroup) { func processImageFile(inputFile string, wg *sync.WaitGroup) {
defer wg.Done() defer wg.Done()
@@ -301,18 +292,37 @@ func processImageFile(inputFile string, wg *sync.WaitGroup) {
} }
fileSize := fi.Size() fileSize := fi.Size()
// Create an output folder and JSONL file based on input file name and current timestamp.
timestamp := time.Now().Format("20060102150405") timestamp := time.Now().Format("20060102150405")
baseName := filepath.Base(inputFile) baseName := filepath.Base(inputFile)
outDir := fmt.Sprintf("%s_%s", baseName, timestamp)
if err := os.Mkdir(outDir, 0755); err != nil { // Determine dump folder: if provided, use that; else use default "<inputFile>_<timestamp>"
log.Printf("Failed to create output directory for %s: %v", inputFile, err) var outDir string
return if *dumpFolder != "" {
outDir = *dumpFolder
// Ensure the folder exists.
if err := os.MkdirAll(outDir, 0755); err != nil {
log.Printf("Failed to create dump folder %s: %v", outDir, err)
return
}
} else {
outDir = fmt.Sprintf("%s_%s", baseName, timestamp)
if err := os.Mkdir(outDir, 0755); err != nil {
log.Printf("Failed to create output directory for %s: %v", inputFile, err)
return
}
} }
jsonlFileName := fmt.Sprintf("%s_%s.jsonl", baseName, timestamp)
jsonlFile, err := os.Create(jsonlFileName) // Determine JSONL output file: if provided, use that; else use default "<inputFile>_<timestamp>.jsonl"
var jsonlPath string
if *jsonlOut != "" {
jsonlPath = *jsonlOut
} else {
jsonlPath = fmt.Sprintf("%s_%s.jsonl", baseName, timestamp)
}
// Open JSONL file in append mode (create if it doesn't exist)
jsonlFile, err := os.OpenFile(jsonlPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil { if err != nil {
log.Printf("Failed to create JSONL file for %s: %v", inputFile, err) log.Printf("Failed to open JSONL file for %s: %v", inputFile, err)
return return
} }
defer jsonlFile.Close() defer jsonlFile.Close()
@@ -320,7 +330,6 @@ func processImageFile(inputFile string, wg *sync.WaitGroup) {
carvedChan := make(chan CarvedRecord, 100) carvedChan := make(chan CarvedRecord, 100)
parsedChan := make(chan ParsedRecord, 100) parsedChan := make(chan ParsedRecord, 100)
// Worker pool for carving/parsing records.
numWorkers := runtime.NumCPU() numWorkers := runtime.NumCPU()
var workerWg sync.WaitGroup var workerWg sync.WaitGroup
for i := 0; i < numWorkers; i++ { for i := 0; i < numWorkers; i++ {
@@ -328,19 +337,17 @@ func processImageFile(inputFile string, wg *sync.WaitGroup) {
go func() { go func() {
defer workerWg.Done() defer workerWg.Done()
for carved := range carvedChan { for carved := range carvedChan {
// Write raw record to disk. // If dumping is enabled, write the raw record file.
recordFileName := filepath.Join(outDir, fmt.Sprintf("%d.mftrecord", carved.Offset)) if *dumpFlag {
if err := os.WriteFile(recordFileName, carved.RecordBytes, 0644); err != nil { recordFileName := filepath.Join(outDir, fmt.Sprintf("%d.mftrecord", carved.Offset))
log.Printf("Failed to write record at offset %d in %s: %v", carved.Offset, inputFile, err) if err := os.WriteFile(recordFileName, carved.RecordBytes, 0644); err != nil {
continue log.Printf("Failed to write record at offset %d in %s: %v", carved.Offset, inputFile, err)
}
} }
// Parse the record.
mft, err := parseMFTRecord(carved.RecordBytes) mft, err := parseMFTRecord(carved.RecordBytes)
if err != nil { if err != nil {
// Skip records that cannot be parsed.
continue continue
} }
// Build JSON record (omitting the magic field).
recordMap := map[string]interface{}{ recordMap := map[string]interface{}{
"input_image": inputFile, "input_image": inputFile,
"offset": carved.Offset, "offset": carved.Offset,
@@ -370,7 +377,6 @@ func processImageFile(inputFile string, wg *sync.WaitGroup) {
}() }()
} }
// Writer goroutine to output JSONL records.
var writerWg sync.WaitGroup var writerWg sync.WaitGroup
writerWg.Add(1) writerWg.Add(1)
go func() { go func() {
@@ -383,7 +389,6 @@ func processImageFile(inputFile string, wg *sync.WaitGroup) {
} }
}() }()
// Scan the file for the "FILE0" pattern.
reader := bufio.NewReader(f) reader := bufio.NewReader(f)
const recordSize = 1024 const recordSize = 1024
const chunkSize = 1024 * 1024 const chunkSize = 1024 * 1024
@@ -399,7 +404,6 @@ func processImageFile(inputFile string, wg *sync.WaitGroup) {
break break
} }
data := append(leftover, chunk[:n]...) data := append(leftover, chunk[:n]...)
// Progress update every ~5 seconds.
if time.Since(lastPrint) > 5*time.Second { if time.Since(lastPrint) > 5*time.Second {
perc := float64(fileOffset) / float64(fileSize) * 100.0 perc := float64(fileOffset) / float64(fileSize) * 100.0
log.Printf("Processing %s: %.2f%% complete", inputFile, perc) log.Printf("Processing %s: %.2f%% complete", inputFile, perc)
@@ -436,17 +440,16 @@ func processImageFile(inputFile string, wg *sync.WaitGroup) {
workerWg.Wait() workerWg.Wait()
close(parsedChan) close(parsedChan)
writerWg.Wait() writerWg.Wait()
log.Printf("Finished processing %s. Raw records are in %s and JSONL file is %s", inputFile, outDir, jsonlFileName) log.Printf("Finished processing %s. Dumps (if enabled) are in %s and JSONL file is %s", inputFile, outDir, jsonlPath)
} }
func main() { func main() {
flag.Parse() flag.Parse()
if flag.NArg() == 0 { if flag.NArg() == 0 {
fmt.Printf("Usage: %s <disk image files or directories>\n", os.Args[0]) fmt.Printf("Usage: %s [options] <disk image files or directories>\n", os.Args[0])
os.Exit(1) os.Exit(1)
} }
// Build list of files from provided arguments (recursively if directories).
var files []string var files []string
for _, arg := range flag.Args() { for _, arg := range flag.Args() {
fi, err := os.Stat(arg) fi, err := os.Stat(arg)

View File