First Commit of mft carver and mft parsers

2025-02-16 18:57:07 +01:00
commit 1973dc7031
2 changed files with 1006 additions and 0 deletions
--- a/mft.go
+++ b/mft.go
@@ -0,0 +1,496 @@
+package main
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/base64"
+	"encoding/binary"
+	"encoding/json"
+	"flag"
+	"fmt"
+	"io"
+	"log"
+	"os"
+	"path/filepath"
+	"runtime"
+	"strings"
+	"sync"
+	"time"
+	"unicode/utf16"
+)
+
+// --------------------
+// Data Structures
+// --------------------
+
+type FileNameAttribute struct {
+	Filename  string `json:"filename"`
+	ParentRef uint64 `json:"parent_ref"`
+	Crtime    string `json:"crtime"`
+	Mtime     string `json:"mtime"`
+	CtTime    string `json:"ctime"`
+	Atime     string `json:"atime"`
+}
+
+type DataStream struct {
+	Name          string `json:"name"`
+	Resident      bool   `json:"resident"`
+	NonResident   bool   `json:"non_resident"`
+	ContentBase64 string `json:"content_base64,omitempty"`
+}
+
+type MFTRecord struct {
+	// Header fields (omitting the "magic" since it's always "FILE")
+	UpdateSeqOffset uint16 `json:"update_seq_offset"`
+	UpdateSeqSize   uint16 `json:"update_seq_size"`
+	LSN             uint64 `json:"lsn"`
+	SequenceNumber  uint16 `json:"sequence_number"`
+	HardLinkCount   uint16 `json:"hard_link_count"`
+	FirstAttrOffset uint16 `json:"first_attr_offset"`
+	Flags           uint16 `json:"flags"`
+	RealSize        uint32 `json:"real_size"`
+	AllocatedSize   uint32 `json:"allocated_size"`
+	BaseFileRecord  uint64 `json:"base_file_record"`
+	NextAttrId      uint16 `json:"next_attr_id"`
+	RecordNumber    uint32 `json:"record_number"`
+
+	// Standard Information attribute timestamps (if present)
+	SI_Crtime string `json:"si_crtime,omitempty"`
+	SI_Mtime  string `json:"si_mtime,omitempty"`
+	SI_CtTime string `json:"si_ctime,omitempty"`
+	SI_Atime  string `json:"si_atime,omitempty"`
+
+	// Additional attributes
+	ObjectID           string                 `json:"object_id,omitempty"`
+	SecurityDescriptor map[string]interface{} `json:"security_descriptor,omitempty"`
+
+	FileNames   []FileNameAttribute `json:"file_names,omitempty"`
+	DataStreams []DataStream        `json:"data_streams,omitempty"`
+}
+
+// CarvedRecord holds a candidate 1024-byte record and its global offset.
+type CarvedRecord struct {
+	Offset      int64
+	RecordBytes []byte
+}
+
+// ParsedRecord is what gets sent to the JSON writer.
+type ParsedRecord struct {
+	Offset     int64
+	RecordJSON map[string]interface{}
+}
+
+// --------------------
+// Helper functions
+// --------------------
+
+// safeSlice returns data[start : start+length] if within bounds; otherwise false.
+func safeSlice(data []byte, start int, length int) ([]byte, bool) {
+	if start < 0 || start+length > len(data) {
+		return nil, false
+	}
+	return data[start : start+length], true
+}
+
+// filetimeToString converts a Windows FILETIME (uint64) into an RFC3339 timestamp string.
+func filetimeToString(ft uint64) string {
+	const epochDiff = 11644473600 // seconds between 1601 and 1970
+	secs := int64(ft/10000000) - epochDiff
+	nsec := int64(ft%10000000) * 100
+	t := time.Unix(secs, nsec).UTC()
+	return t.Format(time.RFC3339)
+}
+
+// decodeUTF16String converts little‑endian UTF‑16 bytes to a Go string.
+func decodeUTF16String(b []byte) string {
+	if len(b)%2 != 0 {
+		b = b[:len(b)-1]
+	}
+	u16 := make([]uint16, len(b)/2)
+	for i := 0; i < len(u16); i++ {
+		u16[i] = binary.LittleEndian.Uint16(b[i*2:])
+	}
+	return string(utf16.Decode(u16))
+}
+
+// parseZoneIdentifier is a simple parser for Zone.Identifier streams.
+func parseZoneIdentifier(content []byte) map[string]string {
+	result := make(map[string]string)
+	text := string(content)
+	lines := strings.Split(text, "\n")
+	for _, line := range lines {
+		if parts := strings.SplitN(line, "=", 2); len(parts) == 2 {
+			key := strings.TrimSpace(parts[0])
+			value := strings.TrimSpace(parts[1])
+			result[key] = value
+		}
+	}
+	return result
+}
+
+// --------------------
+// Attribute Parsing
+// --------------------
+
+// parseAttributes iterates over the attribute area and processes known types.
+// It uses safeSlice to ensure we don’t read beyond the record.
+func parseAttributes(data []byte, rec *MFTRecord) {
+	offset := int(rec.FirstAttrOffset)
+	for offset < len(data)-8 {
+		// First 4 bytes: attribute type
+		if attrBytes, ok := safeSlice(data, offset, 4); !ok {
+			break
+		} else {
+			attrType := binary.LittleEndian.Uint32(attrBytes)
+			// End marker
+			if attrType == 0xFFFFFFFF {
+				break
+			}
+			// Next 4 bytes: attribute length
+			if attrLenBytes, ok := safeSlice(data, offset+4, 4); !ok {
+				break
+			} else {
+				attrLen := binary.LittleEndian.Uint32(attrLenBytes)
+				if attrLen < 8 || offset+int(attrLen) > len(data) {
+					// Malformed attribute, skip one byte and try to re-sync.
+					offset++
+					continue
+				}
+				// Resident flag at offset+8.
+				residentFlag := data[offset+8]
+				// For resident attributes, extract value length and value offset.
+				var valLen uint32
+				var valOffset uint16
+				if residentFlag == 0 {
+					if vb, ok := safeSlice(data, offset+16, 4); ok {
+						valLen = binary.LittleEndian.Uint32(vb)
+					} else {
+						offset += int(attrLen)
+						continue
+					}
+					if vb, ok := safeSlice(data, offset+20, 2); ok {
+						valOffset = binary.LittleEndian.Uint16(vb)
+					} else {
+						offset += int(attrLen)
+						continue
+					}
+				}
+				// Process known attribute types.
+				switch attrType {
+				case 0x10: // $STANDARD_INFORMATION
+					if residentFlag == 0 {
+						if siData, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok && len(siData) >= 32 {
+							rec.SI_Crtime = filetimeToString(binary.LittleEndian.Uint64(siData[0:8]))
+							rec.SI_Mtime = filetimeToString(binary.LittleEndian.Uint64(siData[8:16]))
+							rec.SI_CtTime = filetimeToString(binary.LittleEndian.Uint64(siData[16:24]))
+							rec.SI_Atime = filetimeToString(binary.LittleEndian.Uint64(siData[24:32]))
+						}
+					}
+				case 0x30: // $FILE_NAME
+					if residentFlag == 0 {
+						if fnData, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok && len(fnData) >= 66 {
+							var fn FileNameAttribute
+							fn.ParentRef = binary.LittleEndian.Uint64(fnData[0:8]) & 0x0000FFFFFFFFFFFF
+							fn.Crtime = filetimeToString(binary.LittleEndian.Uint64(fnData[8:16]))
+							fn.Mtime = filetimeToString(binary.LittleEndian.Uint64(fnData[16:24]))
+							fn.CtTime = filetimeToString(binary.LittleEndian.Uint64(fnData[24:32]))
+							fn.Atime = filetimeToString(binary.LittleEndian.Uint64(fnData[32:40]))
+							if filenameLenBytes, ok := safeSlice(fnData, 64, 1); ok {
+								filenameLen := filenameLenBytes[0]
+								if nameBytes, ok := safeSlice(fnData, 66, int(filenameLen)*2); ok {
+									fn.Filename = decodeUTF16String(nameBytes)
+								}
+							}
+							rec.FileNames = append(rec.FileNames, fn)
+						}
+					}
+				case 0x80: // $DATA
+					var ds DataStream
+					// Extract the attribute's name if any.
+					if nameInfo, ok := safeSlice(data, offset+9, 1); ok {
+						nameLen := nameInfo[0]
+						if nameBytes, ok := safeSlice(data, offset+10, int(nameLen)*2); ok && nameLen > 0 {
+							ds.Name = decodeUTF16String(nameBytes)
+						}
+					}
+					if residentFlag == 0 {
+						ds.Resident = true
+						ds.NonResident = false
+						if content, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok {
+							ds.ContentBase64 = base64.StdEncoding.EncodeToString(content)
+							if ds.Name == "Zone.Identifier" {
+								zoneInfo := parseZoneIdentifier(content)
+								ds.Name = fmt.Sprintf("Zone.Identifier %v", zoneInfo)
+							}
+						}
+					} else {
+						ds.Resident = false
+						ds.NonResident = true
+					}
+					rec.DataStreams = append(rec.DataStreams, ds)
+				case 0x40: // $OBJECT_ID (when used as such)
+					if residentFlag == 0 {
+						if objData, ok := safeSlice(data, offset+int(valOffset), 16); ok {
+							rec.ObjectID = fmt.Sprintf("%x", objData)
+						}
+					}
+				case 0x50: // $SECURITY_DESCRIPTOR
+					if residentFlag == 0 {
+						// A minimal parser: extract the first 20 bytes fields if possible.
+						if secData, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok && len(secData) >= 20 {
+							// We could decode further; here we just store raw hex values.
+							rec.SecurityDescriptor = map[string]interface{}{
+								"raw": fmt.Sprintf("%x", secData),
+							}
+						}
+					}
+					// (Other attribute types such as $ATTRIBUTE_LIST, $VOLUME_NAME, etc.
+					// can be added here following similar patterns.)
+				}
+				offset += int(attrLen)
+			}
+		}
+	}
+}
+
+// parseMFTRecord attempts to parse a 1024-byte MFT record.
+// It returns an error if the record is too short or if the expected "FILE" marker is missing.
+func parseMFTRecord(data []byte) (*MFTRecord, error) {
+	if len(data) < 46 {
+		return nil, fmt.Errorf("data too short to be a valid record")
+	}
+	if string(data[:4]) != "FILE" {
+		return nil, fmt.Errorf("invalid record header")
+	}
+	rec := &MFTRecord{
+		UpdateSeqOffset: binary.LittleEndian.Uint16(data[4:6]),
+		UpdateSeqSize:   binary.LittleEndian.Uint16(data[6:8]),
+		LSN:             binary.LittleEndian.Uint64(data[8:16]),
+		SequenceNumber:  binary.LittleEndian.Uint16(data[16:18]),
+		HardLinkCount:   binary.LittleEndian.Uint16(data[18:20]),
+		FirstAttrOffset: binary.LittleEndian.Uint16(data[20:22]),
+		Flags:           binary.LittleEndian.Uint16(data[22:24]),
+		RealSize:        binary.LittleEndian.Uint32(data[24:28]),
+		AllocatedSize:   binary.LittleEndian.Uint32(data[28:32]),
+		BaseFileRecord:  binary.LittleEndian.Uint64(data[32:40]),
+		NextAttrId:      binary.LittleEndian.Uint16(data[40:42]),
+		RecordNumber:    binary.LittleEndian.Uint32(data[42:46]),
+	}
+	parseAttributes(data, rec)
+	return rec, nil
+}
+
+// --------------------
+// Parallel Processing and Main
+// --------------------
+
+func processImageFile(inputFile string, wg *sync.WaitGroup) {
+	defer wg.Done()
+
+	f, err := os.Open(inputFile)
+	if err != nil {
+		log.Printf("Failed to open %s: %v", inputFile, err)
+		return
+	}
+	defer f.Close()
+
+	fi, err := f.Stat()
+	if err != nil {
+		log.Printf("Failed to stat %s: %v", inputFile, err)
+		return
+	}
+	fileSize := fi.Size()
+
+	// Create an output folder and JSONL file based on input file name and current timestamp.
+	timestamp := time.Now().Format("20060102150405")
+	baseName := filepath.Base(inputFile)
+	outDir := fmt.Sprintf("%s_%s", baseName, timestamp)
+	if err := os.Mkdir(outDir, 0755); err != nil {
+		log.Printf("Failed to create output directory for %s: %v", inputFile, err)
+		return
+	}
+	jsonlFileName := fmt.Sprintf("%s_%s.jsonl", baseName, timestamp)
+	jsonlFile, err := os.Create(jsonlFileName)
+	if err != nil {
+		log.Printf("Failed to create JSONL file for %s: %v", inputFile, err)
+		return
+	}
+	defer jsonlFile.Close()
+
+	carvedChan := make(chan CarvedRecord, 100)
+	parsedChan := make(chan ParsedRecord, 100)
+
+	// Worker pool for carving/parsing records.
+	numWorkers := runtime.NumCPU()
+	var workerWg sync.WaitGroup
+	for i := 0; i < numWorkers; i++ {
+		workerWg.Add(1)
+		go func() {
+			defer workerWg.Done()
+			for carved := range carvedChan {
+				// Write raw record to disk.
+				recordFileName := filepath.Join(outDir, fmt.Sprintf("%d.mftrecord", carved.Offset))
+				if err := os.WriteFile(recordFileName, carved.RecordBytes, 0644); err != nil {
+					log.Printf("Failed to write record at offset %d in %s: %v", carved.Offset, inputFile, err)
+					continue
+				}
+				// Parse the record.
+				mft, err := parseMFTRecord(carved.RecordBytes)
+				if err != nil {
+					// Skip records that cannot be parsed.
+					continue
+				}
+				// Build JSON record (omitting the magic field).
+				recordMap := map[string]interface{}{
+					"input_image":         inputFile,
+					"offset":              carved.Offset,
+					"update_seq_offset":   mft.UpdateSeqOffset,
+					"update_seq_size":     mft.UpdateSeqSize,
+					"lsn":                 mft.LSN,
+					"sequence_number":     mft.SequenceNumber,
+					"hard_link_count":     mft.HardLinkCount,
+					"first_attr_offset":   mft.FirstAttrOffset,
+					"flags":               mft.Flags,
+					"real_size":           mft.RealSize,
+					"allocated_size":      mft.AllocatedSize,
+					"base_file_record":    mft.BaseFileRecord,
+					"next_attr_id":        mft.NextAttrId,
+					"record_number":       mft.RecordNumber,
+					"si_crtime":           mft.SI_Crtime,
+					"si_mtime":            mft.SI_Mtime,
+					"si_ctime":            mft.SI_CtTime,
+					"si_atime":            mft.SI_Atime,
+					"object_id":           mft.ObjectID,
+					"security_descriptor": mft.SecurityDescriptor,
+					"file_names":          mft.FileNames,
+					"data_streams":        mft.DataStreams,
+				}
+				parsedChan <- ParsedRecord{Offset: carved.Offset, RecordJSON: recordMap}
+			}
+		}()
+	}
+
+	// Writer goroutine to output JSONL records.
+	var writerWg sync.WaitGroup
+	writerWg.Add(1)
+	go func() {
+		defer writerWg.Done()
+		encoder := json.NewEncoder(jsonlFile)
+		for pr := range parsedChan {
+			if err := encoder.Encode(pr.RecordJSON); err != nil {
+				log.Printf("Error writing JSON record at offset %d in %s: %v", pr.Offset, inputFile, err)
+			}
+		}
+	}()
+
+	// Scan the file for the "FILE0" pattern.
+	reader := bufio.NewReader(f)
+	const recordSize = 1024
+	const chunkSize = 1024 * 1024
+	pattern := []byte("FILE0")
+	var fileOffset int64 = 0
+	var leftover []byte
+	lastPrint := time.Now()
+
+	for {
+		chunk := make([]byte, chunkSize)
+		n, err := reader.Read(chunk)
+		if n == 0 {
+			break
+		}
+		data := append(leftover, chunk[:n]...)
+		// Progress update every ~5 seconds.
+		if time.Since(lastPrint) > 5*time.Second {
+			perc := float64(fileOffset) / float64(fileSize) * 100.0
+			log.Printf("Processing %s: %.2f%% complete", inputFile, perc)
+			lastPrint = time.Now()
+		}
+		searchLimit := len(data) - len(pattern)
+		for i := 0; i <= searchLimit; i++ {
+			if bytes.Equal(data[i:i+len(pattern)], pattern) {
+				globalOffset := fileOffset - int64(len(leftover)) + int64(i)
+				if globalOffset+recordSize > fileSize {
+					continue
+				}
+				recordBytes := make([]byte, recordSize)
+				_, err := f.ReadAt(recordBytes, globalOffset)
+				if err != nil {
+					log.Printf("Failed to read record at offset %d in %s: %v", globalOffset, inputFile, err)
+					continue
+				}
+				carvedChan <- CarvedRecord{Offset: globalOffset, RecordBytes: recordBytes}
+			}
+		}
+		if len(data) >= len(pattern)-1 {
+			leftover = data[len(data)-(len(pattern)-1):]
+		} else {
+			leftover = data
+		}
+		fileOffset += int64(n)
+		if err == io.EOF {
+			break
+		}
+	}
+
+	close(carvedChan)
+	workerWg.Wait()
+	close(parsedChan)
+	writerWg.Wait()
+	log.Printf("Finished processing %s. Raw records are in %s and JSONL file is %s", inputFile, outDir, jsonlFileName)
+}
+
+func main() {
+	flag.Parse()
+	if flag.NArg() == 0 {
+		fmt.Printf("Usage: %s <disk image files or directories>\n", os.Args[0])
+		os.Exit(1)
+	}
+
+	// Build list of files from provided arguments (recursively if directories).
+	var files []string
+	for _, arg := range flag.Args() {
+		fi, err := os.Stat(arg)
+		if err != nil {
+			log.Printf("Error stating %s: %v", arg, err)
+			continue
+		}
+		if fi.IsDir() {
+			err := filepath.Walk(arg, func(path string, info os.FileInfo, err error) error {
+				if err != nil {
+					return nil
+				}
+				if !info.IsDir() {
+					files = append(files, path)
+				}
+				return nil
+			})
+			if err != nil {
+				log.Printf("Error walking directory %s: %v", arg, err)
+			}
+		} else {
+			files = append(files, arg)
+		}
+	}
+
+	totalFiles := len(files)
+	if totalFiles == 0 {
+		log.Println("No input files found.")
+		return
+	}
+	log.Printf("Found %d files to process.", totalFiles)
+
+	var wg sync.WaitGroup
+	concurrentFiles := runtime.NumCPU()
+	sem := make(chan struct{}, concurrentFiles)
+	for i, file := range files {
+		wg.Add(1)
+		sem <- struct{}{}
+		go func(i int, file string) {
+			defer func() { <-sem }()
+			log.Printf("Starting file %d/%d: %s", i+1, totalFiles, file)
+			processImageFile(file, &wg)
+		}(i, file)
+	}
+	wg.Wait()
+	log.Println("All files processed.")
+}
--- a/mft.py
+++ b/mft.py
@@ -0,0 +1,510 @@
+#!/usr/bin/env python3
+import struct
+import uuid
+import hashlib
+import zlib
+import sys
+import pprint
+import datetime
+
+# --- Minimal Constants ---
+MFT_RECORD_MAGIC_NUMBER_OFFSET = 0
+MFT_RECORD_MAGIC_NUMBER_SIZE = 4
+MFT_RECORD_UPDATE_SEQUENCE_OFFSET = 4
+MFT_RECORD_UPDATE_SEQUENCE_SIZE = 2
+MFT_RECORD_UPDATE_SEQUENCE_SIZE_OFFSET = 6
+MFT_RECORD_LOGFILE_SEQUENCE_NUMBER_OFFSET = 8
+MFT_RECORD_LOGFILE_SEQUENCE_NUMBER_SIZE = 8
+MFT_RECORD_SEQUENCE_NUMBER_OFFSET = 16
+MFT_RECORD_SEQUENCE_NUMBER_SIZE = 2
+MFT_RECORD_HARD_LINK_COUNT_OFFSET = 18
+MFT_RECORD_HARD_LINK_COUNT_SIZE = 2
+MFT_RECORD_FIRST_ATTRIBUTE_OFFSET = 20
+MFT_RECORD_FIRST_ATTRIBUTE_SIZE = 2
+MFT_RECORD_FLAGS_OFFSET = 22
+MFT_RECORD_FLAGS_SIZE = 2
+MFT_RECORD_USED_SIZE_OFFSET = 24
+MFT_RECORD_USED_SIZE_SIZE = 4
+MFT_RECORD_ALLOCATED_SIZE_OFFSET = 28
+MFT_RECORD_ALLOCATED_SIZE_SIZE = 4
+MFT_RECORD_FILE_REFERENCE_OFFSET = 32
+MFT_RECORD_FILE_REFERENCE_SIZE = 8
+MFT_RECORD_NEXT_ATTRIBUTE_ID_OFFSET = 40
+MFT_RECORD_NEXT_ATTRIBUTE_ID_SIZE = 2
+MFT_RECORD_RECORD_NUMBER_OFFSET = 42
+MFT_RECORD_RECORD_NUMBER_SIZE = 4
+
+# Attribute type constants (only those used in this parser)
+STANDARD_INFORMATION_ATTRIBUTE = 0x10
+ATTRIBUTE_LIST_ATTRIBUTE       = 0x20
+FILE_NAME_ATTRIBUTE            = 0x30
+OBJECT_ID_ATTRIBUTE            = 0x40
+SECURITY_DESCRIPTOR_ATTRIBUTE  = 0x50
+VOLUME_NAME_ATTRIBUTE          = 0x60
+VOLUME_INFORMATION_ATTRIBUTE   = 0x70
+DATA_ATTRIBUTE                 = 0x80
+INDEX_ROOT_ATTRIBUTE           = 0x90
+INDEX_ALLOCATION_ATTRIBUTE     = 0xA0
+BITMAP_ATTRIBUTE               = 0xB0
+REPARSE_POINT_ATTRIBUTE        = 0xC0
+EA_INFORMATION_ATTRIBUTE       = 0xD0
+EA_ATTRIBUTE                   = 0xE0
+LOGGED_UTILITY_STREAM_ATTRIBUTE= 0x100
+
+# --- Minimal WindowsTime Implementation ---
+class WindowsTime:
+    def __init__(self, low: int, high: int):
+        # Combine low and high into a 64-bit value (assuming little-endian)
+        self.value = (high << 32) | low
+        self.dt = self._filetime_to_dt(self.value)
+        self.dtstr = self.dt.isoformat()
+
+    def _filetime_to_dt(self, filetime: int) -> datetime.datetime:
+        # FILETIME counts 100-nanosecond intervals since January 1, 1601 (UTC)
+        epoch_start = datetime.datetime(1601, 1, 1, tzinfo=datetime.timezone.utc)
+        # Convert 100-nanosecond intervals to microseconds
+        microseconds = filetime // 10
+        return epoch_start + datetime.timedelta(microseconds=microseconds)
+
+# --- MftRecord Class ---
+class MftRecord:
+    def __init__(self, raw_record: bytes, compute_hashes: bool = False, debug_level: int = 0, logger=None):
+        self.raw_record = raw_record
+        self.debug_level = debug_level
+        self.logger = logger or self._default_logger
+        self.magic = 0
+        self.upd_off = 0
+        self.upd_cnt = 0
+        self.lsn = 0
+        self.seq = 0
+        self.link = 0
+        self.attr_off = 0
+        self.flags = 0
+        self.size = 0
+        self.alloc_sizef = 0
+        self.base_ref = 0
+        self.next_attrid = 0
+        self.recordnum = 0
+        self.filename = ''
+        self.si_times = {
+            'crtime': WindowsTime(0, 0),
+            'mtime': WindowsTime(0, 0),
+            'atime': WindowsTime(0, 0),
+            'ctime': WindowsTime(0, 0)
+        }
+        self.fn_times = {
+            'crtime': WindowsTime(0, 0),
+            'mtime': WindowsTime(0, 0),
+            'atime': WindowsTime(0, 0),
+            'ctime': WindowsTime(0, 0)
+        }
+        self.filesize = 0
+        self.attribute_types = set()
+        self.attribute_list = []
+        self.object_id = ''
+        self.birth_volume_id = ''
+        self.birth_object_id = ''
+        self.birth_domain_id = ''
+        self.parent_ref = 0
+        self.md5 = None
+        self.sha256 = None
+        self.sha512 = None
+        self.crc32 = None
+        if compute_hashes:
+            self.compute_hashes()
+        self.parse_record()
+        self.security_descriptor = None
+        self.volume_name = None
+        self.volume_info = None
+        self.data_attribute = None
+        self.index_root = None
+        self.index_allocation = None
+        self.bitmap = None
+        self.reparse_point = None
+        self.ea_information = None
+        self.ea = None
+        self.logged_utility_stream = None
+
+    def _default_logger(self, message: str, level: int = 0):
+        if level <= self.debug_level:
+            print(message)
+
+    def log(self, message: str, level: int = 0):
+        self.logger(message, level)
+
+    def parse_record(self) -> None:
+        try:
+            self.magic = struct.unpack("<I", self.raw_record[MFT_RECORD_MAGIC_NUMBER_OFFSET:MFT_RECORD_MAGIC_NUMBER_OFFSET+MFT_RECORD_MAGIC_NUMBER_SIZE])[0]
+            self.upd_off = struct.unpack("<H", self.raw_record[MFT_RECORD_UPDATE_SEQUENCE_OFFSET:MFT_RECORD_UPDATE_SEQUENCE_OFFSET+MFT_RECORD_UPDATE_SEQUENCE_SIZE])[0]
+            self.upd_cnt = struct.unpack("<H", self.raw_record[MFT_RECORD_UPDATE_SEQUENCE_SIZE_OFFSET:MFT_RECORD_UPDATE_SEQUENCE_SIZE_OFFSET+MFT_RECORD_UPDATE_SEQUENCE_SIZE])[0]
+            self.lsn = struct.unpack("<Q", self.raw_record[MFT_RECORD_LOGFILE_SEQUENCE_NUMBER_OFFSET:MFT_RECORD_LOGFILE_SEQUENCE_NUMBER_OFFSET+MFT_RECORD_LOGFILE_SEQUENCE_NUMBER_SIZE])[0]
+            self.seq = struct.unpack("<H", self.raw_record[MFT_RECORD_SEQUENCE_NUMBER_OFFSET:MFT_RECORD_SEQUENCE_NUMBER_OFFSET+MFT_RECORD_SEQUENCE_NUMBER_SIZE])[0]
+            self.link = struct.unpack("<H", self.raw_record[MFT_RECORD_HARD_LINK_COUNT_OFFSET:MFT_RECORD_HARD_LINK_COUNT_OFFSET+MFT_RECORD_HARD_LINK_COUNT_SIZE])[0]
+            self.attr_off = struct.unpack("<H", self.raw_record[MFT_RECORD_FIRST_ATTRIBUTE_OFFSET:MFT_RECORD_FIRST_ATTRIBUTE_OFFSET+MFT_RECORD_FIRST_ATTRIBUTE_SIZE])[0]
+            self.flags = struct.unpack("<H", self.raw_record[MFT_RECORD_FLAGS_OFFSET:MFT_RECORD_FLAGS_OFFSET+MFT_RECORD_FLAGS_SIZE])[0]
+            self.size = struct.unpack("<I", self.raw_record[MFT_RECORD_USED_SIZE_OFFSET:MFT_RECORD_USED_SIZE_OFFSET+MFT_RECORD_USED_SIZE_SIZE])[0]
+            self.alloc_sizef = struct.unpack("<I", self.raw_record[MFT_RECORD_ALLOCATED_SIZE_OFFSET:MFT_RECORD_ALLOCATED_SIZE_OFFSET+MFT_RECORD_ALLOCATED_SIZE_SIZE])[0]
+            self.base_ref = struct.unpack("<Q", self.raw_record[MFT_RECORD_FILE_REFERENCE_OFFSET:MFT_RECORD_FILE_REFERENCE_OFFSET+MFT_RECORD_FILE_REFERENCE_SIZE])[0]
+            self.next_attrid = struct.unpack("<H", self.raw_record[MFT_RECORD_NEXT_ATTRIBUTE_ID_OFFSET:MFT_RECORD_NEXT_ATTRIBUTE_ID_OFFSET+MFT_RECORD_NEXT_ATTRIBUTE_ID_SIZE])[0]
+            self.recordnum = struct.unpack("<I", self.raw_record[MFT_RECORD_RECORD_NUMBER_OFFSET:MFT_RECORD_RECORD_NUMBER_OFFSET+MFT_RECORD_RECORD_NUMBER_SIZE])[0]
+            self.parse_attributes()
+        except struct.error as e:
+            self.log(f"Error parsing record header: {e}", 0)
+
+    def parse_attributes(self):
+        offset = int(self.attr_off)
+        while offset < len(self.raw_record) - 8:
+            try:
+                self.log(f"Parsing attribute at offset {offset}", 3)
+                attr_type = int(struct.unpack("<L", self.raw_record[offset:offset+4])[0])
+                attr_len = int(struct.unpack("<L", self.raw_record[offset+4:offset+8])[0])
+                
+                self.log(f"Attribute type: {attr_type}, length: {attr_len}", 3)
+
+                if attr_type == 0xffffffff or attr_len == 0:
+                    self.log("End of attributes reached", 3)
+                    break
+                
+                self.attribute_types.add(attr_type)
+
+                if attr_type == STANDARD_INFORMATION_ATTRIBUTE:
+                    self.parse_si_attribute(offset)
+                elif attr_type == FILE_NAME_ATTRIBUTE:
+                    self.parse_fn_attribute(offset)
+                elif attr_type == ATTRIBUTE_LIST_ATTRIBUTE:
+                    self.parse_attribute_list(offset)
+                elif attr_type == OBJECT_ID_ATTRIBUTE:
+                    self.parse_object_id_attribute(offset)
+                elif attr_type == SECURITY_DESCRIPTOR_ATTRIBUTE:
+                    self.parse_security_descriptor(offset)
+                elif attr_type == VOLUME_NAME_ATTRIBUTE:
+                    self.parse_volume_name(offset)
+                elif attr_type == VOLUME_INFORMATION_ATTRIBUTE:
+                    self.parse_volume_information(offset)
+                elif attr_type == DATA_ATTRIBUTE:
+                    self.parse_data(offset)
+                elif attr_type == INDEX_ROOT_ATTRIBUTE:
+                    self.parse_index_root(offset)
+                elif attr_type == INDEX_ALLOCATION_ATTRIBUTE:
+                    self.parse_index_allocation(offset)
+                elif attr_type == BITMAP_ATTRIBUTE:
+                    self.parse_bitmap(offset)
+                elif attr_type == REPARSE_POINT_ATTRIBUTE:
+                    self.parse_reparse_point(offset)
+                elif attr_type == EA_INFORMATION_ATTRIBUTE:
+                    self.parse_ea_information(offset)
+                elif attr_type == EA_ATTRIBUTE:
+                    self.parse_ea(offset)
+                elif attr_type == LOGGED_UTILITY_STREAM_ATTRIBUTE:
+                    self.parse_logged_utility_stream(offset)
+
+                offset += attr_len
+
+            except Exception as e:
+                print(f"Error processing record {self.recordnum}: {e}")
+                offset += 1
+
+    def parse_si_attribute(self, offset: int) -> None:
+        si_data = self.raw_record[offset+24:offset+72]
+        if len(si_data) >= 32:
+            try:
+                self.si_times = {
+                    'crtime': WindowsTime(struct.unpack("<L", si_data[:4])[0], struct.unpack("<L", si_data[4:8])[0]),
+                    'mtime': WindowsTime(struct.unpack("<L", si_data[8:12])[0], struct.unpack("<L", si_data[12:16])[0]),
+                    'ctime': WindowsTime(struct.unpack("<L", si_data[16:20])[0], struct.unpack("<L", si_data[20:24])[0]),
+                    'atime': WindowsTime(struct.unpack("<L", si_data[24:28])[0], struct.unpack("<L", si_data[28:32])[0])
+                }
+            except struct.error:
+                pass
+
+    def parse_fn_attribute(self, offset: int) -> None:
+        fn_data = self.raw_record[offset+24:]
+        if len(fn_data) >= 64:
+            try:
+                self.fn_times = {
+                    'crtime': WindowsTime(struct.unpack("<L", fn_data[8:12])[0], struct.unpack("<L", fn_data[12:16])[0]),
+                    'mtime': WindowsTime(struct.unpack("<L", fn_data[16:20])[0], struct.unpack("<L", fn_data[20:24])[0]),
+                    'ctime': WindowsTime(struct.unpack("<L", fn_data[24:28])[0], struct.unpack("<L", fn_data[28:32])[0]),
+                    'atime': WindowsTime(struct.unpack("<L", fn_data[32:36])[0], struct.unpack("<L", fn_data[36:40])[0])
+                }
+                self.filesize = struct.unpack("<Q", fn_data[48:56])[0]
+                name_len = struct.unpack("B", fn_data[64:65])[0]
+                if len(fn_data) >= 66 + name_len * 2:
+                    self.filename = fn_data[66:66+name_len*2].decode('utf-16-le', errors='replace')
+                self.parent_ref = struct.unpack("<Q", fn_data[:8])[0] & 0x0000FFFFFFFFFFFF
+            except struct.error:
+                pass
+
+    def parse_object_id_attribute(self, offset: int) -> None:
+        obj_id_data = self.raw_record[offset+24:offset+88]
+        if len(obj_id_data) >= 64:
+            try:
+                self.object_id = str(uuid.UUID(bytes_le=obj_id_data[:16]))
+                self.birth_volume_id = str(uuid.UUID(bytes_le=obj_id_data[16:32]))
+                self.birth_object_id = str(uuid.UUID(bytes_le=obj_id_data[32:48]))
+                self.birth_domain_id = str(uuid.UUID(bytes_le=obj_id_data[48:64]))
+            except (struct.error, ValueError):
+                print(f"Error parsing Object ID attribute for record {self.recordnum}")
+
+    def parse_attribute_list(self, offset: int) -> None:
+        attr_content_offset = offset + struct.unpack("<H", self.raw_record[offset+20:offset+22])[0]
+        attr_content_end = offset + struct.unpack("<L", self.raw_record[offset+4:offset+8])[0]
+        while attr_content_offset < attr_content_end:
+            try:
+                attr_type = struct.unpack("<L", self.raw_record[attr_content_offset:attr_content_offset+4])[0]
+                attr_len = struct.unpack("<H", self.raw_record[attr_content_offset+4:attr_content_offset+6])[0]
+                name_len = struct.unpack("B", self.raw_record[attr_content_offset+6:attr_content_offset+7])[0]
+                name_offset = struct.unpack("B", self.raw_record[attr_content_offset+7:attr_content_offset+8])[0]
+                if name_len > 0:
+                    name = self.raw_record[attr_content_offset+name_offset:attr_content_offset+name_offset+name_len*2].decode('utf-16-le', errors='replace')
+                else:
+                    name = ""
+                vcn = struct.unpack("<Q", self.raw_record[attr_content_offset+8:attr_content_offset+16])[0]
+                ref = struct.unpack("<Q", self.raw_record[attr_content_offset+16:attr_content_offset+24])[0]
+                self.attribute_list.append({
+                    'type': attr_type,
+                    'name': name,
+                    'vcn': vcn,
+                    'reference': ref
+                })
+                attr_content_offset += attr_len
+            except struct.error:
+                break
+
+    def parse_security_descriptor(self, offset: int) -> None:
+        sd_data = self.raw_record[offset+24:]
+        if len(sd_data) >= 20:
+            try:
+                revision = struct.unpack("B", sd_data[0:1])[0]
+                control = struct.unpack("<H", sd_data[2:4])[0]
+                owner_offset = struct.unpack("<L", sd_data[4:8])[0]
+                group_offset = struct.unpack("<L", sd_data[8:12])[0]
+                sacl_offset = struct.unpack("<L", sd_data[12:16])[0]
+                dacl_offset = struct.unpack("<L", sd_data[16:20])[0]
+                self.security_descriptor = {
+                    'revision': revision,
+                    'control': control,
+                    'owner_offset': owner_offset,
+                    'group_offset': group_offset,
+                    'sacl_offset': sacl_offset,
+                    'dacl_offset': dacl_offset
+                }
+            except struct.error:
+                print(f"Error parsing Security Descriptor attribute for record {self.recordnum}")
+
+    def parse_volume_name(self, offset: int) -> None:
+        vn_data = self.raw_record[offset+24:]
+        try:
+            name_length = struct.unpack("<H", vn_data[:2])[0]
+            self.volume_name = vn_data[2:2+name_length*2].decode('utf-16-le', errors='replace')
+        except struct.error:
+            print(f"Error parsing Volume Name attribute for record {self.recordnum}")
+
+    def parse_volume_information(self, offset: int) -> None:
+        vi_data = self.raw_record[offset+24:offset+48]
+        if len(vi_data) >= 12:
+            try:
+                self.volume_info = {
+                    'major_version': struct.unpack("B", vi_data[8:9])[0],
+                    'minor_version': struct.unpack("B", vi_data[9:10])[0],
+                    'flags': struct.unpack("<H", vi_data[10:12])[0]
+                }
+            except struct.error:
+                print(f"Error parsing Volume Information attribute for record {self.recordnum}")
+
+    def parse_data(self, offset):
+        data_header = self.raw_record[offset:offset+24]
+        try:
+            non_resident_flag = struct.unpack("B", data_header[8:9])[0]
+            name_length = struct.unpack("B", data_header[9:10])[0]
+            name_offset = struct.unpack("<H", data_header[10:12])[0]
+            if name_length > 0:
+                name = self.raw_record[offset+name_offset:offset+name_offset+name_length*2].decode('utf-16-le', errors='replace')
+            else:
+                name = ""
+            if non_resident_flag == 0:  # Resident
+                content_size = struct.unpack("<L", data_header[16:20])[0]
+                content_offset = struct.unpack("<H", data_header[20:22])[0]
+                content = self.raw_record[offset+content_offset:offset+content_offset+content_size]
+            else:  # Non-resident; for brevity we don't parse data runs here.
+                content = None
+            self.data_attribute = {
+                'name': name,
+                'non_resident': bool(non_resident_flag),
+                'content_size': content_size if non_resident_flag == 0 else None,
+            }
+        except struct.error:
+            print(f"Error parsing Data attribute for record {self.recordnum}")
+
+    def parse_index_root(self, offset: int) -> None:
+        ir_data = self.raw_record[offset+24:]
+        try:
+            attr_type = struct.unpack("<L", ir_data[:4])[0]
+            collation_rule = struct.unpack("<L", ir_data[4:8])[0]
+            index_alloc_size = struct.unpack("<L", ir_data[8:12])[0]
+            clusters_per_index = struct.unpack("B", ir_data[12:13])[0]
+            self.index_root = {
+                'attr_type': attr_type,
+                'collation_rule': collation_rule,
+                'index_alloc_size': index_alloc_size,
+                'clusters_per_index': clusters_per_index
+            }
+        except struct.error:
+            print(f"Error parsing Index Root attribute for record {self.recordnum}")
+
+    def parse_index_allocation(self, offset: int) -> None:
+        ia_data = self.raw_record[offset+24:]
+        try:
+            data_runs_offset = struct.unpack("<H", ia_data[:2])[0]
+            self.index_allocation = {
+                'data_runs_offset': data_runs_offset
+            }
+        except struct.error:
+            print(f"Error parsing Index Allocation attribute for record {self.recordnum}")
+
+    def parse_bitmap(self, offset: int) -> None:
+        bitmap_data = self.raw_record[offset+24:]
+        try:
+            bitmap_size = struct.unpack("<L", bitmap_data[:4])[0]
+            self.bitmap = {
+                'size': bitmap_size,
+                'data': bitmap_data[4:4+bitmap_size]
+            }
+        except struct.error:
+            print(f"Error parsing Bitmap attribute for record {self.recordnum}")
+
+    def parse_reparse_point(self, offset: int) -> None:
+        rp_data = self.raw_record[offset+24:]
+        try:
+            reparse_tag = struct.unpack("<L", rp_data[:4])[0]
+            reparse_data_length = struct.unpack("<H", rp_data[4:6])[0]
+            self.reparse_point = {
+                'reparse_tag': reparse_tag,
+                'data_length': reparse_data_length,
+                'data': rp_data[8:8+reparse_data_length]
+            }
+        except struct.error:
+            print(f"Error parsing Reparse Point attribute for record {self.recordnum}")
+
+    def parse_ea_information(self, offset: int) -> None:
+        eai_data = self.raw_record[offset+24:]
+        try:
+            ea_size = struct.unpack("<L", eai_data[:4])[0]
+            ea_count = struct.unpack("<L", eai_data[4:8])[0]
+            self.ea_information = {
+                'ea_size': ea_size,
+                'ea_count': ea_count
+            }
+        except struct.error:
+            print(f"Error parsing EA Information attribute for record {self.recordnum}")
+
+    def parse_ea(self, offset: int) -> None:
+        ea_data = self.raw_record[offset+24:]
+        try:
+            next_entry_offset = struct.unpack("<L", ea_data[:4])[0]
+            flags = struct.unpack("B", ea_data[4:5])[0]
+            name_length = struct.unpack("B", ea_data[5:6])[0]
+            value_length = struct.unpack("<H", ea_data[6:8])[0]
+            name = ea_data[8:8+name_length].decode('ascii', errors='replace')
+            value = ea_data[8+name_length:8+name_length+value_length]
+            self.ea = {
+                'next_entry_offset': next_entry_offset,
+                'flags': flags,
+                'name': name,
+                'value': value.hex()
+            }
+        except struct.error:
+            print(f"Error parsing EA attribute for record {self.recordnum}")
+
+    def parse_logged_utility_stream(self, offset: int) -> None:
+        lus_data = self.raw_record[offset+24:]
+        try:
+            stream_size = struct.unpack("<Q", lus_data[:8])[0]
+            self.logged_utility_stream = {
+                'size': stream_size,
+                'data': lus_data[8:8+stream_size].hex()
+            }
+        except struct.error:
+            print(f"Error parsing Logged Utility Stream attribute for record {self.recordnum}")
+
+    def compute_hashes(self) -> None:
+        md5 = hashlib.md5()
+        sha256 = hashlib.sha256()
+        sha512 = hashlib.sha512()
+        md5.update(self.raw_record)
+        sha256.update(self.raw_record)
+        sha512.update(self.raw_record)
+        self.md5 = md5.hexdigest()
+        self.sha256 = sha256.hexdigest()
+        self.sha512 = sha512.hexdigest()
+        self.crc32 = format(zlib.crc32(self.raw_record) & 0xFFFFFFFF, '08x')
+
+    def get_file_type(self) -> str:
+        # For illustration, this function checks a few flag bits.
+        # You may need to refine this logic.
+        FILE_RECORD_IS_DIRECTORY = 0x02
+        if self.flags & FILE_RECORD_IS_DIRECTORY:
+            return "Directory"
+        else:
+            return "File"
+
+# --- Main Program ---
+def main():
+    if len(sys.argv) != 2:
+        print("Usage: python show_mft.py <path_to_extracted_record>")
+        sys.exit(1)
+    
+    record_path = sys.argv[1]
+    try:
+        with open(record_path, "rb") as f:
+            raw_record = f.read()
+    except Exception as e:
+        print(f"Failed to read file: {e}")
+        sys.exit(1)
+
+    mft = MftRecord(raw_record, compute_hashes=True, debug_level=1)
+    
+    # Use pprint to print out all details nicely.
+    details = {
+        "record_number": mft.recordnum,
+        "update_sequence_offset": mft.upd_off,
+        "update_sequence_count": mft.upd_cnt,
+        "logfile_sequence_number": mft.lsn,
+        "sequence_number": mft.seq,
+        "hard_link_count": mft.link,
+        "attribute_offset": mft.attr_off,
+        "flags": mft.flags,
+        "used_size": mft.size,
+        "allocated_size": mft.alloc_sizef,
+        "base_file_reference": mft.base_ref,
+        "next_attribute_id": mft.next_attrid,
+        "file_type": mft.get_file_type(),
+        "si_times": { k: v.dtstr for k, v in mft.si_times.items() },
+        "fn_times": { k: v.dtstr for k, v in mft.fn_times.items() },
+        "filename": mft.filename,
+        "parent_reference": mft.parent_ref,
+        "object_id": mft.object_id,
+        "security_descriptor": mft.security_descriptor,
+        "volume_name": mft.volume_name,
+        "volume_info": mft.volume_info,
+        "data_attribute": mft.data_attribute,
+        "index_root": mft.index_root,
+        "index_allocation": mft.index_allocation,
+        "bitmap": mft.bitmap,
+        "reparse_point": mft.reparse_point,
+        "ea_information": mft.ea_information,
+        "ea": mft.ea,
+        "logged_utility_stream": mft.logged_utility_stream,
+        "attribute_list": mft.attribute_list,
+        "hashes": {
+            "md5": mft.md5,
+            "sha256": mft.sha256,
+            "sha512": mft.sha512,
+            "crc32": mft.crc32
+        }
+    }
+    
+    pprint.pprint(details)
+
+if __name__ == "__main__":
+    main()