commit 1973dc7031c4b0fd4ace9c0dbf968546e384890b Author: tobias Date: Sun Feb 16 18:57:07 2025 +0100 First Commit of mft carver and mft parsers diff --git a/mft.go b/mft.go new file mode 100755 index 0000000..7665e35 --- /dev/null +++ b/mft.go @@ -0,0 +1,496 @@ +package main + +import ( + "bufio" + "bytes" + "encoding/base64" + "encoding/binary" + "encoding/json" + "flag" + "fmt" + "io" + "log" + "os" + "path/filepath" + "runtime" + "strings" + "sync" + "time" + "unicode/utf16" +) + +// -------------------- +// Data Structures +// -------------------- + +type FileNameAttribute struct { + Filename string `json:"filename"` + ParentRef uint64 `json:"parent_ref"` + Crtime string `json:"crtime"` + Mtime string `json:"mtime"` + CtTime string `json:"ctime"` + Atime string `json:"atime"` +} + +type DataStream struct { + Name string `json:"name"` + Resident bool `json:"resident"` + NonResident bool `json:"non_resident"` + ContentBase64 string `json:"content_base64,omitempty"` +} + +type MFTRecord struct { + // Header fields (omitting the "magic" since it's always "FILE") + UpdateSeqOffset uint16 `json:"update_seq_offset"` + UpdateSeqSize uint16 `json:"update_seq_size"` + LSN uint64 `json:"lsn"` + SequenceNumber uint16 `json:"sequence_number"` + HardLinkCount uint16 `json:"hard_link_count"` + FirstAttrOffset uint16 `json:"first_attr_offset"` + Flags uint16 `json:"flags"` + RealSize uint32 `json:"real_size"` + AllocatedSize uint32 `json:"allocated_size"` + BaseFileRecord uint64 `json:"base_file_record"` + NextAttrId uint16 `json:"next_attr_id"` + RecordNumber uint32 `json:"record_number"` + + // Standard Information attribute timestamps (if present) + SI_Crtime string `json:"si_crtime,omitempty"` + SI_Mtime string `json:"si_mtime,omitempty"` + SI_CtTime string `json:"si_ctime,omitempty"` + SI_Atime string `json:"si_atime,omitempty"` + + // Additional attributes + ObjectID string `json:"object_id,omitempty"` + SecurityDescriptor map[string]interface{} `json:"security_descriptor,omitempty"` + + FileNames []FileNameAttribute `json:"file_names,omitempty"` + DataStreams []DataStream `json:"data_streams,omitempty"` +} + +// CarvedRecord holds a candidate 1024-byte record and its global offset. +type CarvedRecord struct { + Offset int64 + RecordBytes []byte +} + +// ParsedRecord is what gets sent to the JSON writer. +type ParsedRecord struct { + Offset int64 + RecordJSON map[string]interface{} +} + +// -------------------- +// Helper functions +// -------------------- + +// safeSlice returns data[start : start+length] if within bounds; otherwise false. +func safeSlice(data []byte, start int, length int) ([]byte, bool) { + if start < 0 || start+length > len(data) { + return nil, false + } + return data[start : start+length], true +} + +// filetimeToString converts a Windows FILETIME (uint64) into an RFC3339 timestamp string. +func filetimeToString(ft uint64) string { + const epochDiff = 11644473600 // seconds between 1601 and 1970 + secs := int64(ft/10000000) - epochDiff + nsec := int64(ft%10000000) * 100 + t := time.Unix(secs, nsec).UTC() + return t.Format(time.RFC3339) +} + +// decodeUTF16String converts little‑endian UTF‑16 bytes to a Go string. +func decodeUTF16String(b []byte) string { + if len(b)%2 != 0 { + b = b[:len(b)-1] + } + u16 := make([]uint16, len(b)/2) + for i := 0; i < len(u16); i++ { + u16[i] = binary.LittleEndian.Uint16(b[i*2:]) + } + return string(utf16.Decode(u16)) +} + +// parseZoneIdentifier is a simple parser for Zone.Identifier streams. +func parseZoneIdentifier(content []byte) map[string]string { + result := make(map[string]string) + text := string(content) + lines := strings.Split(text, "\n") + for _, line := range lines { + if parts := strings.SplitN(line, "=", 2); len(parts) == 2 { + key := strings.TrimSpace(parts[0]) + value := strings.TrimSpace(parts[1]) + result[key] = value + } + } + return result +} + +// -------------------- +// Attribute Parsing +// -------------------- + +// parseAttributes iterates over the attribute area and processes known types. +// It uses safeSlice to ensure we don’t read beyond the record. +func parseAttributes(data []byte, rec *MFTRecord) { + offset := int(rec.FirstAttrOffset) + for offset < len(data)-8 { + // First 4 bytes: attribute type + if attrBytes, ok := safeSlice(data, offset, 4); !ok { + break + } else { + attrType := binary.LittleEndian.Uint32(attrBytes) + // End marker + if attrType == 0xFFFFFFFF { + break + } + // Next 4 bytes: attribute length + if attrLenBytes, ok := safeSlice(data, offset+4, 4); !ok { + break + } else { + attrLen := binary.LittleEndian.Uint32(attrLenBytes) + if attrLen < 8 || offset+int(attrLen) > len(data) { + // Malformed attribute, skip one byte and try to re-sync. + offset++ + continue + } + // Resident flag at offset+8. + residentFlag := data[offset+8] + // For resident attributes, extract value length and value offset. + var valLen uint32 + var valOffset uint16 + if residentFlag == 0 { + if vb, ok := safeSlice(data, offset+16, 4); ok { + valLen = binary.LittleEndian.Uint32(vb) + } else { + offset += int(attrLen) + continue + } + if vb, ok := safeSlice(data, offset+20, 2); ok { + valOffset = binary.LittleEndian.Uint16(vb) + } else { + offset += int(attrLen) + continue + } + } + // Process known attribute types. + switch attrType { + case 0x10: // $STANDARD_INFORMATION + if residentFlag == 0 { + if siData, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok && len(siData) >= 32 { + rec.SI_Crtime = filetimeToString(binary.LittleEndian.Uint64(siData[0:8])) + rec.SI_Mtime = filetimeToString(binary.LittleEndian.Uint64(siData[8:16])) + rec.SI_CtTime = filetimeToString(binary.LittleEndian.Uint64(siData[16:24])) + rec.SI_Atime = filetimeToString(binary.LittleEndian.Uint64(siData[24:32])) + } + } + case 0x30: // $FILE_NAME + if residentFlag == 0 { + if fnData, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok && len(fnData) >= 66 { + var fn FileNameAttribute + fn.ParentRef = binary.LittleEndian.Uint64(fnData[0:8]) & 0x0000FFFFFFFFFFFF + fn.Crtime = filetimeToString(binary.LittleEndian.Uint64(fnData[8:16])) + fn.Mtime = filetimeToString(binary.LittleEndian.Uint64(fnData[16:24])) + fn.CtTime = filetimeToString(binary.LittleEndian.Uint64(fnData[24:32])) + fn.Atime = filetimeToString(binary.LittleEndian.Uint64(fnData[32:40])) + if filenameLenBytes, ok := safeSlice(fnData, 64, 1); ok { + filenameLen := filenameLenBytes[0] + if nameBytes, ok := safeSlice(fnData, 66, int(filenameLen)*2); ok { + fn.Filename = decodeUTF16String(nameBytes) + } + } + rec.FileNames = append(rec.FileNames, fn) + } + } + case 0x80: // $DATA + var ds DataStream + // Extract the attribute's name if any. + if nameInfo, ok := safeSlice(data, offset+9, 1); ok { + nameLen := nameInfo[0] + if nameBytes, ok := safeSlice(data, offset+10, int(nameLen)*2); ok && nameLen > 0 { + ds.Name = decodeUTF16String(nameBytes) + } + } + if residentFlag == 0 { + ds.Resident = true + ds.NonResident = false + if content, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok { + ds.ContentBase64 = base64.StdEncoding.EncodeToString(content) + if ds.Name == "Zone.Identifier" { + zoneInfo := parseZoneIdentifier(content) + ds.Name = fmt.Sprintf("Zone.Identifier %v", zoneInfo) + } + } + } else { + ds.Resident = false + ds.NonResident = true + } + rec.DataStreams = append(rec.DataStreams, ds) + case 0x40: // $OBJECT_ID (when used as such) + if residentFlag == 0 { + if objData, ok := safeSlice(data, offset+int(valOffset), 16); ok { + rec.ObjectID = fmt.Sprintf("%x", objData) + } + } + case 0x50: // $SECURITY_DESCRIPTOR + if residentFlag == 0 { + // A minimal parser: extract the first 20 bytes fields if possible. + if secData, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok && len(secData) >= 20 { + // We could decode further; here we just store raw hex values. + rec.SecurityDescriptor = map[string]interface{}{ + "raw": fmt.Sprintf("%x", secData), + } + } + } + // (Other attribute types such as $ATTRIBUTE_LIST, $VOLUME_NAME, etc. + // can be added here following similar patterns.) + } + offset += int(attrLen) + } + } + } +} + +// parseMFTRecord attempts to parse a 1024-byte MFT record. +// It returns an error if the record is too short or if the expected "FILE" marker is missing. +func parseMFTRecord(data []byte) (*MFTRecord, error) { + if len(data) < 46 { + return nil, fmt.Errorf("data too short to be a valid record") + } + if string(data[:4]) != "FILE" { + return nil, fmt.Errorf("invalid record header") + } + rec := &MFTRecord{ + UpdateSeqOffset: binary.LittleEndian.Uint16(data[4:6]), + UpdateSeqSize: binary.LittleEndian.Uint16(data[6:8]), + LSN: binary.LittleEndian.Uint64(data[8:16]), + SequenceNumber: binary.LittleEndian.Uint16(data[16:18]), + HardLinkCount: binary.LittleEndian.Uint16(data[18:20]), + FirstAttrOffset: binary.LittleEndian.Uint16(data[20:22]), + Flags: binary.LittleEndian.Uint16(data[22:24]), + RealSize: binary.LittleEndian.Uint32(data[24:28]), + AllocatedSize: binary.LittleEndian.Uint32(data[28:32]), + BaseFileRecord: binary.LittleEndian.Uint64(data[32:40]), + NextAttrId: binary.LittleEndian.Uint16(data[40:42]), + RecordNumber: binary.LittleEndian.Uint32(data[42:46]), + } + parseAttributes(data, rec) + return rec, nil +} + +// -------------------- +// Parallel Processing and Main +// -------------------- + +func processImageFile(inputFile string, wg *sync.WaitGroup) { + defer wg.Done() + + f, err := os.Open(inputFile) + if err != nil { + log.Printf("Failed to open %s: %v", inputFile, err) + return + } + defer f.Close() + + fi, err := f.Stat() + if err != nil { + log.Printf("Failed to stat %s: %v", inputFile, err) + return + } + fileSize := fi.Size() + + // Create an output folder and JSONL file based on input file name and current timestamp. + timestamp := time.Now().Format("20060102150405") + baseName := filepath.Base(inputFile) + outDir := fmt.Sprintf("%s_%s", baseName, timestamp) + if err := os.Mkdir(outDir, 0755); err != nil { + log.Printf("Failed to create output directory for %s: %v", inputFile, err) + return + } + jsonlFileName := fmt.Sprintf("%s_%s.jsonl", baseName, timestamp) + jsonlFile, err := os.Create(jsonlFileName) + if err != nil { + log.Printf("Failed to create JSONL file for %s: %v", inputFile, err) + return + } + defer jsonlFile.Close() + + carvedChan := make(chan CarvedRecord, 100) + parsedChan := make(chan ParsedRecord, 100) + + // Worker pool for carving/parsing records. + numWorkers := runtime.NumCPU() + var workerWg sync.WaitGroup + for i := 0; i < numWorkers; i++ { + workerWg.Add(1) + go func() { + defer workerWg.Done() + for carved := range carvedChan { + // Write raw record to disk. + recordFileName := filepath.Join(outDir, fmt.Sprintf("%d.mftrecord", carved.Offset)) + if err := os.WriteFile(recordFileName, carved.RecordBytes, 0644); err != nil { + log.Printf("Failed to write record at offset %d in %s: %v", carved.Offset, inputFile, err) + continue + } + // Parse the record. + mft, err := parseMFTRecord(carved.RecordBytes) + if err != nil { + // Skip records that cannot be parsed. + continue + } + // Build JSON record (omitting the magic field). + recordMap := map[string]interface{}{ + "input_image": inputFile, + "offset": carved.Offset, + "update_seq_offset": mft.UpdateSeqOffset, + "update_seq_size": mft.UpdateSeqSize, + "lsn": mft.LSN, + "sequence_number": mft.SequenceNumber, + "hard_link_count": mft.HardLinkCount, + "first_attr_offset": mft.FirstAttrOffset, + "flags": mft.Flags, + "real_size": mft.RealSize, + "allocated_size": mft.AllocatedSize, + "base_file_record": mft.BaseFileRecord, + "next_attr_id": mft.NextAttrId, + "record_number": mft.RecordNumber, + "si_crtime": mft.SI_Crtime, + "si_mtime": mft.SI_Mtime, + "si_ctime": mft.SI_CtTime, + "si_atime": mft.SI_Atime, + "object_id": mft.ObjectID, + "security_descriptor": mft.SecurityDescriptor, + "file_names": mft.FileNames, + "data_streams": mft.DataStreams, + } + parsedChan <- ParsedRecord{Offset: carved.Offset, RecordJSON: recordMap} + } + }() + } + + // Writer goroutine to output JSONL records. + var writerWg sync.WaitGroup + writerWg.Add(1) + go func() { + defer writerWg.Done() + encoder := json.NewEncoder(jsonlFile) + for pr := range parsedChan { + if err := encoder.Encode(pr.RecordJSON); err != nil { + log.Printf("Error writing JSON record at offset %d in %s: %v", pr.Offset, inputFile, err) + } + } + }() + + // Scan the file for the "FILE0" pattern. + reader := bufio.NewReader(f) + const recordSize = 1024 + const chunkSize = 1024 * 1024 + pattern := []byte("FILE0") + var fileOffset int64 = 0 + var leftover []byte + lastPrint := time.Now() + + for { + chunk := make([]byte, chunkSize) + n, err := reader.Read(chunk) + if n == 0 { + break + } + data := append(leftover, chunk[:n]...) + // Progress update every ~5 seconds. + if time.Since(lastPrint) > 5*time.Second { + perc := float64(fileOffset) / float64(fileSize) * 100.0 + log.Printf("Processing %s: %.2f%% complete", inputFile, perc) + lastPrint = time.Now() + } + searchLimit := len(data) - len(pattern) + for i := 0; i <= searchLimit; i++ { + if bytes.Equal(data[i:i+len(pattern)], pattern) { + globalOffset := fileOffset - int64(len(leftover)) + int64(i) + if globalOffset+recordSize > fileSize { + continue + } + recordBytes := make([]byte, recordSize) + _, err := f.ReadAt(recordBytes, globalOffset) + if err != nil { + log.Printf("Failed to read record at offset %d in %s: %v", globalOffset, inputFile, err) + continue + } + carvedChan <- CarvedRecord{Offset: globalOffset, RecordBytes: recordBytes} + } + } + if len(data) >= len(pattern)-1 { + leftover = data[len(data)-(len(pattern)-1):] + } else { + leftover = data + } + fileOffset += int64(n) + if err == io.EOF { + break + } + } + + close(carvedChan) + workerWg.Wait() + close(parsedChan) + writerWg.Wait() + log.Printf("Finished processing %s. Raw records are in %s and JSONL file is %s", inputFile, outDir, jsonlFileName) +} + +func main() { + flag.Parse() + if flag.NArg() == 0 { + fmt.Printf("Usage: %s \n", os.Args[0]) + os.Exit(1) + } + + // Build list of files from provided arguments (recursively if directories). + var files []string + for _, arg := range flag.Args() { + fi, err := os.Stat(arg) + if err != nil { + log.Printf("Error stating %s: %v", arg, err) + continue + } + if fi.IsDir() { + err := filepath.Walk(arg, func(path string, info os.FileInfo, err error) error { + if err != nil { + return nil + } + if !info.IsDir() { + files = append(files, path) + } + return nil + }) + if err != nil { + log.Printf("Error walking directory %s: %v", arg, err) + } + } else { + files = append(files, arg) + } + } + + totalFiles := len(files) + if totalFiles == 0 { + log.Println("No input files found.") + return + } + log.Printf("Found %d files to process.", totalFiles) + + var wg sync.WaitGroup + concurrentFiles := runtime.NumCPU() + sem := make(chan struct{}, concurrentFiles) + for i, file := range files { + wg.Add(1) + sem <- struct{}{} + go func(i int, file string) { + defer func() { <-sem }() + log.Printf("Starting file %d/%d: %s", i+1, totalFiles, file) + processImageFile(file, &wg) + }(i, file) + } + wg.Wait() + log.Println("All files processed.") +} diff --git a/mft.py b/mft.py new file mode 100755 index 0000000..3da78f1 --- /dev/null +++ b/mft.py @@ -0,0 +1,510 @@ +#!/usr/bin/env python3 +import struct +import uuid +import hashlib +import zlib +import sys +import pprint +import datetime + +# --- Minimal Constants --- +MFT_RECORD_MAGIC_NUMBER_OFFSET = 0 +MFT_RECORD_MAGIC_NUMBER_SIZE = 4 +MFT_RECORD_UPDATE_SEQUENCE_OFFSET = 4 +MFT_RECORD_UPDATE_SEQUENCE_SIZE = 2 +MFT_RECORD_UPDATE_SEQUENCE_SIZE_OFFSET = 6 +MFT_RECORD_LOGFILE_SEQUENCE_NUMBER_OFFSET = 8 +MFT_RECORD_LOGFILE_SEQUENCE_NUMBER_SIZE = 8 +MFT_RECORD_SEQUENCE_NUMBER_OFFSET = 16 +MFT_RECORD_SEQUENCE_NUMBER_SIZE = 2 +MFT_RECORD_HARD_LINK_COUNT_OFFSET = 18 +MFT_RECORD_HARD_LINK_COUNT_SIZE = 2 +MFT_RECORD_FIRST_ATTRIBUTE_OFFSET = 20 +MFT_RECORD_FIRST_ATTRIBUTE_SIZE = 2 +MFT_RECORD_FLAGS_OFFSET = 22 +MFT_RECORD_FLAGS_SIZE = 2 +MFT_RECORD_USED_SIZE_OFFSET = 24 +MFT_RECORD_USED_SIZE_SIZE = 4 +MFT_RECORD_ALLOCATED_SIZE_OFFSET = 28 +MFT_RECORD_ALLOCATED_SIZE_SIZE = 4 +MFT_RECORD_FILE_REFERENCE_OFFSET = 32 +MFT_RECORD_FILE_REFERENCE_SIZE = 8 +MFT_RECORD_NEXT_ATTRIBUTE_ID_OFFSET = 40 +MFT_RECORD_NEXT_ATTRIBUTE_ID_SIZE = 2 +MFT_RECORD_RECORD_NUMBER_OFFSET = 42 +MFT_RECORD_RECORD_NUMBER_SIZE = 4 + +# Attribute type constants (only those used in this parser) +STANDARD_INFORMATION_ATTRIBUTE = 0x10 +ATTRIBUTE_LIST_ATTRIBUTE = 0x20 +FILE_NAME_ATTRIBUTE = 0x30 +OBJECT_ID_ATTRIBUTE = 0x40 +SECURITY_DESCRIPTOR_ATTRIBUTE = 0x50 +VOLUME_NAME_ATTRIBUTE = 0x60 +VOLUME_INFORMATION_ATTRIBUTE = 0x70 +DATA_ATTRIBUTE = 0x80 +INDEX_ROOT_ATTRIBUTE = 0x90 +INDEX_ALLOCATION_ATTRIBUTE = 0xA0 +BITMAP_ATTRIBUTE = 0xB0 +REPARSE_POINT_ATTRIBUTE = 0xC0 +EA_INFORMATION_ATTRIBUTE = 0xD0 +EA_ATTRIBUTE = 0xE0 +LOGGED_UTILITY_STREAM_ATTRIBUTE= 0x100 + +# --- Minimal WindowsTime Implementation --- +class WindowsTime: + def __init__(self, low: int, high: int): + # Combine low and high into a 64-bit value (assuming little-endian) + self.value = (high << 32) | low + self.dt = self._filetime_to_dt(self.value) + self.dtstr = self.dt.isoformat() + + def _filetime_to_dt(self, filetime: int) -> datetime.datetime: + # FILETIME counts 100-nanosecond intervals since January 1, 1601 (UTC) + epoch_start = datetime.datetime(1601, 1, 1, tzinfo=datetime.timezone.utc) + # Convert 100-nanosecond intervals to microseconds + microseconds = filetime // 10 + return epoch_start + datetime.timedelta(microseconds=microseconds) + +# --- MftRecord Class --- +class MftRecord: + def __init__(self, raw_record: bytes, compute_hashes: bool = False, debug_level: int = 0, logger=None): + self.raw_record = raw_record + self.debug_level = debug_level + self.logger = logger or self._default_logger + self.magic = 0 + self.upd_off = 0 + self.upd_cnt = 0 + self.lsn = 0 + self.seq = 0 + self.link = 0 + self.attr_off = 0 + self.flags = 0 + self.size = 0 + self.alloc_sizef = 0 + self.base_ref = 0 + self.next_attrid = 0 + self.recordnum = 0 + self.filename = '' + self.si_times = { + 'crtime': WindowsTime(0, 0), + 'mtime': WindowsTime(0, 0), + 'atime': WindowsTime(0, 0), + 'ctime': WindowsTime(0, 0) + } + self.fn_times = { + 'crtime': WindowsTime(0, 0), + 'mtime': WindowsTime(0, 0), + 'atime': WindowsTime(0, 0), + 'ctime': WindowsTime(0, 0) + } + self.filesize = 0 + self.attribute_types = set() + self.attribute_list = [] + self.object_id = '' + self.birth_volume_id = '' + self.birth_object_id = '' + self.birth_domain_id = '' + self.parent_ref = 0 + self.md5 = None + self.sha256 = None + self.sha512 = None + self.crc32 = None + if compute_hashes: + self.compute_hashes() + self.parse_record() + self.security_descriptor = None + self.volume_name = None + self.volume_info = None + self.data_attribute = None + self.index_root = None + self.index_allocation = None + self.bitmap = None + self.reparse_point = None + self.ea_information = None + self.ea = None + self.logged_utility_stream = None + + def _default_logger(self, message: str, level: int = 0): + if level <= self.debug_level: + print(message) + + def log(self, message: str, level: int = 0): + self.logger(message, level) + + def parse_record(self) -> None: + try: + self.magic = struct.unpack(" None: + si_data = self.raw_record[offset+24:offset+72] + if len(si_data) >= 32: + try: + self.si_times = { + 'crtime': WindowsTime(struct.unpack(" None: + fn_data = self.raw_record[offset+24:] + if len(fn_data) >= 64: + try: + self.fn_times = { + 'crtime': WindowsTime(struct.unpack("= 66 + name_len * 2: + self.filename = fn_data[66:66+name_len*2].decode('utf-16-le', errors='replace') + self.parent_ref = struct.unpack(" None: + obj_id_data = self.raw_record[offset+24:offset+88] + if len(obj_id_data) >= 64: + try: + self.object_id = str(uuid.UUID(bytes_le=obj_id_data[:16])) + self.birth_volume_id = str(uuid.UUID(bytes_le=obj_id_data[16:32])) + self.birth_object_id = str(uuid.UUID(bytes_le=obj_id_data[32:48])) + self.birth_domain_id = str(uuid.UUID(bytes_le=obj_id_data[48:64])) + except (struct.error, ValueError): + print(f"Error parsing Object ID attribute for record {self.recordnum}") + + def parse_attribute_list(self, offset: int) -> None: + attr_content_offset = offset + struct.unpack(" 0: + name = self.raw_record[attr_content_offset+name_offset:attr_content_offset+name_offset+name_len*2].decode('utf-16-le', errors='replace') + else: + name = "" + vcn = struct.unpack(" None: + sd_data = self.raw_record[offset+24:] + if len(sd_data) >= 20: + try: + revision = struct.unpack("B", sd_data[0:1])[0] + control = struct.unpack(" None: + vn_data = self.raw_record[offset+24:] + try: + name_length = struct.unpack(" None: + vi_data = self.raw_record[offset+24:offset+48] + if len(vi_data) >= 12: + try: + self.volume_info = { + 'major_version': struct.unpack("B", vi_data[8:9])[0], + 'minor_version': struct.unpack("B", vi_data[9:10])[0], + 'flags': struct.unpack(" 0: + name = self.raw_record[offset+name_offset:offset+name_offset+name_length*2].decode('utf-16-le', errors='replace') + else: + name = "" + if non_resident_flag == 0: # Resident + content_size = struct.unpack(" None: + ir_data = self.raw_record[offset+24:] + try: + attr_type = struct.unpack(" None: + ia_data = self.raw_record[offset+24:] + try: + data_runs_offset = struct.unpack(" None: + bitmap_data = self.raw_record[offset+24:] + try: + bitmap_size = struct.unpack(" None: + rp_data = self.raw_record[offset+24:] + try: + reparse_tag = struct.unpack(" None: + eai_data = self.raw_record[offset+24:] + try: + ea_size = struct.unpack(" None: + ea_data = self.raw_record[offset+24:] + try: + next_entry_offset = struct.unpack(" None: + lus_data = self.raw_record[offset+24:] + try: + stream_size = struct.unpack(" None: + md5 = hashlib.md5() + sha256 = hashlib.sha256() + sha512 = hashlib.sha512() + md5.update(self.raw_record) + sha256.update(self.raw_record) + sha512.update(self.raw_record) + self.md5 = md5.hexdigest() + self.sha256 = sha256.hexdigest() + self.sha512 = sha512.hexdigest() + self.crc32 = format(zlib.crc32(self.raw_record) & 0xFFFFFFFF, '08x') + + def get_file_type(self) -> str: + # For illustration, this function checks a few flag bits. + # You may need to refine this logic. + FILE_RECORD_IS_DIRECTORY = 0x02 + if self.flags & FILE_RECORD_IS_DIRECTORY: + return "Directory" + else: + return "File" + +# --- Main Program --- +def main(): + if len(sys.argv) != 2: + print("Usage: python show_mft.py ") + sys.exit(1) + + record_path = sys.argv[1] + try: + with open(record_path, "rb") as f: + raw_record = f.read() + except Exception as e: + print(f"Failed to read file: {e}") + sys.exit(1) + + mft = MftRecord(raw_record, compute_hashes=True, debug_level=1) + + # Use pprint to print out all details nicely. + details = { + "record_number": mft.recordnum, + "update_sequence_offset": mft.upd_off, + "update_sequence_count": mft.upd_cnt, + "logfile_sequence_number": mft.lsn, + "sequence_number": mft.seq, + "hard_link_count": mft.link, + "attribute_offset": mft.attr_off, + "flags": mft.flags, + "used_size": mft.size, + "allocated_size": mft.alloc_sizef, + "base_file_reference": mft.base_ref, + "next_attribute_id": mft.next_attrid, + "file_type": mft.get_file_type(), + "si_times": { k: v.dtstr for k, v in mft.si_times.items() }, + "fn_times": { k: v.dtstr for k, v in mft.fn_times.items() }, + "filename": mft.filename, + "parent_reference": mft.parent_ref, + "object_id": mft.object_id, + "security_descriptor": mft.security_descriptor, + "volume_name": mft.volume_name, + "volume_info": mft.volume_info, + "data_attribute": mft.data_attribute, + "index_root": mft.index_root, + "index_allocation": mft.index_allocation, + "bitmap": mft.bitmap, + "reparse_point": mft.reparse_point, + "ea_information": mft.ea_information, + "ea": mft.ea, + "logged_utility_stream": mft.logged_utility_stream, + "attribute_list": mft.attribute_list, + "hashes": { + "md5": mft.md5, + "sha256": mft.sha256, + "sha512": mft.sha512, + "crc32": mft.crc32 + } + } + + pprint.pprint(details) + +if __name__ == "__main__": + main()