First Commit of mft carver and mft parsers

This commit is contained in:
tobias
2025-02-16 18:57:07 +01:00
commit 1973dc7031
2 changed files with 1006 additions and 0 deletions

496
mft.go Executable file
View File

@@ -0,0 +1,496 @@
package main
import (
"bufio"
"bytes"
"encoding/base64"
"encoding/binary"
"encoding/json"
"flag"
"fmt"
"io"
"log"
"os"
"path/filepath"
"runtime"
"strings"
"sync"
"time"
"unicode/utf16"
)
// --------------------
// Data Structures
// --------------------
type FileNameAttribute struct {
Filename string `json:"filename"`
ParentRef uint64 `json:"parent_ref"`
Crtime string `json:"crtime"`
Mtime string `json:"mtime"`
CtTime string `json:"ctime"`
Atime string `json:"atime"`
}
type DataStream struct {
Name string `json:"name"`
Resident bool `json:"resident"`
NonResident bool `json:"non_resident"`
ContentBase64 string `json:"content_base64,omitempty"`
}
type MFTRecord struct {
// Header fields (omitting the "magic" since it's always "FILE")
UpdateSeqOffset uint16 `json:"update_seq_offset"`
UpdateSeqSize uint16 `json:"update_seq_size"`
LSN uint64 `json:"lsn"`
SequenceNumber uint16 `json:"sequence_number"`
HardLinkCount uint16 `json:"hard_link_count"`
FirstAttrOffset uint16 `json:"first_attr_offset"`
Flags uint16 `json:"flags"`
RealSize uint32 `json:"real_size"`
AllocatedSize uint32 `json:"allocated_size"`
BaseFileRecord uint64 `json:"base_file_record"`
NextAttrId uint16 `json:"next_attr_id"`
RecordNumber uint32 `json:"record_number"`
// Standard Information attribute timestamps (if present)
SI_Crtime string `json:"si_crtime,omitempty"`
SI_Mtime string `json:"si_mtime,omitempty"`
SI_CtTime string `json:"si_ctime,omitempty"`
SI_Atime string `json:"si_atime,omitempty"`
// Additional attributes
ObjectID string `json:"object_id,omitempty"`
SecurityDescriptor map[string]interface{} `json:"security_descriptor,omitempty"`
FileNames []FileNameAttribute `json:"file_names,omitempty"`
DataStreams []DataStream `json:"data_streams,omitempty"`
}
// CarvedRecord holds a candidate 1024-byte record and its global offset.
type CarvedRecord struct {
Offset int64
RecordBytes []byte
}
// ParsedRecord is what gets sent to the JSON writer.
type ParsedRecord struct {
Offset int64
RecordJSON map[string]interface{}
}
// --------------------
// Helper functions
// --------------------
// safeSlice returns data[start : start+length] if within bounds; otherwise false.
func safeSlice(data []byte, start int, length int) ([]byte, bool) {
if start < 0 || start+length > len(data) {
return nil, false
}
return data[start : start+length], true
}
// filetimeToString converts a Windows FILETIME (uint64) into an RFC3339 timestamp string.
func filetimeToString(ft uint64) string {
const epochDiff = 11644473600 // seconds between 1601 and 1970
secs := int64(ft/10000000) - epochDiff
nsec := int64(ft%10000000) * 100
t := time.Unix(secs, nsec).UTC()
return t.Format(time.RFC3339)
}
// decodeUTF16String converts littleendian UTF16 bytes to a Go string.
func decodeUTF16String(b []byte) string {
if len(b)%2 != 0 {
b = b[:len(b)-1]
}
u16 := make([]uint16, len(b)/2)
for i := 0; i < len(u16); i++ {
u16[i] = binary.LittleEndian.Uint16(b[i*2:])
}
return string(utf16.Decode(u16))
}
// parseZoneIdentifier is a simple parser for Zone.Identifier streams.
func parseZoneIdentifier(content []byte) map[string]string {
result := make(map[string]string)
text := string(content)
lines := strings.Split(text, "\n")
for _, line := range lines {
if parts := strings.SplitN(line, "=", 2); len(parts) == 2 {
key := strings.TrimSpace(parts[0])
value := strings.TrimSpace(parts[1])
result[key] = value
}
}
return result
}
// --------------------
// Attribute Parsing
// --------------------
// parseAttributes iterates over the attribute area and processes known types.
// It uses safeSlice to ensure we dont read beyond the record.
func parseAttributes(data []byte, rec *MFTRecord) {
offset := int(rec.FirstAttrOffset)
for offset < len(data)-8 {
// First 4 bytes: attribute type
if attrBytes, ok := safeSlice(data, offset, 4); !ok {
break
} else {
attrType := binary.LittleEndian.Uint32(attrBytes)
// End marker
if attrType == 0xFFFFFFFF {
break
}
// Next 4 bytes: attribute length
if attrLenBytes, ok := safeSlice(data, offset+4, 4); !ok {
break
} else {
attrLen := binary.LittleEndian.Uint32(attrLenBytes)
if attrLen < 8 || offset+int(attrLen) > len(data) {
// Malformed attribute, skip one byte and try to re-sync.
offset++
continue
}
// Resident flag at offset+8.
residentFlag := data[offset+8]
// For resident attributes, extract value length and value offset.
var valLen uint32
var valOffset uint16
if residentFlag == 0 {
if vb, ok := safeSlice(data, offset+16, 4); ok {
valLen = binary.LittleEndian.Uint32(vb)
} else {
offset += int(attrLen)
continue
}
if vb, ok := safeSlice(data, offset+20, 2); ok {
valOffset = binary.LittleEndian.Uint16(vb)
} else {
offset += int(attrLen)
continue
}
}
// Process known attribute types.
switch attrType {
case 0x10: // $STANDARD_INFORMATION
if residentFlag == 0 {
if siData, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok && len(siData) >= 32 {
rec.SI_Crtime = filetimeToString(binary.LittleEndian.Uint64(siData[0:8]))
rec.SI_Mtime = filetimeToString(binary.LittleEndian.Uint64(siData[8:16]))
rec.SI_CtTime = filetimeToString(binary.LittleEndian.Uint64(siData[16:24]))
rec.SI_Atime = filetimeToString(binary.LittleEndian.Uint64(siData[24:32]))
}
}
case 0x30: // $FILE_NAME
if residentFlag == 0 {
if fnData, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok && len(fnData) >= 66 {
var fn FileNameAttribute
fn.ParentRef = binary.LittleEndian.Uint64(fnData[0:8]) & 0x0000FFFFFFFFFFFF
fn.Crtime = filetimeToString(binary.LittleEndian.Uint64(fnData[8:16]))
fn.Mtime = filetimeToString(binary.LittleEndian.Uint64(fnData[16:24]))
fn.CtTime = filetimeToString(binary.LittleEndian.Uint64(fnData[24:32]))
fn.Atime = filetimeToString(binary.LittleEndian.Uint64(fnData[32:40]))
if filenameLenBytes, ok := safeSlice(fnData, 64, 1); ok {
filenameLen := filenameLenBytes[0]
if nameBytes, ok := safeSlice(fnData, 66, int(filenameLen)*2); ok {
fn.Filename = decodeUTF16String(nameBytes)
}
}
rec.FileNames = append(rec.FileNames, fn)
}
}
case 0x80: // $DATA
var ds DataStream
// Extract the attribute's name if any.
if nameInfo, ok := safeSlice(data, offset+9, 1); ok {
nameLen := nameInfo[0]
if nameBytes, ok := safeSlice(data, offset+10, int(nameLen)*2); ok && nameLen > 0 {
ds.Name = decodeUTF16String(nameBytes)
}
}
if residentFlag == 0 {
ds.Resident = true
ds.NonResident = false
if content, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok {
ds.ContentBase64 = base64.StdEncoding.EncodeToString(content)
if ds.Name == "Zone.Identifier" {
zoneInfo := parseZoneIdentifier(content)
ds.Name = fmt.Sprintf("Zone.Identifier %v", zoneInfo)
}
}
} else {
ds.Resident = false
ds.NonResident = true
}
rec.DataStreams = append(rec.DataStreams, ds)
case 0x40: // $OBJECT_ID (when used as such)
if residentFlag == 0 {
if objData, ok := safeSlice(data, offset+int(valOffset), 16); ok {
rec.ObjectID = fmt.Sprintf("%x", objData)
}
}
case 0x50: // $SECURITY_DESCRIPTOR
if residentFlag == 0 {
// A minimal parser: extract the first 20 bytes fields if possible.
if secData, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok && len(secData) >= 20 {
// We could decode further; here we just store raw hex values.
rec.SecurityDescriptor = map[string]interface{}{
"raw": fmt.Sprintf("%x", secData),
}
}
}
// (Other attribute types such as $ATTRIBUTE_LIST, $VOLUME_NAME, etc.
// can be added here following similar patterns.)
}
offset += int(attrLen)
}
}
}
}
// parseMFTRecord attempts to parse a 1024-byte MFT record.
// It returns an error if the record is too short or if the expected "FILE" marker is missing.
func parseMFTRecord(data []byte) (*MFTRecord, error) {
if len(data) < 46 {
return nil, fmt.Errorf("data too short to be a valid record")
}
if string(data[:4]) != "FILE" {
return nil, fmt.Errorf("invalid record header")
}
rec := &MFTRecord{
UpdateSeqOffset: binary.LittleEndian.Uint16(data[4:6]),
UpdateSeqSize: binary.LittleEndian.Uint16(data[6:8]),
LSN: binary.LittleEndian.Uint64(data[8:16]),
SequenceNumber: binary.LittleEndian.Uint16(data[16:18]),
HardLinkCount: binary.LittleEndian.Uint16(data[18:20]),
FirstAttrOffset: binary.LittleEndian.Uint16(data[20:22]),
Flags: binary.LittleEndian.Uint16(data[22:24]),
RealSize: binary.LittleEndian.Uint32(data[24:28]),
AllocatedSize: binary.LittleEndian.Uint32(data[28:32]),
BaseFileRecord: binary.LittleEndian.Uint64(data[32:40]),
NextAttrId: binary.LittleEndian.Uint16(data[40:42]),
RecordNumber: binary.LittleEndian.Uint32(data[42:46]),
}
parseAttributes(data, rec)
return rec, nil
}
// --------------------
// Parallel Processing and Main
// --------------------
func processImageFile(inputFile string, wg *sync.WaitGroup) {
defer wg.Done()
f, err := os.Open(inputFile)
if err != nil {
log.Printf("Failed to open %s: %v", inputFile, err)
return
}
defer f.Close()
fi, err := f.Stat()
if err != nil {
log.Printf("Failed to stat %s: %v", inputFile, err)
return
}
fileSize := fi.Size()
// Create an output folder and JSONL file based on input file name and current timestamp.
timestamp := time.Now().Format("20060102150405")
baseName := filepath.Base(inputFile)
outDir := fmt.Sprintf("%s_%s", baseName, timestamp)
if err := os.Mkdir(outDir, 0755); err != nil {
log.Printf("Failed to create output directory for %s: %v", inputFile, err)
return
}
jsonlFileName := fmt.Sprintf("%s_%s.jsonl", baseName, timestamp)
jsonlFile, err := os.Create(jsonlFileName)
if err != nil {
log.Printf("Failed to create JSONL file for %s: %v", inputFile, err)
return
}
defer jsonlFile.Close()
carvedChan := make(chan CarvedRecord, 100)
parsedChan := make(chan ParsedRecord, 100)
// Worker pool for carving/parsing records.
numWorkers := runtime.NumCPU()
var workerWg sync.WaitGroup
for i := 0; i < numWorkers; i++ {
workerWg.Add(1)
go func() {
defer workerWg.Done()
for carved := range carvedChan {
// Write raw record to disk.
recordFileName := filepath.Join(outDir, fmt.Sprintf("%d.mftrecord", carved.Offset))
if err := os.WriteFile(recordFileName, carved.RecordBytes, 0644); err != nil {
log.Printf("Failed to write record at offset %d in %s: %v", carved.Offset, inputFile, err)
continue
}
// Parse the record.
mft, err := parseMFTRecord(carved.RecordBytes)
if err != nil {
// Skip records that cannot be parsed.
continue
}
// Build JSON record (omitting the magic field).
recordMap := map[string]interface{}{
"input_image": inputFile,
"offset": carved.Offset,
"update_seq_offset": mft.UpdateSeqOffset,
"update_seq_size": mft.UpdateSeqSize,
"lsn": mft.LSN,
"sequence_number": mft.SequenceNumber,
"hard_link_count": mft.HardLinkCount,
"first_attr_offset": mft.FirstAttrOffset,
"flags": mft.Flags,
"real_size": mft.RealSize,
"allocated_size": mft.AllocatedSize,
"base_file_record": mft.BaseFileRecord,
"next_attr_id": mft.NextAttrId,
"record_number": mft.RecordNumber,
"si_crtime": mft.SI_Crtime,
"si_mtime": mft.SI_Mtime,
"si_ctime": mft.SI_CtTime,
"si_atime": mft.SI_Atime,
"object_id": mft.ObjectID,
"security_descriptor": mft.SecurityDescriptor,
"file_names": mft.FileNames,
"data_streams": mft.DataStreams,
}
parsedChan <- ParsedRecord{Offset: carved.Offset, RecordJSON: recordMap}
}
}()
}
// Writer goroutine to output JSONL records.
var writerWg sync.WaitGroup
writerWg.Add(1)
go func() {
defer writerWg.Done()
encoder := json.NewEncoder(jsonlFile)
for pr := range parsedChan {
if err := encoder.Encode(pr.RecordJSON); err != nil {
log.Printf("Error writing JSON record at offset %d in %s: %v", pr.Offset, inputFile, err)
}
}
}()
// Scan the file for the "FILE0" pattern.
reader := bufio.NewReader(f)
const recordSize = 1024
const chunkSize = 1024 * 1024
pattern := []byte("FILE0")
var fileOffset int64 = 0
var leftover []byte
lastPrint := time.Now()
for {
chunk := make([]byte, chunkSize)
n, err := reader.Read(chunk)
if n == 0 {
break
}
data := append(leftover, chunk[:n]...)
// Progress update every ~5 seconds.
if time.Since(lastPrint) > 5*time.Second {
perc := float64(fileOffset) / float64(fileSize) * 100.0
log.Printf("Processing %s: %.2f%% complete", inputFile, perc)
lastPrint = time.Now()
}
searchLimit := len(data) - len(pattern)
for i := 0; i <= searchLimit; i++ {
if bytes.Equal(data[i:i+len(pattern)], pattern) {
globalOffset := fileOffset - int64(len(leftover)) + int64(i)
if globalOffset+recordSize > fileSize {
continue
}
recordBytes := make([]byte, recordSize)
_, err := f.ReadAt(recordBytes, globalOffset)
if err != nil {
log.Printf("Failed to read record at offset %d in %s: %v", globalOffset, inputFile, err)
continue
}
carvedChan <- CarvedRecord{Offset: globalOffset, RecordBytes: recordBytes}
}
}
if len(data) >= len(pattern)-1 {
leftover = data[len(data)-(len(pattern)-1):]
} else {
leftover = data
}
fileOffset += int64(n)
if err == io.EOF {
break
}
}
close(carvedChan)
workerWg.Wait()
close(parsedChan)
writerWg.Wait()
log.Printf("Finished processing %s. Raw records are in %s and JSONL file is %s", inputFile, outDir, jsonlFileName)
}
func main() {
flag.Parse()
if flag.NArg() == 0 {
fmt.Printf("Usage: %s <disk image files or directories>\n", os.Args[0])
os.Exit(1)
}
// Build list of files from provided arguments (recursively if directories).
var files []string
for _, arg := range flag.Args() {
fi, err := os.Stat(arg)
if err != nil {
log.Printf("Error stating %s: %v", arg, err)
continue
}
if fi.IsDir() {
err := filepath.Walk(arg, func(path string, info os.FileInfo, err error) error {
if err != nil {
return nil
}
if !info.IsDir() {
files = append(files, path)
}
return nil
})
if err != nil {
log.Printf("Error walking directory %s: %v", arg, err)
}
} else {
files = append(files, arg)
}
}
totalFiles := len(files)
if totalFiles == 0 {
log.Println("No input files found.")
return
}
log.Printf("Found %d files to process.", totalFiles)
var wg sync.WaitGroup
concurrentFiles := runtime.NumCPU()
sem := make(chan struct{}, concurrentFiles)
for i, file := range files {
wg.Add(1)
sem <- struct{}{}
go func(i int, file string) {
defer func() { <-sem }()
log.Printf("Starting file %d/%d: %s", i+1, totalFiles, file)
processImageFile(file, &wg)
}(i, file)
}
wg.Wait()
log.Println("All files processed.")
}

510
mft.py Executable file
View File

@@ -0,0 +1,510 @@
#!/usr/bin/env python3
import struct
import uuid
import hashlib
import zlib
import sys
import pprint
import datetime
# --- Minimal Constants ---
MFT_RECORD_MAGIC_NUMBER_OFFSET = 0
MFT_RECORD_MAGIC_NUMBER_SIZE = 4
MFT_RECORD_UPDATE_SEQUENCE_OFFSET = 4
MFT_RECORD_UPDATE_SEQUENCE_SIZE = 2
MFT_RECORD_UPDATE_SEQUENCE_SIZE_OFFSET = 6
MFT_RECORD_LOGFILE_SEQUENCE_NUMBER_OFFSET = 8
MFT_RECORD_LOGFILE_SEQUENCE_NUMBER_SIZE = 8
MFT_RECORD_SEQUENCE_NUMBER_OFFSET = 16
MFT_RECORD_SEQUENCE_NUMBER_SIZE = 2
MFT_RECORD_HARD_LINK_COUNT_OFFSET = 18
MFT_RECORD_HARD_LINK_COUNT_SIZE = 2
MFT_RECORD_FIRST_ATTRIBUTE_OFFSET = 20
MFT_RECORD_FIRST_ATTRIBUTE_SIZE = 2
MFT_RECORD_FLAGS_OFFSET = 22
MFT_RECORD_FLAGS_SIZE = 2
MFT_RECORD_USED_SIZE_OFFSET = 24
MFT_RECORD_USED_SIZE_SIZE = 4
MFT_RECORD_ALLOCATED_SIZE_OFFSET = 28
MFT_RECORD_ALLOCATED_SIZE_SIZE = 4
MFT_RECORD_FILE_REFERENCE_OFFSET = 32
MFT_RECORD_FILE_REFERENCE_SIZE = 8
MFT_RECORD_NEXT_ATTRIBUTE_ID_OFFSET = 40
MFT_RECORD_NEXT_ATTRIBUTE_ID_SIZE = 2
MFT_RECORD_RECORD_NUMBER_OFFSET = 42
MFT_RECORD_RECORD_NUMBER_SIZE = 4
# Attribute type constants (only those used in this parser)
STANDARD_INFORMATION_ATTRIBUTE = 0x10
ATTRIBUTE_LIST_ATTRIBUTE = 0x20
FILE_NAME_ATTRIBUTE = 0x30
OBJECT_ID_ATTRIBUTE = 0x40
SECURITY_DESCRIPTOR_ATTRIBUTE = 0x50
VOLUME_NAME_ATTRIBUTE = 0x60
VOLUME_INFORMATION_ATTRIBUTE = 0x70
DATA_ATTRIBUTE = 0x80
INDEX_ROOT_ATTRIBUTE = 0x90
INDEX_ALLOCATION_ATTRIBUTE = 0xA0
BITMAP_ATTRIBUTE = 0xB0
REPARSE_POINT_ATTRIBUTE = 0xC0
EA_INFORMATION_ATTRIBUTE = 0xD0
EA_ATTRIBUTE = 0xE0
LOGGED_UTILITY_STREAM_ATTRIBUTE= 0x100
# --- Minimal WindowsTime Implementation ---
class WindowsTime:
def __init__(self, low: int, high: int):
# Combine low and high into a 64-bit value (assuming little-endian)
self.value = (high << 32) | low
self.dt = self._filetime_to_dt(self.value)
self.dtstr = self.dt.isoformat()
def _filetime_to_dt(self, filetime: int) -> datetime.datetime:
# FILETIME counts 100-nanosecond intervals since January 1, 1601 (UTC)
epoch_start = datetime.datetime(1601, 1, 1, tzinfo=datetime.timezone.utc)
# Convert 100-nanosecond intervals to microseconds
microseconds = filetime // 10
return epoch_start + datetime.timedelta(microseconds=microseconds)
# --- MftRecord Class ---
class MftRecord:
def __init__(self, raw_record: bytes, compute_hashes: bool = False, debug_level: int = 0, logger=None):
self.raw_record = raw_record
self.debug_level = debug_level
self.logger = logger or self._default_logger
self.magic = 0
self.upd_off = 0
self.upd_cnt = 0
self.lsn = 0
self.seq = 0
self.link = 0
self.attr_off = 0
self.flags = 0
self.size = 0
self.alloc_sizef = 0
self.base_ref = 0
self.next_attrid = 0
self.recordnum = 0
self.filename = ''
self.si_times = {
'crtime': WindowsTime(0, 0),
'mtime': WindowsTime(0, 0),
'atime': WindowsTime(0, 0),
'ctime': WindowsTime(0, 0)
}
self.fn_times = {
'crtime': WindowsTime(0, 0),
'mtime': WindowsTime(0, 0),
'atime': WindowsTime(0, 0),
'ctime': WindowsTime(0, 0)
}
self.filesize = 0
self.attribute_types = set()
self.attribute_list = []
self.object_id = ''
self.birth_volume_id = ''
self.birth_object_id = ''
self.birth_domain_id = ''
self.parent_ref = 0
self.md5 = None
self.sha256 = None
self.sha512 = None
self.crc32 = None
if compute_hashes:
self.compute_hashes()
self.parse_record()
self.security_descriptor = None
self.volume_name = None
self.volume_info = None
self.data_attribute = None
self.index_root = None
self.index_allocation = None
self.bitmap = None
self.reparse_point = None
self.ea_information = None
self.ea = None
self.logged_utility_stream = None
def _default_logger(self, message: str, level: int = 0):
if level <= self.debug_level:
print(message)
def log(self, message: str, level: int = 0):
self.logger(message, level)
def parse_record(self) -> None:
try:
self.magic = struct.unpack("<I", self.raw_record[MFT_RECORD_MAGIC_NUMBER_OFFSET:MFT_RECORD_MAGIC_NUMBER_OFFSET+MFT_RECORD_MAGIC_NUMBER_SIZE])[0]
self.upd_off = struct.unpack("<H", self.raw_record[MFT_RECORD_UPDATE_SEQUENCE_OFFSET:MFT_RECORD_UPDATE_SEQUENCE_OFFSET+MFT_RECORD_UPDATE_SEQUENCE_SIZE])[0]
self.upd_cnt = struct.unpack("<H", self.raw_record[MFT_RECORD_UPDATE_SEQUENCE_SIZE_OFFSET:MFT_RECORD_UPDATE_SEQUENCE_SIZE_OFFSET+MFT_RECORD_UPDATE_SEQUENCE_SIZE])[0]
self.lsn = struct.unpack("<Q", self.raw_record[MFT_RECORD_LOGFILE_SEQUENCE_NUMBER_OFFSET:MFT_RECORD_LOGFILE_SEQUENCE_NUMBER_OFFSET+MFT_RECORD_LOGFILE_SEQUENCE_NUMBER_SIZE])[0]
self.seq = struct.unpack("<H", self.raw_record[MFT_RECORD_SEQUENCE_NUMBER_OFFSET:MFT_RECORD_SEQUENCE_NUMBER_OFFSET+MFT_RECORD_SEQUENCE_NUMBER_SIZE])[0]
self.link = struct.unpack("<H", self.raw_record[MFT_RECORD_HARD_LINK_COUNT_OFFSET:MFT_RECORD_HARD_LINK_COUNT_OFFSET+MFT_RECORD_HARD_LINK_COUNT_SIZE])[0]
self.attr_off = struct.unpack("<H", self.raw_record[MFT_RECORD_FIRST_ATTRIBUTE_OFFSET:MFT_RECORD_FIRST_ATTRIBUTE_OFFSET+MFT_RECORD_FIRST_ATTRIBUTE_SIZE])[0]
self.flags = struct.unpack("<H", self.raw_record[MFT_RECORD_FLAGS_OFFSET:MFT_RECORD_FLAGS_OFFSET+MFT_RECORD_FLAGS_SIZE])[0]
self.size = struct.unpack("<I", self.raw_record[MFT_RECORD_USED_SIZE_OFFSET:MFT_RECORD_USED_SIZE_OFFSET+MFT_RECORD_USED_SIZE_SIZE])[0]
self.alloc_sizef = struct.unpack("<I", self.raw_record[MFT_RECORD_ALLOCATED_SIZE_OFFSET:MFT_RECORD_ALLOCATED_SIZE_OFFSET+MFT_RECORD_ALLOCATED_SIZE_SIZE])[0]
self.base_ref = struct.unpack("<Q", self.raw_record[MFT_RECORD_FILE_REFERENCE_OFFSET:MFT_RECORD_FILE_REFERENCE_OFFSET+MFT_RECORD_FILE_REFERENCE_SIZE])[0]
self.next_attrid = struct.unpack("<H", self.raw_record[MFT_RECORD_NEXT_ATTRIBUTE_ID_OFFSET:MFT_RECORD_NEXT_ATTRIBUTE_ID_OFFSET+MFT_RECORD_NEXT_ATTRIBUTE_ID_SIZE])[0]
self.recordnum = struct.unpack("<I", self.raw_record[MFT_RECORD_RECORD_NUMBER_OFFSET:MFT_RECORD_RECORD_NUMBER_OFFSET+MFT_RECORD_RECORD_NUMBER_SIZE])[0]
self.parse_attributes()
except struct.error as e:
self.log(f"Error parsing record header: {e}", 0)
def parse_attributes(self):
offset = int(self.attr_off)
while offset < len(self.raw_record) - 8:
try:
self.log(f"Parsing attribute at offset {offset}", 3)
attr_type = int(struct.unpack("<L", self.raw_record[offset:offset+4])[0])
attr_len = int(struct.unpack("<L", self.raw_record[offset+4:offset+8])[0])
self.log(f"Attribute type: {attr_type}, length: {attr_len}", 3)
if attr_type == 0xffffffff or attr_len == 0:
self.log("End of attributes reached", 3)
break
self.attribute_types.add(attr_type)
if attr_type == STANDARD_INFORMATION_ATTRIBUTE:
self.parse_si_attribute(offset)
elif attr_type == FILE_NAME_ATTRIBUTE:
self.parse_fn_attribute(offset)
elif attr_type == ATTRIBUTE_LIST_ATTRIBUTE:
self.parse_attribute_list(offset)
elif attr_type == OBJECT_ID_ATTRIBUTE:
self.parse_object_id_attribute(offset)
elif attr_type == SECURITY_DESCRIPTOR_ATTRIBUTE:
self.parse_security_descriptor(offset)
elif attr_type == VOLUME_NAME_ATTRIBUTE:
self.parse_volume_name(offset)
elif attr_type == VOLUME_INFORMATION_ATTRIBUTE:
self.parse_volume_information(offset)
elif attr_type == DATA_ATTRIBUTE:
self.parse_data(offset)
elif attr_type == INDEX_ROOT_ATTRIBUTE:
self.parse_index_root(offset)
elif attr_type == INDEX_ALLOCATION_ATTRIBUTE:
self.parse_index_allocation(offset)
elif attr_type == BITMAP_ATTRIBUTE:
self.parse_bitmap(offset)
elif attr_type == REPARSE_POINT_ATTRIBUTE:
self.parse_reparse_point(offset)
elif attr_type == EA_INFORMATION_ATTRIBUTE:
self.parse_ea_information(offset)
elif attr_type == EA_ATTRIBUTE:
self.parse_ea(offset)
elif attr_type == LOGGED_UTILITY_STREAM_ATTRIBUTE:
self.parse_logged_utility_stream(offset)
offset += attr_len
except Exception as e:
print(f"Error processing record {self.recordnum}: {e}")
offset += 1
def parse_si_attribute(self, offset: int) -> None:
si_data = self.raw_record[offset+24:offset+72]
if len(si_data) >= 32:
try:
self.si_times = {
'crtime': WindowsTime(struct.unpack("<L", si_data[:4])[0], struct.unpack("<L", si_data[4:8])[0]),
'mtime': WindowsTime(struct.unpack("<L", si_data[8:12])[0], struct.unpack("<L", si_data[12:16])[0]),
'ctime': WindowsTime(struct.unpack("<L", si_data[16:20])[0], struct.unpack("<L", si_data[20:24])[0]),
'atime': WindowsTime(struct.unpack("<L", si_data[24:28])[0], struct.unpack("<L", si_data[28:32])[0])
}
except struct.error:
pass
def parse_fn_attribute(self, offset: int) -> None:
fn_data = self.raw_record[offset+24:]
if len(fn_data) >= 64:
try:
self.fn_times = {
'crtime': WindowsTime(struct.unpack("<L", fn_data[8:12])[0], struct.unpack("<L", fn_data[12:16])[0]),
'mtime': WindowsTime(struct.unpack("<L", fn_data[16:20])[0], struct.unpack("<L", fn_data[20:24])[0]),
'ctime': WindowsTime(struct.unpack("<L", fn_data[24:28])[0], struct.unpack("<L", fn_data[28:32])[0]),
'atime': WindowsTime(struct.unpack("<L", fn_data[32:36])[0], struct.unpack("<L", fn_data[36:40])[0])
}
self.filesize = struct.unpack("<Q", fn_data[48:56])[0]
name_len = struct.unpack("B", fn_data[64:65])[0]
if len(fn_data) >= 66 + name_len * 2:
self.filename = fn_data[66:66+name_len*2].decode('utf-16-le', errors='replace')
self.parent_ref = struct.unpack("<Q", fn_data[:8])[0] & 0x0000FFFFFFFFFFFF
except struct.error:
pass
def parse_object_id_attribute(self, offset: int) -> None:
obj_id_data = self.raw_record[offset+24:offset+88]
if len(obj_id_data) >= 64:
try:
self.object_id = str(uuid.UUID(bytes_le=obj_id_data[:16]))
self.birth_volume_id = str(uuid.UUID(bytes_le=obj_id_data[16:32]))
self.birth_object_id = str(uuid.UUID(bytes_le=obj_id_data[32:48]))
self.birth_domain_id = str(uuid.UUID(bytes_le=obj_id_data[48:64]))
except (struct.error, ValueError):
print(f"Error parsing Object ID attribute for record {self.recordnum}")
def parse_attribute_list(self, offset: int) -> None:
attr_content_offset = offset + struct.unpack("<H", self.raw_record[offset+20:offset+22])[0]
attr_content_end = offset + struct.unpack("<L", self.raw_record[offset+4:offset+8])[0]
while attr_content_offset < attr_content_end:
try:
attr_type = struct.unpack("<L", self.raw_record[attr_content_offset:attr_content_offset+4])[0]
attr_len = struct.unpack("<H", self.raw_record[attr_content_offset+4:attr_content_offset+6])[0]
name_len = struct.unpack("B", self.raw_record[attr_content_offset+6:attr_content_offset+7])[0]
name_offset = struct.unpack("B", self.raw_record[attr_content_offset+7:attr_content_offset+8])[0]
if name_len > 0:
name = self.raw_record[attr_content_offset+name_offset:attr_content_offset+name_offset+name_len*2].decode('utf-16-le', errors='replace')
else:
name = ""
vcn = struct.unpack("<Q", self.raw_record[attr_content_offset+8:attr_content_offset+16])[0]
ref = struct.unpack("<Q", self.raw_record[attr_content_offset+16:attr_content_offset+24])[0]
self.attribute_list.append({
'type': attr_type,
'name': name,
'vcn': vcn,
'reference': ref
})
attr_content_offset += attr_len
except struct.error:
break
def parse_security_descriptor(self, offset: int) -> None:
sd_data = self.raw_record[offset+24:]
if len(sd_data) >= 20:
try:
revision = struct.unpack("B", sd_data[0:1])[0]
control = struct.unpack("<H", sd_data[2:4])[0]
owner_offset = struct.unpack("<L", sd_data[4:8])[0]
group_offset = struct.unpack("<L", sd_data[8:12])[0]
sacl_offset = struct.unpack("<L", sd_data[12:16])[0]
dacl_offset = struct.unpack("<L", sd_data[16:20])[0]
self.security_descriptor = {
'revision': revision,
'control': control,
'owner_offset': owner_offset,
'group_offset': group_offset,
'sacl_offset': sacl_offset,
'dacl_offset': dacl_offset
}
except struct.error:
print(f"Error parsing Security Descriptor attribute for record {self.recordnum}")
def parse_volume_name(self, offset: int) -> None:
vn_data = self.raw_record[offset+24:]
try:
name_length = struct.unpack("<H", vn_data[:2])[0]
self.volume_name = vn_data[2:2+name_length*2].decode('utf-16-le', errors='replace')
except struct.error:
print(f"Error parsing Volume Name attribute for record {self.recordnum}")
def parse_volume_information(self, offset: int) -> None:
vi_data = self.raw_record[offset+24:offset+48]
if len(vi_data) >= 12:
try:
self.volume_info = {
'major_version': struct.unpack("B", vi_data[8:9])[0],
'minor_version': struct.unpack("B", vi_data[9:10])[0],
'flags': struct.unpack("<H", vi_data[10:12])[0]
}
except struct.error:
print(f"Error parsing Volume Information attribute for record {self.recordnum}")
def parse_data(self, offset):
data_header = self.raw_record[offset:offset+24]
try:
non_resident_flag = struct.unpack("B", data_header[8:9])[0]
name_length = struct.unpack("B", data_header[9:10])[0]
name_offset = struct.unpack("<H", data_header[10:12])[0]
if name_length > 0:
name = self.raw_record[offset+name_offset:offset+name_offset+name_length*2].decode('utf-16-le', errors='replace')
else:
name = ""
if non_resident_flag == 0: # Resident
content_size = struct.unpack("<L", data_header[16:20])[0]
content_offset = struct.unpack("<H", data_header[20:22])[0]
content = self.raw_record[offset+content_offset:offset+content_offset+content_size]
else: # Non-resident; for brevity we don't parse data runs here.
content = None
self.data_attribute = {
'name': name,
'non_resident': bool(non_resident_flag),
'content_size': content_size if non_resident_flag == 0 else None,
}
except struct.error:
print(f"Error parsing Data attribute for record {self.recordnum}")
def parse_index_root(self, offset: int) -> None:
ir_data = self.raw_record[offset+24:]
try:
attr_type = struct.unpack("<L", ir_data[:4])[0]
collation_rule = struct.unpack("<L", ir_data[4:8])[0]
index_alloc_size = struct.unpack("<L", ir_data[8:12])[0]
clusters_per_index = struct.unpack("B", ir_data[12:13])[0]
self.index_root = {
'attr_type': attr_type,
'collation_rule': collation_rule,
'index_alloc_size': index_alloc_size,
'clusters_per_index': clusters_per_index
}
except struct.error:
print(f"Error parsing Index Root attribute for record {self.recordnum}")
def parse_index_allocation(self, offset: int) -> None:
ia_data = self.raw_record[offset+24:]
try:
data_runs_offset = struct.unpack("<H", ia_data[:2])[0]
self.index_allocation = {
'data_runs_offset': data_runs_offset
}
except struct.error:
print(f"Error parsing Index Allocation attribute for record {self.recordnum}")
def parse_bitmap(self, offset: int) -> None:
bitmap_data = self.raw_record[offset+24:]
try:
bitmap_size = struct.unpack("<L", bitmap_data[:4])[0]
self.bitmap = {
'size': bitmap_size,
'data': bitmap_data[4:4+bitmap_size]
}
except struct.error:
print(f"Error parsing Bitmap attribute for record {self.recordnum}")
def parse_reparse_point(self, offset: int) -> None:
rp_data = self.raw_record[offset+24:]
try:
reparse_tag = struct.unpack("<L", rp_data[:4])[0]
reparse_data_length = struct.unpack("<H", rp_data[4:6])[0]
self.reparse_point = {
'reparse_tag': reparse_tag,
'data_length': reparse_data_length,
'data': rp_data[8:8+reparse_data_length]
}
except struct.error:
print(f"Error parsing Reparse Point attribute for record {self.recordnum}")
def parse_ea_information(self, offset: int) -> None:
eai_data = self.raw_record[offset+24:]
try:
ea_size = struct.unpack("<L", eai_data[:4])[0]
ea_count = struct.unpack("<L", eai_data[4:8])[0]
self.ea_information = {
'ea_size': ea_size,
'ea_count': ea_count
}
except struct.error:
print(f"Error parsing EA Information attribute for record {self.recordnum}")
def parse_ea(self, offset: int) -> None:
ea_data = self.raw_record[offset+24:]
try:
next_entry_offset = struct.unpack("<L", ea_data[:4])[0]
flags = struct.unpack("B", ea_data[4:5])[0]
name_length = struct.unpack("B", ea_data[5:6])[0]
value_length = struct.unpack("<H", ea_data[6:8])[0]
name = ea_data[8:8+name_length].decode('ascii', errors='replace')
value = ea_data[8+name_length:8+name_length+value_length]
self.ea = {
'next_entry_offset': next_entry_offset,
'flags': flags,
'name': name,
'value': value.hex()
}
except struct.error:
print(f"Error parsing EA attribute for record {self.recordnum}")
def parse_logged_utility_stream(self, offset: int) -> None:
lus_data = self.raw_record[offset+24:]
try:
stream_size = struct.unpack("<Q", lus_data[:8])[0]
self.logged_utility_stream = {
'size': stream_size,
'data': lus_data[8:8+stream_size].hex()
}
except struct.error:
print(f"Error parsing Logged Utility Stream attribute for record {self.recordnum}")
def compute_hashes(self) -> None:
md5 = hashlib.md5()
sha256 = hashlib.sha256()
sha512 = hashlib.sha512()
md5.update(self.raw_record)
sha256.update(self.raw_record)
sha512.update(self.raw_record)
self.md5 = md5.hexdigest()
self.sha256 = sha256.hexdigest()
self.sha512 = sha512.hexdigest()
self.crc32 = format(zlib.crc32(self.raw_record) & 0xFFFFFFFF, '08x')
def get_file_type(self) -> str:
# For illustration, this function checks a few flag bits.
# You may need to refine this logic.
FILE_RECORD_IS_DIRECTORY = 0x02
if self.flags & FILE_RECORD_IS_DIRECTORY:
return "Directory"
else:
return "File"
# --- Main Program ---
def main():
if len(sys.argv) != 2:
print("Usage: python show_mft.py <path_to_extracted_record>")
sys.exit(1)
record_path = sys.argv[1]
try:
with open(record_path, "rb") as f:
raw_record = f.read()
except Exception as e:
print(f"Failed to read file: {e}")
sys.exit(1)
mft = MftRecord(raw_record, compute_hashes=True, debug_level=1)
# Use pprint to print out all details nicely.
details = {
"record_number": mft.recordnum,
"update_sequence_offset": mft.upd_off,
"update_sequence_count": mft.upd_cnt,
"logfile_sequence_number": mft.lsn,
"sequence_number": mft.seq,
"hard_link_count": mft.link,
"attribute_offset": mft.attr_off,
"flags": mft.flags,
"used_size": mft.size,
"allocated_size": mft.alloc_sizef,
"base_file_reference": mft.base_ref,
"next_attribute_id": mft.next_attrid,
"file_type": mft.get_file_type(),
"si_times": { k: v.dtstr for k, v in mft.si_times.items() },
"fn_times": { k: v.dtstr for k, v in mft.fn_times.items() },
"filename": mft.filename,
"parent_reference": mft.parent_ref,
"object_id": mft.object_id,
"security_descriptor": mft.security_descriptor,
"volume_name": mft.volume_name,
"volume_info": mft.volume_info,
"data_attribute": mft.data_attribute,
"index_root": mft.index_root,
"index_allocation": mft.index_allocation,
"bitmap": mft.bitmap,
"reparse_point": mft.reparse_point,
"ea_information": mft.ea_information,
"ea": mft.ea,
"logged_utility_stream": mft.logged_utility_stream,
"attribute_list": mft.attribute_list,
"hashes": {
"md5": mft.md5,
"sha256": mft.sha256,
"sha512": mft.sha512,
"crc32": mft.crc32
}
}
pprint.pprint(details)
if __name__ == "__main__":
main()