First Commit of mft carver and mft parsers

This commit is contained in:
tobias
2025-02-16 18:57:07 +01:00
commit 1973dc7031
2 changed files with 1006 additions and 0 deletions

496
mft.go Executable file
View File

@@ -0,0 +1,496 @@
package main
import (
"bufio"
"bytes"
"encoding/base64"
"encoding/binary"
"encoding/json"
"flag"
"fmt"
"io"
"log"
"os"
"path/filepath"
"runtime"
"strings"
"sync"
"time"
"unicode/utf16"
)
// --------------------
// Data Structures
// --------------------
type FileNameAttribute struct {
Filename string `json:"filename"`
ParentRef uint64 `json:"parent_ref"`
Crtime string `json:"crtime"`
Mtime string `json:"mtime"`
CtTime string `json:"ctime"`
Atime string `json:"atime"`
}
type DataStream struct {
Name string `json:"name"`
Resident bool `json:"resident"`
NonResident bool `json:"non_resident"`
ContentBase64 string `json:"content_base64,omitempty"`
}
type MFTRecord struct {
// Header fields (omitting the "magic" since it's always "FILE")
UpdateSeqOffset uint16 `json:"update_seq_offset"`
UpdateSeqSize uint16 `json:"update_seq_size"`
LSN uint64 `json:"lsn"`
SequenceNumber uint16 `json:"sequence_number"`
HardLinkCount uint16 `json:"hard_link_count"`
FirstAttrOffset uint16 `json:"first_attr_offset"`
Flags uint16 `json:"flags"`
RealSize uint32 `json:"real_size"`
AllocatedSize uint32 `json:"allocated_size"`
BaseFileRecord uint64 `json:"base_file_record"`
NextAttrId uint16 `json:"next_attr_id"`
RecordNumber uint32 `json:"record_number"`
// Standard Information attribute timestamps (if present)
SI_Crtime string `json:"si_crtime,omitempty"`
SI_Mtime string `json:"si_mtime,omitempty"`
SI_CtTime string `json:"si_ctime,omitempty"`
SI_Atime string `json:"si_atime,omitempty"`
// Additional attributes
ObjectID string `json:"object_id,omitempty"`
SecurityDescriptor map[string]interface{} `json:"security_descriptor,omitempty"`
FileNames []FileNameAttribute `json:"file_names,omitempty"`
DataStreams []DataStream `json:"data_streams,omitempty"`
}
// CarvedRecord holds a candidate 1024-byte record and its global offset.
type CarvedRecord struct {
Offset int64
RecordBytes []byte
}
// ParsedRecord is what gets sent to the JSON writer.
type ParsedRecord struct {
Offset int64
RecordJSON map[string]interface{}
}
// --------------------
// Helper functions
// --------------------
// safeSlice returns data[start : start+length] if within bounds; otherwise false.
func safeSlice(data []byte, start int, length int) ([]byte, bool) {
if start < 0 || start+length > len(data) {
return nil, false
}
return data[start : start+length], true
}
// filetimeToString converts a Windows FILETIME (uint64) into an RFC3339 timestamp string.
func filetimeToString(ft uint64) string {
const epochDiff = 11644473600 // seconds between 1601 and 1970
secs := int64(ft/10000000) - epochDiff
nsec := int64(ft%10000000) * 100
t := time.Unix(secs, nsec).UTC()
return t.Format(time.RFC3339)
}
// decodeUTF16String converts littleendian UTF16 bytes to a Go string.
func decodeUTF16String(b []byte) string {
if len(b)%2 != 0 {
b = b[:len(b)-1]
}
u16 := make([]uint16, len(b)/2)
for i := 0; i < len(u16); i++ {
u16[i] = binary.LittleEndian.Uint16(b[i*2:])
}
return string(utf16.Decode(u16))
}
// parseZoneIdentifier is a simple parser for Zone.Identifier streams.
func parseZoneIdentifier(content []byte) map[string]string {
result := make(map[string]string)
text := string(content)
lines := strings.Split(text, "\n")
for _, line := range lines {
if parts := strings.SplitN(line, "=", 2); len(parts) == 2 {
key := strings.TrimSpace(parts[0])
value := strings.TrimSpace(parts[1])
result[key] = value
}
}
return result
}
// --------------------
// Attribute Parsing
// --------------------
// parseAttributes iterates over the attribute area and processes known types.
// It uses safeSlice to ensure we dont read beyond the record.
func parseAttributes(data []byte, rec *MFTRecord) {
offset := int(rec.FirstAttrOffset)
for offset < len(data)-8 {
// First 4 bytes: attribute type
if attrBytes, ok := safeSlice(data, offset, 4); !ok {
break
} else {
attrType := binary.LittleEndian.Uint32(attrBytes)
// End marker
if attrType == 0xFFFFFFFF {
break
}
// Next 4 bytes: attribute length
if attrLenBytes, ok := safeSlice(data, offset+4, 4); !ok {
break
} else {
attrLen := binary.LittleEndian.Uint32(attrLenBytes)
if attrLen < 8 || offset+int(attrLen) > len(data) {
// Malformed attribute, skip one byte and try to re-sync.
offset++
continue
}
// Resident flag at offset+8.
residentFlag := data[offset+8]
// For resident attributes, extract value length and value offset.
var valLen uint32
var valOffset uint16
if residentFlag == 0 {
if vb, ok := safeSlice(data, offset+16, 4); ok {
valLen = binary.LittleEndian.Uint32(vb)
} else {
offset += int(attrLen)
continue
}
if vb, ok := safeSlice(data, offset+20, 2); ok {
valOffset = binary.LittleEndian.Uint16(vb)
} else {
offset += int(attrLen)
continue
}
}
// Process known attribute types.
switch attrType {
case 0x10: // $STANDARD_INFORMATION
if residentFlag == 0 {
if siData, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok && len(siData) >= 32 {
rec.SI_Crtime = filetimeToString(binary.LittleEndian.Uint64(siData[0:8]))
rec.SI_Mtime = filetimeToString(binary.LittleEndian.Uint64(siData[8:16]))
rec.SI_CtTime = filetimeToString(binary.LittleEndian.Uint64(siData[16:24]))
rec.SI_Atime = filetimeToString(binary.LittleEndian.Uint64(siData[24:32]))
}
}
case 0x30: // $FILE_NAME
if residentFlag == 0 {
if fnData, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok && len(fnData) >= 66 {
var fn FileNameAttribute
fn.ParentRef = binary.LittleEndian.Uint64(fnData[0:8]) & 0x0000FFFFFFFFFFFF
fn.Crtime = filetimeToString(binary.LittleEndian.Uint64(fnData[8:16]))
fn.Mtime = filetimeToString(binary.LittleEndian.Uint64(fnData[16:24]))
fn.CtTime = filetimeToString(binary.LittleEndian.Uint64(fnData[24:32]))
fn.Atime = filetimeToString(binary.LittleEndian.Uint64(fnData[32:40]))
if filenameLenBytes, ok := safeSlice(fnData, 64, 1); ok {
filenameLen := filenameLenBytes[0]
if nameBytes, ok := safeSlice(fnData, 66, int(filenameLen)*2); ok {
fn.Filename = decodeUTF16String(nameBytes)
}
}
rec.FileNames = append(rec.FileNames, fn)
}
}
case 0x80: // $DATA
var ds DataStream
// Extract the attribute's name if any.
if nameInfo, ok := safeSlice(data, offset+9, 1); ok {
nameLen := nameInfo[0]
if nameBytes, ok := safeSlice(data, offset+10, int(nameLen)*2); ok && nameLen > 0 {
ds.Name = decodeUTF16String(nameBytes)
}
}
if residentFlag == 0 {
ds.Resident = true
ds.NonResident = false
if content, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok {
ds.ContentBase64 = base64.StdEncoding.EncodeToString(content)
if ds.Name == "Zone.Identifier" {
zoneInfo := parseZoneIdentifier(content)
ds.Name = fmt.Sprintf("Zone.Identifier %v", zoneInfo)
}
}
} else {
ds.Resident = false
ds.NonResident = true
}
rec.DataStreams = append(rec.DataStreams, ds)
case 0x40: // $OBJECT_ID (when used as such)
if residentFlag == 0 {
if objData, ok := safeSlice(data, offset+int(valOffset), 16); ok {
rec.ObjectID = fmt.Sprintf("%x", objData)
}
}
case 0x50: // $SECURITY_DESCRIPTOR
if residentFlag == 0 {
// A minimal parser: extract the first 20 bytes fields if possible.
if secData, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok && len(secData) >= 20 {
// We could decode further; here we just store raw hex values.
rec.SecurityDescriptor = map[string]interface{}{
"raw": fmt.Sprintf("%x", secData),
}
}
}
// (Other attribute types such as $ATTRIBUTE_LIST, $VOLUME_NAME, etc.
// can be added here following similar patterns.)
}
offset += int(attrLen)
}
}
}
}
// parseMFTRecord attempts to parse a 1024-byte MFT record.
// It returns an error if the record is too short or if the expected "FILE" marker is missing.
func parseMFTRecord(data []byte) (*MFTRecord, error) {
if len(data) < 46 {
return nil, fmt.Errorf("data too short to be a valid record")
}
if string(data[:4]) != "FILE" {
return nil, fmt.Errorf("invalid record header")
}
rec := &MFTRecord{
UpdateSeqOffset: binary.LittleEndian.Uint16(data[4:6]),
UpdateSeqSize: binary.LittleEndian.Uint16(data[6:8]),
LSN: binary.LittleEndian.Uint64(data[8:16]),
SequenceNumber: binary.LittleEndian.Uint16(data[16:18]),
HardLinkCount: binary.LittleEndian.Uint16(data[18:20]),
FirstAttrOffset: binary.LittleEndian.Uint16(data[20:22]),
Flags: binary.LittleEndian.Uint16(data[22:24]),
RealSize: binary.LittleEndian.Uint32(data[24:28]),
AllocatedSize: binary.LittleEndian.Uint32(data[28:32]),
BaseFileRecord: binary.LittleEndian.Uint64(data[32:40]),
NextAttrId: binary.LittleEndian.Uint16(data[40:42]),
RecordNumber: binary.LittleEndian.Uint32(data[42:46]),
}
parseAttributes(data, rec)
return rec, nil
}
// --------------------
// Parallel Processing and Main
// --------------------
func processImageFile(inputFile string, wg *sync.WaitGroup) {
defer wg.Done()
f, err := os.Open(inputFile)
if err != nil {
log.Printf("Failed to open %s: %v", inputFile, err)
return
}
defer f.Close()
fi, err := f.Stat()
if err != nil {
log.Printf("Failed to stat %s: %v", inputFile, err)
return
}
fileSize := fi.Size()
// Create an output folder and JSONL file based on input file name and current timestamp.
timestamp := time.Now().Format("20060102150405")
baseName := filepath.Base(inputFile)
outDir := fmt.Sprintf("%s_%s", baseName, timestamp)
if err := os.Mkdir(outDir, 0755); err != nil {
log.Printf("Failed to create output directory for %s: %v", inputFile, err)
return
}
jsonlFileName := fmt.Sprintf("%s_%s.jsonl", baseName, timestamp)
jsonlFile, err := os.Create(jsonlFileName)
if err != nil {
log.Printf("Failed to create JSONL file for %s: %v", inputFile, err)
return
}
defer jsonlFile.Close()
carvedChan := make(chan CarvedRecord, 100)
parsedChan := make(chan ParsedRecord, 100)
// Worker pool for carving/parsing records.
numWorkers := runtime.NumCPU()
var workerWg sync.WaitGroup
for i := 0; i < numWorkers; i++ {
workerWg.Add(1)
go func() {
defer workerWg.Done()
for carved := range carvedChan {
// Write raw record to disk.
recordFileName := filepath.Join(outDir, fmt.Sprintf("%d.mftrecord", carved.Offset))
if err := os.WriteFile(recordFileName, carved.RecordBytes, 0644); err != nil {
log.Printf("Failed to write record at offset %d in %s: %v", carved.Offset, inputFile, err)
continue
}
// Parse the record.
mft, err := parseMFTRecord(carved.RecordBytes)
if err != nil {
// Skip records that cannot be parsed.
continue
}
// Build JSON record (omitting the magic field).
recordMap := map[string]interface{}{
"input_image": inputFile,
"offset": carved.Offset,
"update_seq_offset": mft.UpdateSeqOffset,
"update_seq_size": mft.UpdateSeqSize,
"lsn": mft.LSN,
"sequence_number": mft.SequenceNumber,
"hard_link_count": mft.HardLinkCount,
"first_attr_offset": mft.FirstAttrOffset,
"flags": mft.Flags,
"real_size": mft.RealSize,
"allocated_size": mft.AllocatedSize,
"base_file_record": mft.BaseFileRecord,
"next_attr_id": mft.NextAttrId,
"record_number": mft.RecordNumber,
"si_crtime": mft.SI_Crtime,
"si_mtime": mft.SI_Mtime,
"si_ctime": mft.SI_CtTime,
"si_atime": mft.SI_Atime,
"object_id": mft.ObjectID,
"security_descriptor": mft.SecurityDescriptor,
"file_names": mft.FileNames,
"data_streams": mft.DataStreams,
}
parsedChan <- ParsedRecord{Offset: carved.Offset, RecordJSON: recordMap}
}
}()
}
// Writer goroutine to output JSONL records.
var writerWg sync.WaitGroup
writerWg.Add(1)
go func() {
defer writerWg.Done()
encoder := json.NewEncoder(jsonlFile)
for pr := range parsedChan {
if err := encoder.Encode(pr.RecordJSON); err != nil {
log.Printf("Error writing JSON record at offset %d in %s: %v", pr.Offset, inputFile, err)
}
}
}()
// Scan the file for the "FILE0" pattern.
reader := bufio.NewReader(f)
const recordSize = 1024
const chunkSize = 1024 * 1024
pattern := []byte("FILE0")
var fileOffset int64 = 0
var leftover []byte
lastPrint := time.Now()
for {
chunk := make([]byte, chunkSize)
n, err := reader.Read(chunk)
if n == 0 {
break
}
data := append(leftover, chunk[:n]...)
// Progress update every ~5 seconds.
if time.Since(lastPrint) > 5*time.Second {
perc := float64(fileOffset) / float64(fileSize) * 100.0
log.Printf("Processing %s: %.2f%% complete", inputFile, perc)
lastPrint = time.Now()
}
searchLimit := len(data) - len(pattern)
for i := 0; i <= searchLimit; i++ {
if bytes.Equal(data[i:i+len(pattern)], pattern) {
globalOffset := fileOffset - int64(len(leftover)) + int64(i)
if globalOffset+recordSize > fileSize {
continue
}
recordBytes := make([]byte, recordSize)
_, err := f.ReadAt(recordBytes, globalOffset)
if err != nil {
log.Printf("Failed to read record at offset %d in %s: %v", globalOffset, inputFile, err)
continue
}
carvedChan <- CarvedRecord{Offset: globalOffset, RecordBytes: recordBytes}
}
}
if len(data) >= len(pattern)-1 {
leftover = data[len(data)-(len(pattern)-1):]
} else {
leftover = data
}
fileOffset += int64(n)
if err == io.EOF {
break
}
}
close(carvedChan)
workerWg.Wait()
close(parsedChan)
writerWg.Wait()
log.Printf("Finished processing %s. Raw records are in %s and JSONL file is %s", inputFile, outDir, jsonlFileName)
}
func main() {
flag.Parse()
if flag.NArg() == 0 {
fmt.Printf("Usage: %s <disk image files or directories>\n", os.Args[0])
os.Exit(1)
}
// Build list of files from provided arguments (recursively if directories).
var files []string
for _, arg := range flag.Args() {
fi, err := os.Stat(arg)
if err != nil {
log.Printf("Error stating %s: %v", arg, err)
continue
}
if fi.IsDir() {
err := filepath.Walk(arg, func(path string, info os.FileInfo, err error) error {
if err != nil {
return nil
}
if !info.IsDir() {
files = append(files, path)
}
return nil
})
if err != nil {
log.Printf("Error walking directory %s: %v", arg, err)
}
} else {
files = append(files, arg)
}
}
totalFiles := len(files)
if totalFiles == 0 {
log.Println("No input files found.")
return
}
log.Printf("Found %d files to process.", totalFiles)
var wg sync.WaitGroup
concurrentFiles := runtime.NumCPU()
sem := make(chan struct{}, concurrentFiles)
for i, file := range files {
wg.Add(1)
sem <- struct{}{}
go func(i int, file string) {
defer func() { <-sem }()
log.Printf("Starting file %d/%d: %s", i+1, totalFiles, file)
processImageFile(file, &wg)
}(i, file)
}
wg.Wait()
log.Println("All files processed.")
}