Files
mft/mft.go
2025-02-16 18:57:07 +01:00

497 lines
15 KiB
Go
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package main
import (
"bufio"
"bytes"
"encoding/base64"
"encoding/binary"
"encoding/json"
"flag"
"fmt"
"io"
"log"
"os"
"path/filepath"
"runtime"
"strings"
"sync"
"time"
"unicode/utf16"
)
// --------------------
// Data Structures
// --------------------
type FileNameAttribute struct {
Filename string `json:"filename"`
ParentRef uint64 `json:"parent_ref"`
Crtime string `json:"crtime"`
Mtime string `json:"mtime"`
CtTime string `json:"ctime"`
Atime string `json:"atime"`
}
type DataStream struct {
Name string `json:"name"`
Resident bool `json:"resident"`
NonResident bool `json:"non_resident"`
ContentBase64 string `json:"content_base64,omitempty"`
}
type MFTRecord struct {
// Header fields (omitting the "magic" since it's always "FILE")
UpdateSeqOffset uint16 `json:"update_seq_offset"`
UpdateSeqSize uint16 `json:"update_seq_size"`
LSN uint64 `json:"lsn"`
SequenceNumber uint16 `json:"sequence_number"`
HardLinkCount uint16 `json:"hard_link_count"`
FirstAttrOffset uint16 `json:"first_attr_offset"`
Flags uint16 `json:"flags"`
RealSize uint32 `json:"real_size"`
AllocatedSize uint32 `json:"allocated_size"`
BaseFileRecord uint64 `json:"base_file_record"`
NextAttrId uint16 `json:"next_attr_id"`
RecordNumber uint32 `json:"record_number"`
// Standard Information attribute timestamps (if present)
SI_Crtime string `json:"si_crtime,omitempty"`
SI_Mtime string `json:"si_mtime,omitempty"`
SI_CtTime string `json:"si_ctime,omitempty"`
SI_Atime string `json:"si_atime,omitempty"`
// Additional attributes
ObjectID string `json:"object_id,omitempty"`
SecurityDescriptor map[string]interface{} `json:"security_descriptor,omitempty"`
FileNames []FileNameAttribute `json:"file_names,omitempty"`
DataStreams []DataStream `json:"data_streams,omitempty"`
}
// CarvedRecord holds a candidate 1024-byte record and its global offset.
type CarvedRecord struct {
Offset int64
RecordBytes []byte
}
// ParsedRecord is what gets sent to the JSON writer.
type ParsedRecord struct {
Offset int64
RecordJSON map[string]interface{}
}
// --------------------
// Helper functions
// --------------------
// safeSlice returns data[start : start+length] if within bounds; otherwise false.
func safeSlice(data []byte, start int, length int) ([]byte, bool) {
if start < 0 || start+length > len(data) {
return nil, false
}
return data[start : start+length], true
}
// filetimeToString converts a Windows FILETIME (uint64) into an RFC3339 timestamp string.
func filetimeToString(ft uint64) string {
const epochDiff = 11644473600 // seconds between 1601 and 1970
secs := int64(ft/10000000) - epochDiff
nsec := int64(ft%10000000) * 100
t := time.Unix(secs, nsec).UTC()
return t.Format(time.RFC3339)
}
// decodeUTF16String converts littleendian UTF16 bytes to a Go string.
func decodeUTF16String(b []byte) string {
if len(b)%2 != 0 {
b = b[:len(b)-1]
}
u16 := make([]uint16, len(b)/2)
for i := 0; i < len(u16); i++ {
u16[i] = binary.LittleEndian.Uint16(b[i*2:])
}
return string(utf16.Decode(u16))
}
// parseZoneIdentifier is a simple parser for Zone.Identifier streams.
func parseZoneIdentifier(content []byte) map[string]string {
result := make(map[string]string)
text := string(content)
lines := strings.Split(text, "\n")
for _, line := range lines {
if parts := strings.SplitN(line, "=", 2); len(parts) == 2 {
key := strings.TrimSpace(parts[0])
value := strings.TrimSpace(parts[1])
result[key] = value
}
}
return result
}
// --------------------
// Attribute Parsing
// --------------------
// parseAttributes iterates over the attribute area and processes known types.
// It uses safeSlice to ensure we dont read beyond the record.
func parseAttributes(data []byte, rec *MFTRecord) {
offset := int(rec.FirstAttrOffset)
for offset < len(data)-8 {
// First 4 bytes: attribute type
if attrBytes, ok := safeSlice(data, offset, 4); !ok {
break
} else {
attrType := binary.LittleEndian.Uint32(attrBytes)
// End marker
if attrType == 0xFFFFFFFF {
break
}
// Next 4 bytes: attribute length
if attrLenBytes, ok := safeSlice(data, offset+4, 4); !ok {
break
} else {
attrLen := binary.LittleEndian.Uint32(attrLenBytes)
if attrLen < 8 || offset+int(attrLen) > len(data) {
// Malformed attribute, skip one byte and try to re-sync.
offset++
continue
}
// Resident flag at offset+8.
residentFlag := data[offset+8]
// For resident attributes, extract value length and value offset.
var valLen uint32
var valOffset uint16
if residentFlag == 0 {
if vb, ok := safeSlice(data, offset+16, 4); ok {
valLen = binary.LittleEndian.Uint32(vb)
} else {
offset += int(attrLen)
continue
}
if vb, ok := safeSlice(data, offset+20, 2); ok {
valOffset = binary.LittleEndian.Uint16(vb)
} else {
offset += int(attrLen)
continue
}
}
// Process known attribute types.
switch attrType {
case 0x10: // $STANDARD_INFORMATION
if residentFlag == 0 {
if siData, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok && len(siData) >= 32 {
rec.SI_Crtime = filetimeToString(binary.LittleEndian.Uint64(siData[0:8]))
rec.SI_Mtime = filetimeToString(binary.LittleEndian.Uint64(siData[8:16]))
rec.SI_CtTime = filetimeToString(binary.LittleEndian.Uint64(siData[16:24]))
rec.SI_Atime = filetimeToString(binary.LittleEndian.Uint64(siData[24:32]))
}
}
case 0x30: // $FILE_NAME
if residentFlag == 0 {
if fnData, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok && len(fnData) >= 66 {
var fn FileNameAttribute
fn.ParentRef = binary.LittleEndian.Uint64(fnData[0:8]) & 0x0000FFFFFFFFFFFF
fn.Crtime = filetimeToString(binary.LittleEndian.Uint64(fnData[8:16]))
fn.Mtime = filetimeToString(binary.LittleEndian.Uint64(fnData[16:24]))
fn.CtTime = filetimeToString(binary.LittleEndian.Uint64(fnData[24:32]))
fn.Atime = filetimeToString(binary.LittleEndian.Uint64(fnData[32:40]))
if filenameLenBytes, ok := safeSlice(fnData, 64, 1); ok {
filenameLen := filenameLenBytes[0]
if nameBytes, ok := safeSlice(fnData, 66, int(filenameLen)*2); ok {
fn.Filename = decodeUTF16String(nameBytes)
}
}
rec.FileNames = append(rec.FileNames, fn)
}
}
case 0x80: // $DATA
var ds DataStream
// Extract the attribute's name if any.
if nameInfo, ok := safeSlice(data, offset+9, 1); ok {
nameLen := nameInfo[0]
if nameBytes, ok := safeSlice(data, offset+10, int(nameLen)*2); ok && nameLen > 0 {
ds.Name = decodeUTF16String(nameBytes)
}
}
if residentFlag == 0 {
ds.Resident = true
ds.NonResident = false
if content, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok {
ds.ContentBase64 = base64.StdEncoding.EncodeToString(content)
if ds.Name == "Zone.Identifier" {
zoneInfo := parseZoneIdentifier(content)
ds.Name = fmt.Sprintf("Zone.Identifier %v", zoneInfo)
}
}
} else {
ds.Resident = false
ds.NonResident = true
}
rec.DataStreams = append(rec.DataStreams, ds)
case 0x40: // $OBJECT_ID (when used as such)
if residentFlag == 0 {
if objData, ok := safeSlice(data, offset+int(valOffset), 16); ok {
rec.ObjectID = fmt.Sprintf("%x", objData)
}
}
case 0x50: // $SECURITY_DESCRIPTOR
if residentFlag == 0 {
// A minimal parser: extract the first 20 bytes fields if possible.
if secData, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok && len(secData) >= 20 {
// We could decode further; here we just store raw hex values.
rec.SecurityDescriptor = map[string]interface{}{
"raw": fmt.Sprintf("%x", secData),
}
}
}
// (Other attribute types such as $ATTRIBUTE_LIST, $VOLUME_NAME, etc.
// can be added here following similar patterns.)
}
offset += int(attrLen)
}
}
}
}
// parseMFTRecord attempts to parse a 1024-byte MFT record.
// It returns an error if the record is too short or if the expected "FILE" marker is missing.
func parseMFTRecord(data []byte) (*MFTRecord, error) {
if len(data) < 46 {
return nil, fmt.Errorf("data too short to be a valid record")
}
if string(data[:4]) != "FILE" {
return nil, fmt.Errorf("invalid record header")
}
rec := &MFTRecord{
UpdateSeqOffset: binary.LittleEndian.Uint16(data[4:6]),
UpdateSeqSize: binary.LittleEndian.Uint16(data[6:8]),
LSN: binary.LittleEndian.Uint64(data[8:16]),
SequenceNumber: binary.LittleEndian.Uint16(data[16:18]),
HardLinkCount: binary.LittleEndian.Uint16(data[18:20]),
FirstAttrOffset: binary.LittleEndian.Uint16(data[20:22]),
Flags: binary.LittleEndian.Uint16(data[22:24]),
RealSize: binary.LittleEndian.Uint32(data[24:28]),
AllocatedSize: binary.LittleEndian.Uint32(data[28:32]),
BaseFileRecord: binary.LittleEndian.Uint64(data[32:40]),
NextAttrId: binary.LittleEndian.Uint16(data[40:42]),
RecordNumber: binary.LittleEndian.Uint32(data[42:46]),
}
parseAttributes(data, rec)
return rec, nil
}
// --------------------
// Parallel Processing and Main
// --------------------
func processImageFile(inputFile string, wg *sync.WaitGroup) {
defer wg.Done()
f, err := os.Open(inputFile)
if err != nil {
log.Printf("Failed to open %s: %v", inputFile, err)
return
}
defer f.Close()
fi, err := f.Stat()
if err != nil {
log.Printf("Failed to stat %s: %v", inputFile, err)
return
}
fileSize := fi.Size()
// Create an output folder and JSONL file based on input file name and current timestamp.
timestamp := time.Now().Format("20060102150405")
baseName := filepath.Base(inputFile)
outDir := fmt.Sprintf("%s_%s", baseName, timestamp)
if err := os.Mkdir(outDir, 0755); err != nil {
log.Printf("Failed to create output directory for %s: %v", inputFile, err)
return
}
jsonlFileName := fmt.Sprintf("%s_%s.jsonl", baseName, timestamp)
jsonlFile, err := os.Create(jsonlFileName)
if err != nil {
log.Printf("Failed to create JSONL file for %s: %v", inputFile, err)
return
}
defer jsonlFile.Close()
carvedChan := make(chan CarvedRecord, 100)
parsedChan := make(chan ParsedRecord, 100)
// Worker pool for carving/parsing records.
numWorkers := runtime.NumCPU()
var workerWg sync.WaitGroup
for i := 0; i < numWorkers; i++ {
workerWg.Add(1)
go func() {
defer workerWg.Done()
for carved := range carvedChan {
// Write raw record to disk.
recordFileName := filepath.Join(outDir, fmt.Sprintf("%d.mftrecord", carved.Offset))
if err := os.WriteFile(recordFileName, carved.RecordBytes, 0644); err != nil {
log.Printf("Failed to write record at offset %d in %s: %v", carved.Offset, inputFile, err)
continue
}
// Parse the record.
mft, err := parseMFTRecord(carved.RecordBytes)
if err != nil {
// Skip records that cannot be parsed.
continue
}
// Build JSON record (omitting the magic field).
recordMap := map[string]interface{}{
"input_image": inputFile,
"offset": carved.Offset,
"update_seq_offset": mft.UpdateSeqOffset,
"update_seq_size": mft.UpdateSeqSize,
"lsn": mft.LSN,
"sequence_number": mft.SequenceNumber,
"hard_link_count": mft.HardLinkCount,
"first_attr_offset": mft.FirstAttrOffset,
"flags": mft.Flags,
"real_size": mft.RealSize,
"allocated_size": mft.AllocatedSize,
"base_file_record": mft.BaseFileRecord,
"next_attr_id": mft.NextAttrId,
"record_number": mft.RecordNumber,
"si_crtime": mft.SI_Crtime,
"si_mtime": mft.SI_Mtime,
"si_ctime": mft.SI_CtTime,
"si_atime": mft.SI_Atime,
"object_id": mft.ObjectID,
"security_descriptor": mft.SecurityDescriptor,
"file_names": mft.FileNames,
"data_streams": mft.DataStreams,
}
parsedChan <- ParsedRecord{Offset: carved.Offset, RecordJSON: recordMap}
}
}()
}
// Writer goroutine to output JSONL records.
var writerWg sync.WaitGroup
writerWg.Add(1)
go func() {
defer writerWg.Done()
encoder := json.NewEncoder(jsonlFile)
for pr := range parsedChan {
if err := encoder.Encode(pr.RecordJSON); err != nil {
log.Printf("Error writing JSON record at offset %d in %s: %v", pr.Offset, inputFile, err)
}
}
}()
// Scan the file for the "FILE0" pattern.
reader := bufio.NewReader(f)
const recordSize = 1024
const chunkSize = 1024 * 1024
pattern := []byte("FILE0")
var fileOffset int64 = 0
var leftover []byte
lastPrint := time.Now()
for {
chunk := make([]byte, chunkSize)
n, err := reader.Read(chunk)
if n == 0 {
break
}
data := append(leftover, chunk[:n]...)
// Progress update every ~5 seconds.
if time.Since(lastPrint) > 5*time.Second {
perc := float64(fileOffset) / float64(fileSize) * 100.0
log.Printf("Processing %s: %.2f%% complete", inputFile, perc)
lastPrint = time.Now()
}
searchLimit := len(data) - len(pattern)
for i := 0; i <= searchLimit; i++ {
if bytes.Equal(data[i:i+len(pattern)], pattern) {
globalOffset := fileOffset - int64(len(leftover)) + int64(i)
if globalOffset+recordSize > fileSize {
continue
}
recordBytes := make([]byte, recordSize)
_, err := f.ReadAt(recordBytes, globalOffset)
if err != nil {
log.Printf("Failed to read record at offset %d in %s: %v", globalOffset, inputFile, err)
continue
}
carvedChan <- CarvedRecord{Offset: globalOffset, RecordBytes: recordBytes}
}
}
if len(data) >= len(pattern)-1 {
leftover = data[len(data)-(len(pattern)-1):]
} else {
leftover = data
}
fileOffset += int64(n)
if err == io.EOF {
break
}
}
close(carvedChan)
workerWg.Wait()
close(parsedChan)
writerWg.Wait()
log.Printf("Finished processing %s. Raw records are in %s and JSONL file is %s", inputFile, outDir, jsonlFileName)
}
func main() {
flag.Parse()
if flag.NArg() == 0 {
fmt.Printf("Usage: %s <disk image files or directories>\n", os.Args[0])
os.Exit(1)
}
// Build list of files from provided arguments (recursively if directories).
var files []string
for _, arg := range flag.Args() {
fi, err := os.Stat(arg)
if err != nil {
log.Printf("Error stating %s: %v", arg, err)
continue
}
if fi.IsDir() {
err := filepath.Walk(arg, func(path string, info os.FileInfo, err error) error {
if err != nil {
return nil
}
if !info.IsDir() {
files = append(files, path)
}
return nil
})
if err != nil {
log.Printf("Error walking directory %s: %v", arg, err)
}
} else {
files = append(files, arg)
}
}
totalFiles := len(files)
if totalFiles == 0 {
log.Println("No input files found.")
return
}
log.Printf("Found %d files to process.", totalFiles)
var wg sync.WaitGroup
concurrentFiles := runtime.NumCPU()
sem := make(chan struct{}, concurrentFiles)
for i, file := range files {
wg.Add(1)
sem <- struct{}{}
go func(i int, file string) {
defer func() { <-sem }()
log.Printf("Starting file %d/%d: %s", i+1, totalFiles, file)
processImageFile(file, &wg)
}(i, file)
}
wg.Wait()
log.Println("All files processed.")
}