Files
mft/carve_mft.go

500 lines
15 KiB
Go
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package main
import (
"bufio"
"bytes"
"encoding/base64"
"encoding/binary"
"encoding/json"
"flag"
"fmt"
"io"
"log"
"os"
"path/filepath"
"runtime"
"strings"
"sync"
"time"
"unicode/utf16"
)
// --------------------
// Data Structures
// --------------------
type FileNameAttribute struct {
Filename string `json:"filename"`
ParentRef uint64 `json:"parent_ref"`
Crtime string `json:"crtime"`
Mtime string `json:"mtime"`
CtTime string `json:"ctime"`
Atime string `json:"atime"`
}
type DataStream struct {
Name string `json:"name"`
Resident bool `json:"resident"`
NonResident bool `json:"non_resident"`
ContentBase64 string `json:"content_base64,omitempty"`
}
type MFTRecord struct {
// Header fields (omitting the "magic" since it's always "FILE")
UpdateSeqOffset uint16 `json:"update_seq_offset"`
UpdateSeqSize uint16 `json:"update_seq_size"`
LSN uint64 `json:"lsn"`
SequenceNumber uint16 `json:"sequence_number"`
HardLinkCount uint16 `json:"hard_link_count"`
FirstAttrOffset uint16 `json:"first_attr_offset"`
Flags uint16 `json:"flags"`
RealSize uint32 `json:"real_size"`
AllocatedSize uint32 `json:"allocated_size"`
BaseFileRecord uint64 `json:"base_file_record"`
NextAttrId uint16 `json:"next_attr_id"`
RecordNumber uint32 `json:"record_number"`
// Standard Information attribute timestamps (if present)
SI_Crtime string `json:"si_crtime,omitempty"`
SI_Mtime string `json:"si_mtime,omitempty"`
SI_CtTime string `json:"si_ctime,omitempty"`
SI_Atime string `json:"si_atime,omitempty"`
// Additional attributes
ObjectID string `json:"object_id,omitempty"`
SecurityDescriptor map[string]interface{} `json:"security_descriptor,omitempty"`
FileNames []FileNameAttribute `json:"file_names,omitempty"`
DataStreams []DataStream `json:"data_streams,omitempty"`
}
// CarvedRecord holds a candidate 1024-byte record and its global offset.
type CarvedRecord struct {
Offset int64
RecordBytes []byte
}
// ParsedRecord is what gets sent to the JSON writer.
type ParsedRecord struct {
Offset int64
RecordJSON map[string]interface{}
}
// --------------------
// Helper functions
// --------------------
// safeSlice returns data[start : start+length] if within bounds; otherwise false.
func safeSlice(data []byte, start int, length int) ([]byte, bool) {
if start < 0 || start+length > len(data) {
return nil, false
}
return data[start : start+length], true
}
// filetimeToString converts a Windows FILETIME (uint64) into an RFC3339 timestamp string.
func filetimeToString(ft uint64) string {
const epochDiff = 11644473600 // seconds between 1601 and 1970
secs := int64(ft/10000000) - epochDiff
nsec := int64(ft%10000000) * 100
t := time.Unix(secs, nsec).UTC()
return t.Format(time.RFC3339)
}
// decodeUTF16String converts littleendian UTF16 bytes to a Go string.
func decodeUTF16String(b []byte) string {
if len(b)%2 != 0 {
b = b[:len(b)-1]
}
u16 := make([]uint16, len(b)/2)
for i := 0; i < len(u16); i++ {
u16[i] = binary.LittleEndian.Uint16(b[i*2:])
}
return string(utf16.Decode(u16))
}
// parseZoneIdentifier is a simple parser for Zone.Identifier streams.
func parseZoneIdentifier(content []byte) map[string]string {
result := make(map[string]string)
text := string(content)
lines := strings.Split(text, "\n")
for _, line := range lines {
if parts := strings.SplitN(line, "=", 2); len(parts) == 2 {
key := strings.TrimSpace(parts[0])
value := strings.TrimSpace(parts[1])
result[key] = value
}
}
return result
}
// --------------------
// Attribute Parsing
// --------------------
func parseAttributes(data []byte, rec *MFTRecord) {
offset := int(rec.FirstAttrOffset)
for offset < len(data)-8 {
if attrBytes, ok := safeSlice(data, offset, 4); !ok {
break
} else {
attrType := binary.LittleEndian.Uint32(attrBytes)
if attrType == 0xFFFFFFFF {
break
}
if attrLenBytes, ok := safeSlice(data, offset+4, 4); !ok {
break
} else {
attrLen := binary.LittleEndian.Uint32(attrLenBytes)
if attrLen < 8 || offset+int(attrLen) > len(data) {
offset++
continue
}
residentFlag := data[offset+8]
var valLen uint32
var valOffset uint16
if residentFlag == 0 {
if vb, ok := safeSlice(data, offset+16, 4); ok {
valLen = binary.LittleEndian.Uint32(vb)
} else {
offset += int(attrLen)
continue
}
if vb, ok := safeSlice(data, offset+20, 2); ok {
valOffset = binary.LittleEndian.Uint16(vb)
} else {
offset += int(attrLen)
continue
}
}
switch attrType {
case 0x10: // $STANDARD_INFORMATION
if residentFlag == 0 {
if siData, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok && len(siData) >= 32 {
rec.SI_Crtime = filetimeToString(binary.LittleEndian.Uint64(siData[0:8]))
rec.SI_Mtime = filetimeToString(binary.LittleEndian.Uint64(siData[8:16]))
rec.SI_CtTime = filetimeToString(binary.LittleEndian.Uint64(siData[16:24]))
rec.SI_Atime = filetimeToString(binary.LittleEndian.Uint64(siData[24:32]))
}
}
case 0x30: // $FILE_NAME
if residentFlag == 0 {
if fnData, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok && len(fnData) >= 66 {
var fn FileNameAttribute
fn.ParentRef = binary.LittleEndian.Uint64(fnData[0:8]) & 0x0000FFFFFFFFFFFF
fn.Crtime = filetimeToString(binary.LittleEndian.Uint64(fnData[8:16]))
fn.Mtime = filetimeToString(binary.LittleEndian.Uint64(fnData[16:24]))
fn.CtTime = filetimeToString(binary.LittleEndian.Uint64(fnData[24:32]))
fn.Atime = filetimeToString(binary.LittleEndian.Uint64(fnData[32:40]))
if filenameLenBytes, ok := safeSlice(fnData, 64, 1); ok {
filenameLen := filenameLenBytes[0]
if nameBytes, ok := safeSlice(fnData, 66, int(filenameLen)*2); ok {
fn.Filename = decodeUTF16String(nameBytes)
}
}
rec.FileNames = append(rec.FileNames, fn)
}
}
case 0x80: // $DATA
var ds DataStream
if nameInfo, ok := safeSlice(data, offset+9, 1); ok {
nameLen := nameInfo[0]
if nameBytes, ok := safeSlice(data, offset+10, int(nameLen)*2); ok && nameLen > 0 {
ds.Name = decodeUTF16String(nameBytes)
}
}
if residentFlag == 0 {
ds.Resident = true
ds.NonResident = false
if content, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok {
ds.ContentBase64 = base64.StdEncoding.EncodeToString(content)
if ds.Name == "Zone.Identifier" {
zoneInfo := parseZoneIdentifier(content)
ds.Name = fmt.Sprintf("Zone.Identifier %v", zoneInfo)
}
}
} else {
ds.Resident = false
ds.NonResident = true
}
rec.DataStreams = append(rec.DataStreams, ds)
case 0x40: // $OBJECT_ID
if residentFlag == 0 {
if objData, ok := safeSlice(data, offset+int(valOffset), 16); ok {
rec.ObjectID = fmt.Sprintf("%x", objData)
}
}
case 0x50: // $SECURITY_DESCRIPTOR
if residentFlag == 0 {
if secData, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok && len(secData) >= 20 {
rec.SecurityDescriptor = map[string]interface{}{
"raw": fmt.Sprintf("%x", secData),
}
}
}
}
offset += int(attrLen)
}
}
}
}
// parseMFTRecord parses a 1024-byte MFT record.
func parseMFTRecord(data []byte) (*MFTRecord, error) {
if len(data) < 46 {
return nil, fmt.Errorf("data too short to be a valid record")
}
if string(data[:4]) != "FILE" {
return nil, fmt.Errorf("invalid record header")
}
rec := &MFTRecord{
UpdateSeqOffset: binary.LittleEndian.Uint16(data[4:6]),
UpdateSeqSize: binary.LittleEndian.Uint16(data[6:8]),
LSN: binary.LittleEndian.Uint64(data[8:16]),
SequenceNumber: binary.LittleEndian.Uint16(data[16:18]),
HardLinkCount: binary.LittleEndian.Uint16(data[18:20]),
FirstAttrOffset: binary.LittleEndian.Uint16(data[20:22]),
Flags: binary.LittleEndian.Uint16(data[22:24]),
RealSize: binary.LittleEndian.Uint32(data[24:28]),
AllocatedSize: binary.LittleEndian.Uint32(data[28:32]),
BaseFileRecord: binary.LittleEndian.Uint64(data[32:40]),
NextAttrId: binary.LittleEndian.Uint16(data[40:42]),
RecordNumber: binary.LittleEndian.Uint32(data[42:46]),
}
parseAttributes(data, rec)
return rec, nil
}
// --------------------
// Parallel Processing and Main
// --------------------
var (
dumpFlag = flag.Bool("dump", false, "Dump raw MFT records (off by default)")
jsonlOut = flag.String("jsonl", "", "Output JSONL file path (if not provided, a default is used per input file)")
dumpFolder = flag.String("dump-folder", "", "Folder to dump raw MFT records (if not provided, a default folder is created per input file)")
)
func processImageFile(inputFile string, wg *sync.WaitGroup) {
defer wg.Done()
f, err := os.Open(inputFile)
if err != nil {
log.Printf("Failed to open %s: %v", inputFile, err)
return
}
defer f.Close()
fi, err := f.Stat()
if err != nil {
log.Printf("Failed to stat %s: %v", inputFile, err)
return
}
fileSize := fi.Size()
timestamp := time.Now().Format("20060102150405")
baseName := filepath.Base(inputFile)
// Determine dump folder: if provided, use that; else use default "<inputFile>_<timestamp>"
var outDir string
if *dumpFolder != "" {
outDir = *dumpFolder
// Ensure the folder exists.
if err := os.MkdirAll(outDir, 0755); err != nil {
log.Printf("Failed to create dump folder %s: %v", outDir, err)
return
}
} else {
outDir = fmt.Sprintf("%s_%s", baseName, timestamp)
if err := os.Mkdir(outDir, 0755); err != nil {
log.Printf("Failed to create output directory for %s: %v", inputFile, err)
return
}
}
// Determine JSONL output file: if provided, use that; else use default "<inputFile>_<timestamp>.jsonl"
var jsonlPath string
if *jsonlOut != "" {
jsonlPath = *jsonlOut
} else {
jsonlPath = fmt.Sprintf("%s_%s.jsonl", baseName, timestamp)
}
// Open JSONL file in append mode (create if it doesn't exist)
jsonlFile, err := os.OpenFile(jsonlPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
log.Printf("Failed to open JSONL file for %s: %v", inputFile, err)
return
}
defer jsonlFile.Close()
carvedChan := make(chan CarvedRecord, 100)
parsedChan := make(chan ParsedRecord, 100)
numWorkers := runtime.NumCPU()
var workerWg sync.WaitGroup
for i := 0; i < numWorkers; i++ {
workerWg.Add(1)
go func() {
defer workerWg.Done()
for carved := range carvedChan {
// If dumping is enabled, write the raw record file.
if *dumpFlag {
recordFileName := filepath.Join(outDir, fmt.Sprintf("%d.mftrecord", carved.Offset))
if err := os.WriteFile(recordFileName, carved.RecordBytes, 0644); err != nil {
log.Printf("Failed to write record at offset %d in %s: %v", carved.Offset, inputFile, err)
}
}
mft, err := parseMFTRecord(carved.RecordBytes)
if err != nil {
continue
}
recordMap := map[string]interface{}{
"input_image": inputFile,
"offset": carved.Offset,
"update_seq_offset": mft.UpdateSeqOffset,
"update_seq_size": mft.UpdateSeqSize,
"lsn": mft.LSN,
"sequence_number": mft.SequenceNumber,
"hard_link_count": mft.HardLinkCount,
"first_attr_offset": mft.FirstAttrOffset,
"flags": mft.Flags,
"real_size": mft.RealSize,
"allocated_size": mft.AllocatedSize,
"base_file_record": mft.BaseFileRecord,
"next_attr_id": mft.NextAttrId,
"record_number": mft.RecordNumber,
"si_crtime": mft.SI_Crtime,
"si_mtime": mft.SI_Mtime,
"si_ctime": mft.SI_CtTime,
"si_atime": mft.SI_Atime,
"object_id": mft.ObjectID,
"security_descriptor": mft.SecurityDescriptor,
"file_names": mft.FileNames,
"data_streams": mft.DataStreams,
}
parsedChan <- ParsedRecord{Offset: carved.Offset, RecordJSON: recordMap}
}
}()
}
var writerWg sync.WaitGroup
writerWg.Add(1)
go func() {
defer writerWg.Done()
encoder := json.NewEncoder(jsonlFile)
for pr := range parsedChan {
if err := encoder.Encode(pr.RecordJSON); err != nil {
log.Printf("Error writing JSON record at offset %d in %s: %v", pr.Offset, inputFile, err)
}
}
}()
reader := bufio.NewReader(f)
const recordSize = 1024
const chunkSize = 1024 * 1024
pattern := []byte("FILE0")
var fileOffset int64 = 0
var leftover []byte
lastPrint := time.Now()
for {
chunk := make([]byte, chunkSize)
n, err := reader.Read(chunk)
if n == 0 {
break
}
data := append(leftover, chunk[:n]...)
if time.Since(lastPrint) > 5*time.Second {
perc := float64(fileOffset) / float64(fileSize) * 100.0
log.Printf("Processing %s: %.2f%% complete", inputFile, perc)
lastPrint = time.Now()
}
searchLimit := len(data) - len(pattern)
for i := 0; i <= searchLimit; i++ {
if bytes.Equal(data[i:i+len(pattern)], pattern) {
globalOffset := fileOffset - int64(len(leftover)) + int64(i)
if globalOffset+recordSize > fileSize {
continue
}
recordBytes := make([]byte, recordSize)
_, err := f.ReadAt(recordBytes, globalOffset)
if err != nil {
log.Printf("Failed to read record at offset %d in %s: %v", globalOffset, inputFile, err)
continue
}
carvedChan <- CarvedRecord{Offset: globalOffset, RecordBytes: recordBytes}
}
}
if len(data) >= len(pattern)-1 {
leftover = data[len(data)-(len(pattern)-1):]
} else {
leftover = data
}
fileOffset += int64(n)
if err == io.EOF {
break
}
}
close(carvedChan)
workerWg.Wait()
close(parsedChan)
writerWg.Wait()
log.Printf("Finished processing %s. Dumps (if enabled) are in %s and JSONL file is %s", inputFile, outDir, jsonlPath)
}
func main() {
flag.Parse()
if flag.NArg() == 0 {
fmt.Printf("Usage: %s [options] <disk image files or directories>\n", os.Args[0])
os.Exit(1)
}
var files []string
for _, arg := range flag.Args() {
fi, err := os.Stat(arg)
if err != nil {
log.Printf("Error stating %s: %v", arg, err)
continue
}
if fi.IsDir() {
err := filepath.Walk(arg, func(path string, info os.FileInfo, err error) error {
if err != nil {
return nil
}
if !info.IsDir() {
files = append(files, path)
}
return nil
})
if err != nil {
log.Printf("Error walking directory %s: %v", arg, err)
}
} else {
files = append(files, arg)
}
}
totalFiles := len(files)
if totalFiles == 0 {
log.Println("No input files found.")
return
}
log.Printf("Found %d files to process.", totalFiles)
var wg sync.WaitGroup
concurrentFiles := runtime.NumCPU()
sem := make(chan struct{}, concurrentFiles)
for i, file := range files {
wg.Add(1)
sem <- struct{}{}
go func(i int, file string) {
defer func() { <-sem }()
log.Printf("Starting file %d/%d: %s", i+1, totalFiles, file)
processImageFile(file, &wg)
}(i, file)
}
wg.Wait()
log.Println("All files processed.")
}