Add Readme and rename files to something more fitting
This commit is contained in:
499
carve_mft.go
Executable file
499
carve_mft.go
Executable file
@@ -0,0 +1,499 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/base64"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
"unicode/utf16"
|
||||
)
|
||||
|
||||
// --------------------
|
||||
// Data Structures
|
||||
// --------------------
|
||||
|
||||
type FileNameAttribute struct {
|
||||
Filename string `json:"filename"`
|
||||
ParentRef uint64 `json:"parent_ref"`
|
||||
Crtime string `json:"crtime"`
|
||||
Mtime string `json:"mtime"`
|
||||
CtTime string `json:"ctime"`
|
||||
Atime string `json:"atime"`
|
||||
}
|
||||
|
||||
type DataStream struct {
|
||||
Name string `json:"name"`
|
||||
Resident bool `json:"resident"`
|
||||
NonResident bool `json:"non_resident"`
|
||||
ContentBase64 string `json:"content_base64,omitempty"`
|
||||
}
|
||||
|
||||
type MFTRecord struct {
|
||||
// Header fields (omitting the "magic" since it's always "FILE")
|
||||
UpdateSeqOffset uint16 `json:"update_seq_offset"`
|
||||
UpdateSeqSize uint16 `json:"update_seq_size"`
|
||||
LSN uint64 `json:"lsn"`
|
||||
SequenceNumber uint16 `json:"sequence_number"`
|
||||
HardLinkCount uint16 `json:"hard_link_count"`
|
||||
FirstAttrOffset uint16 `json:"first_attr_offset"`
|
||||
Flags uint16 `json:"flags"`
|
||||
RealSize uint32 `json:"real_size"`
|
||||
AllocatedSize uint32 `json:"allocated_size"`
|
||||
BaseFileRecord uint64 `json:"base_file_record"`
|
||||
NextAttrId uint16 `json:"next_attr_id"`
|
||||
RecordNumber uint32 `json:"record_number"`
|
||||
|
||||
// Standard Information attribute timestamps (if present)
|
||||
SI_Crtime string `json:"si_crtime,omitempty"`
|
||||
SI_Mtime string `json:"si_mtime,omitempty"`
|
||||
SI_CtTime string `json:"si_ctime,omitempty"`
|
||||
SI_Atime string `json:"si_atime,omitempty"`
|
||||
|
||||
// Additional attributes
|
||||
ObjectID string `json:"object_id,omitempty"`
|
||||
SecurityDescriptor map[string]interface{} `json:"security_descriptor,omitempty"`
|
||||
|
||||
FileNames []FileNameAttribute `json:"file_names,omitempty"`
|
||||
DataStreams []DataStream `json:"data_streams,omitempty"`
|
||||
}
|
||||
|
||||
// CarvedRecord holds a candidate 1024-byte record and its global offset.
|
||||
type CarvedRecord struct {
|
||||
Offset int64
|
||||
RecordBytes []byte
|
||||
}
|
||||
|
||||
// ParsedRecord is what gets sent to the JSON writer.
|
||||
type ParsedRecord struct {
|
||||
Offset int64
|
||||
RecordJSON map[string]interface{}
|
||||
}
|
||||
|
||||
// --------------------
|
||||
// Helper functions
|
||||
// --------------------
|
||||
|
||||
// safeSlice returns data[start : start+length] if within bounds; otherwise false.
|
||||
func safeSlice(data []byte, start int, length int) ([]byte, bool) {
|
||||
if start < 0 || start+length > len(data) {
|
||||
return nil, false
|
||||
}
|
||||
return data[start : start+length], true
|
||||
}
|
||||
|
||||
// filetimeToString converts a Windows FILETIME (uint64) into an RFC3339 timestamp string.
|
||||
func filetimeToString(ft uint64) string {
|
||||
const epochDiff = 11644473600 // seconds between 1601 and 1970
|
||||
secs := int64(ft/10000000) - epochDiff
|
||||
nsec := int64(ft%10000000) * 100
|
||||
t := time.Unix(secs, nsec).UTC()
|
||||
return t.Format(time.RFC3339)
|
||||
}
|
||||
|
||||
// decodeUTF16String converts little‑endian UTF‑16 bytes to a Go string.
|
||||
func decodeUTF16String(b []byte) string {
|
||||
if len(b)%2 != 0 {
|
||||
b = b[:len(b)-1]
|
||||
}
|
||||
u16 := make([]uint16, len(b)/2)
|
||||
for i := 0; i < len(u16); i++ {
|
||||
u16[i] = binary.LittleEndian.Uint16(b[i*2:])
|
||||
}
|
||||
return string(utf16.Decode(u16))
|
||||
}
|
||||
|
||||
// parseZoneIdentifier is a simple parser for Zone.Identifier streams.
|
||||
func parseZoneIdentifier(content []byte) map[string]string {
|
||||
result := make(map[string]string)
|
||||
text := string(content)
|
||||
lines := strings.Split(text, "\n")
|
||||
for _, line := range lines {
|
||||
if parts := strings.SplitN(line, "=", 2); len(parts) == 2 {
|
||||
key := strings.TrimSpace(parts[0])
|
||||
value := strings.TrimSpace(parts[1])
|
||||
result[key] = value
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// --------------------
|
||||
// Attribute Parsing
|
||||
// --------------------
|
||||
|
||||
func parseAttributes(data []byte, rec *MFTRecord) {
|
||||
offset := int(rec.FirstAttrOffset)
|
||||
for offset < len(data)-8 {
|
||||
if attrBytes, ok := safeSlice(data, offset, 4); !ok {
|
||||
break
|
||||
} else {
|
||||
attrType := binary.LittleEndian.Uint32(attrBytes)
|
||||
if attrType == 0xFFFFFFFF {
|
||||
break
|
||||
}
|
||||
if attrLenBytes, ok := safeSlice(data, offset+4, 4); !ok {
|
||||
break
|
||||
} else {
|
||||
attrLen := binary.LittleEndian.Uint32(attrLenBytes)
|
||||
if attrLen < 8 || offset+int(attrLen) > len(data) {
|
||||
offset++
|
||||
continue
|
||||
}
|
||||
residentFlag := data[offset+8]
|
||||
var valLen uint32
|
||||
var valOffset uint16
|
||||
if residentFlag == 0 {
|
||||
if vb, ok := safeSlice(data, offset+16, 4); ok {
|
||||
valLen = binary.LittleEndian.Uint32(vb)
|
||||
} else {
|
||||
offset += int(attrLen)
|
||||
continue
|
||||
}
|
||||
if vb, ok := safeSlice(data, offset+20, 2); ok {
|
||||
valOffset = binary.LittleEndian.Uint16(vb)
|
||||
} else {
|
||||
offset += int(attrLen)
|
||||
continue
|
||||
}
|
||||
}
|
||||
switch attrType {
|
||||
case 0x10: // $STANDARD_INFORMATION
|
||||
if residentFlag == 0 {
|
||||
if siData, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok && len(siData) >= 32 {
|
||||
rec.SI_Crtime = filetimeToString(binary.LittleEndian.Uint64(siData[0:8]))
|
||||
rec.SI_Mtime = filetimeToString(binary.LittleEndian.Uint64(siData[8:16]))
|
||||
rec.SI_CtTime = filetimeToString(binary.LittleEndian.Uint64(siData[16:24]))
|
||||
rec.SI_Atime = filetimeToString(binary.LittleEndian.Uint64(siData[24:32]))
|
||||
}
|
||||
}
|
||||
case 0x30: // $FILE_NAME
|
||||
if residentFlag == 0 {
|
||||
if fnData, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok && len(fnData) >= 66 {
|
||||
var fn FileNameAttribute
|
||||
fn.ParentRef = binary.LittleEndian.Uint64(fnData[0:8]) & 0x0000FFFFFFFFFFFF
|
||||
fn.Crtime = filetimeToString(binary.LittleEndian.Uint64(fnData[8:16]))
|
||||
fn.Mtime = filetimeToString(binary.LittleEndian.Uint64(fnData[16:24]))
|
||||
fn.CtTime = filetimeToString(binary.LittleEndian.Uint64(fnData[24:32]))
|
||||
fn.Atime = filetimeToString(binary.LittleEndian.Uint64(fnData[32:40]))
|
||||
if filenameLenBytes, ok := safeSlice(fnData, 64, 1); ok {
|
||||
filenameLen := filenameLenBytes[0]
|
||||
if nameBytes, ok := safeSlice(fnData, 66, int(filenameLen)*2); ok {
|
||||
fn.Filename = decodeUTF16String(nameBytes)
|
||||
}
|
||||
}
|
||||
rec.FileNames = append(rec.FileNames, fn)
|
||||
}
|
||||
}
|
||||
case 0x80: // $DATA
|
||||
var ds DataStream
|
||||
if nameInfo, ok := safeSlice(data, offset+9, 1); ok {
|
||||
nameLen := nameInfo[0]
|
||||
if nameBytes, ok := safeSlice(data, offset+10, int(nameLen)*2); ok && nameLen > 0 {
|
||||
ds.Name = decodeUTF16String(nameBytes)
|
||||
}
|
||||
}
|
||||
if residentFlag == 0 {
|
||||
ds.Resident = true
|
||||
ds.NonResident = false
|
||||
if content, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok {
|
||||
ds.ContentBase64 = base64.StdEncoding.EncodeToString(content)
|
||||
if ds.Name == "Zone.Identifier" {
|
||||
zoneInfo := parseZoneIdentifier(content)
|
||||
ds.Name = fmt.Sprintf("Zone.Identifier %v", zoneInfo)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ds.Resident = false
|
||||
ds.NonResident = true
|
||||
}
|
||||
rec.DataStreams = append(rec.DataStreams, ds)
|
||||
case 0x40: // $OBJECT_ID
|
||||
if residentFlag == 0 {
|
||||
if objData, ok := safeSlice(data, offset+int(valOffset), 16); ok {
|
||||
rec.ObjectID = fmt.Sprintf("%x", objData)
|
||||
}
|
||||
}
|
||||
case 0x50: // $SECURITY_DESCRIPTOR
|
||||
if residentFlag == 0 {
|
||||
if secData, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok && len(secData) >= 20 {
|
||||
rec.SecurityDescriptor = map[string]interface{}{
|
||||
"raw": fmt.Sprintf("%x", secData),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
offset += int(attrLen)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// parseMFTRecord parses a 1024-byte MFT record.
|
||||
func parseMFTRecord(data []byte) (*MFTRecord, error) {
|
||||
if len(data) < 46 {
|
||||
return nil, fmt.Errorf("data too short to be a valid record")
|
||||
}
|
||||
if string(data[:4]) != "FILE" {
|
||||
return nil, fmt.Errorf("invalid record header")
|
||||
}
|
||||
rec := &MFTRecord{
|
||||
UpdateSeqOffset: binary.LittleEndian.Uint16(data[4:6]),
|
||||
UpdateSeqSize: binary.LittleEndian.Uint16(data[6:8]),
|
||||
LSN: binary.LittleEndian.Uint64(data[8:16]),
|
||||
SequenceNumber: binary.LittleEndian.Uint16(data[16:18]),
|
||||
HardLinkCount: binary.LittleEndian.Uint16(data[18:20]),
|
||||
FirstAttrOffset: binary.LittleEndian.Uint16(data[20:22]),
|
||||
Flags: binary.LittleEndian.Uint16(data[22:24]),
|
||||
RealSize: binary.LittleEndian.Uint32(data[24:28]),
|
||||
AllocatedSize: binary.LittleEndian.Uint32(data[28:32]),
|
||||
BaseFileRecord: binary.LittleEndian.Uint64(data[32:40]),
|
||||
NextAttrId: binary.LittleEndian.Uint16(data[40:42]),
|
||||
RecordNumber: binary.LittleEndian.Uint32(data[42:46]),
|
||||
}
|
||||
parseAttributes(data, rec)
|
||||
return rec, nil
|
||||
}
|
||||
|
||||
// --------------------
|
||||
// Parallel Processing and Main
|
||||
// --------------------
|
||||
|
||||
var (
|
||||
dumpFlag = flag.Bool("dump", false, "Dump raw MFT records (off by default)")
|
||||
jsonlOut = flag.String("jsonl", "", "Output JSONL file path (if not provided, a default is used per input file)")
|
||||
dumpFolder = flag.String("dump-folder", "", "Folder to dump raw MFT records (if not provided, a default folder is created per input file)")
|
||||
)
|
||||
|
||||
func processImageFile(inputFile string, wg *sync.WaitGroup) {
|
||||
defer wg.Done()
|
||||
|
||||
f, err := os.Open(inputFile)
|
||||
if err != nil {
|
||||
log.Printf("Failed to open %s: %v", inputFile, err)
|
||||
return
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
fi, err := f.Stat()
|
||||
if err != nil {
|
||||
log.Printf("Failed to stat %s: %v", inputFile, err)
|
||||
return
|
||||
}
|
||||
fileSize := fi.Size()
|
||||
|
||||
timestamp := time.Now().Format("20060102150405")
|
||||
baseName := filepath.Base(inputFile)
|
||||
|
||||
// Determine dump folder: if provided, use that; else use default "<inputFile>_<timestamp>"
|
||||
var outDir string
|
||||
if *dumpFolder != "" {
|
||||
outDir = *dumpFolder
|
||||
// Ensure the folder exists.
|
||||
if err := os.MkdirAll(outDir, 0755); err != nil {
|
||||
log.Printf("Failed to create dump folder %s: %v", outDir, err)
|
||||
return
|
||||
}
|
||||
} else {
|
||||
outDir = fmt.Sprintf("%s_%s", baseName, timestamp)
|
||||
if err := os.Mkdir(outDir, 0755); err != nil {
|
||||
log.Printf("Failed to create output directory for %s: %v", inputFile, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Determine JSONL output file: if provided, use that; else use default "<inputFile>_<timestamp>.jsonl"
|
||||
var jsonlPath string
|
||||
if *jsonlOut != "" {
|
||||
jsonlPath = *jsonlOut
|
||||
} else {
|
||||
jsonlPath = fmt.Sprintf("%s_%s.jsonl", baseName, timestamp)
|
||||
}
|
||||
// Open JSONL file in append mode (create if it doesn't exist)
|
||||
jsonlFile, err := os.OpenFile(jsonlPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
||||
if err != nil {
|
||||
log.Printf("Failed to open JSONL file for %s: %v", inputFile, err)
|
||||
return
|
||||
}
|
||||
defer jsonlFile.Close()
|
||||
|
||||
carvedChan := make(chan CarvedRecord, 100)
|
||||
parsedChan := make(chan ParsedRecord, 100)
|
||||
|
||||
numWorkers := runtime.NumCPU()
|
||||
var workerWg sync.WaitGroup
|
||||
for i := 0; i < numWorkers; i++ {
|
||||
workerWg.Add(1)
|
||||
go func() {
|
||||
defer workerWg.Done()
|
||||
for carved := range carvedChan {
|
||||
// If dumping is enabled, write the raw record file.
|
||||
if *dumpFlag {
|
||||
recordFileName := filepath.Join(outDir, fmt.Sprintf("%d.mftrecord", carved.Offset))
|
||||
if err := os.WriteFile(recordFileName, carved.RecordBytes, 0644); err != nil {
|
||||
log.Printf("Failed to write record at offset %d in %s: %v", carved.Offset, inputFile, err)
|
||||
}
|
||||
}
|
||||
mft, err := parseMFTRecord(carved.RecordBytes)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
recordMap := map[string]interface{}{
|
||||
"input_image": inputFile,
|
||||
"offset": carved.Offset,
|
||||
"update_seq_offset": mft.UpdateSeqOffset,
|
||||
"update_seq_size": mft.UpdateSeqSize,
|
||||
"lsn": mft.LSN,
|
||||
"sequence_number": mft.SequenceNumber,
|
||||
"hard_link_count": mft.HardLinkCount,
|
||||
"first_attr_offset": mft.FirstAttrOffset,
|
||||
"flags": mft.Flags,
|
||||
"real_size": mft.RealSize,
|
||||
"allocated_size": mft.AllocatedSize,
|
||||
"base_file_record": mft.BaseFileRecord,
|
||||
"next_attr_id": mft.NextAttrId,
|
||||
"record_number": mft.RecordNumber,
|
||||
"si_crtime": mft.SI_Crtime,
|
||||
"si_mtime": mft.SI_Mtime,
|
||||
"si_ctime": mft.SI_CtTime,
|
||||
"si_atime": mft.SI_Atime,
|
||||
"object_id": mft.ObjectID,
|
||||
"security_descriptor": mft.SecurityDescriptor,
|
||||
"file_names": mft.FileNames,
|
||||
"data_streams": mft.DataStreams,
|
||||
}
|
||||
parsedChan <- ParsedRecord{Offset: carved.Offset, RecordJSON: recordMap}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
var writerWg sync.WaitGroup
|
||||
writerWg.Add(1)
|
||||
go func() {
|
||||
defer writerWg.Done()
|
||||
encoder := json.NewEncoder(jsonlFile)
|
||||
for pr := range parsedChan {
|
||||
if err := encoder.Encode(pr.RecordJSON); err != nil {
|
||||
log.Printf("Error writing JSON record at offset %d in %s: %v", pr.Offset, inputFile, err)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
reader := bufio.NewReader(f)
|
||||
const recordSize = 1024
|
||||
const chunkSize = 1024 * 1024
|
||||
pattern := []byte("FILE0")
|
||||
var fileOffset int64 = 0
|
||||
var leftover []byte
|
||||
lastPrint := time.Now()
|
||||
|
||||
for {
|
||||
chunk := make([]byte, chunkSize)
|
||||
n, err := reader.Read(chunk)
|
||||
if n == 0 {
|
||||
break
|
||||
}
|
||||
data := append(leftover, chunk[:n]...)
|
||||
if time.Since(lastPrint) > 5*time.Second {
|
||||
perc := float64(fileOffset) / float64(fileSize) * 100.0
|
||||
log.Printf("Processing %s: %.2f%% complete", inputFile, perc)
|
||||
lastPrint = time.Now()
|
||||
}
|
||||
searchLimit := len(data) - len(pattern)
|
||||
for i := 0; i <= searchLimit; i++ {
|
||||
if bytes.Equal(data[i:i+len(pattern)], pattern) {
|
||||
globalOffset := fileOffset - int64(len(leftover)) + int64(i)
|
||||
if globalOffset+recordSize > fileSize {
|
||||
continue
|
||||
}
|
||||
recordBytes := make([]byte, recordSize)
|
||||
_, err := f.ReadAt(recordBytes, globalOffset)
|
||||
if err != nil {
|
||||
log.Printf("Failed to read record at offset %d in %s: %v", globalOffset, inputFile, err)
|
||||
continue
|
||||
}
|
||||
carvedChan <- CarvedRecord{Offset: globalOffset, RecordBytes: recordBytes}
|
||||
}
|
||||
}
|
||||
if len(data) >= len(pattern)-1 {
|
||||
leftover = data[len(data)-(len(pattern)-1):]
|
||||
} else {
|
||||
leftover = data
|
||||
}
|
||||
fileOffset += int64(n)
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
close(carvedChan)
|
||||
workerWg.Wait()
|
||||
close(parsedChan)
|
||||
writerWg.Wait()
|
||||
log.Printf("Finished processing %s. Dumps (if enabled) are in %s and JSONL file is %s", inputFile, outDir, jsonlPath)
|
||||
}
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
if flag.NArg() == 0 {
|
||||
fmt.Printf("Usage: %s [options] <disk image files or directories>\n", os.Args[0])
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
var files []string
|
||||
for _, arg := range flag.Args() {
|
||||
fi, err := os.Stat(arg)
|
||||
if err != nil {
|
||||
log.Printf("Error stating %s: %v", arg, err)
|
||||
continue
|
||||
}
|
||||
if fi.IsDir() {
|
||||
err := filepath.Walk(arg, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
if !info.IsDir() {
|
||||
files = append(files, path)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
log.Printf("Error walking directory %s: %v", arg, err)
|
||||
}
|
||||
} else {
|
||||
files = append(files, arg)
|
||||
}
|
||||
}
|
||||
|
||||
totalFiles := len(files)
|
||||
if totalFiles == 0 {
|
||||
log.Println("No input files found.")
|
||||
return
|
||||
}
|
||||
log.Printf("Found %d files to process.", totalFiles)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
concurrentFiles := runtime.NumCPU()
|
||||
sem := make(chan struct{}, concurrentFiles)
|
||||
for i, file := range files {
|
||||
wg.Add(1)
|
||||
sem <- struct{}{}
|
||||
go func(i int, file string) {
|
||||
defer func() { <-sem }()
|
||||
log.Printf("Starting file %d/%d: %s", i+1, totalFiles, file)
|
||||
processImageFile(file, &wg)
|
||||
}(i, file)
|
||||
}
|
||||
wg.Wait()
|
||||
log.Println("All files processed.")
|
||||
}
|
||||
Reference in New Issue
Block a user