500 lines
15 KiB
Go
Executable File
500 lines
15 KiB
Go
Executable File
package main
|
||
|
||
import (
|
||
"bufio"
|
||
"bytes"
|
||
"encoding/base64"
|
||
"encoding/binary"
|
||
"encoding/json"
|
||
"flag"
|
||
"fmt"
|
||
"io"
|
||
"log"
|
||
"os"
|
||
"path/filepath"
|
||
"runtime"
|
||
"strings"
|
||
"sync"
|
||
"time"
|
||
"unicode/utf16"
|
||
)
|
||
|
||
// --------------------
|
||
// Data Structures
|
||
// --------------------
|
||
|
||
type FileNameAttribute struct {
|
||
Filename string `json:"filename"`
|
||
ParentRef uint64 `json:"parent_ref"`
|
||
Crtime string `json:"crtime"`
|
||
Mtime string `json:"mtime"`
|
||
CtTime string `json:"ctime"`
|
||
Atime string `json:"atime"`
|
||
}
|
||
|
||
type DataStream struct {
|
||
Name string `json:"name"`
|
||
Resident bool `json:"resident"`
|
||
NonResident bool `json:"non_resident"`
|
||
ContentBase64 string `json:"content_base64,omitempty"`
|
||
}
|
||
|
||
type MFTRecord struct {
|
||
// Header fields (omitting the "magic" since it's always "FILE")
|
||
UpdateSeqOffset uint16 `json:"update_seq_offset"`
|
||
UpdateSeqSize uint16 `json:"update_seq_size"`
|
||
LSN uint64 `json:"lsn"`
|
||
SequenceNumber uint16 `json:"sequence_number"`
|
||
HardLinkCount uint16 `json:"hard_link_count"`
|
||
FirstAttrOffset uint16 `json:"first_attr_offset"`
|
||
Flags uint16 `json:"flags"`
|
||
RealSize uint32 `json:"real_size"`
|
||
AllocatedSize uint32 `json:"allocated_size"`
|
||
BaseFileRecord uint64 `json:"base_file_record"`
|
||
NextAttrId uint16 `json:"next_attr_id"`
|
||
RecordNumber uint32 `json:"record_number"`
|
||
|
||
// Standard Information attribute timestamps (if present)
|
||
SI_Crtime string `json:"si_crtime,omitempty"`
|
||
SI_Mtime string `json:"si_mtime,omitempty"`
|
||
SI_CtTime string `json:"si_ctime,omitempty"`
|
||
SI_Atime string `json:"si_atime,omitempty"`
|
||
|
||
// Additional attributes
|
||
ObjectID string `json:"object_id,omitempty"`
|
||
SecurityDescriptor map[string]interface{} `json:"security_descriptor,omitempty"`
|
||
|
||
FileNames []FileNameAttribute `json:"file_names,omitempty"`
|
||
DataStreams []DataStream `json:"data_streams,omitempty"`
|
||
}
|
||
|
||
// CarvedRecord holds a candidate 1024-byte record and its global offset.
|
||
type CarvedRecord struct {
|
||
Offset int64
|
||
RecordBytes []byte
|
||
}
|
||
|
||
// ParsedRecord is what gets sent to the JSON writer.
|
||
type ParsedRecord struct {
|
||
Offset int64
|
||
RecordJSON map[string]interface{}
|
||
}
|
||
|
||
// --------------------
|
||
// Helper functions
|
||
// --------------------
|
||
|
||
// safeSlice returns data[start : start+length] if within bounds; otherwise false.
|
||
func safeSlice(data []byte, start int, length int) ([]byte, bool) {
|
||
if start < 0 || start+length > len(data) {
|
||
return nil, false
|
||
}
|
||
return data[start : start+length], true
|
||
}
|
||
|
||
// filetimeToString converts a Windows FILETIME (uint64) into an RFC3339 timestamp string.
|
||
func filetimeToString(ft uint64) string {
|
||
const epochDiff = 11644473600 // seconds between 1601 and 1970
|
||
secs := int64(ft/10000000) - epochDiff
|
||
nsec := int64(ft%10000000) * 100
|
||
t := time.Unix(secs, nsec).UTC()
|
||
return t.Format(time.RFC3339)
|
||
}
|
||
|
||
// decodeUTF16String converts little‑endian UTF‑16 bytes to a Go string.
|
||
func decodeUTF16String(b []byte) string {
|
||
if len(b)%2 != 0 {
|
||
b = b[:len(b)-1]
|
||
}
|
||
u16 := make([]uint16, len(b)/2)
|
||
for i := 0; i < len(u16); i++ {
|
||
u16[i] = binary.LittleEndian.Uint16(b[i*2:])
|
||
}
|
||
return string(utf16.Decode(u16))
|
||
}
|
||
|
||
// parseZoneIdentifier is a simple parser for Zone.Identifier streams.
|
||
func parseZoneIdentifier(content []byte) map[string]string {
|
||
result := make(map[string]string)
|
||
text := string(content)
|
||
lines := strings.Split(text, "\n")
|
||
for _, line := range lines {
|
||
if parts := strings.SplitN(line, "=", 2); len(parts) == 2 {
|
||
key := strings.TrimSpace(parts[0])
|
||
value := strings.TrimSpace(parts[1])
|
||
result[key] = value
|
||
}
|
||
}
|
||
return result
|
||
}
|
||
|
||
// --------------------
|
||
// Attribute Parsing
|
||
// --------------------
|
||
|
||
func parseAttributes(data []byte, rec *MFTRecord) {
|
||
offset := int(rec.FirstAttrOffset)
|
||
for offset < len(data)-8 {
|
||
if attrBytes, ok := safeSlice(data, offset, 4); !ok {
|
||
break
|
||
} else {
|
||
attrType := binary.LittleEndian.Uint32(attrBytes)
|
||
if attrType == 0xFFFFFFFF {
|
||
break
|
||
}
|
||
if attrLenBytes, ok := safeSlice(data, offset+4, 4); !ok {
|
||
break
|
||
} else {
|
||
attrLen := binary.LittleEndian.Uint32(attrLenBytes)
|
||
if attrLen < 8 || offset+int(attrLen) > len(data) {
|
||
offset++
|
||
continue
|
||
}
|
||
residentFlag := data[offset+8]
|
||
var valLen uint32
|
||
var valOffset uint16
|
||
if residentFlag == 0 {
|
||
if vb, ok := safeSlice(data, offset+16, 4); ok {
|
||
valLen = binary.LittleEndian.Uint32(vb)
|
||
} else {
|
||
offset += int(attrLen)
|
||
continue
|
||
}
|
||
if vb, ok := safeSlice(data, offset+20, 2); ok {
|
||
valOffset = binary.LittleEndian.Uint16(vb)
|
||
} else {
|
||
offset += int(attrLen)
|
||
continue
|
||
}
|
||
}
|
||
switch attrType {
|
||
case 0x10: // $STANDARD_INFORMATION
|
||
if residentFlag == 0 {
|
||
if siData, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok && len(siData) >= 32 {
|
||
rec.SI_Crtime = filetimeToString(binary.LittleEndian.Uint64(siData[0:8]))
|
||
rec.SI_Mtime = filetimeToString(binary.LittleEndian.Uint64(siData[8:16]))
|
||
rec.SI_CtTime = filetimeToString(binary.LittleEndian.Uint64(siData[16:24]))
|
||
rec.SI_Atime = filetimeToString(binary.LittleEndian.Uint64(siData[24:32]))
|
||
}
|
||
}
|
||
case 0x30: // $FILE_NAME
|
||
if residentFlag == 0 {
|
||
if fnData, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok && len(fnData) >= 66 {
|
||
var fn FileNameAttribute
|
||
fn.ParentRef = binary.LittleEndian.Uint64(fnData[0:8]) & 0x0000FFFFFFFFFFFF
|
||
fn.Crtime = filetimeToString(binary.LittleEndian.Uint64(fnData[8:16]))
|
||
fn.Mtime = filetimeToString(binary.LittleEndian.Uint64(fnData[16:24]))
|
||
fn.CtTime = filetimeToString(binary.LittleEndian.Uint64(fnData[24:32]))
|
||
fn.Atime = filetimeToString(binary.LittleEndian.Uint64(fnData[32:40]))
|
||
if filenameLenBytes, ok := safeSlice(fnData, 64, 1); ok {
|
||
filenameLen := filenameLenBytes[0]
|
||
if nameBytes, ok := safeSlice(fnData, 66, int(filenameLen)*2); ok {
|
||
fn.Filename = decodeUTF16String(nameBytes)
|
||
}
|
||
}
|
||
rec.FileNames = append(rec.FileNames, fn)
|
||
}
|
||
}
|
||
case 0x80: // $DATA
|
||
var ds DataStream
|
||
if nameInfo, ok := safeSlice(data, offset+9, 1); ok {
|
||
nameLen := nameInfo[0]
|
||
if nameBytes, ok := safeSlice(data, offset+10, int(nameLen)*2); ok && nameLen > 0 {
|
||
ds.Name = decodeUTF16String(nameBytes)
|
||
}
|
||
}
|
||
if residentFlag == 0 {
|
||
ds.Resident = true
|
||
ds.NonResident = false
|
||
if content, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok {
|
||
ds.ContentBase64 = base64.StdEncoding.EncodeToString(content)
|
||
if ds.Name == "Zone.Identifier" {
|
||
zoneInfo := parseZoneIdentifier(content)
|
||
ds.Name = fmt.Sprintf("Zone.Identifier %v", zoneInfo)
|
||
}
|
||
}
|
||
} else {
|
||
ds.Resident = false
|
||
ds.NonResident = true
|
||
}
|
||
rec.DataStreams = append(rec.DataStreams, ds)
|
||
case 0x40: // $OBJECT_ID
|
||
if residentFlag == 0 {
|
||
if objData, ok := safeSlice(data, offset+int(valOffset), 16); ok {
|
||
rec.ObjectID = fmt.Sprintf("%x", objData)
|
||
}
|
||
}
|
||
case 0x50: // $SECURITY_DESCRIPTOR
|
||
if residentFlag == 0 {
|
||
if secData, ok := safeSlice(data, offset+int(valOffset), int(valLen)); ok && len(secData) >= 20 {
|
||
rec.SecurityDescriptor = map[string]interface{}{
|
||
"raw": fmt.Sprintf("%x", secData),
|
||
}
|
||
}
|
||
}
|
||
}
|
||
offset += int(attrLen)
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// parseMFTRecord parses a 1024-byte MFT record.
|
||
func parseMFTRecord(data []byte) (*MFTRecord, error) {
|
||
if len(data) < 46 {
|
||
return nil, fmt.Errorf("data too short to be a valid record")
|
||
}
|
||
if string(data[:4]) != "FILE" {
|
||
return nil, fmt.Errorf("invalid record header")
|
||
}
|
||
rec := &MFTRecord{
|
||
UpdateSeqOffset: binary.LittleEndian.Uint16(data[4:6]),
|
||
UpdateSeqSize: binary.LittleEndian.Uint16(data[6:8]),
|
||
LSN: binary.LittleEndian.Uint64(data[8:16]),
|
||
SequenceNumber: binary.LittleEndian.Uint16(data[16:18]),
|
||
HardLinkCount: binary.LittleEndian.Uint16(data[18:20]),
|
||
FirstAttrOffset: binary.LittleEndian.Uint16(data[20:22]),
|
||
Flags: binary.LittleEndian.Uint16(data[22:24]),
|
||
RealSize: binary.LittleEndian.Uint32(data[24:28]),
|
||
AllocatedSize: binary.LittleEndian.Uint32(data[28:32]),
|
||
BaseFileRecord: binary.LittleEndian.Uint64(data[32:40]),
|
||
NextAttrId: binary.LittleEndian.Uint16(data[40:42]),
|
||
RecordNumber: binary.LittleEndian.Uint32(data[42:46]),
|
||
}
|
||
parseAttributes(data, rec)
|
||
return rec, nil
|
||
}
|
||
|
||
// --------------------
|
||
// Parallel Processing and Main
|
||
// --------------------
|
||
|
||
var (
|
||
dumpFlag = flag.Bool("dump", false, "Dump raw MFT records (off by default)")
|
||
jsonlOut = flag.String("jsonl", "", "Output JSONL file path (if not provided, a default is used per input file)")
|
||
dumpFolder = flag.String("dump-folder", "", "Folder to dump raw MFT records (if not provided, a default folder is created per input file)")
|
||
)
|
||
|
||
func processImageFile(inputFile string, wg *sync.WaitGroup) {
|
||
defer wg.Done()
|
||
|
||
f, err := os.Open(inputFile)
|
||
if err != nil {
|
||
log.Printf("Failed to open %s: %v", inputFile, err)
|
||
return
|
||
}
|
||
defer f.Close()
|
||
|
||
fi, err := f.Stat()
|
||
if err != nil {
|
||
log.Printf("Failed to stat %s: %v", inputFile, err)
|
||
return
|
||
}
|
||
fileSize := fi.Size()
|
||
|
||
timestamp := time.Now().Format("20060102150405")
|
||
baseName := filepath.Base(inputFile)
|
||
|
||
// Determine dump folder: if provided, use that; else use default "<inputFile>_<timestamp>"
|
||
var outDir string
|
||
if *dumpFolder != "" {
|
||
outDir = *dumpFolder
|
||
// Ensure the folder exists.
|
||
if err := os.MkdirAll(outDir, 0755); err != nil {
|
||
log.Printf("Failed to create dump folder %s: %v", outDir, err)
|
||
return
|
||
}
|
||
} else {
|
||
outDir = fmt.Sprintf("%s_%s", baseName, timestamp)
|
||
if err := os.Mkdir(outDir, 0755); err != nil {
|
||
log.Printf("Failed to create output directory for %s: %v", inputFile, err)
|
||
return
|
||
}
|
||
}
|
||
|
||
// Determine JSONL output file: if provided, use that; else use default "<inputFile>_<timestamp>.jsonl"
|
||
var jsonlPath string
|
||
if *jsonlOut != "" {
|
||
jsonlPath = *jsonlOut
|
||
} else {
|
||
jsonlPath = fmt.Sprintf("%s_%s.jsonl", baseName, timestamp)
|
||
}
|
||
// Open JSONL file in append mode (create if it doesn't exist)
|
||
jsonlFile, err := os.OpenFile(jsonlPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
||
if err != nil {
|
||
log.Printf("Failed to open JSONL file for %s: %v", inputFile, err)
|
||
return
|
||
}
|
||
defer jsonlFile.Close()
|
||
|
||
carvedChan := make(chan CarvedRecord, 100)
|
||
parsedChan := make(chan ParsedRecord, 100)
|
||
|
||
numWorkers := runtime.NumCPU()
|
||
var workerWg sync.WaitGroup
|
||
for i := 0; i < numWorkers; i++ {
|
||
workerWg.Add(1)
|
||
go func() {
|
||
defer workerWg.Done()
|
||
for carved := range carvedChan {
|
||
// If dumping is enabled, write the raw record file.
|
||
if *dumpFlag {
|
||
recordFileName := filepath.Join(outDir, fmt.Sprintf("%d.mftrecord", carved.Offset))
|
||
if err := os.WriteFile(recordFileName, carved.RecordBytes, 0644); err != nil {
|
||
log.Printf("Failed to write record at offset %d in %s: %v", carved.Offset, inputFile, err)
|
||
}
|
||
}
|
||
mft, err := parseMFTRecord(carved.RecordBytes)
|
||
if err != nil {
|
||
continue
|
||
}
|
||
recordMap := map[string]interface{}{
|
||
"input_image": inputFile,
|
||
"offset": carved.Offset,
|
||
"update_seq_offset": mft.UpdateSeqOffset,
|
||
"update_seq_size": mft.UpdateSeqSize,
|
||
"lsn": mft.LSN,
|
||
"sequence_number": mft.SequenceNumber,
|
||
"hard_link_count": mft.HardLinkCount,
|
||
"first_attr_offset": mft.FirstAttrOffset,
|
||
"flags": mft.Flags,
|
||
"real_size": mft.RealSize,
|
||
"allocated_size": mft.AllocatedSize,
|
||
"base_file_record": mft.BaseFileRecord,
|
||
"next_attr_id": mft.NextAttrId,
|
||
"record_number": mft.RecordNumber,
|
||
"si_crtime": mft.SI_Crtime,
|
||
"si_mtime": mft.SI_Mtime,
|
||
"si_ctime": mft.SI_CtTime,
|
||
"si_atime": mft.SI_Atime,
|
||
"object_id": mft.ObjectID,
|
||
"security_descriptor": mft.SecurityDescriptor,
|
||
"file_names": mft.FileNames,
|
||
"data_streams": mft.DataStreams,
|
||
}
|
||
parsedChan <- ParsedRecord{Offset: carved.Offset, RecordJSON: recordMap}
|
||
}
|
||
}()
|
||
}
|
||
|
||
var writerWg sync.WaitGroup
|
||
writerWg.Add(1)
|
||
go func() {
|
||
defer writerWg.Done()
|
||
encoder := json.NewEncoder(jsonlFile)
|
||
for pr := range parsedChan {
|
||
if err := encoder.Encode(pr.RecordJSON); err != nil {
|
||
log.Printf("Error writing JSON record at offset %d in %s: %v", pr.Offset, inputFile, err)
|
||
}
|
||
}
|
||
}()
|
||
|
||
reader := bufio.NewReader(f)
|
||
const recordSize = 1024
|
||
const chunkSize = 1024 * 1024
|
||
pattern := []byte("FILE0")
|
||
var fileOffset int64 = 0
|
||
var leftover []byte
|
||
lastPrint := time.Now()
|
||
|
||
for {
|
||
chunk := make([]byte, chunkSize)
|
||
n, err := reader.Read(chunk)
|
||
if n == 0 {
|
||
break
|
||
}
|
||
data := append(leftover, chunk[:n]...)
|
||
if time.Since(lastPrint) > 5*time.Second {
|
||
perc := float64(fileOffset) / float64(fileSize) * 100.0
|
||
log.Printf("Processing %s: %.2f%% complete", inputFile, perc)
|
||
lastPrint = time.Now()
|
||
}
|
||
searchLimit := len(data) - len(pattern)
|
||
for i := 0; i <= searchLimit; i++ {
|
||
if bytes.Equal(data[i:i+len(pattern)], pattern) {
|
||
globalOffset := fileOffset - int64(len(leftover)) + int64(i)
|
||
if globalOffset+recordSize > fileSize {
|
||
continue
|
||
}
|
||
recordBytes := make([]byte, recordSize)
|
||
_, err := f.ReadAt(recordBytes, globalOffset)
|
||
if err != nil {
|
||
log.Printf("Failed to read record at offset %d in %s: %v", globalOffset, inputFile, err)
|
||
continue
|
||
}
|
||
carvedChan <- CarvedRecord{Offset: globalOffset, RecordBytes: recordBytes}
|
||
}
|
||
}
|
||
if len(data) >= len(pattern)-1 {
|
||
leftover = data[len(data)-(len(pattern)-1):]
|
||
} else {
|
||
leftover = data
|
||
}
|
||
fileOffset += int64(n)
|
||
if err == io.EOF {
|
||
break
|
||
}
|
||
}
|
||
|
||
close(carvedChan)
|
||
workerWg.Wait()
|
||
close(parsedChan)
|
||
writerWg.Wait()
|
||
log.Printf("Finished processing %s. Dumps (if enabled) are in %s and JSONL file is %s", inputFile, outDir, jsonlPath)
|
||
}
|
||
|
||
func main() {
|
||
flag.Parse()
|
||
if flag.NArg() == 0 {
|
||
fmt.Printf("Usage: %s [options] <disk image files or directories>\n", os.Args[0])
|
||
os.Exit(1)
|
||
}
|
||
|
||
var files []string
|
||
for _, arg := range flag.Args() {
|
||
fi, err := os.Stat(arg)
|
||
if err != nil {
|
||
log.Printf("Error stating %s: %v", arg, err)
|
||
continue
|
||
}
|
||
if fi.IsDir() {
|
||
err := filepath.Walk(arg, func(path string, info os.FileInfo, err error) error {
|
||
if err != nil {
|
||
return nil
|
||
}
|
||
if !info.IsDir() {
|
||
files = append(files, path)
|
||
}
|
||
return nil
|
||
})
|
||
if err != nil {
|
||
log.Printf("Error walking directory %s: %v", arg, err)
|
||
}
|
||
} else {
|
||
files = append(files, arg)
|
||
}
|
||
}
|
||
|
||
totalFiles := len(files)
|
||
if totalFiles == 0 {
|
||
log.Println("No input files found.")
|
||
return
|
||
}
|
||
log.Printf("Found %d files to process.", totalFiles)
|
||
|
||
var wg sync.WaitGroup
|
||
concurrentFiles := runtime.NumCPU()
|
||
sem := make(chan struct{}, concurrentFiles)
|
||
for i, file := range files {
|
||
wg.Add(1)
|
||
sem <- struct{}{}
|
||
go func(i int, file string) {
|
||
defer func() { <-sem }()
|
||
log.Printf("Starting file %d/%d: %s", i+1, totalFiles, file)
|
||
processImageFile(file, &wg)
|
||
}(i, file)
|
||
}
|
||
wg.Wait()
|
||
log.Println("All files processed.")
|
||
}
|