Add FOR610 tool/workflow knowledge base and data pipeline

Build comprehensive malware analysis knowledge base from 3 sources: - SANS FOR610 course: 120 tools, 47 labs, 15 workflows, 27 recipes - REMnux salt-states: 340 packages parsed from GitHub - REMnux docs: 280+ tools scraped from docs.remnux.org Master inventory merges all sources into 447 tools with help tiers (rich/standard/basic). Pipeline generates: tools.db (397 entries), 397 cheatsheets with multi-tool recipes, 15 workflow guides, 224 TLDR pages, and coverage reports. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-28 17:38:15 +01:00
parent 06ebb09ab0
commit f3ccc09c3d
663 changed files with 36339 additions and 1 deletions
@@ -0,0 +1,402 @@
+# Multi-Tool Analysis Recipes
+# These are pipe chains and multi-step commands that combine tools
+# Each recipe is cross-referenced to all tools it uses
+
+recipes:
+
+  # ============================================================
+  # OFFICE DOCUMENT ANALYSIS
+  # ============================================================
+
+  - id: extract-base64-ps-from-vba
+    name: "Extract Base64 PowerShell from Office Macro"
+    task: "Get encoded PowerShell payload hidden in a VBA UserForm stream"
+    tools: [oledump-py, base64dump-py]
+    commands:
+      - "# List streams — find macro (M) and data streams"
+      - "oledump.py <document>"
+      - "# Extract VBA source to understand what the macro does"
+      - "oledump.py <document> -s <macro_stream> -v"
+      - "# Scan data stream for Base64 strings"
+      - "oledump.py <document> -s <data_stream> -d | base64dump.py -n 10"
+      - "# Decode the longest Base64 hit to file"
+      - "oledump.py <document> -s <data_stream> -d | base64dump.py -s 1 -d > payload.ps1"
+    lab: "3.4"
+
+  - id: vba-number-string-decode
+    name: "Decode VBA Number Arrays to Strings"
+    task: "Convert VBA macros that use Chr() number sequences into readable text"
+    tools: [oledump-py, numbers-to-string-py]
+    commands:
+      - "# Extract VBA and convert number sequences to text"
+      - "oledump.py <document> -s <stream> -v | numbers-to-string.py -j"
+      - "# Same but with line-break formatting for readability"
+      - "oledump.py <document> -s <stream> -v | numbers-to-string.py -j | sed 's/;/;\\n/g'"
+    lab: "3.3"
+
+  - id: multi-stage-base64-gzip
+    name: "Decode Base64 + Gzip Payload"
+    task: "Handle double-encoded payloads: Base64 wrapping gzip-compressed content"
+    tools: [base64dump-py, gunzip]
+    commands:
+      - "# Find Base64 strings in the script"
+      - "base64dump.py <script.ps1> -n 10"
+      - "# Decode Base64 and decompress gzip in one chain"
+      - "base64dump.py <script.ps1> -s <selection> -d | gunzip > decoded.ps1"
+    lab: "3.4"
+
+  - id: base64-xor-shellcode
+    name: "Decode Base64 + XOR Shellcode"
+    task: "Extract shellcode encoded as Base64 with an XOR key"
+    tools: [base64dump-py, translate-py]
+    commands:
+      - "# Find Base64 strings"
+      - "base64dump.py <script.ps1> -n 10"
+      - "# Decode Base64, then XOR with key"
+      - "base64dump.py <script.ps1> -s <selection> -d | translate.py 'byte ^ <key>' > shellcode.bin"
+    lab: "3.4"
+
+  - id: office-full-decode-chain
+    name: "Full Office Macro Decode Chain"
+    task: "Complete pipeline: Office doc → VBA → Base64 → gunzip → XOR → shellcode"
+    tools: [oledump-py, base64dump-py, gunzip, translate-py, scdbgc]
+    commands:
+      - "# Step 1: List streams and extract VBA"
+      - "oledump.py <document>"
+      - "oledump.py <document> -s <macro_stream> -v"
+      - "# Step 2: Extract Base64 from data stream"
+      - "oledump.py <document> -s <data_stream> -d | base64dump.py -s 1 -d > stage1.ps1"
+      - "# Step 3: Decode second Base64 layer + decompress"
+      - "base64dump.py stage1.ps1 -s 3 -d | gunzip > stage2.ps1"
+      - "# Step 4: XOR decode the shellcode"
+      - "base64dump.py stage2.ps1 -s 2 -d | translate.py 'byte ^ 35' > shellcode.bin"
+      - "# Step 5: Emulate the shellcode"
+      - "scdbgc /f shellcode.bin /s -1"
+    lab: "3.4"
+
+  - id: password-protected-office
+    name: "Decrypt Password-Protected Office Document"
+    task: "Remove password protection before analysis"
+    tools: [msoffcrypto-tool]
+    commands:
+      - "# Common malware passwords: infected, malware, password, 123456"
+      - "msoffcrypto-tool -p infected <encrypted.docx> <decrypted.docx>"
+
+  - id: excel-xlm-macros
+    name: "Deobfuscate Excel 4.0 (XLM) Macros"
+    task: "Extract and decode legacy Excel macros hidden in sheets"
+    tools: [xlmmacrodeobfuscator]
+    commands:
+      - "# Deobfuscate XLM macros"
+      - "xlmdeobfuscator --file <spreadsheet.xlsm>"
+
+  - id: vba-pcode-decompile
+    name: "Recover VBA from p-code (source removed)"
+    task: "Decompile VBA when source code has been stripped, only p-code remains"
+    tools: [pcode2code, pcodedmp]
+    commands:
+      - "# Decompile p-code back to VBA source"
+      - "pcode2code <document.docm>"
+      - "# Or disassemble p-code to assembly"
+      - "pcodedmp <document.docm>"
+
+  # ============================================================
+  # RTF DOCUMENT ANALYSIS
+  # ============================================================
+
+  - id: rtf-shellcode-extraction
+    name: "Extract Shellcode from RTF Document"
+    task: "Find and extract embedded shellcode from a malicious RTF file"
+    tools: [rtfdump-py, xorsearch, scdbgc]
+    commands:
+      - "# Scan RTF structure — look for groups with lots of hex data"
+      - "rtfdump.py <document.rtf>"
+      - "# Extract the hex-heavy group as binary"
+      - "rtfdump.py <document.rtf> -s <group_num> -H -d > extracted.bin"
+      - "# Scan for shellcode patterns (even XOR-encoded)"
+      - "XORSearch -W -d 3 extracted.bin"
+      - "# Emulate shellcode at found offset"
+      - "scdbgc /f extracted.bin /foff <offset> /s -1"
+    lab: "3.5"
+
+  # ============================================================
+  # PDF ANALYSIS
+  # ============================================================
+
+  - id: pdf-object-extraction
+    name: "Extract Embedded Object from PDF"
+    task: "Pull out an embedded image, JavaScript, or file from a PDF object"
+    tools: [pdfid-py, pdf-parser-py, feh]
+    commands:
+      - "# Scan for suspicious keywords"
+      - "pdfid.py <document.pdf>"
+      - "# Find objects containing the keyword"
+      - "pdf-parser.py <document.pdf> -s /URI"
+      - "# Extract all values for that keyword"
+      - "pdf-parser.py <document.pdf> -k /URI"
+      - "# Dump a specific object to file"
+      - "pdf-parser.py <document.pdf> -o <obj_id> -d extracted_object"
+      - "# View extracted image"
+      - "feh extracted_object &"
+    lab: "3.1"
+
+  - id: pdf-javascript-extraction
+    name: "Extract JavaScript from PDF"
+    task: "Find and extract embedded JavaScript from a PDF file"
+    tools: [pdfid-py, pdf-parser-py, peepdf]
+    commands:
+      - "# Check if PDF contains JavaScript"
+      - "pdfid.py <document.pdf>"
+      - "# Find objects with JavaScript"
+      - "pdf-parser.py <document.pdf> -s /JavaScript"
+      - "# Interactive analysis with peepdf"
+      - "peepdf -i <document.pdf>"
+
+  # ============================================================
+  # JAVASCRIPT DEOBFUSCATION
+  # ============================================================
+
+  - id: js-deobfuscation-spidermonkey
+    name: "Deobfuscate JavaScript with SpiderMonkey"
+    task: "Execute obfuscated JS safely using SpiderMonkey with API simulation"
+    tools: [js-beautify, spidermonkey]
+    commands:
+      - "# Beautify compressed JavaScript"
+      - "js-beautify <malicious.js> > readable.js"
+      - "# Execute with objects.js to simulate browser/WScript APIs"
+      - "js -f /usr/share/remnux/objects.js -f <malicious.js> > decoded.js"
+      - "# If script expects location.href, edit objects.js first:"
+      - "cp /usr/share/remnux/objects.js ."
+      - "# Edit objects.js to set: location = { href: 'http://expected-url' }"
+      - "js -f objects.js -f <malicious.js> > decoded.js"
+    lab: "3.6, 3.7"
+
+  - id: js-null-byte-cleanup
+    name: "Clean Null Bytes from UTF-16 JavaScript"
+    task: "Remove null byte padding from UTF-16 encoded JavaScript before analysis"
+    tools: [spidermonkey]
+    commands:
+      - "# Check for null bytes (look for 00 in hex)"
+      - "xxd <script.js> | head -2"
+      - "# Remove null bytes"
+      - "cat <script.js> | tr -d '\\00' > clean.js"
+      - "# Then deobfuscate"
+      - "js -f /usr/share/remnux/objects.js -f clean.js > decoded.js"
+      - "# Beautify the result"
+      - "js-beautify decoded.js > final.js"
+    lab: "4.5"
+
+  # ============================================================
+  # SHELLCODE ANALYSIS
+  # ============================================================
+
+  - id: shellcode-emulate-with-offset
+    name: "Emulate Shellcode at Specific Offset"
+    task: "Run shellcode that starts at an offset within a larger binary"
+    tools: [scdbgc]
+    commands:
+      - "# Emulate from file start"
+      - "scdbgc /f <shellcode.bin> /s -1"
+      - "# Emulate from specific offset (hex)"
+      - "scdbgc /f <shellcode.bin> /foff <hex_offset> /s -1"
+      - "# Emulate with a file handle pre-opened (for exploits)"
+      - "scdbgc /f <shellcode.bin> /foff <offset> /fopen <carrier.doc> /s -1"
+    lab: "3.5, 4.6"
+
+  - id: cobalt-strike-beacon-parse
+    name: "Parse Cobalt Strike Beacon Configuration"
+    task: "Extract C2 config from a Cobalt Strike beacon or shellcode"
+    tools: [1768-py, yara]
+    commands:
+      - "# Scan with YARA for CS signatures"
+      - "yara-rules <sample>"
+      - "# Extract beacon configuration"
+      - "1768.py <sample_or_shellcode.bin>"
+    lab: "3.4"
+
+  - id: shellcode-to-exe
+    name: "Convert Shellcode to Executable"
+    task: "Wrap raw shellcode in a PE for analysis in disassemblers"
+    tools: [shcode2exe]
+    commands:
+      - "# Convert 32-bit shellcode to EXE"
+      - "shcode2exe <shellcode.bin> <output.exe>"
+
+  # ============================================================
+  # STRING DEOBFUSCATION
+  # ============================================================
+
+  - id: xor-key-brute-force
+    name: "Brute-Force XOR Key"
+    task: "Find the XOR key used to encode strings in a binary"
+    tools: [brxor-py, bbcrack, xorsearch, xortool]
+    commands:
+      - "# Quick check for XOR-encoded URLs/PE headers"
+      - "XORSearch <file> http:"
+      - "# Brute-force single-byte XOR keys"
+      - "brxor.py <file>"
+      - "# Try XOR, ROL, ADD combinations"
+      - "bbcrack -l 1 <file>"
+      - "# Guess multi-byte XOR key length and value"
+      - "xortool <file>"
+      - "# Decode with known key"
+      - "xortool-xor -s '<key>' -i <encoded> -o <decoded>"
+    lab: "5.2"
+
+  - id: stack-string-extraction
+    name: "Extract Stack-Built Strings"
+    task: "Decode strings assembled byte-by-byte on the stack"
+    tools: [strdeob-pl, floss]
+    commands:
+      - "# Automatic stack string recovery"
+      - "strdeob.pl <sample>"
+      - "# FLOSS automatic deobfuscation (static + stack + decoded)"
+      - "floss <sample>"
+      - "# FLOSS skip static strings, only show decoded"
+      - "floss --no-static -- <sample>"
+    lab: "5.2"
+
+  - id: cyberchef-xor-decode
+    name: "Visual XOR/Base64 Decode with CyberChef"
+    task: "Use CyberChef's recipe builder for multi-step decoding"
+    tools: [cyberchef]
+    commands:
+      - "# Launch CyberChef"
+      - "cyberchef"
+      - "# Common recipe: From Hex → XOR (key) → extract strings"
+      - "# Common recipe: From Base64 → Decode text UTF-16LE"
+
+  # ============================================================
+  # MALWARE EMULATION & CAPABILITY ANALYSIS
+  # ============================================================
+
+  - id: speakeasy-emulation-with-json
+    name: "Emulate Malware and Extract API Calls"
+    task: "Emulate a Windows binary on Linux and analyze its API usage"
+    tools: [speakeasy, jq]
+    commands:
+      - "# Emulate and capture both JSON report and text log"
+      - "speakeasy -t <sample> -o report.json 2> report.txt"
+      - "# Extract all API names called"
+      - "jq '.entry_points[].apis[].api_name' report.json"
+      - "# Extract unique API names"
+      - "jq -r '.entry_points[].apis[].api_name' report.json | sort -u"
+    lab: "1.4"
+
+  - id: capa-capability-filter
+    name: "Filter Capabilities by Technique"
+    task: "Find specific capabilities in capa output"
+    tools: [capa]
+    commands:
+      - "# Full capabilities report"
+      - "capa <sample>"
+      - "# Verbose with rule matches"
+      - "capa -vv <sample>"
+      - "# Filter for specific technique"
+      - "capa -vv <sample> | grep -A7 '<technique_name>'"
+      - "# Find injection-related capabilities"
+      - "capa -vv <sample> | grep -A7 'inject\\|hollow\\|suspend'"
+    lab: "1.4, 5.4"
+
+  # ============================================================
+  # NETWORK ANALYSIS
+  # ============================================================
+
+  - id: pcap-file-carving
+    name: "Extract Files from Network Capture"
+    task: "Carve downloaded payloads and exfiltrated data from PCAP"
+    tools: [tcpxtract, tcpflow, networkminer]
+    commands:
+      - "# Carve files using signatures"
+      - "tcpxtract -f <capture.pcap> -o carved/"
+      - "# Extract individual TCP streams"
+      - "tcpflow -r <capture.pcap> -o streams/"
+      - "# Or use NetworkMiner for automated extraction"
+      - "NetworkMiner --pcap <capture.pcap>"
+
+  - id: dns-interception-setup
+    name: "Set Up DNS + HTTP Interception"
+    task: "Redirect all malware DNS queries and serve fake HTTP responses"
+    tools: [fakedns, httpd, inetsim]
+    commands:
+      - "# Option A: Simple DNS + HTTP"
+      - "fakedns &"
+      - "httpd &"
+      - "# Option B: Full service emulation (HTTP, HTTPS, DNS, FTP, SMTP)"
+      - "inetsim"
+      - "# Verify DNS is working"
+      - "nslookup anything.com"
+      - "# Redirect hardcoded IPs too"
+      - "iptables -t nat -A PREROUTING -i eth0 -j REDIRECT"
+
+  # ============================================================
+  # .NET ANALYSIS
+  # ============================================================
+
+  - id: dotnet-decompile-cli
+    name: "Decompile .NET on Command Line"
+    task: "Decompile a .NET assembly to C# source on REMnux"
+    tools: [ilspycmd, de4dot]
+    commands:
+      - "# Decompile to C# source"
+      - "ilspycmd <assembly.exe> > source.cs"
+      - "# Search for suspicious patterns"
+      - "grep -n 'Assembly.Load\\|WebClient\\|Process.Start' source.cs"
+      - "# If obfuscated, deobfuscate first"
+      - "de4dot <assembly.exe>"
+      - "ilspycmd <assembly-cleaned.exe> > source_clean.cs"
+    lab: "4.8"
+
+  # ============================================================
+  # MEMORY FORENSICS
+  # ============================================================
+
+  - id: volatility-quick-triage
+    name: "Quick Memory Dump Triage"
+    task: "Fast initial assessment of a memory dump"
+    tools: [volatility3]
+    commands:
+      - "# Identify OS"
+      - "vol3 -f <dump> windows.info"
+      - "# Process tree (spot anomalies)"
+      - "vol3 -f <dump> windows.pstree"
+      - "# Network connections"
+      - "vol3 -f <dump> windows.netscan"
+      - "# Injected code detection"
+      - "vol3 -f <dump> windows.malfind"
+
+  # ============================================================
+  # ANDROID ANALYSIS
+  # ============================================================
+
+  - id: apk-quick-triage
+    name: "Quick APK Triage"
+    task: "Fast initial assessment of a suspicious Android app"
+    tools: [apkid, apktool, jadx]
+    commands:
+      - "# Check for packers/obfuscators"
+      - "apkid <app.apk>"
+      - "# Decompile to smali + resources"
+      - "apktool d <app.apk> -o output/"
+      - "# Check permissions"
+      - "grep 'uses-permission' output/AndroidManifest.xml"
+      - "# Decompile to Java source"
+      - "jadx <app.apk> -d src/"
+
+  # ============================================================
+  # EMAIL ANALYSIS
+  # ============================================================
+
+  - id: email-attachment-extraction
+    name: "Extract and Triage Email Attachments"
+    task: "Pull attachments from an email and identify their types"
+    tools: [emldump-py, file, sha256sum]
+    commands:
+      - "# List email structure"
+      - "emldump.py <email.eml>"
+      - "# Extract all attachments"
+      - "emldump.py <email.eml> -d"
+      - "# Identify file types"
+      - "file attachment_*"
+      - "# Compute hashes for lookup"
+      - "sha256sum attachment_*"