Files
tobias f3ccc09c3d Add FOR610 tool/workflow knowledge base and data pipeline
Build comprehensive malware analysis knowledge base from 3 sources:
- SANS FOR610 course: 120 tools, 47 labs, 15 workflows, 27 recipes
- REMnux salt-states: 340 packages parsed from GitHub
- REMnux docs: 280+ tools scraped from docs.remnux.org

Master inventory merges all sources into 447 tools with help tiers
(rich/standard/basic). Pipeline generates: tools.db (397 entries),
397 cheatsheets with multi-tool recipes, 15 workflow guides, 224
TLDR pages, and coverage reports.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-28 17:38:15 +01:00

403 lines
15 KiB
YAML

# Multi-Tool Analysis Recipes
# These are pipe chains and multi-step commands that combine tools
# Each recipe is cross-referenced to all tools it uses
recipes:
# ============================================================
# OFFICE DOCUMENT ANALYSIS
# ============================================================
- id: extract-base64-ps-from-vba
name: "Extract Base64 PowerShell from Office Macro"
task: "Get encoded PowerShell payload hidden in a VBA UserForm stream"
tools: [oledump-py, base64dump-py]
commands:
- "# List streams — find macro (M) and data streams"
- "oledump.py <document>"
- "# Extract VBA source to understand what the macro does"
- "oledump.py <document> -s <macro_stream> -v"
- "# Scan data stream for Base64 strings"
- "oledump.py <document> -s <data_stream> -d | base64dump.py -n 10"
- "# Decode the longest Base64 hit to file"
- "oledump.py <document> -s <data_stream> -d | base64dump.py -s 1 -d > payload.ps1"
lab: "3.4"
- id: vba-number-string-decode
name: "Decode VBA Number Arrays to Strings"
task: "Convert VBA macros that use Chr() number sequences into readable text"
tools: [oledump-py, numbers-to-string-py]
commands:
- "# Extract VBA and convert number sequences to text"
- "oledump.py <document> -s <stream> -v | numbers-to-string.py -j"
- "# Same but with line-break formatting for readability"
- "oledump.py <document> -s <stream> -v | numbers-to-string.py -j | sed 's/;/;\\n/g'"
lab: "3.3"
- id: multi-stage-base64-gzip
name: "Decode Base64 + Gzip Payload"
task: "Handle double-encoded payloads: Base64 wrapping gzip-compressed content"
tools: [base64dump-py, gunzip]
commands:
- "# Find Base64 strings in the script"
- "base64dump.py <script.ps1> -n 10"
- "# Decode Base64 and decompress gzip in one chain"
- "base64dump.py <script.ps1> -s <selection> -d | gunzip > decoded.ps1"
lab: "3.4"
- id: base64-xor-shellcode
name: "Decode Base64 + XOR Shellcode"
task: "Extract shellcode encoded as Base64 with an XOR key"
tools: [base64dump-py, translate-py]
commands:
- "# Find Base64 strings"
- "base64dump.py <script.ps1> -n 10"
- "# Decode Base64, then XOR with key"
- "base64dump.py <script.ps1> -s <selection> -d | translate.py 'byte ^ <key>' > shellcode.bin"
lab: "3.4"
- id: office-full-decode-chain
name: "Full Office Macro Decode Chain"
task: "Complete pipeline: Office doc → VBA → Base64 → gunzip → XOR → shellcode"
tools: [oledump-py, base64dump-py, gunzip, translate-py, scdbgc]
commands:
- "# Step 1: List streams and extract VBA"
- "oledump.py <document>"
- "oledump.py <document> -s <macro_stream> -v"
- "# Step 2: Extract Base64 from data stream"
- "oledump.py <document> -s <data_stream> -d | base64dump.py -s 1 -d > stage1.ps1"
- "# Step 3: Decode second Base64 layer + decompress"
- "base64dump.py stage1.ps1 -s 3 -d | gunzip > stage2.ps1"
- "# Step 4: XOR decode the shellcode"
- "base64dump.py stage2.ps1 -s 2 -d | translate.py 'byte ^ 35' > shellcode.bin"
- "# Step 5: Emulate the shellcode"
- "scdbgc /f shellcode.bin /s -1"
lab: "3.4"
- id: password-protected-office
name: "Decrypt Password-Protected Office Document"
task: "Remove password protection before analysis"
tools: [msoffcrypto-tool]
commands:
- "# Common malware passwords: infected, malware, password, 123456"
- "msoffcrypto-tool -p infected <encrypted.docx> <decrypted.docx>"
- id: excel-xlm-macros
name: "Deobfuscate Excel 4.0 (XLM) Macros"
task: "Extract and decode legacy Excel macros hidden in sheets"
tools: [xlmmacrodeobfuscator]
commands:
- "# Deobfuscate XLM macros"
- "xlmdeobfuscator --file <spreadsheet.xlsm>"
- id: vba-pcode-decompile
name: "Recover VBA from p-code (source removed)"
task: "Decompile VBA when source code has been stripped, only p-code remains"
tools: [pcode2code, pcodedmp]
commands:
- "# Decompile p-code back to VBA source"
- "pcode2code <document.docm>"
- "# Or disassemble p-code to assembly"
- "pcodedmp <document.docm>"
# ============================================================
# RTF DOCUMENT ANALYSIS
# ============================================================
- id: rtf-shellcode-extraction
name: "Extract Shellcode from RTF Document"
task: "Find and extract embedded shellcode from a malicious RTF file"
tools: [rtfdump-py, xorsearch, scdbgc]
commands:
- "# Scan RTF structure — look for groups with lots of hex data"
- "rtfdump.py <document.rtf>"
- "# Extract the hex-heavy group as binary"
- "rtfdump.py <document.rtf> -s <group_num> -H -d > extracted.bin"
- "# Scan for shellcode patterns (even XOR-encoded)"
- "XORSearch -W -d 3 extracted.bin"
- "# Emulate shellcode at found offset"
- "scdbgc /f extracted.bin /foff <offset> /s -1"
lab: "3.5"
# ============================================================
# PDF ANALYSIS
# ============================================================
- id: pdf-object-extraction
name: "Extract Embedded Object from PDF"
task: "Pull out an embedded image, JavaScript, or file from a PDF object"
tools: [pdfid-py, pdf-parser-py, feh]
commands:
- "# Scan for suspicious keywords"
- "pdfid.py <document.pdf>"
- "# Find objects containing the keyword"
- "pdf-parser.py <document.pdf> -s /URI"
- "# Extract all values for that keyword"
- "pdf-parser.py <document.pdf> -k /URI"
- "# Dump a specific object to file"
- "pdf-parser.py <document.pdf> -o <obj_id> -d extracted_object"
- "# View extracted image"
- "feh extracted_object &"
lab: "3.1"
- id: pdf-javascript-extraction
name: "Extract JavaScript from PDF"
task: "Find and extract embedded JavaScript from a PDF file"
tools: [pdfid-py, pdf-parser-py, peepdf]
commands:
- "# Check if PDF contains JavaScript"
- "pdfid.py <document.pdf>"
- "# Find objects with JavaScript"
- "pdf-parser.py <document.pdf> -s /JavaScript"
- "# Interactive analysis with peepdf"
- "peepdf -i <document.pdf>"
# ============================================================
# JAVASCRIPT DEOBFUSCATION
# ============================================================
- id: js-deobfuscation-spidermonkey
name: "Deobfuscate JavaScript with SpiderMonkey"
task: "Execute obfuscated JS safely using SpiderMonkey with API simulation"
tools: [js-beautify, spidermonkey]
commands:
- "# Beautify compressed JavaScript"
- "js-beautify <malicious.js> > readable.js"
- "# Execute with objects.js to simulate browser/WScript APIs"
- "js -f /usr/share/remnux/objects.js -f <malicious.js> > decoded.js"
- "# If script expects location.href, edit objects.js first:"
- "cp /usr/share/remnux/objects.js ."
- "# Edit objects.js to set: location = { href: 'http://expected-url' }"
- "js -f objects.js -f <malicious.js> > decoded.js"
lab: "3.6, 3.7"
- id: js-null-byte-cleanup
name: "Clean Null Bytes from UTF-16 JavaScript"
task: "Remove null byte padding from UTF-16 encoded JavaScript before analysis"
tools: [spidermonkey]
commands:
- "# Check for null bytes (look for 00 in hex)"
- "xxd <script.js> | head -2"
- "# Remove null bytes"
- "cat <script.js> | tr -d '\\00' > clean.js"
- "# Then deobfuscate"
- "js -f /usr/share/remnux/objects.js -f clean.js > decoded.js"
- "# Beautify the result"
- "js-beautify decoded.js > final.js"
lab: "4.5"
# ============================================================
# SHELLCODE ANALYSIS
# ============================================================
- id: shellcode-emulate-with-offset
name: "Emulate Shellcode at Specific Offset"
task: "Run shellcode that starts at an offset within a larger binary"
tools: [scdbgc]
commands:
- "# Emulate from file start"
- "scdbgc /f <shellcode.bin> /s -1"
- "# Emulate from specific offset (hex)"
- "scdbgc /f <shellcode.bin> /foff <hex_offset> /s -1"
- "# Emulate with a file handle pre-opened (for exploits)"
- "scdbgc /f <shellcode.bin> /foff <offset> /fopen <carrier.doc> /s -1"
lab: "3.5, 4.6"
- id: cobalt-strike-beacon-parse
name: "Parse Cobalt Strike Beacon Configuration"
task: "Extract C2 config from a Cobalt Strike beacon or shellcode"
tools: [1768-py, yara]
commands:
- "# Scan with YARA for CS signatures"
- "yara-rules <sample>"
- "# Extract beacon configuration"
- "1768.py <sample_or_shellcode.bin>"
lab: "3.4"
- id: shellcode-to-exe
name: "Convert Shellcode to Executable"
task: "Wrap raw shellcode in a PE for analysis in disassemblers"
tools: [shcode2exe]
commands:
- "# Convert 32-bit shellcode to EXE"
- "shcode2exe <shellcode.bin> <output.exe>"
# ============================================================
# STRING DEOBFUSCATION
# ============================================================
- id: xor-key-brute-force
name: "Brute-Force XOR Key"
task: "Find the XOR key used to encode strings in a binary"
tools: [brxor-py, bbcrack, xorsearch, xortool]
commands:
- "# Quick check for XOR-encoded URLs/PE headers"
- "XORSearch <file> http:"
- "# Brute-force single-byte XOR keys"
- "brxor.py <file>"
- "# Try XOR, ROL, ADD combinations"
- "bbcrack -l 1 <file>"
- "# Guess multi-byte XOR key length and value"
- "xortool <file>"
- "# Decode with known key"
- "xortool-xor -s '<key>' -i <encoded> -o <decoded>"
lab: "5.2"
- id: stack-string-extraction
name: "Extract Stack-Built Strings"
task: "Decode strings assembled byte-by-byte on the stack"
tools: [strdeob-pl, floss]
commands:
- "# Automatic stack string recovery"
- "strdeob.pl <sample>"
- "# FLOSS automatic deobfuscation (static + stack + decoded)"
- "floss <sample>"
- "# FLOSS skip static strings, only show decoded"
- "floss --no-static -- <sample>"
lab: "5.2"
- id: cyberchef-xor-decode
name: "Visual XOR/Base64 Decode with CyberChef"
task: "Use CyberChef's recipe builder for multi-step decoding"
tools: [cyberchef]
commands:
- "# Launch CyberChef"
- "cyberchef"
- "# Common recipe: From Hex → XOR (key) → extract strings"
- "# Common recipe: From Base64 → Decode text UTF-16LE"
# ============================================================
# MALWARE EMULATION & CAPABILITY ANALYSIS
# ============================================================
- id: speakeasy-emulation-with-json
name: "Emulate Malware and Extract API Calls"
task: "Emulate a Windows binary on Linux and analyze its API usage"
tools: [speakeasy, jq]
commands:
- "# Emulate and capture both JSON report and text log"
- "speakeasy -t <sample> -o report.json 2> report.txt"
- "# Extract all API names called"
- "jq '.entry_points[].apis[].api_name' report.json"
- "# Extract unique API names"
- "jq -r '.entry_points[].apis[].api_name' report.json | sort -u"
lab: "1.4"
- id: capa-capability-filter
name: "Filter Capabilities by Technique"
task: "Find specific capabilities in capa output"
tools: [capa]
commands:
- "# Full capabilities report"
- "capa <sample>"
- "# Verbose with rule matches"
- "capa -vv <sample>"
- "# Filter for specific technique"
- "capa -vv <sample> | grep -A7 '<technique_name>'"
- "# Find injection-related capabilities"
- "capa -vv <sample> | grep -A7 'inject\\|hollow\\|suspend'"
lab: "1.4, 5.4"
# ============================================================
# NETWORK ANALYSIS
# ============================================================
- id: pcap-file-carving
name: "Extract Files from Network Capture"
task: "Carve downloaded payloads and exfiltrated data from PCAP"
tools: [tcpxtract, tcpflow, networkminer]
commands:
- "# Carve files using signatures"
- "tcpxtract -f <capture.pcap> -o carved/"
- "# Extract individual TCP streams"
- "tcpflow -r <capture.pcap> -o streams/"
- "# Or use NetworkMiner for automated extraction"
- "NetworkMiner --pcap <capture.pcap>"
- id: dns-interception-setup
name: "Set Up DNS + HTTP Interception"
task: "Redirect all malware DNS queries and serve fake HTTP responses"
tools: [fakedns, httpd, inetsim]
commands:
- "# Option A: Simple DNS + HTTP"
- "fakedns &"
- "httpd &"
- "# Option B: Full service emulation (HTTP, HTTPS, DNS, FTP, SMTP)"
- "inetsim"
- "# Verify DNS is working"
- "nslookup anything.com"
- "# Redirect hardcoded IPs too"
- "iptables -t nat -A PREROUTING -i eth0 -j REDIRECT"
# ============================================================
# .NET ANALYSIS
# ============================================================
- id: dotnet-decompile-cli
name: "Decompile .NET on Command Line"
task: "Decompile a .NET assembly to C# source on REMnux"
tools: [ilspycmd, de4dot]
commands:
- "# Decompile to C# source"
- "ilspycmd <assembly.exe> > source.cs"
- "# Search for suspicious patterns"
- "grep -n 'Assembly.Load\\|WebClient\\|Process.Start' source.cs"
- "# If obfuscated, deobfuscate first"
- "de4dot <assembly.exe>"
- "ilspycmd <assembly-cleaned.exe> > source_clean.cs"
lab: "4.8"
# ============================================================
# MEMORY FORENSICS
# ============================================================
- id: volatility-quick-triage
name: "Quick Memory Dump Triage"
task: "Fast initial assessment of a memory dump"
tools: [volatility3]
commands:
- "# Identify OS"
- "vol3 -f <dump> windows.info"
- "# Process tree (spot anomalies)"
- "vol3 -f <dump> windows.pstree"
- "# Network connections"
- "vol3 -f <dump> windows.netscan"
- "# Injected code detection"
- "vol3 -f <dump> windows.malfind"
# ============================================================
# ANDROID ANALYSIS
# ============================================================
- id: apk-quick-triage
name: "Quick APK Triage"
task: "Fast initial assessment of a suspicious Android app"
tools: [apkid, apktool, jadx]
commands:
- "# Check for packers/obfuscators"
- "apkid <app.apk>"
- "# Decompile to smali + resources"
- "apktool d <app.apk> -o output/"
- "# Check permissions"
- "grep 'uses-permission' output/AndroidManifest.xml"
- "# Decompile to Java source"
- "jadx <app.apk> -d src/"
# ============================================================
# EMAIL ANALYSIS
# ============================================================
- id: email-attachment-extraction
name: "Extract and Triage Email Attachments"
task: "Pull attachments from an email and identify their types"
tools: [emldump-py, file, sha256sum]
commands:
- "# List email structure"
- "emldump.py <email.eml>"
- "# Extract all attachments"
- "emldump.py <email.eml> -d"
- "# Identify file types"
- "file attachment_*"
- "# Compute hashes for lookup"
- "sha256sum attachment_*"