Add FOR610 tool/workflow knowledge base and data pipeline
Build comprehensive malware analysis knowledge base from 3 sources: - SANS FOR610 course: 120 tools, 47 labs, 15 workflows, 27 recipes - REMnux salt-states: 340 packages parsed from GitHub - REMnux docs: 280+ tools scraped from docs.remnux.org Master inventory merges all sources into 447 tools with help tiers (rich/standard/basic). Pipeline generates: tools.db (397 entries), 397 cheatsheets with multi-tool recipes, 15 workflow guides, 224 TLDR pages, and coverage reports. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,534 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Generate all help artifacts from the master tool inventory.
|
||||
|
||||
Reads data/remnux/tools-master.yaml and data/for610/workflows.yaml to produce:
|
||||
- data/generated/tools.db (pipe-delimited for find-tool)
|
||||
- data/generated/cheatsheets/*.cheat (per-tool cheat sheets)
|
||||
- data/generated/workflows/*.txt (workflow help files)
|
||||
- data/generated/tldr/*.md (TLDR pages)
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import yaml
|
||||
import textwrap
|
||||
|
||||
BASE_DIR = os.path.join(os.path.dirname(__file__), "..")
|
||||
MASTER = os.path.join(BASE_DIR, "data", "remnux", "tools-master.yaml")
|
||||
WORKFLOWS_SRC = os.path.join(BASE_DIR, "data", "for610", "workflows.yaml")
|
||||
RECIPES_SRC = os.path.join(BASE_DIR, "data", "for610", "recipes.yaml")
|
||||
GEN_DIR = os.path.join(BASE_DIR, "data", "generated")
|
||||
|
||||
|
||||
def load_master():
|
||||
with open(MASTER) as f:
|
||||
return yaml.safe_load(f)
|
||||
|
||||
|
||||
def load_workflows():
|
||||
with open(WORKFLOWS_SRC) as f:
|
||||
return yaml.safe_load(f)
|
||||
|
||||
|
||||
def load_recipes():
|
||||
if os.path.exists(RECIPES_SRC):
|
||||
with open(RECIPES_SRC) as f:
|
||||
return yaml.safe_load(f)
|
||||
return {"recipes": []}
|
||||
|
||||
|
||||
def build_recipe_index(recipes_data):
|
||||
"""Build a mapping of tool_id -> list of recipes that use that tool."""
|
||||
index = {}
|
||||
for recipe in recipes_data.get("recipes", []):
|
||||
for tool_id in recipe.get("tools", []):
|
||||
index.setdefault(tool_id, []).append(recipe)
|
||||
# Also index by normalized variants
|
||||
normalized = tool_id.lower().replace("-", "").replace("_", "")
|
||||
if normalized != tool_id:
|
||||
index.setdefault(normalized, []).append(recipe)
|
||||
return index
|
||||
|
||||
|
||||
# ============================================================
|
||||
# tools.db generator
|
||||
# ============================================================
|
||||
|
||||
def generate_tools_db(tools):
|
||||
"""Generate pipe-delimited tools.db for find-tool."""
|
||||
output_path = os.path.join(GEN_DIR, "tools.db")
|
||||
lines = []
|
||||
|
||||
for t in tools:
|
||||
if not t.get("in_remnux"):
|
||||
continue
|
||||
|
||||
name = t["name"]
|
||||
desc = t.get("description", "").replace("|", "/").replace("\n", " ").strip()[:120]
|
||||
if not desc:
|
||||
desc = f"(no description available)"
|
||||
|
||||
# Get best category
|
||||
cat = ""
|
||||
if t["sources"]["remnux_docs"].get("covered"):
|
||||
cat = t["sources"]["remnux_docs"].get("category", "")
|
||||
elif t["sources"]["for610"].get("covered"):
|
||||
cat = t["sources"]["for610"].get("category", "")
|
||||
|
||||
# Get best usage example
|
||||
usage = ""
|
||||
if t["sources"]["for610"].get("covered"):
|
||||
usages = t["sources"]["for610"].get("typical_usage", [])
|
||||
if usages:
|
||||
usage = usages[0]
|
||||
if not usage:
|
||||
usage = f"{name} --help"
|
||||
usage = usage.replace("|", " ").strip()
|
||||
|
||||
tier = t.get("help_tier", "stub")
|
||||
|
||||
lines.append(f"{name}|{desc}|{cat}|{usage}|{tier}")
|
||||
|
||||
lines.sort()
|
||||
|
||||
with open(output_path, "w") as f:
|
||||
f.write("\n".join(lines) + "\n")
|
||||
|
||||
print(f" tools.db: {len(lines)} entries")
|
||||
return len(lines)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Cheatsheet generator
|
||||
# ============================================================
|
||||
|
||||
def sanitize_filename(name):
|
||||
"""Convert tool name to a safe filename."""
|
||||
return re.sub(r'[^a-zA-Z0-9._-]', '-', name).strip('-').lower()
|
||||
|
||||
|
||||
def generate_usage_comment(name, usage, index):
|
||||
"""Generate a descriptive comment for a usage example."""
|
||||
# Analyze the command to produce a meaningful description
|
||||
usage_lower = usage.lower()
|
||||
|
||||
if index == 0:
|
||||
return f"Basic usage"
|
||||
|
||||
# Try to describe based on flags
|
||||
if "-vv" in usage or "--verbose" in usage:
|
||||
return "Verbose output with details"
|
||||
if "--no-static" in usage or "--no static" in usage:
|
||||
return "Skip static analysis, focus on dynamic"
|
||||
if "-n " in usage:
|
||||
return "Suppress default output"
|
||||
if "-a " in usage or "--all" in usage:
|
||||
return "Show all results"
|
||||
if "-s " in usage:
|
||||
return "Select specific item"
|
||||
if "-d " in usage:
|
||||
return "Dump/extract content"
|
||||
if "-r " in usage:
|
||||
return "Recursive/follow references"
|
||||
if "-k " in usage:
|
||||
return "Extract by keyword"
|
||||
if "-o " in usage:
|
||||
return "Output to file"
|
||||
if "-f " in usage:
|
||||
return "Process input file"
|
||||
if "-i " in usage:
|
||||
return "Case-insensitive search"
|
||||
if "grep" in usage_lower:
|
||||
return "Filter output for specific pattern"
|
||||
if "--help" in usage:
|
||||
return "Show help"
|
||||
if "|" in usage:
|
||||
return "Pipe output for processing"
|
||||
if ">" in usage:
|
||||
return "Save output to file"
|
||||
|
||||
return f"Alternative usage"
|
||||
|
||||
|
||||
def format_recipes_section(tool_id, recipe_index):
|
||||
"""Generate the recipes section for a cheatsheet."""
|
||||
recipes = recipe_index.get(tool_id, [])
|
||||
if not recipes:
|
||||
# Try variants
|
||||
for variant in [tool_id.replace("-py", ""), tool_id.replace("-", "")]:
|
||||
recipes = recipe_index.get(variant, [])
|
||||
if recipes:
|
||||
break
|
||||
if not recipes:
|
||||
return ""
|
||||
|
||||
# Deduplicate recipes by id
|
||||
seen = set()
|
||||
unique = []
|
||||
for r in recipes:
|
||||
if r["id"] not in seen:
|
||||
seen.add(r["id"])
|
||||
unique.append(r)
|
||||
|
||||
lines = [
|
||||
"",
|
||||
"# --- Recipes (multi-tool chains) ---",
|
||||
"",
|
||||
]
|
||||
for recipe in unique:
|
||||
lines.append(f"# >> {recipe['name']}")
|
||||
for cmd in recipe.get("commands", []):
|
||||
lines.append(cmd)
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def generate_cheatsheet_rich(t, recipe_index=None):
|
||||
"""Generate a rich cheatsheet for a tool with FOR610 coverage."""
|
||||
f610 = t["sources"]["for610"]
|
||||
name = t["name"]
|
||||
desc = t.get("description", "")
|
||||
labs = f610.get("labs", [])
|
||||
sections = f610.get("sections", [])
|
||||
tags = f610.get("tags", [])
|
||||
usages = f610.get("typical_usage", [])
|
||||
author = f610.get("author", "")
|
||||
|
||||
lines = [
|
||||
f"# {name}",
|
||||
f"# {desc}",
|
||||
]
|
||||
|
||||
meta_parts = []
|
||||
if labs:
|
||||
meta_parts.append(f"FOR610 Labs: {', '.join(labs)}")
|
||||
if sections:
|
||||
meta_parts.append(f"Sections: {', '.join(str(s) for s in sections)}")
|
||||
if author:
|
||||
meta_parts.append(f"Author: {author}")
|
||||
if meta_parts:
|
||||
lines.append(f"# {' | '.join(meta_parts)}")
|
||||
|
||||
# REMnux docs URL if available
|
||||
if t["sources"]["remnux_docs"].get("covered"):
|
||||
url = t["sources"]["remnux_docs"].get("docs_url", "")
|
||||
if url:
|
||||
lines.append(f"# Docs: {url}")
|
||||
|
||||
lines.append("")
|
||||
|
||||
# Tags
|
||||
tag_str = ", ".join(tags[:8]) if tags else name.lower()
|
||||
lines.append(f"% {tag_str}")
|
||||
lines.append("")
|
||||
|
||||
# Usage examples with descriptive comments
|
||||
for i, usage in enumerate(usages):
|
||||
comment = generate_usage_comment(name, usage, i)
|
||||
lines.append(f"# {comment}")
|
||||
lines.append(usage)
|
||||
lines.append("")
|
||||
|
||||
# If no usage examples, add a basic one
|
||||
if not usages:
|
||||
lines.append(f"# Show help")
|
||||
lines.append(f"{name} --help")
|
||||
lines.append("")
|
||||
|
||||
# Append recipes section if this tool participates in any recipes
|
||||
if recipe_index:
|
||||
recipes_text = format_recipes_section(t["id"], recipe_index)
|
||||
if recipes_text:
|
||||
lines.append(recipes_text)
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def generate_cheatsheet_standard(t):
|
||||
"""Generate a standard cheatsheet from REMnux docs."""
|
||||
rdocs = t["sources"]["remnux_docs"]
|
||||
name = t["name"]
|
||||
desc = t.get("description", "") or rdocs.get("description", "")
|
||||
cat = rdocs.get("category", "")
|
||||
url = rdocs.get("docs_url", "")
|
||||
|
||||
lines = [
|
||||
f"# {name}",
|
||||
f"# {desc}" if desc else f"# {name} tool",
|
||||
]
|
||||
if cat:
|
||||
lines.append(f"# Category: {cat}")
|
||||
if url:
|
||||
lines.append(f"# Docs: {url}")
|
||||
|
||||
lines += [
|
||||
"",
|
||||
f"% {sanitize_filename(name)}",
|
||||
"",
|
||||
f"# Show help for {name}",
|
||||
f"{name} --help",
|
||||
"",
|
||||
]
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def generate_cheatsheet_basic(t):
|
||||
"""Generate a minimal cheatsheet for a tool with only salt-states."""
|
||||
name = t["name"]
|
||||
salt = t["sources"]["salt_states"]
|
||||
install = salt.get("install_method", "unknown")
|
||||
pkg = salt.get("package_name", name)
|
||||
|
||||
lines = [
|
||||
f"# {name}",
|
||||
f"# Installed via: {install} ({pkg})",
|
||||
"",
|
||||
f"% {sanitize_filename(name)}",
|
||||
"",
|
||||
f"# Show help for {name}",
|
||||
f"{name} --help",
|
||||
"",
|
||||
]
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def generate_cheatsheets(tools, recipe_index=None):
|
||||
"""Generate per-tool cheatsheet files."""
|
||||
cheat_dir = os.path.join(GEN_DIR, "cheatsheets")
|
||||
os.makedirs(cheat_dir, exist_ok=True)
|
||||
|
||||
count = 0
|
||||
for t in tools:
|
||||
if not t.get("in_remnux"):
|
||||
continue
|
||||
|
||||
tier = t.get("help_tier", "stub")
|
||||
name = t["name"]
|
||||
filename = sanitize_filename(name) + ".cheat"
|
||||
|
||||
if tier == "rich":
|
||||
content = generate_cheatsheet_rich(t, recipe_index=recipe_index)
|
||||
elif tier == "standard":
|
||||
content = generate_cheatsheet_standard(t)
|
||||
else:
|
||||
content = generate_cheatsheet_basic(t)
|
||||
|
||||
with open(os.path.join(cheat_dir, filename), "w") as f:
|
||||
f.write(content)
|
||||
count += 1
|
||||
|
||||
print(f" cheatsheets: {count} .cheat files")
|
||||
return count
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Workflow generator
|
||||
# ============================================================
|
||||
|
||||
def _get_tool_examples(tool_name, master_tools_by_name):
|
||||
"""Get 1-2 example commands for a tool from the master inventory."""
|
||||
tool = master_tools_by_name.get(tool_name)
|
||||
if not tool:
|
||||
# Try kebab-case lookup
|
||||
normalized = tool_name.lower().replace("_", "-")
|
||||
tool = master_tools_by_name.get(normalized)
|
||||
if tool and tool["sources"]["for610"].get("covered"):
|
||||
usages = tool["sources"]["for610"].get("typical_usage", [])
|
||||
return usages[:2]
|
||||
return []
|
||||
|
||||
|
||||
def generate_workflows(workflows_data, master_tools=None):
|
||||
"""Generate readable workflow help files with inline examples."""
|
||||
wf_dir = os.path.join(GEN_DIR, "workflows")
|
||||
os.makedirs(wf_dir, exist_ok=True)
|
||||
|
||||
# Build tool name lookup for inline examples
|
||||
tools_by_name = {}
|
||||
if master_tools:
|
||||
for t in master_tools:
|
||||
tools_by_name[t["name"].lower()] = t
|
||||
tools_by_name[t["id"]] = t
|
||||
for alias in t.get("aliases", []):
|
||||
tools_by_name[alias.lower()] = t
|
||||
|
||||
workflows = workflows_data.get("workflows", [])
|
||||
count = 0
|
||||
|
||||
for wf in workflows:
|
||||
wf_id = wf["id"]
|
||||
name = wf["name"]
|
||||
desc = wf.get("description", "")
|
||||
steps = wf.get("steps", [])
|
||||
related_labs = wf.get("related_labs", [])
|
||||
|
||||
lines = [
|
||||
f"{'='*60}",
|
||||
f" {name}",
|
||||
f"{'='*60}",
|
||||
"",
|
||||
f" {desc}",
|
||||
"",
|
||||
]
|
||||
|
||||
if related_labs:
|
||||
lines.append(f" Related FOR610 Labs: {', '.join(related_labs)}")
|
||||
lines.append("")
|
||||
|
||||
lines.append(f"{'─'*60}")
|
||||
lines.append("")
|
||||
|
||||
for step in steps:
|
||||
order = step.get("order", "?")
|
||||
step_name = step.get("name", "")
|
||||
step_desc = step.get("description", "")
|
||||
step_tools = step.get("tools", [])
|
||||
|
||||
lines.append(f" Step {order}: {step_name}")
|
||||
if step_tools:
|
||||
lines.append(f" Tools: {', '.join(step_tools)}")
|
||||
if step_desc:
|
||||
wrapped = textwrap.fill(step_desc, width=56, initial_indent=" ", subsequent_indent=" ")
|
||||
lines.append(wrapped)
|
||||
|
||||
# Add inline command examples for each tool
|
||||
if step_tools and tools_by_name:
|
||||
examples_shown = False
|
||||
for tool_name in step_tools:
|
||||
examples = _get_tool_examples(tool_name, tools_by_name)
|
||||
if examples:
|
||||
if not examples_shown:
|
||||
lines.append("")
|
||||
for ex in examples[:1]: # Show 1 example per tool
|
||||
lines.append(f" $ {ex}")
|
||||
examples_shown = True
|
||||
|
||||
lines.append("")
|
||||
|
||||
lines.append(f"{'─'*60}")
|
||||
lines.append(f" Tip: 'fhelp cheat <tool>' for full examples")
|
||||
lines.append(f" 'Ctrl+G' for interactive cheatsheet browser")
|
||||
lines.append("")
|
||||
|
||||
filename = wf_id.replace("_", "-") + ".txt"
|
||||
with open(os.path.join(wf_dir, filename), "w") as f:
|
||||
f.write("\n".join(lines))
|
||||
count += 1
|
||||
|
||||
# Also generate an index file
|
||||
index_lines = [
|
||||
f"{'='*60}",
|
||||
f" Available Analysis Workflows",
|
||||
f"{'='*60}",
|
||||
"",
|
||||
]
|
||||
for wf in workflows:
|
||||
wf_id = wf["id"].replace("_", "-")
|
||||
name = wf["name"]
|
||||
desc = wf.get("description", "")
|
||||
index_lines.append(f" {wf_id}")
|
||||
index_lines.append(f" {name}")
|
||||
wrapped = textwrap.fill(desc, width=56, initial_indent=" ", subsequent_indent=" ")
|
||||
index_lines.append(wrapped)
|
||||
index_lines.append("")
|
||||
|
||||
index_lines += [
|
||||
f"{'─'*60}",
|
||||
f" Usage: fhelp workflow <name>",
|
||||
f" Example: fhelp workflow static-analysis",
|
||||
"",
|
||||
]
|
||||
|
||||
with open(os.path.join(wf_dir, "index.txt"), "w") as f:
|
||||
f.write("\n".join(index_lines))
|
||||
|
||||
print(f" workflows: {count} workflow files + index")
|
||||
return count
|
||||
|
||||
|
||||
# ============================================================
|
||||
# TLDR generator
|
||||
# ============================================================
|
||||
|
||||
def generate_tldr(tools):
|
||||
"""Generate TLDR pages for tools missing from upstream."""
|
||||
tldr_dir = os.path.join(GEN_DIR, "tldr")
|
||||
os.makedirs(tldr_dir, exist_ok=True)
|
||||
|
||||
count = 0
|
||||
for t in tools:
|
||||
if not t.get("in_remnux"):
|
||||
continue
|
||||
|
||||
tier = t.get("help_tier", "stub")
|
||||
if tier not in ("rich", "standard"):
|
||||
continue
|
||||
|
||||
name = t["name"]
|
||||
desc = t.get("description", "") or f"{name} tool"
|
||||
|
||||
# Get usage examples
|
||||
usages = []
|
||||
if t["sources"]["for610"].get("covered"):
|
||||
usages = t["sources"]["for610"].get("typical_usage", [])
|
||||
|
||||
if not usages:
|
||||
usages = [f"{name} --help"]
|
||||
|
||||
# TLDR format
|
||||
lines = [
|
||||
f"# {name}",
|
||||
"",
|
||||
f"> {desc}",
|
||||
"",
|
||||
]
|
||||
|
||||
for i, usage in enumerate(usages[:4]):
|
||||
# Create a description from the command
|
||||
lines.append(f"- Run {name}:")
|
||||
lines.append("")
|
||||
lines.append(f"`{usage}`")
|
||||
lines.append("")
|
||||
|
||||
filename = sanitize_filename(name) + ".md"
|
||||
with open(os.path.join(tldr_dir, filename), "w") as f:
|
||||
f.write("\n".join(lines))
|
||||
count += 1
|
||||
|
||||
print(f" tldr: {count} pages")
|
||||
return count
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Main
|
||||
# ============================================================
|
||||
|
||||
def main():
|
||||
print("Generating help artifacts from master inventory...")
|
||||
|
||||
master = load_master()
|
||||
tools = master["tools"]
|
||||
workflows_data = load_workflows()
|
||||
recipes_data = load_recipes()
|
||||
recipe_index = build_recipe_index(recipes_data)
|
||||
|
||||
print(f"\nInput: {len(tools)} tools, {len(workflows_data.get('workflows', []))} workflows, {len(recipes_data.get('recipes', []))} recipes")
|
||||
print()
|
||||
|
||||
db_count = generate_tools_db(tools)
|
||||
cheat_count = generate_cheatsheets(tools, recipe_index=recipe_index)
|
||||
wf_count = generate_workflows(workflows_data, master_tools=tools)
|
||||
tldr_count = generate_tldr(tools)
|
||||
|
||||
print(f"\nAll artifacts generated in {GEN_DIR}/")
|
||||
print(f" tools.db: {db_count} entries")
|
||||
print(f" cheatsheets/: {cheat_count} files")
|
||||
print(f" workflows/: {wf_count} + index")
|
||||
print(f" tldr/: {tldr_count} pages")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user