Add FOR610 tool/workflow knowledge base and data pipeline

Build comprehensive malware analysis knowledge base from 3 sources: - SANS FOR610 course: 120 tools, 47 labs, 15 workflows, 27 recipes - REMnux salt-states: 340 packages parsed from GitHub - REMnux docs: 280+ tools scraped from docs.remnux.org Master inventory merges all sources into 447 tools with help tiers (rich/standard/basic). Pipeline generates: tools.db (397 entries), 397 cheatsheets with multi-tool recipes, 15 workflow guides, 224 TLDR pages, and coverage reports. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-28 17:38:15 +01:00
parent 06ebb09ab0
commit f3ccc09c3d
663 changed files with 36339 additions and 1 deletions
@@ -0,0 +1,466 @@
+#!/usr/bin/env python3
+"""Build the master tool inventory by merging three sources.
+
+Merges:
+1. FOR610 course data (data/for610/tools.yaml)
+2. Salt-states installation data (data/remnux/sources/salt-states.yaml)
+3. REMnux docs (data/remnux/sources/remnux-docs.yaml)
+
+Output: data/remnux/tools-master.yaml
+"""
+
+import os
+import re
+import yaml
+
+BASE_DIR = os.path.join(os.path.dirname(__file__), "..")
+FOR610_TOOLS = os.path.join(BASE_DIR, "data", "for610", "tools.yaml")
+SALT_STATES = os.path.join(BASE_DIR, "data", "remnux", "sources", "salt-states.yaml")
+REMNUX_DOCS = os.path.join(BASE_DIR, "data", "remnux", "sources", "remnux-docs.yaml")
+ENRICHMENTS = os.path.join(BASE_DIR, "data", "remnux", "tool-enrichments.yaml")
+OUTPUT = os.path.join(BASE_DIR, "data", "remnux", "tools-master.yaml")
+
+# Manual override mapping for tools that have different names across sources
+# Format: normalized_key -> canonical_id
+NAME_OVERRIDES = {
+    "die": "diec",
+    "detect-it-easy": "diec",
+    "detect it easy": "diec",
+    "js": "spidermonkey",
+    "js-patched": "spidermonkey",
+    "spidermonkey-patched": "spidermonkey",
+    "mozilla-spidermonkey": "spidermonkey",
+    "vol": "volatility3",
+    "vol-py": "volatility3",
+    "volatility-framework": "volatility3",
+    "volatility": "volatility3",
+    "process-hacker": "system-informer",
+    "yara-rules": "yara",
+    "yara-forge": "yara",
+    "yara-x": "yara-x",
+    "jsbeautifier": "js-beautify",
+    "js-beautifier": "js-beautify",
+    "ilspycmd": "ilspycmd",
+    "ilspy": "ilspy",
+    "upx-ucl": "upx",
+    "unrar-free": "rar",
+    "netcat-openbsd": "netcat",
+    "net-tools": "net-tools",
+    "oletools": "olevba",
+    "pev": "readpe",
+    "scdbg": "scdbgc",
+    "origamindee": "origami",
+    "pdftk-java": "pdftk",
+    "fakenet-ng": "fakenet-ng",
+    "accept-all-ips": "httpd",
+    "7zip": "7zip",
+    "7z": "7zip",
+    "p7zip": "7zip",
+    "info-zip": "unzip",
+    "cutter": "cutter",
+    "r2pipe": "radare2",
+    "r2": "radare2",
+    "stpyv8": "spidermonkey",
+    "rhino-debugger": "spidermonkey",
+    "powershell-core": "powershell",
+    "powershell": "powershell",
+    "didier-stevens-scripts": "didier-stevens-suite",
+    "docker-compose": "docker",
+    "docker": "docker",
+    "ghidrassist-mcp": "ghidra",
+    "remnux-mcp-server": "remnux-mcp-server",
+}
+
+
+def normalize_name(name):
+    """Normalize a tool name for matching."""
+    n = name.lower().strip()
+    n = re.sub(r'\.py$', '', n)
+    n = re.sub(r'\.pl$', '', n)
+    n = re.sub(r'\.bat$', '', n)
+    n = re.sub(r'[^a-z0-9]+', '-', n)
+    n = n.strip('-')
+    return n
+
+
+def make_id(name):
+    """Create a kebab-case ID from a name."""
+    n = name.lower().strip()
+    # Keep .py/.pl as -py/-pl in the ID
+    n = re.sub(r'\.py$', '-py', n)
+    n = re.sub(r'\.pl$', '-pl', n)
+    n = re.sub(r'\.bat$', '-bat', n)
+    n = re.sub(r'[^a-z0-9]+', '-', n)
+    n = n.strip('-')
+    return n
+
+
+def load_for610():
+    """Load FOR610 tools."""
+    with open(FOR610_TOOLS) as f:
+        data = yaml.safe_load(f)
+    return data.get("tools", [])
+
+
+def load_salt_states():
+    """Load salt-states parsed data."""
+    if not os.path.exists(SALT_STATES):
+        print(f"  Warning: {SALT_STATES} not found, skipping")
+        return []
+    with open(SALT_STATES) as f:
+        data = yaml.safe_load(f)
+    return data.get("tools", [])
+
+
+def load_remnux_docs():
+    """Load REMnux docs scraped data."""
+    if not os.path.exists(REMNUX_DOCS):
+        print(f"  Warning: {REMNUX_DOCS} not found, skipping")
+        return []
+    with open(REMNUX_DOCS) as f:
+        data = yaml.safe_load(f)
+    return data.get("tools", [])
+
+
+def build_lookup_index(master_tools):
+    """Build a multi-key lookup index for matching."""
+    index = {}
+    for tool in master_tools:
+        tid = tool["id"]
+        # Index by id
+        index[tid] = tid
+        # Index by normalized name
+        index[normalize_name(tool["name"])] = tid
+        # Index by aliases
+        for alias in tool.get("aliases", []):
+            index[normalize_name(alias)] = tid
+    return index
+
+
+def find_match(name, index):
+    """Try to find a matching tool in the index."""
+    normalized = normalize_name(name)
+
+    # Check overrides first
+    if normalized in NAME_OVERRIDES:
+        override_id = NAME_OVERRIDES[normalized]
+        if override_id in index:
+            return index[override_id]
+        return override_id
+
+    # Direct match
+    if normalized in index:
+        return index[normalized]
+
+    # Try with -py suffix
+    if normalized + "-py" in index:
+        return index[normalized + "-py"]
+
+    # Try without trailing digits
+    stripped = re.sub(r'-?\d+$', '', normalized)
+    if stripped and stripped in index:
+        return index[stripped]
+
+    return None
+
+
+def compute_help_tier(tool):
+    """Determine the help tier based on coverage."""
+    has_for610 = tool.get("sources", {}).get("for610", {}).get("covered", False)
+    has_docs = tool.get("sources", {}).get("remnux_docs", {}).get("covered", False)
+    has_salt = tool.get("sources", {}).get("salt_states", {}).get("covered", False)
+
+    if has_for610:
+        return "rich"
+    elif has_docs:
+        return "standard"
+    elif has_salt:
+        return "basic"
+    else:
+        return "stub"
+
+
+def main():
+    print("Building master tool inventory...")
+
+    # --- Step 1: Load FOR610 tools as base ---
+    print("\n1. Loading FOR610 tools...")
+    for610_tools = load_for610()
+    print(f"   Loaded {len(for610_tools)} tools")
+
+    master = {}
+    for t in for610_tools:
+        tid = t["id"]
+        entry = {
+            "id": tid,
+            "name": t["name"],
+            "aliases": t.get("aliases", []),
+            "description": t.get("description", ""),
+            "in_remnux": t.get("in_remnux", False),
+            "platform": t.get("platform", "linux"),
+            "sources": {
+                "for610": {
+                    "covered": True,
+                    "description": t.get("description", ""),
+                    "category": t.get("category", ""),
+                    "labs": t.get("labs", []),
+                    "sections": t.get("for610_sections", []),
+                    "typical_usage": t.get("typical_usage", []),
+                    "tags": t.get("tags", []),
+                },
+                "salt_states": {"covered": False},
+                "remnux_docs": {"covered": False},
+            },
+        }
+        if t.get("author"):
+            entry["sources"]["for610"]["author"] = t["author"]
+        master[tid] = entry
+
+    # --- Step 2: Merge salt-states ---
+    print("\n2. Loading salt-states...")
+    salt_tools = load_salt_states()
+    print(f"   Loaded {len(salt_tools)} entries")
+
+    index = build_lookup_index(list(master.values()))
+    salt_matched = 0
+    salt_new = 0
+
+    for st in salt_tools:
+        st_id = st["id"]
+        st_names = st.get("package_names", [st_id])
+
+        # Try to match against existing tools
+        matched_id = None
+        for name in [st_id] + st_names:
+            matched_id = find_match(name, index)
+            if matched_id:
+                break
+
+        if matched_id and matched_id in master:
+            # Enrich existing tool
+            master[matched_id]["sources"]["salt_states"] = {
+                "covered": True,
+                "install_method": st.get("install_method", "unknown"),
+                "package_name": st_names[0] if st_names else st_id,
+                "salt_state_path": st.get("salt_state_path", ""),
+            }
+            master[matched_id]["in_remnux"] = True
+            salt_matched += 1
+        else:
+            # Create new tool entry
+            new_id = make_id(st_id)
+            # Check if override maps to something we don't have yet
+            if normalize_name(st_id) in NAME_OVERRIDES:
+                new_id = NAME_OVERRIDES[normalize_name(st_id)]
+
+            if new_id not in master:
+                master[new_id] = {
+                    "id": new_id,
+                    "name": st_id,
+                    "aliases": [n for n in st_names if n != st_id][:3],
+                    "description": "",
+                    "in_remnux": True,
+                    "platform": "linux",
+                    "sources": {
+                        "for610": {"covered": False},
+                        "salt_states": {
+                            "covered": True,
+                            "install_method": st.get("install_method", "unknown"),
+                            "package_name": st_names[0] if st_names else st_id,
+                            "salt_state_path": st.get("salt_state_path", ""),
+                        },
+                        "remnux_docs": {"covered": False},
+                    },
+                }
+                # Update index
+                index[new_id] = new_id
+                index[normalize_name(st_id)] = new_id
+                for n in st_names:
+                    index[normalize_name(n)] = new_id
+                salt_new += 1
+            else:
+                # Already exists under the override ID
+                master[new_id]["sources"]["salt_states"] = {
+                    "covered": True,
+                    "install_method": st.get("install_method", "unknown"),
+                    "package_name": st_names[0] if st_names else st_id,
+                    "salt_state_path": st.get("salt_state_path", ""),
+                }
+                salt_matched += 1
+
+    print(f"   Matched: {salt_matched}, New: {salt_new}")
+
+    # --- Step 3: Merge REMnux docs ---
+    print("\n3. Loading REMnux docs...")
+    doc_tools = load_remnux_docs()
+    print(f"   Loaded {len(doc_tools)} entries")
+
+    # Rebuild index after salt-states additions
+    index = build_lookup_index(list(master.values()))
+    docs_matched = 0
+    docs_new = 0
+
+    for dt in doc_tools:
+        dt_name = dt.get("name", "")
+        dt_id = dt.get("id", make_id(dt_name))
+
+        matched_id = find_match(dt_name, index)
+        if not matched_id:
+            matched_id = find_match(dt_id, index)
+
+        if matched_id and matched_id in master:
+            # Enrich existing tool
+            doc_entry = {
+                "covered": True,
+                "category": dt.get("category", ""),
+                "description": dt.get("description", ""),
+                "docs_url": dt.get("docs_url", ""),
+            }
+            if dt.get("website"):
+                doc_entry["website"] = dt["website"]
+            if dt.get("anchor"):
+                doc_entry["anchor"] = dt["anchor"]
+
+            master[matched_id]["sources"]["remnux_docs"] = doc_entry
+
+            # Use REMnux docs description if we don't have one
+            if not master[matched_id]["description"] and dt.get("description"):
+                master[matched_id]["description"] = dt["description"]
+
+            docs_matched += 1
+        else:
+            # Create new entry
+            new_id = make_id(dt_name) if dt_name else dt_id
+            if new_id not in master:
+                master[new_id] = {
+                    "id": new_id,
+                    "name": dt_name,
+                    "aliases": [],
+                    "description": dt.get("description", ""),
+                    "in_remnux": True,
+                    "platform": "linux",
+                    "sources": {
+                        "for610": {"covered": False},
+                        "salt_states": {"covered": False},
+                        "remnux_docs": {
+                            "covered": True,
+                            "category": dt.get("category", ""),
+                            "description": dt.get("description", ""),
+                            "docs_url": dt.get("docs_url", ""),
+                        },
+                    },
+                }
+                if dt.get("website"):
+                    master[new_id]["sources"]["remnux_docs"]["website"] = dt["website"]
+                index[new_id] = new_id
+                index[normalize_name(dt_name)] = new_id
+                docs_new += 1
+            else:
+                master[new_id]["sources"]["remnux_docs"] = {
+                    "covered": True,
+                    "category": dt.get("category", ""),
+                    "description": dt.get("description", ""),
+                    "docs_url": dt.get("docs_url", ""),
+                }
+                docs_matched += 1
+
+    print(f"   Matched: {docs_matched}, New: {docs_new}")
+
+    # --- Step 4: Apply manual enrichments ---
+    print("\n4. Applying manual enrichments...")
+    if os.path.exists(ENRICHMENTS):
+        with open(ENRICHMENTS) as f:
+            enrich_data = yaml.safe_load(f)
+        enrichments = enrich_data.get("enrichments", {})
+        enriched = 0
+        for tool_key, enrich in enrichments.items():
+            # Find the tool in master by key or normalized name
+            matched_id = find_match(tool_key, index)
+            if not matched_id:
+                matched_id = tool_key
+            if matched_id in master:
+                tool = master[matched_id]
+                # Apply enrichment data
+                if enrich.get("description") and not tool.get("description"):
+                    tool["description"] = enrich["description"]
+                elif enrich.get("description"):
+                    tool["description"] = enrich["description"]
+                # Add usage examples to for610 source (or create enrichment source)
+                if enrich.get("typical_usage"):
+                    if not tool["sources"]["for610"].get("covered"):
+                        tool["sources"]["for610"]["covered"] = True
+                        tool["sources"]["for610"]["typical_usage"] = enrich["typical_usage"]
+                        tool["sources"]["for610"]["tags"] = enrich.get("tags", [])
+                        tool["sources"]["for610"]["description"] = enrich.get("description", "")
+                    else:
+                        # Merge usage examples
+                        existing = tool["sources"]["for610"].get("typical_usage", [])
+                        for u in enrich["typical_usage"]:
+                            if u not in existing:
+                                existing.append(u)
+                        tool["sources"]["for610"]["typical_usage"] = existing
+                enriched += 1
+            else:
+                print(f"    Warning: enrichment key '{tool_key}' not found in master")
+        print(f"   Enriched: {enriched} tools")
+    else:
+        print("   No enrichments file found, skipping")
+
+    # Rebuild index after enrichments
+    index = build_lookup_index(list(master.values()))
+
+    # --- Step 5: Compute derived fields ---
+    print("\n5. Computing derived fields...")
+    for tool in master.values():
+        tool["has_for610_coverage"] = tool["sources"]["for610"].get("covered", False)
+        tool["has_remnux_docs"] = tool["sources"]["remnux_docs"].get("covered", False)
+        tool["has_salt_state"] = tool["sources"]["salt_states"].get("covered", False)
+        tool["help_tier"] = compute_help_tier(tool)
+
+    # --- Step 6: Sort and output ---
+    tools_list = sorted(master.values(), key=lambda t: t["id"])
+
+    # Remove windows-only/online tools that aren't in remnux
+    # (keep them for reference but flag appropriately)
+
+    tiers = {}
+    for t in tools_list:
+        tier = t["help_tier"]
+        tiers[tier] = tiers.get(tier, 0) + 1
+
+    output = {
+        "metadata": {
+            "total_tools": len(tools_list),
+            "in_remnux_count": sum(1 for t in tools_list if t["in_remnux"]),
+            "help_tier_counts": tiers,
+            "source_coverage": {
+                "for610_only": sum(1 for t in tools_list if t["has_for610_coverage"] and not t["has_remnux_docs"] and not t["has_salt_state"]),
+                "remnux_docs_only": sum(1 for t in tools_list if t["has_remnux_docs"] and not t["has_for610_coverage"] and not t["has_salt_state"]),
+                "salt_states_only": sum(1 for t in tools_list if t["has_salt_state"] and not t["has_for610_coverage"] and not t["has_remnux_docs"]),
+                "all_three": sum(1 for t in tools_list if t["has_for610_coverage"] and t["has_remnux_docs"] and t["has_salt_state"]),
+                "for610_and_docs": sum(1 for t in tools_list if t["has_for610_coverage"] and t["has_remnux_docs"]),
+                "for610_and_salt": sum(1 for t in tools_list if t["has_for610_coverage"] and t["has_salt_state"]),
+                "docs_and_salt": sum(1 for t in tools_list if t["has_remnux_docs"] and t["has_salt_state"]),
+                "no_coverage": sum(1 for t in tools_list if not t["has_for610_coverage"] and not t["has_remnux_docs"] and not t["has_salt_state"]),
+            },
+        },
+        "tools": tools_list,
+    }
+
+    with open(OUTPUT, "w") as f:
+        yaml.dump(output, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
+
+    print(f"\n{'='*50}")
+    print(f"MASTER INVENTORY BUILT: {len(tools_list)} tools")
+    print(f"  In REMnux: {output['metadata']['in_remnux_count']}")
+    print(f"\nHelp Tiers:")
+    for tier, count in sorted(tiers.items()):
+        print(f"  {tier}: {count}")
+    print(f"\nSource Coverage:")
+    for key, val in output["metadata"]["source_coverage"].items():
+        print(f"  {key}: {val}")
+    print(f"\nOutput: {OUTPUT}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,122 @@
+#!/usr/bin/env python3
+"""Generate coverage report from the master tool inventory.
+
+Reads data/remnux/tools-master.yaml and produces:
+- data/generated/coverage-report.md (human-readable)
+- data/remnux/coverage-report.yaml (machine-readable)
+"""
+
+import os
+import yaml
+
+BASE_DIR = os.path.join(os.path.dirname(__file__), "..")
+MASTER = os.path.join(BASE_DIR, "data", "remnux", "tools-master.yaml")
+MD_OUTPUT = os.path.join(BASE_DIR, "data", "generated", "coverage-report.md")
+YAML_OUTPUT = os.path.join(BASE_DIR, "data", "remnux", "coverage-report.yaml")
+
+
+def main():
+    with open(MASTER) as f:
+        data = yaml.safe_load(f)
+
+    tools = data["tools"]
+    meta = data["metadata"]
+
+    # Classify tools
+    remnux_tools = [t for t in tools if t.get("in_remnux")]
+    rich = [t for t in tools if t["help_tier"] == "rich"]
+    standard = [t for t in tools if t["help_tier"] == "standard"]
+    basic = [t for t in tools if t["help_tier"] == "basic"]
+    stub = [t for t in tools if t["help_tier"] == "stub"]
+
+    # Tools in REMnux with no good help
+    needs_help = [t for t in remnux_tools if t["help_tier"] in ("basic", "stub")]
+    needs_help.sort(key=lambda t: t["name"])
+
+    # Tools with FOR610 coverage (richest help)
+    for610_covered = [t for t in remnux_tools if t.get("has_for610_coverage")]
+    for610_covered.sort(key=lambda t: t["name"])
+
+    # Tools with REMnux docs only (decent help)
+    docs_only = [t for t in remnux_tools if t.get("has_remnux_docs") and not t.get("has_for610_coverage")]
+    docs_only.sort(key=lambda t: t["name"])
+
+    # Generate markdown report
+    lines = [
+        "# Tool Coverage Report",
+        "",
+        "## Summary",
+        "",
+        f"| Metric | Count |",
+        f"|--------|-------|",
+        f"| Total tools in master inventory | {len(tools)} |",
+        f"| Tools in REMnux container | {len(remnux_tools)} |",
+        f"| Rich help (FOR610 coverage) | {len(rich)} |",
+        f"| Standard help (REMnux docs) | {len(standard)} |",
+        f"| Basic help (salt-states only) | {len(basic)} |",
+        f"| Stub (no documentation) | {len(stub)} |",
+        "",
+        "## Source Overlap",
+        "",
+        f"| Combination | Count |",
+        f"|-------------|-------|",
+    ]
+    for key, val in meta["source_coverage"].items():
+        lines.append(f"| {key.replace('_', ' ')} | {val} |")
+
+    lines += [
+        "",
+        "## Priority: REMnux Tools Needing Help",
+        "",
+        f"These {len(needs_help)} tools are installed in the container but have minimal or no documentation:",
+        "",
+    ]
+    for t in needs_help:
+        tier_badge = "basic" if t["help_tier"] == "basic" else "STUB"
+        lines.append(f"- `{t['name']}` [{tier_badge}]")
+
+    lines += [
+        "",
+        f"## Rich Help Tools ({len(for610_covered)} tools with FOR610 coverage)",
+        "",
+    ]
+    for t in for610_covered:
+        labs = t["sources"]["for610"].get("labs", [])
+        lab_str = f" (Labs: {', '.join(labs)})" if labs else ""
+        lines.append(f"- `{t['name']}`{lab_str}")
+
+    lines += [
+        "",
+        f"## Standard Help Tools ({len(docs_only)} tools with REMnux docs only)",
+        "",
+    ]
+    for t in docs_only:
+        cat = t["sources"]["remnux_docs"].get("category", "")
+        lines.append(f"- `{t['name']}` — {cat}")
+
+    md_content = "\n".join(lines) + "\n"
+
+    os.makedirs(os.path.dirname(MD_OUTPUT), exist_ok=True)
+    with open(MD_OUTPUT, "w") as f:
+        f.write(md_content)
+
+    # Machine-readable YAML
+    yaml_data = {
+        "summary": meta,
+        "needs_help": [{"id": t["id"], "name": t["name"], "tier": t["help_tier"]} for t in needs_help],
+        "rich_tools": [{"id": t["id"], "name": t["name"]} for t in for610_covered],
+        "standard_tools": [{"id": t["id"], "name": t["name"]} for t in docs_only],
+    }
+    with open(YAML_OUTPUT, "w") as f:
+        yaml.dump(yaml_data, f, default_flow_style=False, sort_keys=False)
+
+    print(f"Coverage report generated:")
+    print(f"  Markdown: {MD_OUTPUT}")
+    print(f"  YAML: {YAML_OUTPUT}")
+    print(f"\n  {len(remnux_tools)} REMnux tools:")
+    print(f"    {len(rich)} rich, {len(standard)} standard, {len(basic)} basic, {len(stub)} stub")
+    print(f"    {len(needs_help)} need better documentation")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,534 @@
+#!/usr/bin/env python3
+"""Generate all help artifacts from the master tool inventory.
+
+Reads data/remnux/tools-master.yaml and data/for610/workflows.yaml to produce:
+- data/generated/tools.db (pipe-delimited for find-tool)
+- data/generated/cheatsheets/*.cheat (per-tool cheat sheets)
+- data/generated/workflows/*.txt (workflow help files)
+- data/generated/tldr/*.md (TLDR pages)
+"""
+
+import os
+import re
+import yaml
+import textwrap
+
+BASE_DIR = os.path.join(os.path.dirname(__file__), "..")
+MASTER = os.path.join(BASE_DIR, "data", "remnux", "tools-master.yaml")
+WORKFLOWS_SRC = os.path.join(BASE_DIR, "data", "for610", "workflows.yaml")
+RECIPES_SRC = os.path.join(BASE_DIR, "data", "for610", "recipes.yaml")
+GEN_DIR = os.path.join(BASE_DIR, "data", "generated")
+
+
+def load_master():
+    with open(MASTER) as f:
+        return yaml.safe_load(f)
+
+
+def load_workflows():
+    with open(WORKFLOWS_SRC) as f:
+        return yaml.safe_load(f)
+
+
+def load_recipes():
+    if os.path.exists(RECIPES_SRC):
+        with open(RECIPES_SRC) as f:
+            return yaml.safe_load(f)
+    return {"recipes": []}
+
+
+def build_recipe_index(recipes_data):
+    """Build a mapping of tool_id -> list of recipes that use that tool."""
+    index = {}
+    for recipe in recipes_data.get("recipes", []):
+        for tool_id in recipe.get("tools", []):
+            index.setdefault(tool_id, []).append(recipe)
+            # Also index by normalized variants
+            normalized = tool_id.lower().replace("-", "").replace("_", "")
+            if normalized != tool_id:
+                index.setdefault(normalized, []).append(recipe)
+    return index
+
+
+# ============================================================
+# tools.db generator
+# ============================================================
+
+def generate_tools_db(tools):
+    """Generate pipe-delimited tools.db for find-tool."""
+    output_path = os.path.join(GEN_DIR, "tools.db")
+    lines = []
+
+    for t in tools:
+        if not t.get("in_remnux"):
+            continue
+
+        name = t["name"]
+        desc = t.get("description", "").replace("|", "/").replace("\n", " ").strip()[:120]
+        if not desc:
+            desc = f"(no description available)"
+
+        # Get best category
+        cat = ""
+        if t["sources"]["remnux_docs"].get("covered"):
+            cat = t["sources"]["remnux_docs"].get("category", "")
+        elif t["sources"]["for610"].get("covered"):
+            cat = t["sources"]["for610"].get("category", "")
+
+        # Get best usage example
+        usage = ""
+        if t["sources"]["for610"].get("covered"):
+            usages = t["sources"]["for610"].get("typical_usage", [])
+            if usages:
+                usage = usages[0]
+        if not usage:
+            usage = f"{name} --help"
+        usage = usage.replace("|", " ").strip()
+
+        tier = t.get("help_tier", "stub")
+
+        lines.append(f"{name}|{desc}|{cat}|{usage}|{tier}")
+
+    lines.sort()
+
+    with open(output_path, "w") as f:
+        f.write("\n".join(lines) + "\n")
+
+    print(f"  tools.db: {len(lines)} entries")
+    return len(lines)
+
+
+# ============================================================
+# Cheatsheet generator
+# ============================================================
+
+def sanitize_filename(name):
+    """Convert tool name to a safe filename."""
+    return re.sub(r'[^a-zA-Z0-9._-]', '-', name).strip('-').lower()
+
+
+def generate_usage_comment(name, usage, index):
+    """Generate a descriptive comment for a usage example."""
+    # Analyze the command to produce a meaningful description
+    usage_lower = usage.lower()
+
+    if index == 0:
+        return f"Basic usage"
+
+    # Try to describe based on flags
+    if "-vv" in usage or "--verbose" in usage:
+        return "Verbose output with details"
+    if "--no-static" in usage or "--no static" in usage:
+        return "Skip static analysis, focus on dynamic"
+    if "-n " in usage:
+        return "Suppress default output"
+    if "-a " in usage or "--all" in usage:
+        return "Show all results"
+    if "-s " in usage:
+        return "Select specific item"
+    if "-d " in usage:
+        return "Dump/extract content"
+    if "-r " in usage:
+        return "Recursive/follow references"
+    if "-k " in usage:
+        return "Extract by keyword"
+    if "-o " in usage:
+        return "Output to file"
+    if "-f " in usage:
+        return "Process input file"
+    if "-i " in usage:
+        return "Case-insensitive search"
+    if "grep" in usage_lower:
+        return "Filter output for specific pattern"
+    if "--help" in usage:
+        return "Show help"
+    if "|" in usage:
+        return "Pipe output for processing"
+    if ">" in usage:
+        return "Save output to file"
+
+    return f"Alternative usage"
+
+
+def format_recipes_section(tool_id, recipe_index):
+    """Generate the recipes section for a cheatsheet."""
+    recipes = recipe_index.get(tool_id, [])
+    if not recipes:
+        # Try variants
+        for variant in [tool_id.replace("-py", ""), tool_id.replace("-", "")]:
+            recipes = recipe_index.get(variant, [])
+            if recipes:
+                break
+    if not recipes:
+        return ""
+
+    # Deduplicate recipes by id
+    seen = set()
+    unique = []
+    for r in recipes:
+        if r["id"] not in seen:
+            seen.add(r["id"])
+            unique.append(r)
+
+    lines = [
+        "",
+        "# --- Recipes (multi-tool chains) ---",
+        "",
+    ]
+    for recipe in unique:
+        lines.append(f"# >> {recipe['name']}")
+        for cmd in recipe.get("commands", []):
+            lines.append(cmd)
+        lines.append("")
+
+    return "\n".join(lines)
+
+
+def generate_cheatsheet_rich(t, recipe_index=None):
+    """Generate a rich cheatsheet for a tool with FOR610 coverage."""
+    f610 = t["sources"]["for610"]
+    name = t["name"]
+    desc = t.get("description", "")
+    labs = f610.get("labs", [])
+    sections = f610.get("sections", [])
+    tags = f610.get("tags", [])
+    usages = f610.get("typical_usage", [])
+    author = f610.get("author", "")
+
+    lines = [
+        f"# {name}",
+        f"# {desc}",
+    ]
+
+    meta_parts = []
+    if labs:
+        meta_parts.append(f"FOR610 Labs: {', '.join(labs)}")
+    if sections:
+        meta_parts.append(f"Sections: {', '.join(str(s) for s in sections)}")
+    if author:
+        meta_parts.append(f"Author: {author}")
+    if meta_parts:
+        lines.append(f"# {' | '.join(meta_parts)}")
+
+    # REMnux docs URL if available
+    if t["sources"]["remnux_docs"].get("covered"):
+        url = t["sources"]["remnux_docs"].get("docs_url", "")
+        if url:
+            lines.append(f"# Docs: {url}")
+
+    lines.append("")
+
+    # Tags
+    tag_str = ", ".join(tags[:8]) if tags else name.lower()
+    lines.append(f"% {tag_str}")
+    lines.append("")
+
+    # Usage examples with descriptive comments
+    for i, usage in enumerate(usages):
+        comment = generate_usage_comment(name, usage, i)
+        lines.append(f"# {comment}")
+        lines.append(usage)
+        lines.append("")
+
+    # If no usage examples, add a basic one
+    if not usages:
+        lines.append(f"# Show help")
+        lines.append(f"{name} --help")
+        lines.append("")
+
+    # Append recipes section if this tool participates in any recipes
+    if recipe_index:
+        recipes_text = format_recipes_section(t["id"], recipe_index)
+        if recipes_text:
+            lines.append(recipes_text)
+
+    return "\n".join(lines)
+
+
+def generate_cheatsheet_standard(t):
+    """Generate a standard cheatsheet from REMnux docs."""
+    rdocs = t["sources"]["remnux_docs"]
+    name = t["name"]
+    desc = t.get("description", "") or rdocs.get("description", "")
+    cat = rdocs.get("category", "")
+    url = rdocs.get("docs_url", "")
+
+    lines = [
+        f"# {name}",
+        f"# {desc}" if desc else f"# {name} tool",
+    ]
+    if cat:
+        lines.append(f"# Category: {cat}")
+    if url:
+        lines.append(f"# Docs: {url}")
+
+    lines += [
+        "",
+        f"% {sanitize_filename(name)}",
+        "",
+        f"# Show help for {name}",
+        f"{name} --help",
+        "",
+    ]
+
+    return "\n".join(lines)
+
+
+def generate_cheatsheet_basic(t):
+    """Generate a minimal cheatsheet for a tool with only salt-states."""
+    name = t["name"]
+    salt = t["sources"]["salt_states"]
+    install = salt.get("install_method", "unknown")
+    pkg = salt.get("package_name", name)
+
+    lines = [
+        f"# {name}",
+        f"# Installed via: {install} ({pkg})",
+        "",
+        f"% {sanitize_filename(name)}",
+        "",
+        f"# Show help for {name}",
+        f"{name} --help",
+        "",
+    ]
+
+    return "\n".join(lines)
+
+
+def generate_cheatsheets(tools, recipe_index=None):
+    """Generate per-tool cheatsheet files."""
+    cheat_dir = os.path.join(GEN_DIR, "cheatsheets")
+    os.makedirs(cheat_dir, exist_ok=True)
+
+    count = 0
+    for t in tools:
+        if not t.get("in_remnux"):
+            continue
+
+        tier = t.get("help_tier", "stub")
+        name = t["name"]
+        filename = sanitize_filename(name) + ".cheat"
+
+        if tier == "rich":
+            content = generate_cheatsheet_rich(t, recipe_index=recipe_index)
+        elif tier == "standard":
+            content = generate_cheatsheet_standard(t)
+        else:
+            content = generate_cheatsheet_basic(t)
+
+        with open(os.path.join(cheat_dir, filename), "w") as f:
+            f.write(content)
+        count += 1
+
+    print(f"  cheatsheets: {count} .cheat files")
+    return count
+
+
+# ============================================================
+# Workflow generator
+# ============================================================
+
+def _get_tool_examples(tool_name, master_tools_by_name):
+    """Get 1-2 example commands for a tool from the master inventory."""
+    tool = master_tools_by_name.get(tool_name)
+    if not tool:
+        # Try kebab-case lookup
+        normalized = tool_name.lower().replace("_", "-")
+        tool = master_tools_by_name.get(normalized)
+    if tool and tool["sources"]["for610"].get("covered"):
+        usages = tool["sources"]["for610"].get("typical_usage", [])
+        return usages[:2]
+    return []
+
+
+def generate_workflows(workflows_data, master_tools=None):
+    """Generate readable workflow help files with inline examples."""
+    wf_dir = os.path.join(GEN_DIR, "workflows")
+    os.makedirs(wf_dir, exist_ok=True)
+
+    # Build tool name lookup for inline examples
+    tools_by_name = {}
+    if master_tools:
+        for t in master_tools:
+            tools_by_name[t["name"].lower()] = t
+            tools_by_name[t["id"]] = t
+            for alias in t.get("aliases", []):
+                tools_by_name[alias.lower()] = t
+
+    workflows = workflows_data.get("workflows", [])
+    count = 0
+
+    for wf in workflows:
+        wf_id = wf["id"]
+        name = wf["name"]
+        desc = wf.get("description", "")
+        steps = wf.get("steps", [])
+        related_labs = wf.get("related_labs", [])
+
+        lines = [
+            f"{'='*60}",
+            f"  {name}",
+            f"{'='*60}",
+            "",
+            f"  {desc}",
+            "",
+        ]
+
+        if related_labs:
+            lines.append(f"  Related FOR610 Labs: {', '.join(related_labs)}")
+            lines.append("")
+
+        lines.append(f"{'─'*60}")
+        lines.append("")
+
+        for step in steps:
+            order = step.get("order", "?")
+            step_name = step.get("name", "")
+            step_desc = step.get("description", "")
+            step_tools = step.get("tools", [])
+
+            lines.append(f"  Step {order}: {step_name}")
+            if step_tools:
+                lines.append(f"  Tools: {', '.join(step_tools)}")
+            if step_desc:
+                wrapped = textwrap.fill(step_desc, width=56, initial_indent="  ", subsequent_indent="  ")
+                lines.append(wrapped)
+
+            # Add inline command examples for each tool
+            if step_tools and tools_by_name:
+                examples_shown = False
+                for tool_name in step_tools:
+                    examples = _get_tool_examples(tool_name, tools_by_name)
+                    if examples:
+                        if not examples_shown:
+                            lines.append("")
+                        for ex in examples[:1]:  # Show 1 example per tool
+                            lines.append(f"    $ {ex}")
+                        examples_shown = True
+
+            lines.append("")
+
+        lines.append(f"{'─'*60}")
+        lines.append(f"  Tip: 'fhelp cheat <tool>' for full examples")
+        lines.append(f"       'Ctrl+G' for interactive cheatsheet browser")
+        lines.append("")
+
+        filename = wf_id.replace("_", "-") + ".txt"
+        with open(os.path.join(wf_dir, filename), "w") as f:
+            f.write("\n".join(lines))
+        count += 1
+
+    # Also generate an index file
+    index_lines = [
+        f"{'='*60}",
+        f"  Available Analysis Workflows",
+        f"{'='*60}",
+        "",
+    ]
+    for wf in workflows:
+        wf_id = wf["id"].replace("_", "-")
+        name = wf["name"]
+        desc = wf.get("description", "")
+        index_lines.append(f"  {wf_id}")
+        index_lines.append(f"    {name}")
+        wrapped = textwrap.fill(desc, width=56, initial_indent="    ", subsequent_indent="    ")
+        index_lines.append(wrapped)
+        index_lines.append("")
+
+    index_lines += [
+        f"{'─'*60}",
+        f"  Usage: fhelp workflow <name>",
+        f"  Example: fhelp workflow static-analysis",
+        "",
+    ]
+
+    with open(os.path.join(wf_dir, "index.txt"), "w") as f:
+        f.write("\n".join(index_lines))
+
+    print(f"  workflows: {count} workflow files + index")
+    return count
+
+
+# ============================================================
+# TLDR generator
+# ============================================================
+
+def generate_tldr(tools):
+    """Generate TLDR pages for tools missing from upstream."""
+    tldr_dir = os.path.join(GEN_DIR, "tldr")
+    os.makedirs(tldr_dir, exist_ok=True)
+
+    count = 0
+    for t in tools:
+        if not t.get("in_remnux"):
+            continue
+
+        tier = t.get("help_tier", "stub")
+        if tier not in ("rich", "standard"):
+            continue
+
+        name = t["name"]
+        desc = t.get("description", "") or f"{name} tool"
+
+        # Get usage examples
+        usages = []
+        if t["sources"]["for610"].get("covered"):
+            usages = t["sources"]["for610"].get("typical_usage", [])
+
+        if not usages:
+            usages = [f"{name} --help"]
+
+        # TLDR format
+        lines = [
+            f"# {name}",
+            "",
+            f"> {desc}",
+            "",
+        ]
+
+        for i, usage in enumerate(usages[:4]):
+            # Create a description from the command
+            lines.append(f"- Run {name}:")
+            lines.append("")
+            lines.append(f"`{usage}`")
+            lines.append("")
+
+        filename = sanitize_filename(name) + ".md"
+        with open(os.path.join(tldr_dir, filename), "w") as f:
+            f.write("\n".join(lines))
+        count += 1
+
+    print(f"  tldr: {count} pages")
+    return count
+
+
+# ============================================================
+# Main
+# ============================================================
+
+def main():
+    print("Generating help artifacts from master inventory...")
+
+    master = load_master()
+    tools = master["tools"]
+    workflows_data = load_workflows()
+    recipes_data = load_recipes()
+    recipe_index = build_recipe_index(recipes_data)
+
+    print(f"\nInput: {len(tools)} tools, {len(workflows_data.get('workflows', []))} workflows, {len(recipes_data.get('recipes', []))} recipes")
+    print()
+
+    db_count = generate_tools_db(tools)
+    cheat_count = generate_cheatsheets(tools, recipe_index=recipe_index)
+    wf_count = generate_workflows(workflows_data, master_tools=tools)
+    tldr_count = generate_tldr(tools)
+
+    print(f"\nAll artifacts generated in {GEN_DIR}/")
+    print(f"  tools.db:      {db_count} entries")
+    print(f"  cheatsheets/:  {cheat_count} files")
+    print(f"  workflows/:    {wf_count} + index")
+    print(f"  tldr/:         {tldr_count} pages")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,202 @@
+#!/usr/bin/env python3
+"""Parse REMnux salt-states repository to extract all installed tools/packages.
+
+Fetches the salt-states repo tree from GitHub, parses .sls files to identify
+what gets installed, and outputs data/remnux/sources/salt-states.yaml.
+"""
+
+import json
+import re
+import urllib.request
+import yaml
+import os
+
+GITHUB_API = "https://api.github.com/repos/REMnux/salt-states"
+RAW_BASE = "https://raw.githubusercontent.com/REMnux/salt-states/master"
+OUTPUT_PATH = os.path.join(os.path.dirname(__file__), "..", "data", "remnux", "sources", "salt-states.yaml")
+
+
+def fetch_json(url):
+    req = urllib.request.Request(url, headers={"User-Agent": "remnux-tool-parser"})
+    with urllib.request.urlopen(req, timeout=30) as resp:
+        return json.loads(resp.read().decode())
+
+
+def fetch_text(url):
+    req = urllib.request.Request(url, headers={"User-Agent": "remnux-tool-parser"})
+    try:
+        with urllib.request.urlopen(req, timeout=30) as resp:
+            return resp.read().decode()
+    except Exception as e:
+        print(f"  Warning: could not fetch {url}: {e}")
+        return None
+
+
+def get_sls_files():
+    """Get all .sls file paths from the repo."""
+    tree = fetch_json(f"{GITHUB_API}/git/trees/master?recursive=1")
+    return [item["path"] for item in tree["tree"]
+            if item["path"].endswith(".sls") and item["type"] == "blob"]
+
+
+def classify_sls_path(path):
+    """Classify the install method from the directory structure."""
+    parts = path.lower()
+    if "python3-package" in parts or "python-package" in parts:
+        return "pip"
+    elif "pip" in parts:
+        return "pip"
+    elif "rubygem" in parts:
+        return "gem"
+    elif "npm" in parts or "node" in parts:
+        return "npm"
+    elif "perl-package" in parts:
+        return "perl"
+    elif "package" in parts:
+        return "apt"
+    elif "tools" in parts:
+        return "manual"
+    elif "script" in parts:
+        return "script"
+    else:
+        return "unknown"
+
+
+def extract_tool_name_from_path(path):
+    """Extract a human-readable tool name from the .sls file path."""
+    basename = os.path.basename(path).replace(".sls", "")
+    # Skip non-tool files
+    skip = {"init", "addon", "cloud", "dedicated", "theme", "remnux-config",
+            "apt-transport-https", "packages", "python3-packages", "python-packages",
+            "rubygems", "perl-packages", "node-packages", "tools", "scripts"}
+    if basename in skip:
+        return None
+    return basename
+
+
+def parse_sls_content(content, path):
+    """Parse a .sls file and extract package/tool information."""
+    if not content:
+        return []
+
+    results = []
+    tool_name = extract_tool_name_from_path(path)
+    if not tool_name:
+        return []
+
+    install_method = classify_sls_path(path)
+
+    # Try to find the actual package name from the content
+    package_names = []
+
+    # Match pip.installed, pkg.installed, gem.installed, npm.installed
+    for match in re.finditer(r'(\w[\w.-]+):\s*\n\s+(?:pip|pkg|gem|npm)\.installed', content):
+        package_names.append(match.group(1))
+
+    # Match "- name: package_name" in pip/pkg states
+    for match in re.finditer(r'-\s+name:\s+([^\s#\n]+)', content):
+        name = match.group(1).strip("'\"")
+        if name and not name.startswith('{') and not name.startswith('/'):
+            package_names.append(name)
+
+    # Match wget/curl downloads (manual installs)
+    for match in re.finditer(r'(?:wget|curl)\s+.*?/([^/\s"]+?)(?:\s|"|$)', content):
+        fname = match.group(1)
+        if '.' in fname and not fname.endswith('.key'):
+            package_names.append(fname)
+
+    # Match file.managed targets (scripts/binaries being deployed)
+    for match in re.finditer(r'/usr/local/bin/([^:\s]+)', content):
+        package_names.append(match.group(1))
+
+    # Deduplicate and clean
+    seen = set()
+    clean_names = []
+    for n in package_names:
+        n = n.strip().strip("'\"")
+        if n and n.lower() not in seen and len(n) > 1:
+            seen.add(n.lower())
+            clean_names.append(n)
+
+    entry = {
+        "id": tool_name,
+        "package_names": clean_names if clean_names else [tool_name],
+        "install_method": install_method,
+        "salt_state_path": path,
+    }
+
+    # Try to detect if it's enabled/disabled
+    if "False" in content and ("onlyif" in content.lower() or "unless" in content.lower()):
+        entry["possibly_conditional"] = True
+
+    results.append(entry)
+    return results
+
+
+def main():
+    print("Fetching salt-states repository tree...")
+    sls_files = get_sls_files()
+    print(f"Found {len(sls_files)} .sls files")
+
+    # Filter to relevant paths (skip top-level orchestration files)
+    relevant = [f for f in sls_files if f.startswith("remnux/")]
+    print(f"  {len(relevant)} under remnux/")
+
+    all_tools = []
+    categories_seen = set()
+
+    for i, path in enumerate(relevant):
+        if i % 20 == 0:
+            print(f"  Processing {i}/{len(relevant)}...")
+
+        # Derive category from path
+        parts = path.split("/")
+        if len(parts) >= 3:
+            category_dir = parts[1]  # e.g., "python3-packages", "tools", "packages"
+            categories_seen.add(category_dir)
+
+        content = fetch_text(f"{RAW_BASE}/{path}")
+        tools = parse_sls_content(content, path)
+        all_tools.extend(tools)
+
+    # Deduplicate by id
+    seen_ids = set()
+    unique_tools = []
+    for t in all_tools:
+        if t["id"] not in seen_ids:
+            seen_ids.add(t["id"])
+            unique_tools.append(t)
+
+    # Sort by id
+    unique_tools.sort(key=lambda t: t["id"])
+
+    output = {
+        "metadata": {
+            "source": "https://github.com/REMnux/salt-states",
+            "branch": "master",
+            "total_sls_files": len(relevant),
+            "total_tools_extracted": len(unique_tools),
+            "install_method_counts": {},
+            "salt_directories": sorted(categories_seen),
+        },
+        "tools": unique_tools,
+    }
+
+    # Count install methods
+    for t in unique_tools:
+        m = t["install_method"]
+        output["metadata"]["install_method_counts"][m] = \
+            output["metadata"]["install_method_counts"].get(m, 0) + 1
+
+    os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
+    with open(OUTPUT_PATH, "w") as f:
+        yaml.dump(output, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
+
+    print(f"\nDone! Extracted {len(unique_tools)} tools")
+    for method, count in sorted(output["metadata"]["install_method_counts"].items()):
+        print(f"  {method}: {count}")
+    print(f"Output: {OUTPUT_PATH}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,226 @@
+#!/usr/bin/env python3
+"""Scrape REMnux documentation to extract all documented tools.
+
+Fetches docs.remnux.org tool listing pages and extracts tool names,
+descriptions, categories, and URLs. Outputs data/remnux/sources/remnux-docs.yaml.
+"""
+
+import re
+import urllib.request
+import yaml
+import os
+import time
+
+BASE_URL = "https://docs.remnux.org/discover-the-tools"
+OUTPUT_PATH = os.path.join(os.path.dirname(__file__), "..", "data", "remnux", "sources", "remnux-docs.yaml")
+
+# All known category pages from docs.remnux.org
+CATEGORY_PAGES = [
+    # Examine Static Properties
+    ("Examine Static Properties > General", "examine+static+properties/general"),
+    ("Examine Static Properties > PE Files", "examine+static+properties/pe-files"),
+    ("Examine Static Properties > ELF Files", "examine+static+properties/elf-files"),
+    ("Examine Static Properties > .NET", "examine+static+properties/.net"),
+    ("Examine Static Properties > Go", "examine+static+properties/go"),
+    ("Examine Static Properties > Deobfuscation", "examine+static+properties/deobfuscation"),
+    # Statically Analyze Code
+    ("Statically Analyze Code > General", "statically+analyze+code/general"),
+    ("Statically Analyze Code > Unpacking", "statically+analyze+code/unpacking"),
+    ("Statically Analyze Code > PE Files", "statically+analyze+code/pe-files"),
+    ("Statically Analyze Code > Python", "statically+analyze+code/python"),
+    ("Statically Analyze Code > Scripts", "statically+analyze+code/scripts"),
+    ("Statically Analyze Code > Java", "statically+analyze+code/java"),
+    ("Statically Analyze Code > .NET", "statically+analyze+code/.net"),
+    ("Statically Analyze Code > Android", "statically+analyze+code/android"),
+    # Dynamically Reverse-Engineer Code
+    ("Dynamically Reverse-Engineer Code > General", "dynamically+reverse-engineer+code/general"),
+    ("Dynamically Reverse-Engineer Code > Shellcode", "dynamically+reverse-engineer+code/shellcode"),
+    ("Dynamically Reverse-Engineer Code > Scripts", "dynamically+reverse-engineer+code/scripts"),
+    ("Dynamically Reverse-Engineer Code > ELF Files", "dynamically+reverse-engineer+code/elf-files"),
+    # Memory Forensics
+    ("Perform Memory Forensics", "perform+memory+forensics"),
+    # Network Interactions
+    ("Explore Network Interactions > Monitoring", "explore+network+interactions/monitoring"),
+    ("Explore Network Interactions > Connecting", "explore+network+interactions/connecting"),
+    ("Explore Network Interactions > Services", "explore+network+interactions/services"),
+    # System Interactions
+    ("Investigate System Interactions", "investigate+system+interactions"),
+    # Documents
+    ("Analyze Documents > General", "analyze+documents/general"),
+    ("Analyze Documents > PDF", "analyze+documents/pdf"),
+    ("Analyze Documents > Microsoft Office", "analyze+documents/microsoft+office"),
+    ("Analyze Documents > Email Messages", "analyze+documents/email+messages"),
+    # AI
+    ("Use Artificial Intelligence", "use+artificial+intelligence"),
+    # Data
+    ("Gather and Analyze Data", "gather+and+analyze+data"),
+    # View/Edit
+    ("View or Edit Files", "view+or+edit+files"),
+    # Utilities
+    ("General Utilities", "general+utilities"),
+]
+
+
+def fetch_page(url):
+    """Fetch a page and return its text content."""
+    req = urllib.request.Request(url, headers={
+        "User-Agent": "Mozilla/5.0 (remnux-doc-scraper)",
+        "Accept": "text/html,application/xhtml+xml",
+    })
+    try:
+        with urllib.request.urlopen(req, timeout=30) as resp:
+            return resp.read().decode("utf-8", errors="replace")
+    except Exception as e:
+        print(f"  Warning: could not fetch {url}: {e}")
+        return None
+
+
+def normalize_id(name):
+    """Convert tool name to a normalized kebab-case ID."""
+    # Remove .py suffix for ID, keep display name
+    n = name.lower().strip()
+    n = re.sub(r'\.py$', '-py', n)
+    n = re.sub(r'\.pl$', '-pl', n)
+    n = re.sub(r'\.bat$', '-bat', n)
+    n = re.sub(r'[^a-z0-9]+', '-', n)
+    n = n.strip('-')
+    return n
+
+
+def extract_tools_from_html(html, category, category_path):
+    """Extract tool entries from a docs page HTML."""
+    tools = []
+
+    # GitBook pages use specific patterns for tool headings
+    # Pattern 1: <h2> or <h3> headings with tool names
+    # Pattern 2: Bold text followed by description
+    # The docs use a pattern like: **Tool Name** description text
+
+    # Try to find tool sections - GitBook uses specific div/section patterns
+    # Look for heading patterns with tool names
+    heading_pattern = re.compile(
+        r'<h[23][^>]*id="([^"]*)"[^>]*>.*?<a[^>]*>.*?</a>\s*(.*?)\s*</h[23]>',
+        re.DOTALL | re.IGNORECASE
+    )
+
+    # Also try plain text patterns
+    # GitBook often renders as: tool-name followed by description
+    bold_pattern = re.compile(
+        r'<strong>(.*?)</strong>\s*[-:]\s*(.*?)(?=<(?:br|p|div|strong|h[23])|$)',
+        re.DOTALL | re.IGNORECASE
+    )
+
+    # Find headings first
+    for match in heading_pattern.finditer(html):
+        anchor_id = match.group(1)
+        heading_text = re.sub(r'<[^>]+>', '', match.group(2)).strip()
+        if heading_text and len(heading_text) < 80:
+            # Get description from content after heading
+            pos = match.end()
+            desc_chunk = html[pos:pos+500]
+            desc_chunk = re.sub(r'<[^>]+>', ' ', desc_chunk)
+            desc_chunk = re.sub(r'\s+', ' ', desc_chunk).strip()
+            # Take first sentence
+            desc = desc_chunk.split('.')[0].strip() + '.' if desc_chunk else ""
+            if len(desc) > 200:
+                desc = desc[:197] + "..."
+
+            # Try to find website URL near this section
+            website_chunk = html[pos:pos+2000]
+            website_match = re.search(r'href="(https?://(?!docs\.remnux)[^"]+)"', website_chunk)
+            website = website_match.group(1) if website_match else ""
+
+            tool = {
+                "name": heading_text,
+                "id": normalize_id(heading_text),
+                "category": category,
+                "category_path": category_path,
+                "description": desc,
+                "docs_url": f"{BASE_URL}/{category_path}",
+                "anchor": anchor_id,
+            }
+            if website:
+                tool["website"] = website
+            tools.append(tool)
+
+    # If we got nothing from headings, try the bold pattern
+    if not tools:
+        for match in bold_pattern.finditer(html):
+            name = re.sub(r'<[^>]+>', '', match.group(1)).strip()
+            desc = re.sub(r'<[^>]+>', ' ', match.group(2)).strip()
+            desc = re.sub(r'\s+', ' ', desc).strip()
+            if name and len(name) < 80 and len(name) > 1:
+                if len(desc) > 200:
+                    desc = desc[:197] + "..."
+                tools.append({
+                    "name": name,
+                    "id": normalize_id(name),
+                    "category": category,
+                    "category_path": category_path,
+                    "description": desc,
+                    "docs_url": f"{BASE_URL}/{category_path}",
+                })
+
+    return tools
+
+
+def main():
+    print("Scraping REMnux documentation...")
+    all_tools = []
+
+    for category, path in CATEGORY_PAGES:
+        url = f"{BASE_URL}/{path}"
+        print(f"  Fetching: {category}")
+        html = fetch_page(url)
+
+        if not html:
+            print(f"    Skipped (fetch failed)")
+            continue
+
+        tools = extract_tools_from_html(html, category, path)
+        print(f"    Found {len(tools)} tools")
+        all_tools.extend(tools)
+
+        time.sleep(0.3)  # Be polite
+
+    # Deduplicate by id (same tool can appear in multiple categories)
+    seen = {}
+    for t in all_tools:
+        tid = t["id"]
+        if tid not in seen:
+            seen[tid] = t
+        else:
+            # Tool appears in multiple categories - track both
+            existing = seen[tid]
+            if "additional_categories" not in existing:
+                existing["additional_categories"] = []
+            existing["additional_categories"].append(t["category"])
+
+    unique_tools = sorted(seen.values(), key=lambda t: t["id"])
+
+    output = {
+        "metadata": {
+            "source": "https://docs.remnux.org/discover-the-tools",
+            "categories_scraped": len(CATEGORY_PAGES),
+            "total_tools_extracted": len(unique_tools),
+            "category_counts": {},
+        },
+        "tools": unique_tools,
+    }
+
+    # Count per category
+    for t in all_tools:
+        cat = t["category"]
+        output["metadata"]["category_counts"][cat] = \
+            output["metadata"]["category_counts"].get(cat, 0) + 1
+
+    os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
+    with open(OUTPUT_PATH, "w") as f:
+        yaml.dump(output, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
+
+    print(f"\nDone! Extracted {len(unique_tools)} unique tools from {len(CATEGORY_PAGES)} category pages")
+    print(f"Output: {OUTPUT_PATH}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,360 @@
+#!/usr/bin/env python3
+"""Comprehensive verification of generated help artifacts.
+
+Tests:
+1. All FOR610 tools with in_remnux=true have cheatsheets
+2. All cheatsheet content matches researched data
+3. All workflows are generated and contain correct tool references
+4. tools.db entries match master inventory
+5. No orphaned references (tools in labs but missing from master)
+6. Rich-tier cheatsheets have usage examples from FOR610
+7. REMnux docs tools have correct descriptions
+"""
+
+import os
+import sys
+import yaml
+import glob
+
+BASE_DIR = os.path.join(os.path.dirname(__file__), "..")
+
+def load_yaml(path):
+    with open(path) as f:
+        return yaml.safe_load(f)
+
+
+def test_master_inventory():
+    """Verify master inventory integrity."""
+    print("=" * 60)
+    print("TEST 1: Master Inventory Integrity")
+    print("=" * 60)
+    errors = []
+
+    master = load_yaml(os.path.join(BASE_DIR, "data/remnux/tools-master.yaml"))
+    tools = master["tools"]
+
+    # Check all tools have required fields
+    for t in tools:
+        tid = t.get("id", "MISSING")
+        if not t.get("id"):
+            errors.append(f"Tool missing id: {t}")
+        if not t.get("name"):
+            errors.append(f"Tool {tid} missing name")
+        if "sources" not in t:
+            errors.append(f"Tool {tid} missing sources")
+        if "help_tier" not in t:
+            errors.append(f"Tool {tid} missing help_tier")
+
+    # Check no duplicate IDs
+    ids = [t["id"] for t in tools]
+    dupes = [x for x in ids if ids.count(x) > 1]
+    if dupes:
+        errors.append(f"Duplicate IDs: {set(dupes)}")
+
+    print(f"  Total tools: {len(tools)}")
+    print(f"  Errors: {len(errors)}")
+    for e in errors[:10]:
+        print(f"    ! {e}")
+    return errors
+
+
+def test_for610_coverage():
+    """Verify all FOR610 in_remnux tools appear in master and have cheatsheets."""
+    print("\n" + "=" * 60)
+    print("TEST 2: FOR610 Tool Coverage")
+    print("=" * 60)
+    errors = []
+
+    for610 = load_yaml(os.path.join(BASE_DIR, "data/for610/tools.yaml"))
+    master = load_yaml(os.path.join(BASE_DIR, "data/remnux/tools-master.yaml"))
+    master_ids = {t["id"] for t in master["tools"]}
+
+    cheat_dir = os.path.join(BASE_DIR, "data/generated/cheatsheets")
+    cheat_files = {os.path.basename(f).replace(".cheat", "")
+                   for f in glob.glob(os.path.join(cheat_dir, "*.cheat"))}
+
+    for610_remnux = [t for t in for610["tools"] if t.get("in_remnux")]
+    for610_all = for610["tools"]
+
+    # Check all FOR610 in_remnux tools are in master
+    missing_from_master = []
+    for t in for610_remnux:
+        if t["id"] not in master_ids:
+            missing_from_master.append(t["id"])
+            errors.append(f"FOR610 tool '{t['id']}' ({t['name']}) not in master inventory")
+
+    # Check all FOR610 in_remnux tools have cheatsheets
+    missing_cheats = []
+    for t in for610_remnux:
+        name_variants = [
+            t["name"].lower().replace(" ", "-"),
+            t["id"],
+            t["name"].lower(),
+        ]
+        found = False
+        for v in name_variants:
+            if v in cheat_files:
+                found = True
+                break
+        if not found:
+            missing_cheats.append(t["name"])
+
+    # Check rich-tier cheatsheets have usage examples
+    rich_without_examples = []
+    for t in for610_remnux:
+        usages = t.get("typical_usage", [])
+        cheat_path = os.path.join(cheat_dir, t["name"].lower().replace(" ", "-") + ".cheat")
+        if not os.path.exists(cheat_path):
+            cheat_path = os.path.join(cheat_dir, t["id"] + ".cheat")
+        if os.path.exists(cheat_path):
+            content = open(cheat_path).read()
+            if usages and not any(u in content for u in usages[:1]):
+                rich_without_examples.append(t["name"])
+
+    print(f"  FOR610 tools (all): {len(for610_all)}")
+    print(f"  FOR610 in REMnux: {len(for610_remnux)}")
+    print(f"  Missing from master: {len(missing_from_master)}")
+    print(f"  Missing cheatsheets: {len(missing_cheats)}")
+    if missing_cheats:
+        for m in missing_cheats[:5]:
+            print(f"    ! {m}")
+    print(f"  Rich without examples: {len(rich_without_examples)}")
+    if rich_without_examples:
+        for m in rich_without_examples[:5]:
+            print(f"    ! {m}")
+    print(f"  Errors: {len(errors)}")
+    return errors
+
+
+def test_tools_db():
+    """Verify tools.db matches master inventory."""
+    print("\n" + "=" * 60)
+    print("TEST 3: tools.db Consistency")
+    print("=" * 60)
+    errors = []
+
+    master = load_yaml(os.path.join(BASE_DIR, "data/remnux/tools-master.yaml"))
+    remnux_tools = {t["name"]: t for t in master["tools"] if t.get("in_remnux")}
+
+    db_path = os.path.join(BASE_DIR, "data/generated/tools.db")
+    db_entries = {}
+    with open(db_path) as f:
+        for line in f:
+            line = line.strip()
+            if not line:
+                continue
+            parts = line.split("|")
+            if len(parts) >= 5:
+                db_entries[parts[0]] = {
+                    "name": parts[0],
+                    "description": parts[1],
+                    "category": parts[2],
+                    "usage": parts[3],
+                    "tier": parts[4],
+                }
+
+    # Check all REMnux tools are in DB
+    missing_from_db = []
+    for name, tool in remnux_tools.items():
+        if name not in db_entries:
+            missing_from_db.append(name)
+
+    # Check no empty descriptions
+    empty_descs = [e["name"] for e in db_entries.values()
+                   if e["description"] == "(no description available)"]
+
+    # Check tier consistency
+    tier_mismatches = []
+    for name, entry in db_entries.items():
+        if name in remnux_tools:
+            expected_tier = remnux_tools[name].get("help_tier", "stub")
+            if entry["tier"] != expected_tier:
+                tier_mismatches.append(f"{name}: db={entry['tier']} vs master={expected_tier}")
+
+    print(f"  tools.db entries: {len(db_entries)}")
+    print(f"  REMnux tools in master: {len(remnux_tools)}")
+    print(f"  Missing from DB: {len(missing_from_db)}")
+    if missing_from_db:
+        for m in missing_from_db[:5]:
+            print(f"    ! {m}")
+    print(f"  Empty descriptions: {len(empty_descs)}")
+    if empty_descs:
+        for m in empty_descs[:5]:
+            print(f"    ! {m}")
+    print(f"  Tier mismatches: {len(tier_mismatches)}")
+    return errors
+
+
+def test_workflows():
+    """Verify all workflow files are generated and contain valid tool references."""
+    print("\n" + "=" * 60)
+    print("TEST 4: Workflow Files")
+    print("=" * 60)
+    errors = []
+
+    wf_src = load_yaml(os.path.join(BASE_DIR, "data/for610/workflows.yaml"))
+    wf_dir = os.path.join(BASE_DIR, "data/generated/workflows")
+
+    expected_workflows = wf_src.get("workflows", [])
+    generated = glob.glob(os.path.join(wf_dir, "*.txt"))
+    generated_names = {os.path.basename(f).replace(".txt", "") for f in generated}
+
+    # Check all workflows generated
+    for wf in expected_workflows:
+        wf_id = wf["id"].replace("_", "-")
+        if wf_id not in generated_names:
+            errors.append(f"Missing workflow file: {wf_id}.txt")
+
+    # Check index file exists
+    if "index" not in generated_names:
+        errors.append("Missing workflow index.txt")
+
+    # Check each workflow file has content
+    for f in generated:
+        content = open(f).read()
+        if len(content) < 50:
+            errors.append(f"Workflow file too short: {os.path.basename(f)}")
+
+    print(f"  Expected workflows: {len(expected_workflows)}")
+    print(f"  Generated files: {len(generated)} (including index)")
+    print(f"  Errors: {len(errors)}")
+    for e in errors:
+        print(f"    ! {e}")
+    return errors
+
+
+def test_lab_tool_references():
+    """Verify all tools referenced in labs exist in master inventory."""
+    print("\n" + "=" * 60)
+    print("TEST 5: Lab-Tool Cross-References")
+    print("=" * 60)
+    errors = []
+
+    labs = load_yaml(os.path.join(BASE_DIR, "data/for610/labs.yaml"))
+    master = load_yaml(os.path.join(BASE_DIR, "data/remnux/tools-master.yaml"))
+    master_ids = {t["id"] for t in master["tools"]}
+
+    for610_tools = load_yaml(os.path.join(BASE_DIR, "data/for610/tools.yaml"))
+    for610_ids = {t["id"] for t in for610_tools["tools"]}
+
+    # Check all tool_ids in labs exist in FOR610
+    missing = set()
+    for lab in labs["labs"]:
+        for tu in lab.get("tools_used", []):
+            tid = tu["tool_id"]
+            if tid not in for610_ids:
+                missing.add(f"Lab {lab['id']}: tool '{tid}'")
+                errors.append(f"Lab {lab['id']} references unknown tool: {tid}")
+
+    print(f"  Labs: {len(labs['labs'])}")
+    print(f"  Missing tool references: {len(missing)}")
+    for m in sorted(missing)[:5]:
+        print(f"    ! {m}")
+    return errors
+
+
+def test_remnux_docs_coverage():
+    """Check how many REMnux-documented tools have help content."""
+    print("\n" + "=" * 60)
+    print("TEST 6: REMnux Docs Coverage in Help")
+    print("=" * 60)
+    errors = []
+
+    master = load_yaml(os.path.join(BASE_DIR, "data/remnux/tools-master.yaml"))
+    cheat_dir = os.path.join(BASE_DIR, "data/generated/cheatsheets")
+
+    docs_tools = [t for t in master["tools"]
+                  if t["sources"]["remnux_docs"].get("covered") and t.get("in_remnux")]
+    docs_with_cheat = 0
+    docs_without_cheat = []
+
+    for t in docs_tools:
+        name = t["name"].lower().replace(" ", "-")
+        variants = [name, t["id"], name + ".cheat"]
+        found = any(os.path.exists(os.path.join(cheat_dir, v + ".cheat")) for v in [name, t["id"]])
+        if found:
+            docs_with_cheat += 1
+        else:
+            docs_without_cheat.append(t["name"])
+
+    print(f"  REMnux-documented tools: {len(docs_tools)}")
+    print(f"  With cheatsheets: {docs_with_cheat}")
+    print(f"  Without cheatsheets: {len(docs_without_cheat)}")
+    if docs_without_cheat:
+        for m in docs_without_cheat[:5]:
+            print(f"    ! {m}")
+    return errors
+
+
+def test_cheatsheet_quality():
+    """Spot-check cheatsheet content for key tools."""
+    print("\n" + "=" * 60)
+    print("TEST 7: Cheatsheet Quality Spot-Checks")
+    print("=" * 60)
+    errors = []
+
+    cheat_dir = os.path.join(BASE_DIR, "data/generated/cheatsheets")
+
+    # Key tools that MUST have good cheatsheets
+    key_tools = {
+        "pdfid.py": ["pdfid.py", "document.pdf"],
+        "pdf-parser.py": ["pdf-parser.py", "-a", "-s"],
+        "oledump.py": ["oledump.py", "-s", "-v"],
+        "capa": ["capa", "specimen"],
+        "speakeasy": ["speakeasy", "-t"],
+        "ghidra": ["ghidra"],
+        "wireshark": ["wireshark"],
+        "floss": ["floss"],
+        "scdbgc": ["scdbgc", "/f"],
+        "rtfdump.py": ["rtfdump.py"],
+    }
+
+    for tool, expected_strings in key_tools.items():
+        cheat_path = os.path.join(cheat_dir, tool + ".cheat")
+        if not os.path.exists(cheat_path):
+            # Try without .py
+            alt = tool.replace(".py", "-py") + ".cheat"
+            cheat_path = os.path.join(cheat_dir, alt)
+
+        if not os.path.exists(cheat_path):
+            errors.append(f"Key tool {tool} has no cheatsheet")
+            print(f"  ! {tool}: NO CHEATSHEET")
+            continue
+
+        content = open(cheat_path).read()
+        missing_strings = [s for s in expected_strings if s not in content]
+        if missing_strings:
+            errors.append(f"{tool} cheatsheet missing: {missing_strings}")
+            print(f"  ! {tool}: missing {missing_strings}")
+        else:
+            print(f"  + {tool}: OK")
+
+    return errors
+
+
+def main():
+    all_errors = []
+
+    all_errors.extend(test_master_inventory())
+    all_errors.extend(test_for610_coverage())
+    all_errors.extend(test_tools_db())
+    all_errors.extend(test_workflows())
+    all_errors.extend(test_lab_tool_references())
+    all_errors.extend(test_remnux_docs_coverage())
+    all_errors.extend(test_cheatsheet_quality())
+
+    print("\n" + "=" * 60)
+    print("SUMMARY")
+    print("=" * 60)
+    if all_errors:
+        print(f"\n  Total issues found: {len(all_errors)}")
+        for e in all_errors:
+            print(f"    - {e}")
+        sys.exit(1)
+    else:
+        print(f"\n  All tests passed!")
+        sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()