#!/usr/bin/env python3 """Comprehensive verification of generated help artifacts. Tests: 1. All FOR610 tools with in_remnux=true have cheatsheets 2. All cheatsheet content matches researched data 3. All workflows are generated and contain correct tool references 4. tools.db entries match master inventory 5. No orphaned references (tools in labs but missing from master) 6. Rich-tier cheatsheets have usage examples from FOR610 7. REMnux docs tools have correct descriptions """ import os import sys import yaml import glob BASE_DIR = os.path.join(os.path.dirname(__file__), "..") def load_yaml(path): with open(path) as f: return yaml.safe_load(f) def test_master_inventory(): """Verify master inventory integrity.""" print("=" * 60) print("TEST 1: Master Inventory Integrity") print("=" * 60) errors = [] master = load_yaml(os.path.join(BASE_DIR, "data/remnux/tools-master.yaml")) tools = master["tools"] # Check all tools have required fields for t in tools: tid = t.get("id", "MISSING") if not t.get("id"): errors.append(f"Tool missing id: {t}") if not t.get("name"): errors.append(f"Tool {tid} missing name") if "sources" not in t: errors.append(f"Tool {tid} missing sources") if "help_tier" not in t: errors.append(f"Tool {tid} missing help_tier") # Check no duplicate IDs ids = [t["id"] for t in tools] dupes = [x for x in ids if ids.count(x) > 1] if dupes: errors.append(f"Duplicate IDs: {set(dupes)}") print(f" Total tools: {len(tools)}") print(f" Errors: {len(errors)}") for e in errors[:10]: print(f" ! {e}") return errors def test_for610_coverage(): """Verify all FOR610 in_remnux tools appear in master and have cheatsheets.""" print("\n" + "=" * 60) print("TEST 2: FOR610 Tool Coverage") print("=" * 60) errors = [] for610 = load_yaml(os.path.join(BASE_DIR, "data/for610/tools.yaml")) master = load_yaml(os.path.join(BASE_DIR, "data/remnux/tools-master.yaml")) master_ids = {t["id"] for t in master["tools"]} cheat_dir = os.path.join(BASE_DIR, "data/generated/cheatsheets") cheat_files = {os.path.basename(f).replace(".cheat", "") for f in glob.glob(os.path.join(cheat_dir, "*.cheat"))} for610_remnux = [t for t in for610["tools"] if t.get("in_remnux")] for610_all = for610["tools"] # Check all FOR610 in_remnux tools are in master missing_from_master = [] for t in for610_remnux: if t["id"] not in master_ids: missing_from_master.append(t["id"]) errors.append(f"FOR610 tool '{t['id']}' ({t['name']}) not in master inventory") # Check all FOR610 in_remnux tools have cheatsheets missing_cheats = [] for t in for610_remnux: name_variants = [ t["name"].lower().replace(" ", "-"), t["id"], t["name"].lower(), ] found = False for v in name_variants: if v in cheat_files: found = True break if not found: missing_cheats.append(t["name"]) # Check rich-tier cheatsheets have usage examples rich_without_examples = [] for t in for610_remnux: usages = t.get("typical_usage", []) cheat_path = os.path.join(cheat_dir, t["name"].lower().replace(" ", "-") + ".cheat") if not os.path.exists(cheat_path): cheat_path = os.path.join(cheat_dir, t["id"] + ".cheat") if os.path.exists(cheat_path): content = open(cheat_path).read() if usages and not any(u in content for u in usages[:1]): rich_without_examples.append(t["name"]) print(f" FOR610 tools (all): {len(for610_all)}") print(f" FOR610 in REMnux: {len(for610_remnux)}") print(f" Missing from master: {len(missing_from_master)}") print(f" Missing cheatsheets: {len(missing_cheats)}") if missing_cheats: for m in missing_cheats[:5]: print(f" ! {m}") print(f" Rich without examples: {len(rich_without_examples)}") if rich_without_examples: for m in rich_without_examples[:5]: print(f" ! {m}") print(f" Errors: {len(errors)}") return errors def test_tools_db(): """Verify tools.db matches master inventory.""" print("\n" + "=" * 60) print("TEST 3: tools.db Consistency") print("=" * 60) errors = [] master = load_yaml(os.path.join(BASE_DIR, "data/remnux/tools-master.yaml")) remnux_tools = {t["name"]: t for t in master["tools"] if t.get("in_remnux")} db_path = os.path.join(BASE_DIR, "data/generated/tools.db") db_entries = {} with open(db_path) as f: for line in f: line = line.strip() if not line: continue parts = line.split("|") if len(parts) >= 5: db_entries[parts[0]] = { "name": parts[0], "description": parts[1], "category": parts[2], "usage": parts[3], "tier": parts[4], } # Check all REMnux tools are in DB missing_from_db = [] for name, tool in remnux_tools.items(): if name not in db_entries: missing_from_db.append(name) # Check no empty descriptions empty_descs = [e["name"] for e in db_entries.values() if e["description"] == "(no description available)"] # Check tier consistency tier_mismatches = [] for name, entry in db_entries.items(): if name in remnux_tools: expected_tier = remnux_tools[name].get("help_tier", "stub") if entry["tier"] != expected_tier: tier_mismatches.append(f"{name}: db={entry['tier']} vs master={expected_tier}") print(f" tools.db entries: {len(db_entries)}") print(f" REMnux tools in master: {len(remnux_tools)}") print(f" Missing from DB: {len(missing_from_db)}") if missing_from_db: for m in missing_from_db[:5]: print(f" ! {m}") print(f" Empty descriptions: {len(empty_descs)}") if empty_descs: for m in empty_descs[:5]: print(f" ! {m}") print(f" Tier mismatches: {len(tier_mismatches)}") return errors def test_workflows(): """Verify all workflow files are generated and contain valid tool references.""" print("\n" + "=" * 60) print("TEST 4: Workflow Files") print("=" * 60) errors = [] wf_src = load_yaml(os.path.join(BASE_DIR, "data/for610/workflows.yaml")) wf_dir = os.path.join(BASE_DIR, "data/generated/workflows") expected_workflows = wf_src.get("workflows", []) generated = glob.glob(os.path.join(wf_dir, "*.txt")) generated_names = {os.path.basename(f).replace(".txt", "") for f in generated} # Check all workflows generated for wf in expected_workflows: wf_id = wf["id"].replace("_", "-") if wf_id not in generated_names: errors.append(f"Missing workflow file: {wf_id}.txt") # Check index file exists if "index" not in generated_names: errors.append("Missing workflow index.txt") # Check each workflow file has content for f in generated: content = open(f).read() if len(content) < 50: errors.append(f"Workflow file too short: {os.path.basename(f)}") print(f" Expected workflows: {len(expected_workflows)}") print(f" Generated files: {len(generated)} (including index)") print(f" Errors: {len(errors)}") for e in errors: print(f" ! {e}") return errors def test_lab_tool_references(): """Verify all tools referenced in labs exist in master inventory.""" print("\n" + "=" * 60) print("TEST 5: Lab-Tool Cross-References") print("=" * 60) errors = [] labs = load_yaml(os.path.join(BASE_DIR, "data/for610/labs.yaml")) master = load_yaml(os.path.join(BASE_DIR, "data/remnux/tools-master.yaml")) master_ids = {t["id"] for t in master["tools"]} for610_tools = load_yaml(os.path.join(BASE_DIR, "data/for610/tools.yaml")) for610_ids = {t["id"] for t in for610_tools["tools"]} # Check all tool_ids in labs exist in FOR610 missing = set() for lab in labs["labs"]: for tu in lab.get("tools_used", []): tid = tu["tool_id"] if tid not in for610_ids: missing.add(f"Lab {lab['id']}: tool '{tid}'") errors.append(f"Lab {lab['id']} references unknown tool: {tid}") print(f" Labs: {len(labs['labs'])}") print(f" Missing tool references: {len(missing)}") for m in sorted(missing)[:5]: print(f" ! {m}") return errors def test_remnux_docs_coverage(): """Check how many REMnux-documented tools have help content.""" print("\n" + "=" * 60) print("TEST 6: REMnux Docs Coverage in Help") print("=" * 60) errors = [] master = load_yaml(os.path.join(BASE_DIR, "data/remnux/tools-master.yaml")) cheat_dir = os.path.join(BASE_DIR, "data/generated/cheatsheets") docs_tools = [t for t in master["tools"] if t["sources"]["remnux_docs"].get("covered") and t.get("in_remnux")] docs_with_cheat = 0 docs_without_cheat = [] for t in docs_tools: name = t["name"].lower().replace(" ", "-") variants = [name, t["id"], name + ".cheat"] found = any(os.path.exists(os.path.join(cheat_dir, v + ".cheat")) for v in [name, t["id"]]) if found: docs_with_cheat += 1 else: docs_without_cheat.append(t["name"]) print(f" REMnux-documented tools: {len(docs_tools)}") print(f" With cheatsheets: {docs_with_cheat}") print(f" Without cheatsheets: {len(docs_without_cheat)}") if docs_without_cheat: for m in docs_without_cheat[:5]: print(f" ! {m}") return errors def test_cheatsheet_quality(): """Spot-check cheatsheet content for key tools.""" print("\n" + "=" * 60) print("TEST 7: Cheatsheet Quality Spot-Checks") print("=" * 60) errors = [] cheat_dir = os.path.join(BASE_DIR, "data/generated/cheatsheets") # Key tools that MUST have good cheatsheets key_tools = { "pdfid.py": ["pdfid.py", "document.pdf"], "pdf-parser.py": ["pdf-parser.py", "-a", "-s"], "oledump.py": ["oledump.py", "-s", "-v"], "capa": ["capa", "specimen"], "speakeasy": ["speakeasy", "-t"], "ghidra": ["ghidra"], "wireshark": ["wireshark"], "floss": ["floss"], "scdbgc": ["scdbgc", "/f"], "rtfdump.py": ["rtfdump.py"], } for tool, expected_strings in key_tools.items(): cheat_path = os.path.join(cheat_dir, tool + ".cheat") if not os.path.exists(cheat_path): # Try without .py alt = tool.replace(".py", "-py") + ".cheat" cheat_path = os.path.join(cheat_dir, alt) if not os.path.exists(cheat_path): errors.append(f"Key tool {tool} has no cheatsheet") print(f" ! {tool}: NO CHEATSHEET") continue content = open(cheat_path).read() missing_strings = [s for s in expected_strings if s not in content] if missing_strings: errors.append(f"{tool} cheatsheet missing: {missing_strings}") print(f" ! {tool}: missing {missing_strings}") else: print(f" + {tool}: OK") return errors def main(): all_errors = [] all_errors.extend(test_master_inventory()) all_errors.extend(test_for610_coverage()) all_errors.extend(test_tools_db()) all_errors.extend(test_workflows()) all_errors.extend(test_lab_tool_references()) all_errors.extend(test_remnux_docs_coverage()) all_errors.extend(test_cheatsheet_quality()) print("\n" + "=" * 60) print("SUMMARY") print("=" * 60) if all_errors: print(f"\n Total issues found: {len(all_errors)}") for e in all_errors: print(f" - {e}") sys.exit(1) else: print(f"\n All tests passed!") sys.exit(0) if __name__ == "__main__": main()