f3ccc09c3d
Build comprehensive malware analysis knowledge base from 3 sources: - SANS FOR610 course: 120 tools, 47 labs, 15 workflows, 27 recipes - REMnux salt-states: 340 packages parsed from GitHub - REMnux docs: 280+ tools scraped from docs.remnux.org Master inventory merges all sources into 447 tools with help tiers (rich/standard/basic). Pipeline generates: tools.db (397 entries), 397 cheatsheets with multi-tool recipes, 15 workflow guides, 224 TLDR pages, and coverage reports. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
123 lines
4.2 KiB
Python
123 lines
4.2 KiB
Python
#!/usr/bin/env python3
|
|
"""Generate coverage report from the master tool inventory.
|
|
|
|
Reads data/remnux/tools-master.yaml and produces:
|
|
- data/generated/coverage-report.md (human-readable)
|
|
- data/remnux/coverage-report.yaml (machine-readable)
|
|
"""
|
|
|
|
import os
|
|
import yaml
|
|
|
|
BASE_DIR = os.path.join(os.path.dirname(__file__), "..")
|
|
MASTER = os.path.join(BASE_DIR, "data", "remnux", "tools-master.yaml")
|
|
MD_OUTPUT = os.path.join(BASE_DIR, "data", "generated", "coverage-report.md")
|
|
YAML_OUTPUT = os.path.join(BASE_DIR, "data", "remnux", "coverage-report.yaml")
|
|
|
|
|
|
def main():
|
|
with open(MASTER) as f:
|
|
data = yaml.safe_load(f)
|
|
|
|
tools = data["tools"]
|
|
meta = data["metadata"]
|
|
|
|
# Classify tools
|
|
remnux_tools = [t for t in tools if t.get("in_remnux")]
|
|
rich = [t for t in tools if t["help_tier"] == "rich"]
|
|
standard = [t for t in tools if t["help_tier"] == "standard"]
|
|
basic = [t for t in tools if t["help_tier"] == "basic"]
|
|
stub = [t for t in tools if t["help_tier"] == "stub"]
|
|
|
|
# Tools in REMnux with no good help
|
|
needs_help = [t for t in remnux_tools if t["help_tier"] in ("basic", "stub")]
|
|
needs_help.sort(key=lambda t: t["name"])
|
|
|
|
# Tools with FOR610 coverage (richest help)
|
|
for610_covered = [t for t in remnux_tools if t.get("has_for610_coverage")]
|
|
for610_covered.sort(key=lambda t: t["name"])
|
|
|
|
# Tools with REMnux docs only (decent help)
|
|
docs_only = [t for t in remnux_tools if t.get("has_remnux_docs") and not t.get("has_for610_coverage")]
|
|
docs_only.sort(key=lambda t: t["name"])
|
|
|
|
# Generate markdown report
|
|
lines = [
|
|
"# Tool Coverage Report",
|
|
"",
|
|
"## Summary",
|
|
"",
|
|
f"| Metric | Count |",
|
|
f"|--------|-------|",
|
|
f"| Total tools in master inventory | {len(tools)} |",
|
|
f"| Tools in REMnux container | {len(remnux_tools)} |",
|
|
f"| Rich help (FOR610 coverage) | {len(rich)} |",
|
|
f"| Standard help (REMnux docs) | {len(standard)} |",
|
|
f"| Basic help (salt-states only) | {len(basic)} |",
|
|
f"| Stub (no documentation) | {len(stub)} |",
|
|
"",
|
|
"## Source Overlap",
|
|
"",
|
|
f"| Combination | Count |",
|
|
f"|-------------|-------|",
|
|
]
|
|
for key, val in meta["source_coverage"].items():
|
|
lines.append(f"| {key.replace('_', ' ')} | {val} |")
|
|
|
|
lines += [
|
|
"",
|
|
"## Priority: REMnux Tools Needing Help",
|
|
"",
|
|
f"These {len(needs_help)} tools are installed in the container but have minimal or no documentation:",
|
|
"",
|
|
]
|
|
for t in needs_help:
|
|
tier_badge = "basic" if t["help_tier"] == "basic" else "STUB"
|
|
lines.append(f"- `{t['name']}` [{tier_badge}]")
|
|
|
|
lines += [
|
|
"",
|
|
f"## Rich Help Tools ({len(for610_covered)} tools with FOR610 coverage)",
|
|
"",
|
|
]
|
|
for t in for610_covered:
|
|
labs = t["sources"]["for610"].get("labs", [])
|
|
lab_str = f" (Labs: {', '.join(labs)})" if labs else ""
|
|
lines.append(f"- `{t['name']}`{lab_str}")
|
|
|
|
lines += [
|
|
"",
|
|
f"## Standard Help Tools ({len(docs_only)} tools with REMnux docs only)",
|
|
"",
|
|
]
|
|
for t in docs_only:
|
|
cat = t["sources"]["remnux_docs"].get("category", "")
|
|
lines.append(f"- `{t['name']}` — {cat}")
|
|
|
|
md_content = "\n".join(lines) + "\n"
|
|
|
|
os.makedirs(os.path.dirname(MD_OUTPUT), exist_ok=True)
|
|
with open(MD_OUTPUT, "w") as f:
|
|
f.write(md_content)
|
|
|
|
# Machine-readable YAML
|
|
yaml_data = {
|
|
"summary": meta,
|
|
"needs_help": [{"id": t["id"], "name": t["name"], "tier": t["help_tier"]} for t in needs_help],
|
|
"rich_tools": [{"id": t["id"], "name": t["name"]} for t in for610_covered],
|
|
"standard_tools": [{"id": t["id"], "name": t["name"]} for t in docs_only],
|
|
}
|
|
with open(YAML_OUTPUT, "w") as f:
|
|
yaml.dump(yaml_data, f, default_flow_style=False, sort_keys=False)
|
|
|
|
print(f"Coverage report generated:")
|
|
print(f" Markdown: {MD_OUTPUT}")
|
|
print(f" YAML: {YAML_OUTPUT}")
|
|
print(f"\n {len(remnux_tools)} REMnux tools:")
|
|
print(f" {len(rich)} rich, {len(standard)} standard, {len(basic)} basic, {len(stub)} stub")
|
|
print(f" {len(needs_help)} need better documentation")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|