Files
docker_file_analysis/scripts/generate-coverage-report.py
tobias f3ccc09c3d Add FOR610 tool/workflow knowledge base and data pipeline
Build comprehensive malware analysis knowledge base from 3 sources:
- SANS FOR610 course: 120 tools, 47 labs, 15 workflows, 27 recipes
- REMnux salt-states: 340 packages parsed from GitHub
- REMnux docs: 280+ tools scraped from docs.remnux.org

Master inventory merges all sources into 447 tools with help tiers
(rich/standard/basic). Pipeline generates: tools.db (397 entries),
397 cheatsheets with multi-tool recipes, 15 workflow guides, 224
TLDR pages, and coverage reports.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-28 17:38:15 +01:00

123 lines
4.2 KiB
Python

#!/usr/bin/env python3
"""Generate coverage report from the master tool inventory.
Reads data/remnux/tools-master.yaml and produces:
- data/generated/coverage-report.md (human-readable)
- data/remnux/coverage-report.yaml (machine-readable)
"""
import os
import yaml
BASE_DIR = os.path.join(os.path.dirname(__file__), "..")
MASTER = os.path.join(BASE_DIR, "data", "remnux", "tools-master.yaml")
MD_OUTPUT = os.path.join(BASE_DIR, "data", "generated", "coverage-report.md")
YAML_OUTPUT = os.path.join(BASE_DIR, "data", "remnux", "coverage-report.yaml")
def main():
with open(MASTER) as f:
data = yaml.safe_load(f)
tools = data["tools"]
meta = data["metadata"]
# Classify tools
remnux_tools = [t for t in tools if t.get("in_remnux")]
rich = [t for t in tools if t["help_tier"] == "rich"]
standard = [t for t in tools if t["help_tier"] == "standard"]
basic = [t for t in tools if t["help_tier"] == "basic"]
stub = [t for t in tools if t["help_tier"] == "stub"]
# Tools in REMnux with no good help
needs_help = [t for t in remnux_tools if t["help_tier"] in ("basic", "stub")]
needs_help.sort(key=lambda t: t["name"])
# Tools with FOR610 coverage (richest help)
for610_covered = [t for t in remnux_tools if t.get("has_for610_coverage")]
for610_covered.sort(key=lambda t: t["name"])
# Tools with REMnux docs only (decent help)
docs_only = [t for t in remnux_tools if t.get("has_remnux_docs") and not t.get("has_for610_coverage")]
docs_only.sort(key=lambda t: t["name"])
# Generate markdown report
lines = [
"# Tool Coverage Report",
"",
"## Summary",
"",
f"| Metric | Count |",
f"|--------|-------|",
f"| Total tools in master inventory | {len(tools)} |",
f"| Tools in REMnux container | {len(remnux_tools)} |",
f"| Rich help (FOR610 coverage) | {len(rich)} |",
f"| Standard help (REMnux docs) | {len(standard)} |",
f"| Basic help (salt-states only) | {len(basic)} |",
f"| Stub (no documentation) | {len(stub)} |",
"",
"## Source Overlap",
"",
f"| Combination | Count |",
f"|-------------|-------|",
]
for key, val in meta["source_coverage"].items():
lines.append(f"| {key.replace('_', ' ')} | {val} |")
lines += [
"",
"## Priority: REMnux Tools Needing Help",
"",
f"These {len(needs_help)} tools are installed in the container but have minimal or no documentation:",
"",
]
for t in needs_help:
tier_badge = "basic" if t["help_tier"] == "basic" else "STUB"
lines.append(f"- `{t['name']}` [{tier_badge}]")
lines += [
"",
f"## Rich Help Tools ({len(for610_covered)} tools with FOR610 coverage)",
"",
]
for t in for610_covered:
labs = t["sources"]["for610"].get("labs", [])
lab_str = f" (Labs: {', '.join(labs)})" if labs else ""
lines.append(f"- `{t['name']}`{lab_str}")
lines += [
"",
f"## Standard Help Tools ({len(docs_only)} tools with REMnux docs only)",
"",
]
for t in docs_only:
cat = t["sources"]["remnux_docs"].get("category", "")
lines.append(f"- `{t['name']}` — {cat}")
md_content = "\n".join(lines) + "\n"
os.makedirs(os.path.dirname(MD_OUTPUT), exist_ok=True)
with open(MD_OUTPUT, "w") as f:
f.write(md_content)
# Machine-readable YAML
yaml_data = {
"summary": meta,
"needs_help": [{"id": t["id"], "name": t["name"], "tier": t["help_tier"]} for t in needs_help],
"rich_tools": [{"id": t["id"], "name": t["name"]} for t in for610_covered],
"standard_tools": [{"id": t["id"], "name": t["name"]} for t in docs_only],
}
with open(YAML_OUTPUT, "w") as f:
yaml.dump(yaml_data, f, default_flow_style=False, sort_keys=False)
print(f"Coverage report generated:")
print(f" Markdown: {MD_OUTPUT}")
print(f" YAML: {YAML_OUTPUT}")
print(f"\n {len(remnux_tools)} REMnux tools:")
print(f" {len(rich)} rich, {len(standard)} standard, {len(basic)} basic, {len(stub)} stub")
print(f" {len(needs_help)} need better documentation")
if __name__ == "__main__":
main()