Add FOR610 tool/workflow knowledge base and data pipeline
Build comprehensive malware analysis knowledge base from 3 sources: - SANS FOR610 course: 120 tools, 47 labs, 15 workflows, 27 recipes - REMnux salt-states: 340 packages parsed from GitHub - REMnux docs: 280+ tools scraped from docs.remnux.org Master inventory merges all sources into 447 tools with help tiers (rich/standard/basic). Pipeline generates: tools.db (397 entries), 397 cheatsheets with multi-tool recipes, 15 workflow guides, 224 TLDR pages, and coverage reports. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,466 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Build the master tool inventory by merging three sources.
|
||||
|
||||
Merges:
|
||||
1. FOR610 course data (data/for610/tools.yaml)
|
||||
2. Salt-states installation data (data/remnux/sources/salt-states.yaml)
|
||||
3. REMnux docs (data/remnux/sources/remnux-docs.yaml)
|
||||
|
||||
Output: data/remnux/tools-master.yaml
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import yaml
|
||||
|
||||
BASE_DIR = os.path.join(os.path.dirname(__file__), "..")
|
||||
FOR610_TOOLS = os.path.join(BASE_DIR, "data", "for610", "tools.yaml")
|
||||
SALT_STATES = os.path.join(BASE_DIR, "data", "remnux", "sources", "salt-states.yaml")
|
||||
REMNUX_DOCS = os.path.join(BASE_DIR, "data", "remnux", "sources", "remnux-docs.yaml")
|
||||
ENRICHMENTS = os.path.join(BASE_DIR, "data", "remnux", "tool-enrichments.yaml")
|
||||
OUTPUT = os.path.join(BASE_DIR, "data", "remnux", "tools-master.yaml")
|
||||
|
||||
# Manual override mapping for tools that have different names across sources
|
||||
# Format: normalized_key -> canonical_id
|
||||
NAME_OVERRIDES = {
|
||||
"die": "diec",
|
||||
"detect-it-easy": "diec",
|
||||
"detect it easy": "diec",
|
||||
"js": "spidermonkey",
|
||||
"js-patched": "spidermonkey",
|
||||
"spidermonkey-patched": "spidermonkey",
|
||||
"mozilla-spidermonkey": "spidermonkey",
|
||||
"vol": "volatility3",
|
||||
"vol-py": "volatility3",
|
||||
"volatility-framework": "volatility3",
|
||||
"volatility": "volatility3",
|
||||
"process-hacker": "system-informer",
|
||||
"yara-rules": "yara",
|
||||
"yara-forge": "yara",
|
||||
"yara-x": "yara-x",
|
||||
"jsbeautifier": "js-beautify",
|
||||
"js-beautifier": "js-beautify",
|
||||
"ilspycmd": "ilspycmd",
|
||||
"ilspy": "ilspy",
|
||||
"upx-ucl": "upx",
|
||||
"unrar-free": "rar",
|
||||
"netcat-openbsd": "netcat",
|
||||
"net-tools": "net-tools",
|
||||
"oletools": "olevba",
|
||||
"pev": "readpe",
|
||||
"scdbg": "scdbgc",
|
||||
"origamindee": "origami",
|
||||
"pdftk-java": "pdftk",
|
||||
"fakenet-ng": "fakenet-ng",
|
||||
"accept-all-ips": "httpd",
|
||||
"7zip": "7zip",
|
||||
"7z": "7zip",
|
||||
"p7zip": "7zip",
|
||||
"info-zip": "unzip",
|
||||
"cutter": "cutter",
|
||||
"r2pipe": "radare2",
|
||||
"r2": "radare2",
|
||||
"stpyv8": "spidermonkey",
|
||||
"rhino-debugger": "spidermonkey",
|
||||
"powershell-core": "powershell",
|
||||
"powershell": "powershell",
|
||||
"didier-stevens-scripts": "didier-stevens-suite",
|
||||
"docker-compose": "docker",
|
||||
"docker": "docker",
|
||||
"ghidrassist-mcp": "ghidra",
|
||||
"remnux-mcp-server": "remnux-mcp-server",
|
||||
}
|
||||
|
||||
|
||||
def normalize_name(name):
|
||||
"""Normalize a tool name for matching."""
|
||||
n = name.lower().strip()
|
||||
n = re.sub(r'\.py$', '', n)
|
||||
n = re.sub(r'\.pl$', '', n)
|
||||
n = re.sub(r'\.bat$', '', n)
|
||||
n = re.sub(r'[^a-z0-9]+', '-', n)
|
||||
n = n.strip('-')
|
||||
return n
|
||||
|
||||
|
||||
def make_id(name):
|
||||
"""Create a kebab-case ID from a name."""
|
||||
n = name.lower().strip()
|
||||
# Keep .py/.pl as -py/-pl in the ID
|
||||
n = re.sub(r'\.py$', '-py', n)
|
||||
n = re.sub(r'\.pl$', '-pl', n)
|
||||
n = re.sub(r'\.bat$', '-bat', n)
|
||||
n = re.sub(r'[^a-z0-9]+', '-', n)
|
||||
n = n.strip('-')
|
||||
return n
|
||||
|
||||
|
||||
def load_for610():
|
||||
"""Load FOR610 tools."""
|
||||
with open(FOR610_TOOLS) as f:
|
||||
data = yaml.safe_load(f)
|
||||
return data.get("tools", [])
|
||||
|
||||
|
||||
def load_salt_states():
|
||||
"""Load salt-states parsed data."""
|
||||
if not os.path.exists(SALT_STATES):
|
||||
print(f" Warning: {SALT_STATES} not found, skipping")
|
||||
return []
|
||||
with open(SALT_STATES) as f:
|
||||
data = yaml.safe_load(f)
|
||||
return data.get("tools", [])
|
||||
|
||||
|
||||
def load_remnux_docs():
|
||||
"""Load REMnux docs scraped data."""
|
||||
if not os.path.exists(REMNUX_DOCS):
|
||||
print(f" Warning: {REMNUX_DOCS} not found, skipping")
|
||||
return []
|
||||
with open(REMNUX_DOCS) as f:
|
||||
data = yaml.safe_load(f)
|
||||
return data.get("tools", [])
|
||||
|
||||
|
||||
def build_lookup_index(master_tools):
|
||||
"""Build a multi-key lookup index for matching."""
|
||||
index = {}
|
||||
for tool in master_tools:
|
||||
tid = tool["id"]
|
||||
# Index by id
|
||||
index[tid] = tid
|
||||
# Index by normalized name
|
||||
index[normalize_name(tool["name"])] = tid
|
||||
# Index by aliases
|
||||
for alias in tool.get("aliases", []):
|
||||
index[normalize_name(alias)] = tid
|
||||
return index
|
||||
|
||||
|
||||
def find_match(name, index):
|
||||
"""Try to find a matching tool in the index."""
|
||||
normalized = normalize_name(name)
|
||||
|
||||
# Check overrides first
|
||||
if normalized in NAME_OVERRIDES:
|
||||
override_id = NAME_OVERRIDES[normalized]
|
||||
if override_id in index:
|
||||
return index[override_id]
|
||||
return override_id
|
||||
|
||||
# Direct match
|
||||
if normalized in index:
|
||||
return index[normalized]
|
||||
|
||||
# Try with -py suffix
|
||||
if normalized + "-py" in index:
|
||||
return index[normalized + "-py"]
|
||||
|
||||
# Try without trailing digits
|
||||
stripped = re.sub(r'-?\d+$', '', normalized)
|
||||
if stripped and stripped in index:
|
||||
return index[stripped]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def compute_help_tier(tool):
|
||||
"""Determine the help tier based on coverage."""
|
||||
has_for610 = tool.get("sources", {}).get("for610", {}).get("covered", False)
|
||||
has_docs = tool.get("sources", {}).get("remnux_docs", {}).get("covered", False)
|
||||
has_salt = tool.get("sources", {}).get("salt_states", {}).get("covered", False)
|
||||
|
||||
if has_for610:
|
||||
return "rich"
|
||||
elif has_docs:
|
||||
return "standard"
|
||||
elif has_salt:
|
||||
return "basic"
|
||||
else:
|
||||
return "stub"
|
||||
|
||||
|
||||
def main():
|
||||
print("Building master tool inventory...")
|
||||
|
||||
# --- Step 1: Load FOR610 tools as base ---
|
||||
print("\n1. Loading FOR610 tools...")
|
||||
for610_tools = load_for610()
|
||||
print(f" Loaded {len(for610_tools)} tools")
|
||||
|
||||
master = {}
|
||||
for t in for610_tools:
|
||||
tid = t["id"]
|
||||
entry = {
|
||||
"id": tid,
|
||||
"name": t["name"],
|
||||
"aliases": t.get("aliases", []),
|
||||
"description": t.get("description", ""),
|
||||
"in_remnux": t.get("in_remnux", False),
|
||||
"platform": t.get("platform", "linux"),
|
||||
"sources": {
|
||||
"for610": {
|
||||
"covered": True,
|
||||
"description": t.get("description", ""),
|
||||
"category": t.get("category", ""),
|
||||
"labs": t.get("labs", []),
|
||||
"sections": t.get("for610_sections", []),
|
||||
"typical_usage": t.get("typical_usage", []),
|
||||
"tags": t.get("tags", []),
|
||||
},
|
||||
"salt_states": {"covered": False},
|
||||
"remnux_docs": {"covered": False},
|
||||
},
|
||||
}
|
||||
if t.get("author"):
|
||||
entry["sources"]["for610"]["author"] = t["author"]
|
||||
master[tid] = entry
|
||||
|
||||
# --- Step 2: Merge salt-states ---
|
||||
print("\n2. Loading salt-states...")
|
||||
salt_tools = load_salt_states()
|
||||
print(f" Loaded {len(salt_tools)} entries")
|
||||
|
||||
index = build_lookup_index(list(master.values()))
|
||||
salt_matched = 0
|
||||
salt_new = 0
|
||||
|
||||
for st in salt_tools:
|
||||
st_id = st["id"]
|
||||
st_names = st.get("package_names", [st_id])
|
||||
|
||||
# Try to match against existing tools
|
||||
matched_id = None
|
||||
for name in [st_id] + st_names:
|
||||
matched_id = find_match(name, index)
|
||||
if matched_id:
|
||||
break
|
||||
|
||||
if matched_id and matched_id in master:
|
||||
# Enrich existing tool
|
||||
master[matched_id]["sources"]["salt_states"] = {
|
||||
"covered": True,
|
||||
"install_method": st.get("install_method", "unknown"),
|
||||
"package_name": st_names[0] if st_names else st_id,
|
||||
"salt_state_path": st.get("salt_state_path", ""),
|
||||
}
|
||||
master[matched_id]["in_remnux"] = True
|
||||
salt_matched += 1
|
||||
else:
|
||||
# Create new tool entry
|
||||
new_id = make_id(st_id)
|
||||
# Check if override maps to something we don't have yet
|
||||
if normalize_name(st_id) in NAME_OVERRIDES:
|
||||
new_id = NAME_OVERRIDES[normalize_name(st_id)]
|
||||
|
||||
if new_id not in master:
|
||||
master[new_id] = {
|
||||
"id": new_id,
|
||||
"name": st_id,
|
||||
"aliases": [n for n in st_names if n != st_id][:3],
|
||||
"description": "",
|
||||
"in_remnux": True,
|
||||
"platform": "linux",
|
||||
"sources": {
|
||||
"for610": {"covered": False},
|
||||
"salt_states": {
|
||||
"covered": True,
|
||||
"install_method": st.get("install_method", "unknown"),
|
||||
"package_name": st_names[0] if st_names else st_id,
|
||||
"salt_state_path": st.get("salt_state_path", ""),
|
||||
},
|
||||
"remnux_docs": {"covered": False},
|
||||
},
|
||||
}
|
||||
# Update index
|
||||
index[new_id] = new_id
|
||||
index[normalize_name(st_id)] = new_id
|
||||
for n in st_names:
|
||||
index[normalize_name(n)] = new_id
|
||||
salt_new += 1
|
||||
else:
|
||||
# Already exists under the override ID
|
||||
master[new_id]["sources"]["salt_states"] = {
|
||||
"covered": True,
|
||||
"install_method": st.get("install_method", "unknown"),
|
||||
"package_name": st_names[0] if st_names else st_id,
|
||||
"salt_state_path": st.get("salt_state_path", ""),
|
||||
}
|
||||
salt_matched += 1
|
||||
|
||||
print(f" Matched: {salt_matched}, New: {salt_new}")
|
||||
|
||||
# --- Step 3: Merge REMnux docs ---
|
||||
print("\n3. Loading REMnux docs...")
|
||||
doc_tools = load_remnux_docs()
|
||||
print(f" Loaded {len(doc_tools)} entries")
|
||||
|
||||
# Rebuild index after salt-states additions
|
||||
index = build_lookup_index(list(master.values()))
|
||||
docs_matched = 0
|
||||
docs_new = 0
|
||||
|
||||
for dt in doc_tools:
|
||||
dt_name = dt.get("name", "")
|
||||
dt_id = dt.get("id", make_id(dt_name))
|
||||
|
||||
matched_id = find_match(dt_name, index)
|
||||
if not matched_id:
|
||||
matched_id = find_match(dt_id, index)
|
||||
|
||||
if matched_id and matched_id in master:
|
||||
# Enrich existing tool
|
||||
doc_entry = {
|
||||
"covered": True,
|
||||
"category": dt.get("category", ""),
|
||||
"description": dt.get("description", ""),
|
||||
"docs_url": dt.get("docs_url", ""),
|
||||
}
|
||||
if dt.get("website"):
|
||||
doc_entry["website"] = dt["website"]
|
||||
if dt.get("anchor"):
|
||||
doc_entry["anchor"] = dt["anchor"]
|
||||
|
||||
master[matched_id]["sources"]["remnux_docs"] = doc_entry
|
||||
|
||||
# Use REMnux docs description if we don't have one
|
||||
if not master[matched_id]["description"] and dt.get("description"):
|
||||
master[matched_id]["description"] = dt["description"]
|
||||
|
||||
docs_matched += 1
|
||||
else:
|
||||
# Create new entry
|
||||
new_id = make_id(dt_name) if dt_name else dt_id
|
||||
if new_id not in master:
|
||||
master[new_id] = {
|
||||
"id": new_id,
|
||||
"name": dt_name,
|
||||
"aliases": [],
|
||||
"description": dt.get("description", ""),
|
||||
"in_remnux": True,
|
||||
"platform": "linux",
|
||||
"sources": {
|
||||
"for610": {"covered": False},
|
||||
"salt_states": {"covered": False},
|
||||
"remnux_docs": {
|
||||
"covered": True,
|
||||
"category": dt.get("category", ""),
|
||||
"description": dt.get("description", ""),
|
||||
"docs_url": dt.get("docs_url", ""),
|
||||
},
|
||||
},
|
||||
}
|
||||
if dt.get("website"):
|
||||
master[new_id]["sources"]["remnux_docs"]["website"] = dt["website"]
|
||||
index[new_id] = new_id
|
||||
index[normalize_name(dt_name)] = new_id
|
||||
docs_new += 1
|
||||
else:
|
||||
master[new_id]["sources"]["remnux_docs"] = {
|
||||
"covered": True,
|
||||
"category": dt.get("category", ""),
|
||||
"description": dt.get("description", ""),
|
||||
"docs_url": dt.get("docs_url", ""),
|
||||
}
|
||||
docs_matched += 1
|
||||
|
||||
print(f" Matched: {docs_matched}, New: {docs_new}")
|
||||
|
||||
# --- Step 4: Apply manual enrichments ---
|
||||
print("\n4. Applying manual enrichments...")
|
||||
if os.path.exists(ENRICHMENTS):
|
||||
with open(ENRICHMENTS) as f:
|
||||
enrich_data = yaml.safe_load(f)
|
||||
enrichments = enrich_data.get("enrichments", {})
|
||||
enriched = 0
|
||||
for tool_key, enrich in enrichments.items():
|
||||
# Find the tool in master by key or normalized name
|
||||
matched_id = find_match(tool_key, index)
|
||||
if not matched_id:
|
||||
matched_id = tool_key
|
||||
if matched_id in master:
|
||||
tool = master[matched_id]
|
||||
# Apply enrichment data
|
||||
if enrich.get("description") and not tool.get("description"):
|
||||
tool["description"] = enrich["description"]
|
||||
elif enrich.get("description"):
|
||||
tool["description"] = enrich["description"]
|
||||
# Add usage examples to for610 source (or create enrichment source)
|
||||
if enrich.get("typical_usage"):
|
||||
if not tool["sources"]["for610"].get("covered"):
|
||||
tool["sources"]["for610"]["covered"] = True
|
||||
tool["sources"]["for610"]["typical_usage"] = enrich["typical_usage"]
|
||||
tool["sources"]["for610"]["tags"] = enrich.get("tags", [])
|
||||
tool["sources"]["for610"]["description"] = enrich.get("description", "")
|
||||
else:
|
||||
# Merge usage examples
|
||||
existing = tool["sources"]["for610"].get("typical_usage", [])
|
||||
for u in enrich["typical_usage"]:
|
||||
if u not in existing:
|
||||
existing.append(u)
|
||||
tool["sources"]["for610"]["typical_usage"] = existing
|
||||
enriched += 1
|
||||
else:
|
||||
print(f" Warning: enrichment key '{tool_key}' not found in master")
|
||||
print(f" Enriched: {enriched} tools")
|
||||
else:
|
||||
print(" No enrichments file found, skipping")
|
||||
|
||||
# Rebuild index after enrichments
|
||||
index = build_lookup_index(list(master.values()))
|
||||
|
||||
# --- Step 5: Compute derived fields ---
|
||||
print("\n5. Computing derived fields...")
|
||||
for tool in master.values():
|
||||
tool["has_for610_coverage"] = tool["sources"]["for610"].get("covered", False)
|
||||
tool["has_remnux_docs"] = tool["sources"]["remnux_docs"].get("covered", False)
|
||||
tool["has_salt_state"] = tool["sources"]["salt_states"].get("covered", False)
|
||||
tool["help_tier"] = compute_help_tier(tool)
|
||||
|
||||
# --- Step 6: Sort and output ---
|
||||
tools_list = sorted(master.values(), key=lambda t: t["id"])
|
||||
|
||||
# Remove windows-only/online tools that aren't in remnux
|
||||
# (keep them for reference but flag appropriately)
|
||||
|
||||
tiers = {}
|
||||
for t in tools_list:
|
||||
tier = t["help_tier"]
|
||||
tiers[tier] = tiers.get(tier, 0) + 1
|
||||
|
||||
output = {
|
||||
"metadata": {
|
||||
"total_tools": len(tools_list),
|
||||
"in_remnux_count": sum(1 for t in tools_list if t["in_remnux"]),
|
||||
"help_tier_counts": tiers,
|
||||
"source_coverage": {
|
||||
"for610_only": sum(1 for t in tools_list if t["has_for610_coverage"] and not t["has_remnux_docs"] and not t["has_salt_state"]),
|
||||
"remnux_docs_only": sum(1 for t in tools_list if t["has_remnux_docs"] and not t["has_for610_coverage"] and not t["has_salt_state"]),
|
||||
"salt_states_only": sum(1 for t in tools_list if t["has_salt_state"] and not t["has_for610_coverage"] and not t["has_remnux_docs"]),
|
||||
"all_three": sum(1 for t in tools_list if t["has_for610_coverage"] and t["has_remnux_docs"] and t["has_salt_state"]),
|
||||
"for610_and_docs": sum(1 for t in tools_list if t["has_for610_coverage"] and t["has_remnux_docs"]),
|
||||
"for610_and_salt": sum(1 for t in tools_list if t["has_for610_coverage"] and t["has_salt_state"]),
|
||||
"docs_and_salt": sum(1 for t in tools_list if t["has_remnux_docs"] and t["has_salt_state"]),
|
||||
"no_coverage": sum(1 for t in tools_list if not t["has_for610_coverage"] and not t["has_remnux_docs"] and not t["has_salt_state"]),
|
||||
},
|
||||
},
|
||||
"tools": tools_list,
|
||||
}
|
||||
|
||||
with open(OUTPUT, "w") as f:
|
||||
yaml.dump(output, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
|
||||
|
||||
print(f"\n{'='*50}")
|
||||
print(f"MASTER INVENTORY BUILT: {len(tools_list)} tools")
|
||||
print(f" In REMnux: {output['metadata']['in_remnux_count']}")
|
||||
print(f"\nHelp Tiers:")
|
||||
for tier, count in sorted(tiers.items()):
|
||||
print(f" {tier}: {count}")
|
||||
print(f"\nSource Coverage:")
|
||||
for key, val in output["metadata"]["source_coverage"].items():
|
||||
print(f" {key}: {val}")
|
||||
print(f"\nOutput: {OUTPUT}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,122 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Generate coverage report from the master tool inventory.
|
||||
|
||||
Reads data/remnux/tools-master.yaml and produces:
|
||||
- data/generated/coverage-report.md (human-readable)
|
||||
- data/remnux/coverage-report.yaml (machine-readable)
|
||||
"""
|
||||
|
||||
import os
|
||||
import yaml
|
||||
|
||||
BASE_DIR = os.path.join(os.path.dirname(__file__), "..")
|
||||
MASTER = os.path.join(BASE_DIR, "data", "remnux", "tools-master.yaml")
|
||||
MD_OUTPUT = os.path.join(BASE_DIR, "data", "generated", "coverage-report.md")
|
||||
YAML_OUTPUT = os.path.join(BASE_DIR, "data", "remnux", "coverage-report.yaml")
|
||||
|
||||
|
||||
def main():
|
||||
with open(MASTER) as f:
|
||||
data = yaml.safe_load(f)
|
||||
|
||||
tools = data["tools"]
|
||||
meta = data["metadata"]
|
||||
|
||||
# Classify tools
|
||||
remnux_tools = [t for t in tools if t.get("in_remnux")]
|
||||
rich = [t for t in tools if t["help_tier"] == "rich"]
|
||||
standard = [t for t in tools if t["help_tier"] == "standard"]
|
||||
basic = [t for t in tools if t["help_tier"] == "basic"]
|
||||
stub = [t for t in tools if t["help_tier"] == "stub"]
|
||||
|
||||
# Tools in REMnux with no good help
|
||||
needs_help = [t for t in remnux_tools if t["help_tier"] in ("basic", "stub")]
|
||||
needs_help.sort(key=lambda t: t["name"])
|
||||
|
||||
# Tools with FOR610 coverage (richest help)
|
||||
for610_covered = [t for t in remnux_tools if t.get("has_for610_coverage")]
|
||||
for610_covered.sort(key=lambda t: t["name"])
|
||||
|
||||
# Tools with REMnux docs only (decent help)
|
||||
docs_only = [t for t in remnux_tools if t.get("has_remnux_docs") and not t.get("has_for610_coverage")]
|
||||
docs_only.sort(key=lambda t: t["name"])
|
||||
|
||||
# Generate markdown report
|
||||
lines = [
|
||||
"# Tool Coverage Report",
|
||||
"",
|
||||
"## Summary",
|
||||
"",
|
||||
f"| Metric | Count |",
|
||||
f"|--------|-------|",
|
||||
f"| Total tools in master inventory | {len(tools)} |",
|
||||
f"| Tools in REMnux container | {len(remnux_tools)} |",
|
||||
f"| Rich help (FOR610 coverage) | {len(rich)} |",
|
||||
f"| Standard help (REMnux docs) | {len(standard)} |",
|
||||
f"| Basic help (salt-states only) | {len(basic)} |",
|
||||
f"| Stub (no documentation) | {len(stub)} |",
|
||||
"",
|
||||
"## Source Overlap",
|
||||
"",
|
||||
f"| Combination | Count |",
|
||||
f"|-------------|-------|",
|
||||
]
|
||||
for key, val in meta["source_coverage"].items():
|
||||
lines.append(f"| {key.replace('_', ' ')} | {val} |")
|
||||
|
||||
lines += [
|
||||
"",
|
||||
"## Priority: REMnux Tools Needing Help",
|
||||
"",
|
||||
f"These {len(needs_help)} tools are installed in the container but have minimal or no documentation:",
|
||||
"",
|
||||
]
|
||||
for t in needs_help:
|
||||
tier_badge = "basic" if t["help_tier"] == "basic" else "STUB"
|
||||
lines.append(f"- `{t['name']}` [{tier_badge}]")
|
||||
|
||||
lines += [
|
||||
"",
|
||||
f"## Rich Help Tools ({len(for610_covered)} tools with FOR610 coverage)",
|
||||
"",
|
||||
]
|
||||
for t in for610_covered:
|
||||
labs = t["sources"]["for610"].get("labs", [])
|
||||
lab_str = f" (Labs: {', '.join(labs)})" if labs else ""
|
||||
lines.append(f"- `{t['name']}`{lab_str}")
|
||||
|
||||
lines += [
|
||||
"",
|
||||
f"## Standard Help Tools ({len(docs_only)} tools with REMnux docs only)",
|
||||
"",
|
||||
]
|
||||
for t in docs_only:
|
||||
cat = t["sources"]["remnux_docs"].get("category", "")
|
||||
lines.append(f"- `{t['name']}` — {cat}")
|
||||
|
||||
md_content = "\n".join(lines) + "\n"
|
||||
|
||||
os.makedirs(os.path.dirname(MD_OUTPUT), exist_ok=True)
|
||||
with open(MD_OUTPUT, "w") as f:
|
||||
f.write(md_content)
|
||||
|
||||
# Machine-readable YAML
|
||||
yaml_data = {
|
||||
"summary": meta,
|
||||
"needs_help": [{"id": t["id"], "name": t["name"], "tier": t["help_tier"]} for t in needs_help],
|
||||
"rich_tools": [{"id": t["id"], "name": t["name"]} for t in for610_covered],
|
||||
"standard_tools": [{"id": t["id"], "name": t["name"]} for t in docs_only],
|
||||
}
|
||||
with open(YAML_OUTPUT, "w") as f:
|
||||
yaml.dump(yaml_data, f, default_flow_style=False, sort_keys=False)
|
||||
|
||||
print(f"Coverage report generated:")
|
||||
print(f" Markdown: {MD_OUTPUT}")
|
||||
print(f" YAML: {YAML_OUTPUT}")
|
||||
print(f"\n {len(remnux_tools)} REMnux tools:")
|
||||
print(f" {len(rich)} rich, {len(standard)} standard, {len(basic)} basic, {len(stub)} stub")
|
||||
print(f" {len(needs_help)} need better documentation")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,534 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Generate all help artifacts from the master tool inventory.
|
||||
|
||||
Reads data/remnux/tools-master.yaml and data/for610/workflows.yaml to produce:
|
||||
- data/generated/tools.db (pipe-delimited for find-tool)
|
||||
- data/generated/cheatsheets/*.cheat (per-tool cheat sheets)
|
||||
- data/generated/workflows/*.txt (workflow help files)
|
||||
- data/generated/tldr/*.md (TLDR pages)
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import yaml
|
||||
import textwrap
|
||||
|
||||
BASE_DIR = os.path.join(os.path.dirname(__file__), "..")
|
||||
MASTER = os.path.join(BASE_DIR, "data", "remnux", "tools-master.yaml")
|
||||
WORKFLOWS_SRC = os.path.join(BASE_DIR, "data", "for610", "workflows.yaml")
|
||||
RECIPES_SRC = os.path.join(BASE_DIR, "data", "for610", "recipes.yaml")
|
||||
GEN_DIR = os.path.join(BASE_DIR, "data", "generated")
|
||||
|
||||
|
||||
def load_master():
|
||||
with open(MASTER) as f:
|
||||
return yaml.safe_load(f)
|
||||
|
||||
|
||||
def load_workflows():
|
||||
with open(WORKFLOWS_SRC) as f:
|
||||
return yaml.safe_load(f)
|
||||
|
||||
|
||||
def load_recipes():
|
||||
if os.path.exists(RECIPES_SRC):
|
||||
with open(RECIPES_SRC) as f:
|
||||
return yaml.safe_load(f)
|
||||
return {"recipes": []}
|
||||
|
||||
|
||||
def build_recipe_index(recipes_data):
|
||||
"""Build a mapping of tool_id -> list of recipes that use that tool."""
|
||||
index = {}
|
||||
for recipe in recipes_data.get("recipes", []):
|
||||
for tool_id in recipe.get("tools", []):
|
||||
index.setdefault(tool_id, []).append(recipe)
|
||||
# Also index by normalized variants
|
||||
normalized = tool_id.lower().replace("-", "").replace("_", "")
|
||||
if normalized != tool_id:
|
||||
index.setdefault(normalized, []).append(recipe)
|
||||
return index
|
||||
|
||||
|
||||
# ============================================================
|
||||
# tools.db generator
|
||||
# ============================================================
|
||||
|
||||
def generate_tools_db(tools):
|
||||
"""Generate pipe-delimited tools.db for find-tool."""
|
||||
output_path = os.path.join(GEN_DIR, "tools.db")
|
||||
lines = []
|
||||
|
||||
for t in tools:
|
||||
if not t.get("in_remnux"):
|
||||
continue
|
||||
|
||||
name = t["name"]
|
||||
desc = t.get("description", "").replace("|", "/").replace("\n", " ").strip()[:120]
|
||||
if not desc:
|
||||
desc = f"(no description available)"
|
||||
|
||||
# Get best category
|
||||
cat = ""
|
||||
if t["sources"]["remnux_docs"].get("covered"):
|
||||
cat = t["sources"]["remnux_docs"].get("category", "")
|
||||
elif t["sources"]["for610"].get("covered"):
|
||||
cat = t["sources"]["for610"].get("category", "")
|
||||
|
||||
# Get best usage example
|
||||
usage = ""
|
||||
if t["sources"]["for610"].get("covered"):
|
||||
usages = t["sources"]["for610"].get("typical_usage", [])
|
||||
if usages:
|
||||
usage = usages[0]
|
||||
if not usage:
|
||||
usage = f"{name} --help"
|
||||
usage = usage.replace("|", " ").strip()
|
||||
|
||||
tier = t.get("help_tier", "stub")
|
||||
|
||||
lines.append(f"{name}|{desc}|{cat}|{usage}|{tier}")
|
||||
|
||||
lines.sort()
|
||||
|
||||
with open(output_path, "w") as f:
|
||||
f.write("\n".join(lines) + "\n")
|
||||
|
||||
print(f" tools.db: {len(lines)} entries")
|
||||
return len(lines)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Cheatsheet generator
|
||||
# ============================================================
|
||||
|
||||
def sanitize_filename(name):
|
||||
"""Convert tool name to a safe filename."""
|
||||
return re.sub(r'[^a-zA-Z0-9._-]', '-', name).strip('-').lower()
|
||||
|
||||
|
||||
def generate_usage_comment(name, usage, index):
|
||||
"""Generate a descriptive comment for a usage example."""
|
||||
# Analyze the command to produce a meaningful description
|
||||
usage_lower = usage.lower()
|
||||
|
||||
if index == 0:
|
||||
return f"Basic usage"
|
||||
|
||||
# Try to describe based on flags
|
||||
if "-vv" in usage or "--verbose" in usage:
|
||||
return "Verbose output with details"
|
||||
if "--no-static" in usage or "--no static" in usage:
|
||||
return "Skip static analysis, focus on dynamic"
|
||||
if "-n " in usage:
|
||||
return "Suppress default output"
|
||||
if "-a " in usage or "--all" in usage:
|
||||
return "Show all results"
|
||||
if "-s " in usage:
|
||||
return "Select specific item"
|
||||
if "-d " in usage:
|
||||
return "Dump/extract content"
|
||||
if "-r " in usage:
|
||||
return "Recursive/follow references"
|
||||
if "-k " in usage:
|
||||
return "Extract by keyword"
|
||||
if "-o " in usage:
|
||||
return "Output to file"
|
||||
if "-f " in usage:
|
||||
return "Process input file"
|
||||
if "-i " in usage:
|
||||
return "Case-insensitive search"
|
||||
if "grep" in usage_lower:
|
||||
return "Filter output for specific pattern"
|
||||
if "--help" in usage:
|
||||
return "Show help"
|
||||
if "|" in usage:
|
||||
return "Pipe output for processing"
|
||||
if ">" in usage:
|
||||
return "Save output to file"
|
||||
|
||||
return f"Alternative usage"
|
||||
|
||||
|
||||
def format_recipes_section(tool_id, recipe_index):
|
||||
"""Generate the recipes section for a cheatsheet."""
|
||||
recipes = recipe_index.get(tool_id, [])
|
||||
if not recipes:
|
||||
# Try variants
|
||||
for variant in [tool_id.replace("-py", ""), tool_id.replace("-", "")]:
|
||||
recipes = recipe_index.get(variant, [])
|
||||
if recipes:
|
||||
break
|
||||
if not recipes:
|
||||
return ""
|
||||
|
||||
# Deduplicate recipes by id
|
||||
seen = set()
|
||||
unique = []
|
||||
for r in recipes:
|
||||
if r["id"] not in seen:
|
||||
seen.add(r["id"])
|
||||
unique.append(r)
|
||||
|
||||
lines = [
|
||||
"",
|
||||
"# --- Recipes (multi-tool chains) ---",
|
||||
"",
|
||||
]
|
||||
for recipe in unique:
|
||||
lines.append(f"# >> {recipe['name']}")
|
||||
for cmd in recipe.get("commands", []):
|
||||
lines.append(cmd)
|
||||
lines.append("")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def generate_cheatsheet_rich(t, recipe_index=None):
|
||||
"""Generate a rich cheatsheet for a tool with FOR610 coverage."""
|
||||
f610 = t["sources"]["for610"]
|
||||
name = t["name"]
|
||||
desc = t.get("description", "")
|
||||
labs = f610.get("labs", [])
|
||||
sections = f610.get("sections", [])
|
||||
tags = f610.get("tags", [])
|
||||
usages = f610.get("typical_usage", [])
|
||||
author = f610.get("author", "")
|
||||
|
||||
lines = [
|
||||
f"# {name}",
|
||||
f"# {desc}",
|
||||
]
|
||||
|
||||
meta_parts = []
|
||||
if labs:
|
||||
meta_parts.append(f"FOR610 Labs: {', '.join(labs)}")
|
||||
if sections:
|
||||
meta_parts.append(f"Sections: {', '.join(str(s) for s in sections)}")
|
||||
if author:
|
||||
meta_parts.append(f"Author: {author}")
|
||||
if meta_parts:
|
||||
lines.append(f"# {' | '.join(meta_parts)}")
|
||||
|
||||
# REMnux docs URL if available
|
||||
if t["sources"]["remnux_docs"].get("covered"):
|
||||
url = t["sources"]["remnux_docs"].get("docs_url", "")
|
||||
if url:
|
||||
lines.append(f"# Docs: {url}")
|
||||
|
||||
lines.append("")
|
||||
|
||||
# Tags
|
||||
tag_str = ", ".join(tags[:8]) if tags else name.lower()
|
||||
lines.append(f"% {tag_str}")
|
||||
lines.append("")
|
||||
|
||||
# Usage examples with descriptive comments
|
||||
for i, usage in enumerate(usages):
|
||||
comment = generate_usage_comment(name, usage, i)
|
||||
lines.append(f"# {comment}")
|
||||
lines.append(usage)
|
||||
lines.append("")
|
||||
|
||||
# If no usage examples, add a basic one
|
||||
if not usages:
|
||||
lines.append(f"# Show help")
|
||||
lines.append(f"{name} --help")
|
||||
lines.append("")
|
||||
|
||||
# Append recipes section if this tool participates in any recipes
|
||||
if recipe_index:
|
||||
recipes_text = format_recipes_section(t["id"], recipe_index)
|
||||
if recipes_text:
|
||||
lines.append(recipes_text)
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def generate_cheatsheet_standard(t):
|
||||
"""Generate a standard cheatsheet from REMnux docs."""
|
||||
rdocs = t["sources"]["remnux_docs"]
|
||||
name = t["name"]
|
||||
desc = t.get("description", "") or rdocs.get("description", "")
|
||||
cat = rdocs.get("category", "")
|
||||
url = rdocs.get("docs_url", "")
|
||||
|
||||
lines = [
|
||||
f"# {name}",
|
||||
f"# {desc}" if desc else f"# {name} tool",
|
||||
]
|
||||
if cat:
|
||||
lines.append(f"# Category: {cat}")
|
||||
if url:
|
||||
lines.append(f"# Docs: {url}")
|
||||
|
||||
lines += [
|
||||
"",
|
||||
f"% {sanitize_filename(name)}",
|
||||
"",
|
||||
f"# Show help for {name}",
|
||||
f"{name} --help",
|
||||
"",
|
||||
]
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def generate_cheatsheet_basic(t):
|
||||
"""Generate a minimal cheatsheet for a tool with only salt-states."""
|
||||
name = t["name"]
|
||||
salt = t["sources"]["salt_states"]
|
||||
install = salt.get("install_method", "unknown")
|
||||
pkg = salt.get("package_name", name)
|
||||
|
||||
lines = [
|
||||
f"# {name}",
|
||||
f"# Installed via: {install} ({pkg})",
|
||||
"",
|
||||
f"% {sanitize_filename(name)}",
|
||||
"",
|
||||
f"# Show help for {name}",
|
||||
f"{name} --help",
|
||||
"",
|
||||
]
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def generate_cheatsheets(tools, recipe_index=None):
|
||||
"""Generate per-tool cheatsheet files."""
|
||||
cheat_dir = os.path.join(GEN_DIR, "cheatsheets")
|
||||
os.makedirs(cheat_dir, exist_ok=True)
|
||||
|
||||
count = 0
|
||||
for t in tools:
|
||||
if not t.get("in_remnux"):
|
||||
continue
|
||||
|
||||
tier = t.get("help_tier", "stub")
|
||||
name = t["name"]
|
||||
filename = sanitize_filename(name) + ".cheat"
|
||||
|
||||
if tier == "rich":
|
||||
content = generate_cheatsheet_rich(t, recipe_index=recipe_index)
|
||||
elif tier == "standard":
|
||||
content = generate_cheatsheet_standard(t)
|
||||
else:
|
||||
content = generate_cheatsheet_basic(t)
|
||||
|
||||
with open(os.path.join(cheat_dir, filename), "w") as f:
|
||||
f.write(content)
|
||||
count += 1
|
||||
|
||||
print(f" cheatsheets: {count} .cheat files")
|
||||
return count
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Workflow generator
|
||||
# ============================================================
|
||||
|
||||
def _get_tool_examples(tool_name, master_tools_by_name):
|
||||
"""Get 1-2 example commands for a tool from the master inventory."""
|
||||
tool = master_tools_by_name.get(tool_name)
|
||||
if not tool:
|
||||
# Try kebab-case lookup
|
||||
normalized = tool_name.lower().replace("_", "-")
|
||||
tool = master_tools_by_name.get(normalized)
|
||||
if tool and tool["sources"]["for610"].get("covered"):
|
||||
usages = tool["sources"]["for610"].get("typical_usage", [])
|
||||
return usages[:2]
|
||||
return []
|
||||
|
||||
|
||||
def generate_workflows(workflows_data, master_tools=None):
|
||||
"""Generate readable workflow help files with inline examples."""
|
||||
wf_dir = os.path.join(GEN_DIR, "workflows")
|
||||
os.makedirs(wf_dir, exist_ok=True)
|
||||
|
||||
# Build tool name lookup for inline examples
|
||||
tools_by_name = {}
|
||||
if master_tools:
|
||||
for t in master_tools:
|
||||
tools_by_name[t["name"].lower()] = t
|
||||
tools_by_name[t["id"]] = t
|
||||
for alias in t.get("aliases", []):
|
||||
tools_by_name[alias.lower()] = t
|
||||
|
||||
workflows = workflows_data.get("workflows", [])
|
||||
count = 0
|
||||
|
||||
for wf in workflows:
|
||||
wf_id = wf["id"]
|
||||
name = wf["name"]
|
||||
desc = wf.get("description", "")
|
||||
steps = wf.get("steps", [])
|
||||
related_labs = wf.get("related_labs", [])
|
||||
|
||||
lines = [
|
||||
f"{'='*60}",
|
||||
f" {name}",
|
||||
f"{'='*60}",
|
||||
"",
|
||||
f" {desc}",
|
||||
"",
|
||||
]
|
||||
|
||||
if related_labs:
|
||||
lines.append(f" Related FOR610 Labs: {', '.join(related_labs)}")
|
||||
lines.append("")
|
||||
|
||||
lines.append(f"{'─'*60}")
|
||||
lines.append("")
|
||||
|
||||
for step in steps:
|
||||
order = step.get("order", "?")
|
||||
step_name = step.get("name", "")
|
||||
step_desc = step.get("description", "")
|
||||
step_tools = step.get("tools", [])
|
||||
|
||||
lines.append(f" Step {order}: {step_name}")
|
||||
if step_tools:
|
||||
lines.append(f" Tools: {', '.join(step_tools)}")
|
||||
if step_desc:
|
||||
wrapped = textwrap.fill(step_desc, width=56, initial_indent=" ", subsequent_indent=" ")
|
||||
lines.append(wrapped)
|
||||
|
||||
# Add inline command examples for each tool
|
||||
if step_tools and tools_by_name:
|
||||
examples_shown = False
|
||||
for tool_name in step_tools:
|
||||
examples = _get_tool_examples(tool_name, tools_by_name)
|
||||
if examples:
|
||||
if not examples_shown:
|
||||
lines.append("")
|
||||
for ex in examples[:1]: # Show 1 example per tool
|
||||
lines.append(f" $ {ex}")
|
||||
examples_shown = True
|
||||
|
||||
lines.append("")
|
||||
|
||||
lines.append(f"{'─'*60}")
|
||||
lines.append(f" Tip: 'fhelp cheat <tool>' for full examples")
|
||||
lines.append(f" 'Ctrl+G' for interactive cheatsheet browser")
|
||||
lines.append("")
|
||||
|
||||
filename = wf_id.replace("_", "-") + ".txt"
|
||||
with open(os.path.join(wf_dir, filename), "w") as f:
|
||||
f.write("\n".join(lines))
|
||||
count += 1
|
||||
|
||||
# Also generate an index file
|
||||
index_lines = [
|
||||
f"{'='*60}",
|
||||
f" Available Analysis Workflows",
|
||||
f"{'='*60}",
|
||||
"",
|
||||
]
|
||||
for wf in workflows:
|
||||
wf_id = wf["id"].replace("_", "-")
|
||||
name = wf["name"]
|
||||
desc = wf.get("description", "")
|
||||
index_lines.append(f" {wf_id}")
|
||||
index_lines.append(f" {name}")
|
||||
wrapped = textwrap.fill(desc, width=56, initial_indent=" ", subsequent_indent=" ")
|
||||
index_lines.append(wrapped)
|
||||
index_lines.append("")
|
||||
|
||||
index_lines += [
|
||||
f"{'─'*60}",
|
||||
f" Usage: fhelp workflow <name>",
|
||||
f" Example: fhelp workflow static-analysis",
|
||||
"",
|
||||
]
|
||||
|
||||
with open(os.path.join(wf_dir, "index.txt"), "w") as f:
|
||||
f.write("\n".join(index_lines))
|
||||
|
||||
print(f" workflows: {count} workflow files + index")
|
||||
return count
|
||||
|
||||
|
||||
# ============================================================
|
||||
# TLDR generator
|
||||
# ============================================================
|
||||
|
||||
def generate_tldr(tools):
|
||||
"""Generate TLDR pages for tools missing from upstream."""
|
||||
tldr_dir = os.path.join(GEN_DIR, "tldr")
|
||||
os.makedirs(tldr_dir, exist_ok=True)
|
||||
|
||||
count = 0
|
||||
for t in tools:
|
||||
if not t.get("in_remnux"):
|
||||
continue
|
||||
|
||||
tier = t.get("help_tier", "stub")
|
||||
if tier not in ("rich", "standard"):
|
||||
continue
|
||||
|
||||
name = t["name"]
|
||||
desc = t.get("description", "") or f"{name} tool"
|
||||
|
||||
# Get usage examples
|
||||
usages = []
|
||||
if t["sources"]["for610"].get("covered"):
|
||||
usages = t["sources"]["for610"].get("typical_usage", [])
|
||||
|
||||
if not usages:
|
||||
usages = [f"{name} --help"]
|
||||
|
||||
# TLDR format
|
||||
lines = [
|
||||
f"# {name}",
|
||||
"",
|
||||
f"> {desc}",
|
||||
"",
|
||||
]
|
||||
|
||||
for i, usage in enumerate(usages[:4]):
|
||||
# Create a description from the command
|
||||
lines.append(f"- Run {name}:")
|
||||
lines.append("")
|
||||
lines.append(f"`{usage}`")
|
||||
lines.append("")
|
||||
|
||||
filename = sanitize_filename(name) + ".md"
|
||||
with open(os.path.join(tldr_dir, filename), "w") as f:
|
||||
f.write("\n".join(lines))
|
||||
count += 1
|
||||
|
||||
print(f" tldr: {count} pages")
|
||||
return count
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Main
|
||||
# ============================================================
|
||||
|
||||
def main():
|
||||
print("Generating help artifacts from master inventory...")
|
||||
|
||||
master = load_master()
|
||||
tools = master["tools"]
|
||||
workflows_data = load_workflows()
|
||||
recipes_data = load_recipes()
|
||||
recipe_index = build_recipe_index(recipes_data)
|
||||
|
||||
print(f"\nInput: {len(tools)} tools, {len(workflows_data.get('workflows', []))} workflows, {len(recipes_data.get('recipes', []))} recipes")
|
||||
print()
|
||||
|
||||
db_count = generate_tools_db(tools)
|
||||
cheat_count = generate_cheatsheets(tools, recipe_index=recipe_index)
|
||||
wf_count = generate_workflows(workflows_data, master_tools=tools)
|
||||
tldr_count = generate_tldr(tools)
|
||||
|
||||
print(f"\nAll artifacts generated in {GEN_DIR}/")
|
||||
print(f" tools.db: {db_count} entries")
|
||||
print(f" cheatsheets/: {cheat_count} files")
|
||||
print(f" workflows/: {wf_count} + index")
|
||||
print(f" tldr/: {tldr_count} pages")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,202 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Parse REMnux salt-states repository to extract all installed tools/packages.
|
||||
|
||||
Fetches the salt-states repo tree from GitHub, parses .sls files to identify
|
||||
what gets installed, and outputs data/remnux/sources/salt-states.yaml.
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
import urllib.request
|
||||
import yaml
|
||||
import os
|
||||
|
||||
GITHUB_API = "https://api.github.com/repos/REMnux/salt-states"
|
||||
RAW_BASE = "https://raw.githubusercontent.com/REMnux/salt-states/master"
|
||||
OUTPUT_PATH = os.path.join(os.path.dirname(__file__), "..", "data", "remnux", "sources", "salt-states.yaml")
|
||||
|
||||
|
||||
def fetch_json(url):
|
||||
req = urllib.request.Request(url, headers={"User-Agent": "remnux-tool-parser"})
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
return json.loads(resp.read().decode())
|
||||
|
||||
|
||||
def fetch_text(url):
|
||||
req = urllib.request.Request(url, headers={"User-Agent": "remnux-tool-parser"})
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
return resp.read().decode()
|
||||
except Exception as e:
|
||||
print(f" Warning: could not fetch {url}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def get_sls_files():
|
||||
"""Get all .sls file paths from the repo."""
|
||||
tree = fetch_json(f"{GITHUB_API}/git/trees/master?recursive=1")
|
||||
return [item["path"] for item in tree["tree"]
|
||||
if item["path"].endswith(".sls") and item["type"] == "blob"]
|
||||
|
||||
|
||||
def classify_sls_path(path):
|
||||
"""Classify the install method from the directory structure."""
|
||||
parts = path.lower()
|
||||
if "python3-package" in parts or "python-package" in parts:
|
||||
return "pip"
|
||||
elif "pip" in parts:
|
||||
return "pip"
|
||||
elif "rubygem" in parts:
|
||||
return "gem"
|
||||
elif "npm" in parts or "node" in parts:
|
||||
return "npm"
|
||||
elif "perl-package" in parts:
|
||||
return "perl"
|
||||
elif "package" in parts:
|
||||
return "apt"
|
||||
elif "tools" in parts:
|
||||
return "manual"
|
||||
elif "script" in parts:
|
||||
return "script"
|
||||
else:
|
||||
return "unknown"
|
||||
|
||||
|
||||
def extract_tool_name_from_path(path):
|
||||
"""Extract a human-readable tool name from the .sls file path."""
|
||||
basename = os.path.basename(path).replace(".sls", "")
|
||||
# Skip non-tool files
|
||||
skip = {"init", "addon", "cloud", "dedicated", "theme", "remnux-config",
|
||||
"apt-transport-https", "packages", "python3-packages", "python-packages",
|
||||
"rubygems", "perl-packages", "node-packages", "tools", "scripts"}
|
||||
if basename in skip:
|
||||
return None
|
||||
return basename
|
||||
|
||||
|
||||
def parse_sls_content(content, path):
|
||||
"""Parse a .sls file and extract package/tool information."""
|
||||
if not content:
|
||||
return []
|
||||
|
||||
results = []
|
||||
tool_name = extract_tool_name_from_path(path)
|
||||
if not tool_name:
|
||||
return []
|
||||
|
||||
install_method = classify_sls_path(path)
|
||||
|
||||
# Try to find the actual package name from the content
|
||||
package_names = []
|
||||
|
||||
# Match pip.installed, pkg.installed, gem.installed, npm.installed
|
||||
for match in re.finditer(r'(\w[\w.-]+):\s*\n\s+(?:pip|pkg|gem|npm)\.installed', content):
|
||||
package_names.append(match.group(1))
|
||||
|
||||
# Match "- name: package_name" in pip/pkg states
|
||||
for match in re.finditer(r'-\s+name:\s+([^\s#\n]+)', content):
|
||||
name = match.group(1).strip("'\"")
|
||||
if name and not name.startswith('{') and not name.startswith('/'):
|
||||
package_names.append(name)
|
||||
|
||||
# Match wget/curl downloads (manual installs)
|
||||
for match in re.finditer(r'(?:wget|curl)\s+.*?/([^/\s"]+?)(?:\s|"|$)', content):
|
||||
fname = match.group(1)
|
||||
if '.' in fname and not fname.endswith('.key'):
|
||||
package_names.append(fname)
|
||||
|
||||
# Match file.managed targets (scripts/binaries being deployed)
|
||||
for match in re.finditer(r'/usr/local/bin/([^:\s]+)', content):
|
||||
package_names.append(match.group(1))
|
||||
|
||||
# Deduplicate and clean
|
||||
seen = set()
|
||||
clean_names = []
|
||||
for n in package_names:
|
||||
n = n.strip().strip("'\"")
|
||||
if n and n.lower() not in seen and len(n) > 1:
|
||||
seen.add(n.lower())
|
||||
clean_names.append(n)
|
||||
|
||||
entry = {
|
||||
"id": tool_name,
|
||||
"package_names": clean_names if clean_names else [tool_name],
|
||||
"install_method": install_method,
|
||||
"salt_state_path": path,
|
||||
}
|
||||
|
||||
# Try to detect if it's enabled/disabled
|
||||
if "False" in content and ("onlyif" in content.lower() or "unless" in content.lower()):
|
||||
entry["possibly_conditional"] = True
|
||||
|
||||
results.append(entry)
|
||||
return results
|
||||
|
||||
|
||||
def main():
|
||||
print("Fetching salt-states repository tree...")
|
||||
sls_files = get_sls_files()
|
||||
print(f"Found {len(sls_files)} .sls files")
|
||||
|
||||
# Filter to relevant paths (skip top-level orchestration files)
|
||||
relevant = [f for f in sls_files if f.startswith("remnux/")]
|
||||
print(f" {len(relevant)} under remnux/")
|
||||
|
||||
all_tools = []
|
||||
categories_seen = set()
|
||||
|
||||
for i, path in enumerate(relevant):
|
||||
if i % 20 == 0:
|
||||
print(f" Processing {i}/{len(relevant)}...")
|
||||
|
||||
# Derive category from path
|
||||
parts = path.split("/")
|
||||
if len(parts) >= 3:
|
||||
category_dir = parts[1] # e.g., "python3-packages", "tools", "packages"
|
||||
categories_seen.add(category_dir)
|
||||
|
||||
content = fetch_text(f"{RAW_BASE}/{path}")
|
||||
tools = parse_sls_content(content, path)
|
||||
all_tools.extend(tools)
|
||||
|
||||
# Deduplicate by id
|
||||
seen_ids = set()
|
||||
unique_tools = []
|
||||
for t in all_tools:
|
||||
if t["id"] not in seen_ids:
|
||||
seen_ids.add(t["id"])
|
||||
unique_tools.append(t)
|
||||
|
||||
# Sort by id
|
||||
unique_tools.sort(key=lambda t: t["id"])
|
||||
|
||||
output = {
|
||||
"metadata": {
|
||||
"source": "https://github.com/REMnux/salt-states",
|
||||
"branch": "master",
|
||||
"total_sls_files": len(relevant),
|
||||
"total_tools_extracted": len(unique_tools),
|
||||
"install_method_counts": {},
|
||||
"salt_directories": sorted(categories_seen),
|
||||
},
|
||||
"tools": unique_tools,
|
||||
}
|
||||
|
||||
# Count install methods
|
||||
for t in unique_tools:
|
||||
m = t["install_method"]
|
||||
output["metadata"]["install_method_counts"][m] = \
|
||||
output["metadata"]["install_method_counts"].get(m, 0) + 1
|
||||
|
||||
os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
|
||||
with open(OUTPUT_PATH, "w") as f:
|
||||
yaml.dump(output, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
|
||||
|
||||
print(f"\nDone! Extracted {len(unique_tools)} tools")
|
||||
for method, count in sorted(output["metadata"]["install_method_counts"].items()):
|
||||
print(f" {method}: {count}")
|
||||
print(f"Output: {OUTPUT_PATH}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,226 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Scrape REMnux documentation to extract all documented tools.
|
||||
|
||||
Fetches docs.remnux.org tool listing pages and extracts tool names,
|
||||
descriptions, categories, and URLs. Outputs data/remnux/sources/remnux-docs.yaml.
|
||||
"""
|
||||
|
||||
import re
|
||||
import urllib.request
|
||||
import yaml
|
||||
import os
|
||||
import time
|
||||
|
||||
BASE_URL = "https://docs.remnux.org/discover-the-tools"
|
||||
OUTPUT_PATH = os.path.join(os.path.dirname(__file__), "..", "data", "remnux", "sources", "remnux-docs.yaml")
|
||||
|
||||
# All known category pages from docs.remnux.org
|
||||
CATEGORY_PAGES = [
|
||||
# Examine Static Properties
|
||||
("Examine Static Properties > General", "examine+static+properties/general"),
|
||||
("Examine Static Properties > PE Files", "examine+static+properties/pe-files"),
|
||||
("Examine Static Properties > ELF Files", "examine+static+properties/elf-files"),
|
||||
("Examine Static Properties > .NET", "examine+static+properties/.net"),
|
||||
("Examine Static Properties > Go", "examine+static+properties/go"),
|
||||
("Examine Static Properties > Deobfuscation", "examine+static+properties/deobfuscation"),
|
||||
# Statically Analyze Code
|
||||
("Statically Analyze Code > General", "statically+analyze+code/general"),
|
||||
("Statically Analyze Code > Unpacking", "statically+analyze+code/unpacking"),
|
||||
("Statically Analyze Code > PE Files", "statically+analyze+code/pe-files"),
|
||||
("Statically Analyze Code > Python", "statically+analyze+code/python"),
|
||||
("Statically Analyze Code > Scripts", "statically+analyze+code/scripts"),
|
||||
("Statically Analyze Code > Java", "statically+analyze+code/java"),
|
||||
("Statically Analyze Code > .NET", "statically+analyze+code/.net"),
|
||||
("Statically Analyze Code > Android", "statically+analyze+code/android"),
|
||||
# Dynamically Reverse-Engineer Code
|
||||
("Dynamically Reverse-Engineer Code > General", "dynamically+reverse-engineer+code/general"),
|
||||
("Dynamically Reverse-Engineer Code > Shellcode", "dynamically+reverse-engineer+code/shellcode"),
|
||||
("Dynamically Reverse-Engineer Code > Scripts", "dynamically+reverse-engineer+code/scripts"),
|
||||
("Dynamically Reverse-Engineer Code > ELF Files", "dynamically+reverse-engineer+code/elf-files"),
|
||||
# Memory Forensics
|
||||
("Perform Memory Forensics", "perform+memory+forensics"),
|
||||
# Network Interactions
|
||||
("Explore Network Interactions > Monitoring", "explore+network+interactions/monitoring"),
|
||||
("Explore Network Interactions > Connecting", "explore+network+interactions/connecting"),
|
||||
("Explore Network Interactions > Services", "explore+network+interactions/services"),
|
||||
# System Interactions
|
||||
("Investigate System Interactions", "investigate+system+interactions"),
|
||||
# Documents
|
||||
("Analyze Documents > General", "analyze+documents/general"),
|
||||
("Analyze Documents > PDF", "analyze+documents/pdf"),
|
||||
("Analyze Documents > Microsoft Office", "analyze+documents/microsoft+office"),
|
||||
("Analyze Documents > Email Messages", "analyze+documents/email+messages"),
|
||||
# AI
|
||||
("Use Artificial Intelligence", "use+artificial+intelligence"),
|
||||
# Data
|
||||
("Gather and Analyze Data", "gather+and+analyze+data"),
|
||||
# View/Edit
|
||||
("View or Edit Files", "view+or+edit+files"),
|
||||
# Utilities
|
||||
("General Utilities", "general+utilities"),
|
||||
]
|
||||
|
||||
|
||||
def fetch_page(url):
|
||||
"""Fetch a page and return its text content."""
|
||||
req = urllib.request.Request(url, headers={
|
||||
"User-Agent": "Mozilla/5.0 (remnux-doc-scraper)",
|
||||
"Accept": "text/html,application/xhtml+xml",
|
||||
})
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
return resp.read().decode("utf-8", errors="replace")
|
||||
except Exception as e:
|
||||
print(f" Warning: could not fetch {url}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def normalize_id(name):
|
||||
"""Convert tool name to a normalized kebab-case ID."""
|
||||
# Remove .py suffix for ID, keep display name
|
||||
n = name.lower().strip()
|
||||
n = re.sub(r'\.py$', '-py', n)
|
||||
n = re.sub(r'\.pl$', '-pl', n)
|
||||
n = re.sub(r'\.bat$', '-bat', n)
|
||||
n = re.sub(r'[^a-z0-9]+', '-', n)
|
||||
n = n.strip('-')
|
||||
return n
|
||||
|
||||
|
||||
def extract_tools_from_html(html, category, category_path):
|
||||
"""Extract tool entries from a docs page HTML."""
|
||||
tools = []
|
||||
|
||||
# GitBook pages use specific patterns for tool headings
|
||||
# Pattern 1: <h2> or <h3> headings with tool names
|
||||
# Pattern 2: Bold text followed by description
|
||||
# The docs use a pattern like: **Tool Name** description text
|
||||
|
||||
# Try to find tool sections - GitBook uses specific div/section patterns
|
||||
# Look for heading patterns with tool names
|
||||
heading_pattern = re.compile(
|
||||
r'<h[23][^>]*id="([^"]*)"[^>]*>.*?<a[^>]*>.*?</a>\s*(.*?)\s*</h[23]>',
|
||||
re.DOTALL | re.IGNORECASE
|
||||
)
|
||||
|
||||
# Also try plain text patterns
|
||||
# GitBook often renders as: tool-name followed by description
|
||||
bold_pattern = re.compile(
|
||||
r'<strong>(.*?)</strong>\s*[-:]\s*(.*?)(?=<(?:br|p|div|strong|h[23])|$)',
|
||||
re.DOTALL | re.IGNORECASE
|
||||
)
|
||||
|
||||
# Find headings first
|
||||
for match in heading_pattern.finditer(html):
|
||||
anchor_id = match.group(1)
|
||||
heading_text = re.sub(r'<[^>]+>', '', match.group(2)).strip()
|
||||
if heading_text and len(heading_text) < 80:
|
||||
# Get description from content after heading
|
||||
pos = match.end()
|
||||
desc_chunk = html[pos:pos+500]
|
||||
desc_chunk = re.sub(r'<[^>]+>', ' ', desc_chunk)
|
||||
desc_chunk = re.sub(r'\s+', ' ', desc_chunk).strip()
|
||||
# Take first sentence
|
||||
desc = desc_chunk.split('.')[0].strip() + '.' if desc_chunk else ""
|
||||
if len(desc) > 200:
|
||||
desc = desc[:197] + "..."
|
||||
|
||||
# Try to find website URL near this section
|
||||
website_chunk = html[pos:pos+2000]
|
||||
website_match = re.search(r'href="(https?://(?!docs\.remnux)[^"]+)"', website_chunk)
|
||||
website = website_match.group(1) if website_match else ""
|
||||
|
||||
tool = {
|
||||
"name": heading_text,
|
||||
"id": normalize_id(heading_text),
|
||||
"category": category,
|
||||
"category_path": category_path,
|
||||
"description": desc,
|
||||
"docs_url": f"{BASE_URL}/{category_path}",
|
||||
"anchor": anchor_id,
|
||||
}
|
||||
if website:
|
||||
tool["website"] = website
|
||||
tools.append(tool)
|
||||
|
||||
# If we got nothing from headings, try the bold pattern
|
||||
if not tools:
|
||||
for match in bold_pattern.finditer(html):
|
||||
name = re.sub(r'<[^>]+>', '', match.group(1)).strip()
|
||||
desc = re.sub(r'<[^>]+>', ' ', match.group(2)).strip()
|
||||
desc = re.sub(r'\s+', ' ', desc).strip()
|
||||
if name and len(name) < 80 and len(name) > 1:
|
||||
if len(desc) > 200:
|
||||
desc = desc[:197] + "..."
|
||||
tools.append({
|
||||
"name": name,
|
||||
"id": normalize_id(name),
|
||||
"category": category,
|
||||
"category_path": category_path,
|
||||
"description": desc,
|
||||
"docs_url": f"{BASE_URL}/{category_path}",
|
||||
})
|
||||
|
||||
return tools
|
||||
|
||||
|
||||
def main():
|
||||
print("Scraping REMnux documentation...")
|
||||
all_tools = []
|
||||
|
||||
for category, path in CATEGORY_PAGES:
|
||||
url = f"{BASE_URL}/{path}"
|
||||
print(f" Fetching: {category}")
|
||||
html = fetch_page(url)
|
||||
|
||||
if not html:
|
||||
print(f" Skipped (fetch failed)")
|
||||
continue
|
||||
|
||||
tools = extract_tools_from_html(html, category, path)
|
||||
print(f" Found {len(tools)} tools")
|
||||
all_tools.extend(tools)
|
||||
|
||||
time.sleep(0.3) # Be polite
|
||||
|
||||
# Deduplicate by id (same tool can appear in multiple categories)
|
||||
seen = {}
|
||||
for t in all_tools:
|
||||
tid = t["id"]
|
||||
if tid not in seen:
|
||||
seen[tid] = t
|
||||
else:
|
||||
# Tool appears in multiple categories - track both
|
||||
existing = seen[tid]
|
||||
if "additional_categories" not in existing:
|
||||
existing["additional_categories"] = []
|
||||
existing["additional_categories"].append(t["category"])
|
||||
|
||||
unique_tools = sorted(seen.values(), key=lambda t: t["id"])
|
||||
|
||||
output = {
|
||||
"metadata": {
|
||||
"source": "https://docs.remnux.org/discover-the-tools",
|
||||
"categories_scraped": len(CATEGORY_PAGES),
|
||||
"total_tools_extracted": len(unique_tools),
|
||||
"category_counts": {},
|
||||
},
|
||||
"tools": unique_tools,
|
||||
}
|
||||
|
||||
# Count per category
|
||||
for t in all_tools:
|
||||
cat = t["category"]
|
||||
output["metadata"]["category_counts"][cat] = \
|
||||
output["metadata"]["category_counts"].get(cat, 0) + 1
|
||||
|
||||
os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True)
|
||||
with open(OUTPUT_PATH, "w") as f:
|
||||
yaml.dump(output, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
|
||||
|
||||
print(f"\nDone! Extracted {len(unique_tools)} unique tools from {len(CATEGORY_PAGES)} category pages")
|
||||
print(f"Output: {OUTPUT_PATH}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,360 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Comprehensive verification of generated help artifacts.
|
||||
|
||||
Tests:
|
||||
1. All FOR610 tools with in_remnux=true have cheatsheets
|
||||
2. All cheatsheet content matches researched data
|
||||
3. All workflows are generated and contain correct tool references
|
||||
4. tools.db entries match master inventory
|
||||
5. No orphaned references (tools in labs but missing from master)
|
||||
6. Rich-tier cheatsheets have usage examples from FOR610
|
||||
7. REMnux docs tools have correct descriptions
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import yaml
|
||||
import glob
|
||||
|
||||
BASE_DIR = os.path.join(os.path.dirname(__file__), "..")
|
||||
|
||||
def load_yaml(path):
|
||||
with open(path) as f:
|
||||
return yaml.safe_load(f)
|
||||
|
||||
|
||||
def test_master_inventory():
|
||||
"""Verify master inventory integrity."""
|
||||
print("=" * 60)
|
||||
print("TEST 1: Master Inventory Integrity")
|
||||
print("=" * 60)
|
||||
errors = []
|
||||
|
||||
master = load_yaml(os.path.join(BASE_DIR, "data/remnux/tools-master.yaml"))
|
||||
tools = master["tools"]
|
||||
|
||||
# Check all tools have required fields
|
||||
for t in tools:
|
||||
tid = t.get("id", "MISSING")
|
||||
if not t.get("id"):
|
||||
errors.append(f"Tool missing id: {t}")
|
||||
if not t.get("name"):
|
||||
errors.append(f"Tool {tid} missing name")
|
||||
if "sources" not in t:
|
||||
errors.append(f"Tool {tid} missing sources")
|
||||
if "help_tier" not in t:
|
||||
errors.append(f"Tool {tid} missing help_tier")
|
||||
|
||||
# Check no duplicate IDs
|
||||
ids = [t["id"] for t in tools]
|
||||
dupes = [x for x in ids if ids.count(x) > 1]
|
||||
if dupes:
|
||||
errors.append(f"Duplicate IDs: {set(dupes)}")
|
||||
|
||||
print(f" Total tools: {len(tools)}")
|
||||
print(f" Errors: {len(errors)}")
|
||||
for e in errors[:10]:
|
||||
print(f" ! {e}")
|
||||
return errors
|
||||
|
||||
|
||||
def test_for610_coverage():
|
||||
"""Verify all FOR610 in_remnux tools appear in master and have cheatsheets."""
|
||||
print("\n" + "=" * 60)
|
||||
print("TEST 2: FOR610 Tool Coverage")
|
||||
print("=" * 60)
|
||||
errors = []
|
||||
|
||||
for610 = load_yaml(os.path.join(BASE_DIR, "data/for610/tools.yaml"))
|
||||
master = load_yaml(os.path.join(BASE_DIR, "data/remnux/tools-master.yaml"))
|
||||
master_ids = {t["id"] for t in master["tools"]}
|
||||
|
||||
cheat_dir = os.path.join(BASE_DIR, "data/generated/cheatsheets")
|
||||
cheat_files = {os.path.basename(f).replace(".cheat", "")
|
||||
for f in glob.glob(os.path.join(cheat_dir, "*.cheat"))}
|
||||
|
||||
for610_remnux = [t for t in for610["tools"] if t.get("in_remnux")]
|
||||
for610_all = for610["tools"]
|
||||
|
||||
# Check all FOR610 in_remnux tools are in master
|
||||
missing_from_master = []
|
||||
for t in for610_remnux:
|
||||
if t["id"] not in master_ids:
|
||||
missing_from_master.append(t["id"])
|
||||
errors.append(f"FOR610 tool '{t['id']}' ({t['name']}) not in master inventory")
|
||||
|
||||
# Check all FOR610 in_remnux tools have cheatsheets
|
||||
missing_cheats = []
|
||||
for t in for610_remnux:
|
||||
name_variants = [
|
||||
t["name"].lower().replace(" ", "-"),
|
||||
t["id"],
|
||||
t["name"].lower(),
|
||||
]
|
||||
found = False
|
||||
for v in name_variants:
|
||||
if v in cheat_files:
|
||||
found = True
|
||||
break
|
||||
if not found:
|
||||
missing_cheats.append(t["name"])
|
||||
|
||||
# Check rich-tier cheatsheets have usage examples
|
||||
rich_without_examples = []
|
||||
for t in for610_remnux:
|
||||
usages = t.get("typical_usage", [])
|
||||
cheat_path = os.path.join(cheat_dir, t["name"].lower().replace(" ", "-") + ".cheat")
|
||||
if not os.path.exists(cheat_path):
|
||||
cheat_path = os.path.join(cheat_dir, t["id"] + ".cheat")
|
||||
if os.path.exists(cheat_path):
|
||||
content = open(cheat_path).read()
|
||||
if usages and not any(u in content for u in usages[:1]):
|
||||
rich_without_examples.append(t["name"])
|
||||
|
||||
print(f" FOR610 tools (all): {len(for610_all)}")
|
||||
print(f" FOR610 in REMnux: {len(for610_remnux)}")
|
||||
print(f" Missing from master: {len(missing_from_master)}")
|
||||
print(f" Missing cheatsheets: {len(missing_cheats)}")
|
||||
if missing_cheats:
|
||||
for m in missing_cheats[:5]:
|
||||
print(f" ! {m}")
|
||||
print(f" Rich without examples: {len(rich_without_examples)}")
|
||||
if rich_without_examples:
|
||||
for m in rich_without_examples[:5]:
|
||||
print(f" ! {m}")
|
||||
print(f" Errors: {len(errors)}")
|
||||
return errors
|
||||
|
||||
|
||||
def test_tools_db():
|
||||
"""Verify tools.db matches master inventory."""
|
||||
print("\n" + "=" * 60)
|
||||
print("TEST 3: tools.db Consistency")
|
||||
print("=" * 60)
|
||||
errors = []
|
||||
|
||||
master = load_yaml(os.path.join(BASE_DIR, "data/remnux/tools-master.yaml"))
|
||||
remnux_tools = {t["name"]: t for t in master["tools"] if t.get("in_remnux")}
|
||||
|
||||
db_path = os.path.join(BASE_DIR, "data/generated/tools.db")
|
||||
db_entries = {}
|
||||
with open(db_path) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
parts = line.split("|")
|
||||
if len(parts) >= 5:
|
||||
db_entries[parts[0]] = {
|
||||
"name": parts[0],
|
||||
"description": parts[1],
|
||||
"category": parts[2],
|
||||
"usage": parts[3],
|
||||
"tier": parts[4],
|
||||
}
|
||||
|
||||
# Check all REMnux tools are in DB
|
||||
missing_from_db = []
|
||||
for name, tool in remnux_tools.items():
|
||||
if name not in db_entries:
|
||||
missing_from_db.append(name)
|
||||
|
||||
# Check no empty descriptions
|
||||
empty_descs = [e["name"] for e in db_entries.values()
|
||||
if e["description"] == "(no description available)"]
|
||||
|
||||
# Check tier consistency
|
||||
tier_mismatches = []
|
||||
for name, entry in db_entries.items():
|
||||
if name in remnux_tools:
|
||||
expected_tier = remnux_tools[name].get("help_tier", "stub")
|
||||
if entry["tier"] != expected_tier:
|
||||
tier_mismatches.append(f"{name}: db={entry['tier']} vs master={expected_tier}")
|
||||
|
||||
print(f" tools.db entries: {len(db_entries)}")
|
||||
print(f" REMnux tools in master: {len(remnux_tools)}")
|
||||
print(f" Missing from DB: {len(missing_from_db)}")
|
||||
if missing_from_db:
|
||||
for m in missing_from_db[:5]:
|
||||
print(f" ! {m}")
|
||||
print(f" Empty descriptions: {len(empty_descs)}")
|
||||
if empty_descs:
|
||||
for m in empty_descs[:5]:
|
||||
print(f" ! {m}")
|
||||
print(f" Tier mismatches: {len(tier_mismatches)}")
|
||||
return errors
|
||||
|
||||
|
||||
def test_workflows():
|
||||
"""Verify all workflow files are generated and contain valid tool references."""
|
||||
print("\n" + "=" * 60)
|
||||
print("TEST 4: Workflow Files")
|
||||
print("=" * 60)
|
||||
errors = []
|
||||
|
||||
wf_src = load_yaml(os.path.join(BASE_DIR, "data/for610/workflows.yaml"))
|
||||
wf_dir = os.path.join(BASE_DIR, "data/generated/workflows")
|
||||
|
||||
expected_workflows = wf_src.get("workflows", [])
|
||||
generated = glob.glob(os.path.join(wf_dir, "*.txt"))
|
||||
generated_names = {os.path.basename(f).replace(".txt", "") for f in generated}
|
||||
|
||||
# Check all workflows generated
|
||||
for wf in expected_workflows:
|
||||
wf_id = wf["id"].replace("_", "-")
|
||||
if wf_id not in generated_names:
|
||||
errors.append(f"Missing workflow file: {wf_id}.txt")
|
||||
|
||||
# Check index file exists
|
||||
if "index" not in generated_names:
|
||||
errors.append("Missing workflow index.txt")
|
||||
|
||||
# Check each workflow file has content
|
||||
for f in generated:
|
||||
content = open(f).read()
|
||||
if len(content) < 50:
|
||||
errors.append(f"Workflow file too short: {os.path.basename(f)}")
|
||||
|
||||
print(f" Expected workflows: {len(expected_workflows)}")
|
||||
print(f" Generated files: {len(generated)} (including index)")
|
||||
print(f" Errors: {len(errors)}")
|
||||
for e in errors:
|
||||
print(f" ! {e}")
|
||||
return errors
|
||||
|
||||
|
||||
def test_lab_tool_references():
|
||||
"""Verify all tools referenced in labs exist in master inventory."""
|
||||
print("\n" + "=" * 60)
|
||||
print("TEST 5: Lab-Tool Cross-References")
|
||||
print("=" * 60)
|
||||
errors = []
|
||||
|
||||
labs = load_yaml(os.path.join(BASE_DIR, "data/for610/labs.yaml"))
|
||||
master = load_yaml(os.path.join(BASE_DIR, "data/remnux/tools-master.yaml"))
|
||||
master_ids = {t["id"] for t in master["tools"]}
|
||||
|
||||
for610_tools = load_yaml(os.path.join(BASE_DIR, "data/for610/tools.yaml"))
|
||||
for610_ids = {t["id"] for t in for610_tools["tools"]}
|
||||
|
||||
# Check all tool_ids in labs exist in FOR610
|
||||
missing = set()
|
||||
for lab in labs["labs"]:
|
||||
for tu in lab.get("tools_used", []):
|
||||
tid = tu["tool_id"]
|
||||
if tid not in for610_ids:
|
||||
missing.add(f"Lab {lab['id']}: tool '{tid}'")
|
||||
errors.append(f"Lab {lab['id']} references unknown tool: {tid}")
|
||||
|
||||
print(f" Labs: {len(labs['labs'])}")
|
||||
print(f" Missing tool references: {len(missing)}")
|
||||
for m in sorted(missing)[:5]:
|
||||
print(f" ! {m}")
|
||||
return errors
|
||||
|
||||
|
||||
def test_remnux_docs_coverage():
|
||||
"""Check how many REMnux-documented tools have help content."""
|
||||
print("\n" + "=" * 60)
|
||||
print("TEST 6: REMnux Docs Coverage in Help")
|
||||
print("=" * 60)
|
||||
errors = []
|
||||
|
||||
master = load_yaml(os.path.join(BASE_DIR, "data/remnux/tools-master.yaml"))
|
||||
cheat_dir = os.path.join(BASE_DIR, "data/generated/cheatsheets")
|
||||
|
||||
docs_tools = [t for t in master["tools"]
|
||||
if t["sources"]["remnux_docs"].get("covered") and t.get("in_remnux")]
|
||||
docs_with_cheat = 0
|
||||
docs_without_cheat = []
|
||||
|
||||
for t in docs_tools:
|
||||
name = t["name"].lower().replace(" ", "-")
|
||||
variants = [name, t["id"], name + ".cheat"]
|
||||
found = any(os.path.exists(os.path.join(cheat_dir, v + ".cheat")) for v in [name, t["id"]])
|
||||
if found:
|
||||
docs_with_cheat += 1
|
||||
else:
|
||||
docs_without_cheat.append(t["name"])
|
||||
|
||||
print(f" REMnux-documented tools: {len(docs_tools)}")
|
||||
print(f" With cheatsheets: {docs_with_cheat}")
|
||||
print(f" Without cheatsheets: {len(docs_without_cheat)}")
|
||||
if docs_without_cheat:
|
||||
for m in docs_without_cheat[:5]:
|
||||
print(f" ! {m}")
|
||||
return errors
|
||||
|
||||
|
||||
def test_cheatsheet_quality():
|
||||
"""Spot-check cheatsheet content for key tools."""
|
||||
print("\n" + "=" * 60)
|
||||
print("TEST 7: Cheatsheet Quality Spot-Checks")
|
||||
print("=" * 60)
|
||||
errors = []
|
||||
|
||||
cheat_dir = os.path.join(BASE_DIR, "data/generated/cheatsheets")
|
||||
|
||||
# Key tools that MUST have good cheatsheets
|
||||
key_tools = {
|
||||
"pdfid.py": ["pdfid.py", "document.pdf"],
|
||||
"pdf-parser.py": ["pdf-parser.py", "-a", "-s"],
|
||||
"oledump.py": ["oledump.py", "-s", "-v"],
|
||||
"capa": ["capa", "specimen"],
|
||||
"speakeasy": ["speakeasy", "-t"],
|
||||
"ghidra": ["ghidra"],
|
||||
"wireshark": ["wireshark"],
|
||||
"floss": ["floss"],
|
||||
"scdbgc": ["scdbgc", "/f"],
|
||||
"rtfdump.py": ["rtfdump.py"],
|
||||
}
|
||||
|
||||
for tool, expected_strings in key_tools.items():
|
||||
cheat_path = os.path.join(cheat_dir, tool + ".cheat")
|
||||
if not os.path.exists(cheat_path):
|
||||
# Try without .py
|
||||
alt = tool.replace(".py", "-py") + ".cheat"
|
||||
cheat_path = os.path.join(cheat_dir, alt)
|
||||
|
||||
if not os.path.exists(cheat_path):
|
||||
errors.append(f"Key tool {tool} has no cheatsheet")
|
||||
print(f" ! {tool}: NO CHEATSHEET")
|
||||
continue
|
||||
|
||||
content = open(cheat_path).read()
|
||||
missing_strings = [s for s in expected_strings if s not in content]
|
||||
if missing_strings:
|
||||
errors.append(f"{tool} cheatsheet missing: {missing_strings}")
|
||||
print(f" ! {tool}: missing {missing_strings}")
|
||||
else:
|
||||
print(f" + {tool}: OK")
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
def main():
|
||||
all_errors = []
|
||||
|
||||
all_errors.extend(test_master_inventory())
|
||||
all_errors.extend(test_for610_coverage())
|
||||
all_errors.extend(test_tools_db())
|
||||
all_errors.extend(test_workflows())
|
||||
all_errors.extend(test_lab_tool_references())
|
||||
all_errors.extend(test_remnux_docs_coverage())
|
||||
all_errors.extend(test_cheatsheet_quality())
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("SUMMARY")
|
||||
print("=" * 60)
|
||||
if all_errors:
|
||||
print(f"\n Total issues found: {len(all_errors)}")
|
||||
for e in all_errors:
|
||||
print(f" - {e}")
|
||||
sys.exit(1)
|
||||
else:
|
||||
print(f"\n All tests passed!")
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user