Files
docker_file_analysis/scripts/generate-wiki.py
T
tobias e62a14dafc Add markdown wiki with 473 pages and zk browser
Generate interlinked wiki from master inventory: 397 tool pages,
15 workflow pages, 27 recipe pages, 33 category pages, plus index.
All pages use [[wiki-links]] for cross-navigation between tools,
workflows, recipes, and categories (1782 links total).

Install zk for interactive browsing with fzf search, tag filtering,
and backlink discovery. Add 'fhelp wiki' command and Makefile target.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-28 19:50:36 +01:00

501 lines
16 KiB
Python

#!/usr/bin/env python3
"""Generate a markdown wiki with [[wiki-links]] from the master tool inventory.
Produces interlinked markdown pages for tools, workflows, recipes, and categories.
Designed for browsing with zk (https://github.com/zk-org/zk).
Output: data/generated/wiki/
"""
import os
import re
import yaml
BASE_DIR = os.path.join(os.path.dirname(__file__), "..")
MASTER = os.path.join(BASE_DIR, "data", "remnux", "tools-master.yaml")
WORKFLOWS_SRC = os.path.join(BASE_DIR, "data", "for610", "workflows.yaml")
RECIPES_SRC = os.path.join(BASE_DIR, "data", "for610", "recipes.yaml")
CATEGORIES_SRC = os.path.join(BASE_DIR, "data", "remnux", "categories-master.yaml")
WIKI_DIR = os.path.join(BASE_DIR, "data", "generated", "wiki")
def slug(name):
"""Convert a name to a wiki-safe filename slug."""
s = name.lower().strip()
s = re.sub(r'\.py$', '', s)
s = re.sub(r'\.pl$', '', s)
s = re.sub(r'[^a-z0-9]+', '-', s)
return s.strip('-')
def wiki_link(path, label=None):
"""Create a [[wiki-link]]."""
if label:
return f"[[{path}|{label}]]"
return f"[[{path}]]"
def load_yaml(path):
if not os.path.exists(path):
return {}
with open(path) as f:
return yaml.safe_load(f) or {}
# ============================================================
# Build cross-reference indexes
# ============================================================
def build_indexes(tools, workflows, recipes):
"""Build lookup indexes for cross-referencing."""
# Tool ID → list of workflow IDs where it appears
tool_to_workflows = {}
for wf in workflows:
for step in wf.get("steps", []):
for tool_name in step.get("tools", []):
tool_to_workflows.setdefault(tool_name, []).append({
"workflow_id": wf["id"],
"workflow_name": wf["name"],
"step_order": step["order"],
"step_name": step["name"],
})
# Tool ID → list of recipe IDs where it appears
tool_to_recipes = {}
for recipe in recipes:
for tool_id in recipe.get("tools", []):
tool_to_recipes.setdefault(tool_id, []).append({
"recipe_id": recipe["id"],
"recipe_name": recipe["name"],
})
# Category → list of tool IDs
category_to_tools = {}
for t in tools:
cat = ""
if t["sources"]["remnux_docs"].get("covered"):
cat = t["sources"]["remnux_docs"].get("category", "")
elif t["sources"]["for610"].get("covered"):
cat = t["sources"]["for610"].get("category", "")
if cat:
category_to_tools.setdefault(cat, []).append(t)
# Build related tools index (same category)
tool_related = {}
for cat, cat_tools in category_to_tools.items():
for t in cat_tools:
related = [rt for rt in cat_tools if rt["id"] != t["id"]][:5]
tool_related[t["id"]] = related
return tool_to_workflows, tool_to_recipes, category_to_tools, tool_related
# ============================================================
# Generate tool pages
# ============================================================
def generate_tool_page(t, tool_to_workflows, tool_to_recipes, tool_related):
"""Generate a wiki page for a single tool."""
name = t["name"]
desc = t.get("description", "")
tier = t.get("help_tier", "basic")
tid = t["id"]
# Determine category
cat = ""
cat_slug_val = ""
if t["sources"]["remnux_docs"].get("covered"):
cat = t["sources"]["remnux_docs"].get("category", "")
elif t["sources"]["for610"].get("covered"):
cat = t["sources"]["for610"].get("category", "")
if cat:
cat_slug_val = slug(cat)
# Determine tier label
tier_label = {"rich": "Rich (FOR610)", "standard": "Standard (docs)", "basic": "Basic"}.get(tier, tier)
lines = [f"# {name}"]
if desc:
lines.append(f"> {desc}")
lines.append("")
# Metadata
meta_parts = []
if cat:
meta_parts.append(f"**Category:** {wiki_link(f'categories/{cat_slug_val}', cat)}")
meta_parts.append(f"**Tier:** {tier_label}")
if t["sources"]["for610"].get("author"):
meta_parts.append(f"**Author:** {t['sources']['for610']['author']}")
lines.append(" | ".join(meta_parts))
# Docs link
if t["sources"]["remnux_docs"].get("docs_url"):
lines.append(f"**Docs:** [{t['sources']['remnux_docs']['docs_url']}]({t['sources']['remnux_docs']['docs_url']})")
lines.append("")
# Usage examples
usages = t["sources"]["for610"].get("typical_usage", [])
if usages:
lines.append("## Usage")
lines.append("```bash")
for u in usages:
lines.append(u)
lines.append("```")
lines.append("")
# Recipes
recipe_refs = tool_to_recipes.get(tid, [])
if recipe_refs:
lines.append("## Recipes")
seen = set()
for r in recipe_refs:
rid = r["recipe_id"]
rname = r["recipe_name"]
if rid not in seen:
seen.add(rid)
rslug = slug(rid)
lines.append(f"- {wiki_link(f'recipes/{rslug}', rname)}")
lines.append("")
# Workflows
wf_refs = tool_to_workflows.get(tid, [])
if wf_refs:
lines.append("## Workflows")
seen = set()
for w in wf_refs:
if w["workflow_id"] not in seen:
seen.add(w["workflow_id"])
wf_slug = w["workflow_id"].replace("_", "-")
lines.append(f"- {wiki_link(f'workflows/{wf_slug}', w['workflow_name'])} — Step {w['step_order']}: {w['step_name']}")
lines.append("")
# Related tools
related = tool_related.get(tid, [])
if related:
lines.append("## Related Tools")
for rt in related:
rt_desc = rt.get("description", "")[:60]
rt_name = rt["name"]
rt_slug = slug(rt_name)
lines.append(f"- {wiki_link(f'tools/{rt_slug}', rt_name)}{rt_desc}")
lines.append("")
# FOR610 labs
labs = t["sources"]["for610"].get("labs", [])
sections = t["sources"]["for610"].get("sections", [])
if labs or sections:
lines.append("## FOR610")
if labs:
lines.append(f"**Labs:** {', '.join(labs)}")
if sections:
lines.append(f"**Sections:** {', '.join(str(s) for s in sections)}")
lines.append("")
# Tags
tags = t["sources"]["for610"].get("tags", [])
if tags:
lines.append(" ".join(f"#{tag}" for tag in tags))
elif cat:
lines.append(f"#{slug(cat)}")
lines.append("")
return "\n".join(lines)
# ============================================================
# Generate workflow pages
# ============================================================
def generate_workflow_page(wf, tools_by_id):
"""Generate a wiki page for a workflow."""
name = wf["name"]
desc = wf.get("description", "")
steps = wf.get("steps", [])
related_labs = wf.get("related_labs", [])
tags = wf.get("tags", [])
lines = [
f"# {name}",
f"> {desc}",
"",
]
if related_labs:
lines.append(f"**FOR610 Labs:** {', '.join(related_labs)}")
lines.append("")
lines.append("## Steps")
lines.append("")
for step in steps:
order = step.get("order", "?")
step_name = step.get("name", "")
step_desc = step.get("description", "")
step_tools = step.get("tools", [])
lines.append(f"### Step {order}: {step_name}")
if step_tools:
tool_links = []
for tool_name in step_tools:
tool_links.append(wiki_link(f"tools/{slug(tool_name)}", tool_name))
lines.append(f"**Tools:** {', '.join(tool_links)}")
if step_desc:
lines.append(f"\n{step_desc}")
# Add inline examples
if step_tools:
examples = []
for tool_name in step_tools:
tool = tools_by_id.get(tool_name) or tools_by_id.get(slug(tool_name))
if tool:
usages = tool["sources"]["for610"].get("typical_usage", [])
if usages:
examples.append(usages[0])
if examples:
lines.append("\n```bash")
for ex in examples[:3]:
lines.append(ex)
lines.append("```")
lines.append("")
if tags:
lines.append(" ".join(f"#{tag}" for tag in tags) + " #workflow")
lines.append("")
return "\n".join(lines)
# ============================================================
# Generate recipe pages
# ============================================================
def generate_recipe_page(recipe):
"""Generate a wiki page for a recipe."""
name = recipe["name"]
task = recipe.get("task", "")
tools = recipe.get("tools", [])
commands = recipe.get("commands", [])
lab = recipe.get("lab", "")
lines = [
f"# {name}",
f"> {task}",
"",
]
if tools:
tool_links = [wiki_link(f"tools/{slug(t)}", t) for t in tools]
lines.append(f"**Tools:** {', '.join(tool_links)}")
if lab:
lines.append(f"**FOR610 Lab:** {lab}")
lines.append("")
lines.append("## Commands")
lines.append("```bash")
for cmd in commands:
lines.append(cmd)
lines.append("```")
lines.append("")
lines.append("#recipe " + " ".join(f"#{slug(t)}" for t in tools))
lines.append("")
return "\n".join(lines)
# ============================================================
# Generate category pages
# ============================================================
def generate_category_page(cat_name, cat_tools):
"""Generate a wiki page for a category."""
lines = [
f"# {cat_name}",
"",
f"**{len(cat_tools)} tools** in this category.",
"",
"## Tools",
"",
]
# Group by tier
for tier_name, tier_label in [("rich", "Rich (FOR610)"), ("standard", "Standard"), ("basic", "Basic")]:
tier_tools = [t for t in cat_tools if t.get("help_tier") == tier_name]
if tier_tools:
lines.append(f"### {tier_label}")
for t in sorted(tier_tools, key=lambda x: x["name"]):
desc = t.get("description", "")[:60]
tname = t["name"]
tslug = slug(tname)
lines.append(f"- {wiki_link(f'tools/{tslug}', tname)}{desc}")
lines.append("")
lines.append(f"#{slug(cat_name)} #category")
lines.append("")
return "\n".join(lines)
# ============================================================
# Generate index page
# ============================================================
def generate_index_page(category_to_tools, workflows, recipes, total_tools):
"""Generate the wiki landing page."""
lines = [
"# REMnux Analysis Wiki",
"",
f"> {total_tools} tools | {len(workflows)} workflows | {len(recipes)} recipes",
"",
"## Browse by Category",
"",
]
for cat_name in sorted(category_to_tools.keys()):
count = len(category_to_tools[cat_name])
lines.append(f"- {wiki_link(f'categories/{slug(cat_name)}', cat_name)} ({count} tools)")
lines.append("")
lines.append("## Workflows")
lines.append("")
for wf in workflows:
wf_slug = wf["id"].replace("_", "-")
lines.append(f"- {wiki_link(f'workflows/{wf_slug}', wf['name'])}")
lines.append("")
lines.append("## Recipes")
lines.append("")
for recipe in recipes:
rec_id = recipe["id"]
rec_name = recipe["name"]
rec_slug = slug(rec_id)
lines.append(f"- {wiki_link(f'recipes/{rec_slug}', rec_name)}")
lines.append("")
lines.append("## Quick Access")
lines.append("")
lines.append("```bash")
lines.append("# Browse interactively")
lines.append("zk list --interactive")
lines.append("")
lines.append("# Search by tag")
lines.append("zk list --tag pdf")
lines.append("")
lines.append("# Find what links to a tool")
lines.append("zk list --mention pdfid")
lines.append("```")
lines.append("")
lines.append("#index #wiki")
lines.append("")
return "\n".join(lines)
# ============================================================
# Main
# ============================================================
def main():
print("Generating wiki pages...")
# Load data
master = load_yaml(MASTER)
tools = master.get("tools", [])
remnux_tools = [t for t in tools if t.get("in_remnux")]
workflows_data = load_yaml(WORKFLOWS_SRC)
workflows = workflows_data.get("workflows", [])
recipes_data = load_yaml(RECIPES_SRC)
recipes = recipes_data.get("recipes", [])
print(f" Input: {len(remnux_tools)} tools, {len(workflows)} workflows, {len(recipes)} recipes")
# Build indexes
tool_to_workflows, tool_to_recipes, category_to_tools, tool_related = \
build_indexes(remnux_tools, workflows, recipes)
# Build tool lookup by id and name variants
tools_by_id = {}
for t in remnux_tools:
tools_by_id[t["id"]] = t
tools_by_id[slug(t["name"])] = t
for alias in t.get("aliases", []):
tools_by_id[slug(alias)] = t
# Create output directories
for d in ["tools", "workflows", "recipes", "categories", ".zk"]:
os.makedirs(os.path.join(WIKI_DIR, d), exist_ok=True)
# Generate tool pages
tool_count = 0
for t in remnux_tools:
filename = slug(t["name"]) + ".md"
content = generate_tool_page(t, tool_to_workflows, tool_to_recipes, tool_related)
with open(os.path.join(WIKI_DIR, "tools", filename), "w") as f:
f.write(content)
tool_count += 1
# Generate workflow pages
wf_count = 0
for wf in workflows:
filename = wf["id"].replace("_", "-") + ".md"
content = generate_workflow_page(wf, tools_by_id)
with open(os.path.join(WIKI_DIR, "workflows", filename), "w") as f:
f.write(content)
wf_count += 1
# Generate recipe pages
recipe_count = 0
for recipe in recipes:
filename = slug(recipe["id"]) + ".md"
content = generate_recipe_page(recipe)
with open(os.path.join(WIKI_DIR, "recipes", filename), "w") as f:
f.write(content)
recipe_count += 1
# Generate category pages
cat_count = 0
for cat_name, cat_tools in category_to_tools.items():
filename = slug(cat_name) + ".md"
content = generate_category_page(cat_name, cat_tools)
with open(os.path.join(WIKI_DIR, "categories", filename), "w") as f:
f.write(content)
cat_count += 1
# Generate index page
content = generate_index_page(category_to_tools, workflows, recipes, len(remnux_tools))
with open(os.path.join(WIKI_DIR, "index.md"), "w") as f:
f.write(content)
# Generate zk config
zk_config = """[note]
default-title = "untitled"
filename = "{{slug title}}"
extension = "md"
[tool]
fzf-preview = "cat {-1}"
[format.markdown]
link-format = "wiki"
"""
with open(os.path.join(WIKI_DIR, ".zk", "config.toml"), "w") as f:
f.write(zk_config)
total = tool_count + wf_count + recipe_count + cat_count + 1 # +1 for index
print(f"\nWiki generated: {total} pages")
print(f" tools: {tool_count}")
print(f" workflows: {wf_count}")
print(f" recipes: {recipe_count}")
print(f" categories: {cat_count}")
print(f" index: 1")
print(f"\nOutput: {WIKI_DIR}/")
if __name__ == "__main__":
main()