Major repository cleanup and enhancement

- Reorganize documentation: moved old docs to docs/ directory - Add comprehensive README.md with build options and usage guide - Add detailed CONTRIBUTING.md with help content management guide - Create Makefile for automated building and testing - Add Dockerfile.scratch for building from Ubuntu 20.04 base - Enhance all Dockerfiles with PowerShell + PSScriptAnalyzer - Add modern shells: zsh (with plugins) and fish (with config) - Add modern CLI tools: fd-find, ripgrep, fzf - Create comprehensive help system with cheat/TLDR/fish completions - Add helper scripts for help content management and coverage checking - Fix Dockerfile.remnux script references - Support three build variants: upstream (REMnux), scratch (Ubuntu), kali Build options: - make build-upstream: Fast, uses REMnux upstream (recommended) - make build-scratch: Full control, builds from Ubuntu 20.04 - make build-kali: Legacy Kali Linux base Features: - PowerShell with PSScriptAnalyzer module - Modern shells (zsh, fish) with custom configurations - Enhanced help system (cheat sheets, TLDR pages, fish completions) - Help coverage checking and bulk import tools - Comprehensive documentation for users and contributors
2025-10-01 11:45:56 +02:00
parent 6bfcfd7935
commit b98aaee3e0
27 changed files with 5000 additions and 62 deletions
--- a/scripts/convert-remnux-cheats.py
+++ b/scripts/convert-remnux-cheats.py
@@ -0,0 +1,243 @@
+#!/usr/bin/env python3
+
+"""
+convert-remnux-cheats.py
+
+Parse a consolidated markdown cheatsheet (e.g., remnux-tldr-cheatsheet.md)
+and generate cheat sheets and TLDR pages for tools missing from the system.
+
+Heuristics:
+- A tool section is detected by a heading line (## or ###). The heading text
+  is the tool display name. If parentheses provide aliases, the first alias is
+  used as the canonical filename (e.g., "7-Zip (7z, 7za)" => 7z).
+- Within a section, bullet lines ("- " or "* ") are treated as descriptions.
+  Inline code `...` on those lines is extracted as commands.
+- Fenced code blocks (``` ... ```) are extracted; each non-empty line becomes a command entry.
+- TLDR entries: produce simple bullets with the command in backticks and the description when available.
+- Cheat entries: print description as a comment line, followed by the command.
+
+By default, only missing files are created. Existing cheat or TLDR files are left intact
+unless --overwrite is provided.
+"""
+
+import argparse
+import os
+import re
+from pathlib import Path
+
+HDR_RE = re.compile(r"^(#{2,3})\s+(.+?)\s*$")
+FENCE_RE = re.compile(r"^```(.*)$")
+INLINE_CODE_RE = re.compile(r"`([^`]+)`")
+
+
+def choose_canonical_name(heading: str) -> str:
+    """From a heading like "7-Zip (7z, 7za, 7zr, 7zz)" choose the first alias if present,
+    else sanitize the heading text to a plausible command name."""
+    text = heading.strip()
+    # If parentheses include aliases, pick the first alias
+    m = re.search(r"\(([^)]+)\)", text)
+    if m:
+        inner = m.group(1)
+        # split on comma or whitespace
+        cand = re.split(r"[\s,]+", inner.strip())[0]
+        if cand:
+            return cand
+    # Otherwise, drop parentheses and after a dash if looks like "Tool - description"
+    text = re.sub(r"\(.*?\)", "", text)
+    text = text.split(" - ")[0]
+    text = text.strip()
+    # Replace spaces with hyphens, keep .py if present
+    # Remove characters not typical for command names
+    safe = re.sub(r"[^a-zA-Z0-9._+-]", "-", text).strip("-")
+    # Lowercase for TLDR filenames; cheat files can be exact
+    return safe
+
+
+def parse_sections(lines):
+    sections = []  # list of dicts: {name, display, desc_cmds:[(desc, cmd)], cmds:[cmd]}
+    current = None
+    in_fence = False
+    fence_accum = []
+
+    bullet_desc = None  # last bullet description without inline code
+
+    for raw in lines:
+        line = raw.rstrip("\n")
+        h = HDR_RE.match(line)
+        if h and not in_fence:
+            # start new section
+            if current:
+                sections.append(current)
+            hdr_text = h.group(2).strip()
+            current = {
+                "display": hdr_text,
+                "name": choose_canonical_name(hdr_text),
+                "desc_cmds": [],  # list of (desc, cmd)
+                "cmds": []        # list of commands (from code fences)
+            }
+            bullet_desc = None
+            continue
+
+        # fence handling
+        f = FENCE_RE.match(line)
+        if f:
+            if not in_fence:
+                in_fence = True
+                fence_accum = []
+            else:
+                # closing fence: flush accumulated commands
+                in_fence = False
+                if current and fence_accum:
+                    for cmdline in fence_accum:
+                        cmdline = cmdline.strip()
+                        if cmdline:
+                            current["cmds"].append(cmdline)
+                fence_accum = []
+            continue
+
+        if in_fence:
+            fence_accum.append(line)
+            continue
+
+        # bullets with potential inline code
+        if re.match(r"^\s*[-*]\s+", line):
+            # extract inline code first
+            codes = INLINE_CODE_RE.findall(line)
+            # remove code spans for a cleaner description
+            desc = INLINE_CODE_RE.sub("{}", line)
+            desc = re.sub(r"^\s*[-*]\s+", "", desc).strip()
+            if current is not None:
+                if codes:
+                    for code in codes:
+                        current["desc_cmds"].append((desc, code.strip()))
+                else:
+                    # bullet with no code -> remember as a description context for next code block lines if desired
+                    bullet_desc = desc
+            continue
+
+        # plain text lines with inline code, treat similarly
+        codes = INLINE_CODE_RE.findall(line)
+        if codes and current is not None:
+            desc = INLINE_CODE_RE.sub("{}", line).strip()
+            for code in codes:
+                current["desc_cmds"].append((desc, code.strip()))
+            continue
+
+        # otherwise ignore
+
+    if current:
+        sections.append(current)
+    return sections
+
+
+def ensure_dir(p: Path):
+    p.mkdir(parents=True, exist_ok=True)
+
+
+def write_cheat(tool_name: str, section, out_dir: Path, overwrite: bool) -> bool:
+    # cheat file path chosen as plain name without extension (.cheat not required)
+    filename = tool_name
+    cheat_path = out_dir / filename
+    if cheat_path.exists() and not overwrite:
+        return False
+
+    lines = []
+    # Header as comment
+    lines.append(f"# {section['display']}")
+    lines.append("")
+
+    # From desc_cmds (description + single command)
+    for desc, cmd in section.get("desc_cmds", []):
+        if desc:
+            lines.append(f"# {desc}")
+        lines.append(cmd)
+        lines.append("")
+
+    # From cmds (code fences)
+    for cmd in section.get("cmds", []):
+        lines.append(cmd)
+        lines.append("")
+
+    # Write
+    cheat_path.write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8")
+    return True
+
+
+def write_tldr(tool_name: str, section, out_dir: Path, overwrite: bool) -> bool:
+    # TLDR pages expect lowercase filenames with .md
+    filename = tool_name.lower()
+    tldr_path = out_dir / f"{filename}.md"
+    if tldr_path.exists() and not overwrite:
+        return False
+
+    md = []
+    md.append(f"# {tool_name}")
+    md.append("")
+    md.append("> Generated from remnux-tldr-cheatsheet.md. Review for accuracy.")
+    md.append("")
+
+    # Emit desc_cmds first
+    for desc, cmd in section.get("desc_cmds", []):
+        if desc:
+            md.append(f"- {desc}:")
+        else:
+            md.append(f"- Example:")
+        md.append("")
+        md.append(f"`{cmd}`")
+        md.append("")
+
+    # Emit code-fence commands as generic examples
+    for cmd in section.get("cmds", []):
+        md.append(f"- Example:")
+        md.append("")
+        md.append(f"`{cmd}`")
+        md.append("")
+
+    tldr_path.write_text("\n".join(md).rstrip() + "\n", encoding="utf-8")
+    return True
+
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--in", dest="infile", required=True, help="Path to remnux-tldr-cheatsheet.md")
+    ap.add_argument("--cheat-dir", required=True, help="Output directory for cheat sheets")
+    ap.add_argument("--tldr-dir", required=True, help="Output directory for TLDR pages")
+    ap.add_argument("--only-missing", action="store_true", help="Only create files that don't already exist")
+    ap.add_argument("--overwrite", action="store_true", help="Overwrite existing files")
+    args = ap.parse_args()
+
+    if args.overwrite and args.only_missing:
+        print("[!] --overwrite and --only-missing are mutually exclusive; using --overwrite")
+        args.only_missing = False
+
+    text = Path(args.infile).read_text(encoding="utf-8", errors="replace").splitlines()
+    sections = parse_sections(text)
+
+    cheat_dir = Path(args.cheat_dir)
+    tldr_dir = Path(args.tldr_dir)
+    ensure_dir(cheat_dir)
+    ensure_dir(tldr_dir)
+
+    created_cheat = created_tldr = 0
+    skipped_cheat = skipped_tldr = 0
+
+    for sec in sections:
+        tool_name = sec["name"]
+        # Write cheat
+        c_written = write_cheat(tool_name, sec, cheat_dir, overwrite=not args.only_missing)
+        if c_written:
+            created_cheat += 1
+        else:
+            skipped_cheat += 1
+        # Write tldr
+        t_written = write_tldr(tool_name, sec, tldr_dir, overwrite=not args.only_missing)
+        if t_written:
+            created_tldr += 1
+        else:
+            skipped_tldr += 1
+
+    print(f"✅ Conversion complete: cheat created={created_cheat}, skipped={skipped_cheat}; tldr created={created_tldr}, skipped={skipped_tldr}")
+
+
+if __name__ == "__main__":
+    main()