Files
docker_file_analysis/scripts/convert-remnux-cheats.py
Tobias Kessels b98aaee3e0 Major repository cleanup and enhancement
- Reorganize documentation: moved old docs to docs/ directory
- Add comprehensive README.md with build options and usage guide
- Add detailed CONTRIBUTING.md with help content management guide
- Create Makefile for automated building and testing
- Add Dockerfile.scratch for building from Ubuntu 20.04 base
- Enhance all Dockerfiles with PowerShell + PSScriptAnalyzer
- Add modern shells: zsh (with plugins) and fish (with config)
- Add modern CLI tools: fd-find, ripgrep, fzf
- Create comprehensive help system with cheat/TLDR/fish completions
- Add helper scripts for help content management and coverage checking
- Fix Dockerfile.remnux script references
- Support three build variants: upstream (REMnux), scratch (Ubuntu), kali

Build options:
  - make build-upstream: Fast, uses REMnux upstream (recommended)
  - make build-scratch: Full control, builds from Ubuntu 20.04
  - make build-kali: Legacy Kali Linux base

Features:
  - PowerShell with PSScriptAnalyzer module
  - Modern shells (zsh, fish) with custom configurations
  - Enhanced help system (cheat sheets, TLDR pages, fish completions)
  - Help coverage checking and bulk import tools
  - Comprehensive documentation for users and contributors
2025-10-01 11:45:56 +02:00

244 lines
8.1 KiB
Python

#!/usr/bin/env python3
"""
convert-remnux-cheats.py
Parse a consolidated markdown cheatsheet (e.g., remnux-tldr-cheatsheet.md)
and generate cheat sheets and TLDR pages for tools missing from the system.
Heuristics:
- A tool section is detected by a heading line (## or ###). The heading text
is the tool display name. If parentheses provide aliases, the first alias is
used as the canonical filename (e.g., "7-Zip (7z, 7za)" => 7z).
- Within a section, bullet lines ("- " or "* ") are treated as descriptions.
Inline code `...` on those lines is extracted as commands.
- Fenced code blocks (``` ... ```) are extracted; each non-empty line becomes a command entry.
- TLDR entries: produce simple bullets with the command in backticks and the description when available.
- Cheat entries: print description as a comment line, followed by the command.
By default, only missing files are created. Existing cheat or TLDR files are left intact
unless --overwrite is provided.
"""
import argparse
import os
import re
from pathlib import Path
HDR_RE = re.compile(r"^(#{2,3})\s+(.+?)\s*$")
FENCE_RE = re.compile(r"^```(.*)$")
INLINE_CODE_RE = re.compile(r"`([^`]+)`")
def choose_canonical_name(heading: str) -> str:
"""From a heading like "7-Zip (7z, 7za, 7zr, 7zz)" choose the first alias if present,
else sanitize the heading text to a plausible command name."""
text = heading.strip()
# If parentheses include aliases, pick the first alias
m = re.search(r"\(([^)]+)\)", text)
if m:
inner = m.group(1)
# split on comma or whitespace
cand = re.split(r"[\s,]+", inner.strip())[0]
if cand:
return cand
# Otherwise, drop parentheses and after a dash if looks like "Tool - description"
text = re.sub(r"\(.*?\)", "", text)
text = text.split(" - ")[0]
text = text.strip()
# Replace spaces with hyphens, keep .py if present
# Remove characters not typical for command names
safe = re.sub(r"[^a-zA-Z0-9._+-]", "-", text).strip("-")
# Lowercase for TLDR filenames; cheat files can be exact
return safe
def parse_sections(lines):
sections = [] # list of dicts: {name, display, desc_cmds:[(desc, cmd)], cmds:[cmd]}
current = None
in_fence = False
fence_accum = []
bullet_desc = None # last bullet description without inline code
for raw in lines:
line = raw.rstrip("\n")
h = HDR_RE.match(line)
if h and not in_fence:
# start new section
if current:
sections.append(current)
hdr_text = h.group(2).strip()
current = {
"display": hdr_text,
"name": choose_canonical_name(hdr_text),
"desc_cmds": [], # list of (desc, cmd)
"cmds": [] # list of commands (from code fences)
}
bullet_desc = None
continue
# fence handling
f = FENCE_RE.match(line)
if f:
if not in_fence:
in_fence = True
fence_accum = []
else:
# closing fence: flush accumulated commands
in_fence = False
if current and fence_accum:
for cmdline in fence_accum:
cmdline = cmdline.strip()
if cmdline:
current["cmds"].append(cmdline)
fence_accum = []
continue
if in_fence:
fence_accum.append(line)
continue
# bullets with potential inline code
if re.match(r"^\s*[-*]\s+", line):
# extract inline code first
codes = INLINE_CODE_RE.findall(line)
# remove code spans for a cleaner description
desc = INLINE_CODE_RE.sub("{}", line)
desc = re.sub(r"^\s*[-*]\s+", "", desc).strip()
if current is not None:
if codes:
for code in codes:
current["desc_cmds"].append((desc, code.strip()))
else:
# bullet with no code -> remember as a description context for next code block lines if desired
bullet_desc = desc
continue
# plain text lines with inline code, treat similarly
codes = INLINE_CODE_RE.findall(line)
if codes and current is not None:
desc = INLINE_CODE_RE.sub("{}", line).strip()
for code in codes:
current["desc_cmds"].append((desc, code.strip()))
continue
# otherwise ignore
if current:
sections.append(current)
return sections
def ensure_dir(p: Path):
p.mkdir(parents=True, exist_ok=True)
def write_cheat(tool_name: str, section, out_dir: Path, overwrite: bool) -> bool:
# cheat file path chosen as plain name without extension (.cheat not required)
filename = tool_name
cheat_path = out_dir / filename
if cheat_path.exists() and not overwrite:
return False
lines = []
# Header as comment
lines.append(f"# {section['display']}")
lines.append("")
# From desc_cmds (description + single command)
for desc, cmd in section.get("desc_cmds", []):
if desc:
lines.append(f"# {desc}")
lines.append(cmd)
lines.append("")
# From cmds (code fences)
for cmd in section.get("cmds", []):
lines.append(cmd)
lines.append("")
# Write
cheat_path.write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8")
return True
def write_tldr(tool_name: str, section, out_dir: Path, overwrite: bool) -> bool:
# TLDR pages expect lowercase filenames with .md
filename = tool_name.lower()
tldr_path = out_dir / f"{filename}.md"
if tldr_path.exists() and not overwrite:
return False
md = []
md.append(f"# {tool_name}")
md.append("")
md.append("> Generated from remnux-tldr-cheatsheet.md. Review for accuracy.")
md.append("")
# Emit desc_cmds first
for desc, cmd in section.get("desc_cmds", []):
if desc:
md.append(f"- {desc}:")
else:
md.append(f"- Example:")
md.append("")
md.append(f"`{cmd}`")
md.append("")
# Emit code-fence commands as generic examples
for cmd in section.get("cmds", []):
md.append(f"- Example:")
md.append("")
md.append(f"`{cmd}`")
md.append("")
tldr_path.write_text("\n".join(md).rstrip() + "\n", encoding="utf-8")
return True
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--in", dest="infile", required=True, help="Path to remnux-tldr-cheatsheet.md")
ap.add_argument("--cheat-dir", required=True, help="Output directory for cheat sheets")
ap.add_argument("--tldr-dir", required=True, help="Output directory for TLDR pages")
ap.add_argument("--only-missing", action="store_true", help="Only create files that don't already exist")
ap.add_argument("--overwrite", action="store_true", help="Overwrite existing files")
args = ap.parse_args()
if args.overwrite and args.only_missing:
print("[!] --overwrite and --only-missing are mutually exclusive; using --overwrite")
args.only_missing = False
text = Path(args.infile).read_text(encoding="utf-8", errors="replace").splitlines()
sections = parse_sections(text)
cheat_dir = Path(args.cheat_dir)
tldr_dir = Path(args.tldr_dir)
ensure_dir(cheat_dir)
ensure_dir(tldr_dir)
created_cheat = created_tldr = 0
skipped_cheat = skipped_tldr = 0
for sec in sections:
tool_name = sec["name"]
# Write cheat
c_written = write_cheat(tool_name, sec, cheat_dir, overwrite=not args.only_missing)
if c_written:
created_cheat += 1
else:
skipped_cheat += 1
# Write tldr
t_written = write_tldr(tool_name, sec, tldr_dir, overwrite=not args.only_missing)
if t_written:
created_tldr += 1
else:
skipped_tldr += 1
print(f"✅ Conversion complete: cheat created={created_cheat}, skipped={skipped_cheat}; tldr created={created_tldr}, skipped={skipped_tldr}")
if __name__ == "__main__":
main()