#!/usr/bin/env python3 """ convert-remnux-cheats.py Parse a consolidated markdown cheatsheet (e.g., remnux-tldr-cheatsheet.md) and generate cheat sheets and TLDR pages for tools missing from the system. Heuristics: - A tool section is detected by a heading line (## or ###). The heading text is the tool display name. If parentheses provide aliases, the first alias is used as the canonical filename (e.g., "7-Zip (7z, 7za)" => 7z). - Within a section, bullet lines ("- " or "* ") are treated as descriptions. Inline code `...` on those lines is extracted as commands. - Fenced code blocks (``` ... ```) are extracted; each non-empty line becomes a command entry. - TLDR entries: produce simple bullets with the command in backticks and the description when available. - Cheat entries: print description as a comment line, followed by the command. By default, only missing files are created. Existing cheat or TLDR files are left intact unless --overwrite is provided. """ import argparse import os import re from pathlib import Path HDR_RE = re.compile(r"^(#{2,3})\s+(.+?)\s*$") FENCE_RE = re.compile(r"^```(.*)$") INLINE_CODE_RE = re.compile(r"`([^`]+)`") def choose_canonical_name(heading: str) -> str: """From a heading like "7-Zip (7z, 7za, 7zr, 7zz)" choose the first alias if present, else sanitize the heading text to a plausible command name.""" text = heading.strip() # If parentheses include aliases, pick the first alias m = re.search(r"\(([^)]+)\)", text) if m: inner = m.group(1) # split on comma or whitespace cand = re.split(r"[\s,]+", inner.strip())[0] if cand: return cand # Otherwise, drop parentheses and after a dash if looks like "Tool - description" text = re.sub(r"\(.*?\)", "", text) text = text.split(" - ")[0] text = text.strip() # Replace spaces with hyphens, keep .py if present # Remove characters not typical for command names safe = re.sub(r"[^a-zA-Z0-9._+-]", "-", text).strip("-") # Lowercase for TLDR filenames; cheat files can be exact return safe def parse_sections(lines): sections = [] # list of dicts: {name, display, desc_cmds:[(desc, cmd)], cmds:[cmd]} current = None in_fence = False fence_accum = [] bullet_desc = None # last bullet description without inline code for raw in lines: line = raw.rstrip("\n") h = HDR_RE.match(line) if h and not in_fence: # start new section if current: sections.append(current) hdr_text = h.group(2).strip() current = { "display": hdr_text, "name": choose_canonical_name(hdr_text), "desc_cmds": [], # list of (desc, cmd) "cmds": [] # list of commands (from code fences) } bullet_desc = None continue # fence handling f = FENCE_RE.match(line) if f: if not in_fence: in_fence = True fence_accum = [] else: # closing fence: flush accumulated commands in_fence = False if current and fence_accum: for cmdline in fence_accum: cmdline = cmdline.strip() if cmdline: current["cmds"].append(cmdline) fence_accum = [] continue if in_fence: fence_accum.append(line) continue # bullets with potential inline code if re.match(r"^\s*[-*]\s+", line): # extract inline code first codes = INLINE_CODE_RE.findall(line) # remove code spans for a cleaner description desc = INLINE_CODE_RE.sub("{}", line) desc = re.sub(r"^\s*[-*]\s+", "", desc).strip() if current is not None: if codes: for code in codes: current["desc_cmds"].append((desc, code.strip())) else: # bullet with no code -> remember as a description context for next code block lines if desired bullet_desc = desc continue # plain text lines with inline code, treat similarly codes = INLINE_CODE_RE.findall(line) if codes and current is not None: desc = INLINE_CODE_RE.sub("{}", line).strip() for code in codes: current["desc_cmds"].append((desc, code.strip())) continue # otherwise ignore if current: sections.append(current) return sections def ensure_dir(p: Path): p.mkdir(parents=True, exist_ok=True) def write_cheat(tool_name: str, section, out_dir: Path, overwrite: bool) -> bool: # cheat file path chosen as plain name without extension (.cheat not required) filename = tool_name cheat_path = out_dir / filename if cheat_path.exists() and not overwrite: return False lines = [] # Header as comment lines.append(f"# {section['display']}") lines.append("") # From desc_cmds (description + single command) for desc, cmd in section.get("desc_cmds", []): if desc: lines.append(f"# {desc}") lines.append(cmd) lines.append("") # From cmds (code fences) for cmd in section.get("cmds", []): lines.append(cmd) lines.append("") # Write cheat_path.write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8") return True def write_tldr(tool_name: str, section, out_dir: Path, overwrite: bool) -> bool: # TLDR pages expect lowercase filenames with .md filename = tool_name.lower() tldr_path = out_dir / f"{filename}.md" if tldr_path.exists() and not overwrite: return False md = [] md.append(f"# {tool_name}") md.append("") md.append("> Generated from remnux-tldr-cheatsheet.md. Review for accuracy.") md.append("") # Emit desc_cmds first for desc, cmd in section.get("desc_cmds", []): if desc: md.append(f"- {desc}:") else: md.append(f"- Example:") md.append("") md.append(f"`{cmd}`") md.append("") # Emit code-fence commands as generic examples for cmd in section.get("cmds", []): md.append(f"- Example:") md.append("") md.append(f"`{cmd}`") md.append("") tldr_path.write_text("\n".join(md).rstrip() + "\n", encoding="utf-8") return True def main(): ap = argparse.ArgumentParser() ap.add_argument("--in", dest="infile", required=True, help="Path to remnux-tldr-cheatsheet.md") ap.add_argument("--cheat-dir", required=True, help="Output directory for cheat sheets") ap.add_argument("--tldr-dir", required=True, help="Output directory for TLDR pages") ap.add_argument("--only-missing", action="store_true", help="Only create files that don't already exist") ap.add_argument("--overwrite", action="store_true", help="Overwrite existing files") args = ap.parse_args() if args.overwrite and args.only_missing: print("[!] --overwrite and --only-missing are mutually exclusive; using --overwrite") args.only_missing = False text = Path(args.infile).read_text(encoding="utf-8", errors="replace").splitlines() sections = parse_sections(text) cheat_dir = Path(args.cheat_dir) tldr_dir = Path(args.tldr_dir) ensure_dir(cheat_dir) ensure_dir(tldr_dir) created_cheat = created_tldr = 0 skipped_cheat = skipped_tldr = 0 for sec in sections: tool_name = sec["name"] # Write cheat c_written = write_cheat(tool_name, sec, cheat_dir, overwrite=not args.only_missing) if c_written: created_cheat += 1 else: skipped_cheat += 1 # Write tldr t_written = write_tldr(tool_name, sec, tldr_dir, overwrite=not args.only_missing) if t_written: created_tldr += 1 else: skipped_tldr += 1 print(f"✅ Conversion complete: cheat created={created_cheat}, skipped={skipped_cheat}; tldr created={created_tldr}, skipped={skipped_tldr}") if __name__ == "__main__": main()