From acecd18173cbb15ae9db4f89402ae1bd06dcb413 Mon Sep 17 00:00:00 2001 From: tobias Date: Fri, 6 Feb 2026 16:17:30 +0100 Subject: [PATCH] fixed usage help and added -h to command --- README.md | 8 ++++---- pdf_sanatizer.py | 22 +++++++++++++++++----- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 15fc11a..eb9c8ef 100644 --- a/README.md +++ b/README.md @@ -19,12 +19,13 @@ Removes personalized SANS “Licensed To …” watermarks (names, emails, hashe ``` 3. **Run the sanitizer (auto-creates `_clean.pdf`):** -4. + ```bash - # Recommended latest script: - python enhanced_sanitize_pdf.py INPUT_unlocked.pdf + python pdf_sanatizer.py INPUT_unlocked.pdf ``` + Use `python pdf_sanatizer.py -h` for CLI help and options. + ## Notes * The tool targets common SANS watermark patterns: @@ -32,4 +33,3 @@ Removes personalized SANS “Licensed To …” watermarks (names, emails, hashe * Rotated diagonal overlay using −25-pt fonts, * Footer lines at 10/18-pt. * If your course PDFs use different fonts/sizes, adjust the regex patterns inside the script. - diff --git a/pdf_sanatizer.py b/pdf_sanatizer.py index 9f2c0ee..4ce62e2 100644 --- a/pdf_sanatizer.py +++ b/pdf_sanatizer.py @@ -37,11 +37,11 @@ number of watermark segments removed. This script requires the PyMuPDF (``fitz``) package. """ +import argparse import sys import os import re import getpass -from typing import Tuple, Optional import fitz # type: ignore[import] @@ -162,11 +162,23 @@ def sanitize_pdf(path: str) -> None: print(f"Removed {removed} watermark segment(s). Cleaned PDF saved as '{out_path}'.") +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description="Remove SANS-style watermark overlays from the provided PDF. " + "Encrypted PDFs are supported and will trigger a password prompt." + ) + parser.add_argument( + "input_pdf", + metavar="INPUT.pdf", + help="Path to the PDF you want sanitized." + ) + return parser + + def main(argv: list[str]) -> None: - if len(argv) != 2: - print("Usage: python enhanced_sanitize_pdf.py input.pdf") - return - sanitize_pdf(argv[1]) + parser = build_parser() + args = parser.parse_args(argv[1:]) + sanitize_pdf(args.input_pdf) if __name__ == "__main__":