From a8381c1fa543e80fff9b6d872d5dec262cf2f6b2 Mon Sep 17 00:00:00 2001 From: tobias Date: Fri, 6 Feb 2026 15:54:08 +0100 Subject: [PATCH] fix decryption not working --- pdf_sanatizer.py | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/pdf_sanatizer.py b/pdf_sanatizer.py index 6f0b298..9f2c0ee 100644 --- a/pdf_sanatizer.py +++ b/pdf_sanatizer.py @@ -40,29 +40,42 @@ This script requires the PyMuPDF (``fitz``) package. import sys import os import re +import getpass from typing import Tuple, Optional import fitz # type: ignore[import] -def open_with_password(path: str) -> fitz.Document: - """Open a PDF file, prompting the user for a password if necessary. +def open_with_password(path: str, max_attempts: int = 3) -> fitz.Document: + """Open ``path`` and prompt for a password when the PDF is encrypted. - If the file is encrypted, PyMuPDF raises a ``RuntimeError``. In that - case the user is prompted to enter the password. If authentication - fails, a ``ValueError`` is raised and the program aborts. + PyMuPDF happily opens encrypted PDFs without immediately failing, but it + marks them as needing authentication via ``doc.needs_pass``. The original + implementation relied on ``fitz.open`` raising a ``RuntimeError`` to detect + encryption, which meant passwords were never requested and the caller + would hit authorization errors later. We now inspect ``doc.needs_pass`` + directly and allow up to ``max_attempts`` attempts at entering the password + before aborting. """ + try: - return fitz.open(path) - except RuntimeError: - # Document appears to be encrypted - print(f"The document '{path}' is encrypted.") - password = input("Please enter the password: ") doc = fitz.open(path) - if not doc.authenticate(password): - raise ValueError("Incorrect password provided. Unable to open the PDF.") + except RuntimeError as exc: + raise RuntimeError(f"Unable to open '{path}': {exc}") from exc + + if not doc.needs_pass: return doc + print(f"The document '{path}' is encrypted.") + for attempt in range(1, max_attempts + 1): + password = getpass.getpass("Please enter the password: ") + if doc.authenticate(password): + return doc + print("Incorrect password. Try again." if attempt < max_attempts else "Incorrect password.") + + doc.close() + raise ValueError("Failed to authenticate after multiple attempts. Unable to open the PDF.") + def remove_watermark_segments(doc: fitz.Document) -> int: """Remove known watermark text blocks from every page of the document. @@ -157,4 +170,4 @@ def main(argv: list[str]) -> None: if __name__ == "__main__": - main(sys.argv) \ No newline at end of file + main(sys.argv)