fix decryption not working

This commit is contained in:
tobias
2026-02-06 15:54:08 +01:00
parent bd42e2f7e3
commit a8381c1fa5

View File

@@ -40,29 +40,42 @@ This script requires the PyMuPDF (``fitz``) package.
import sys import sys
import os import os
import re import re
import getpass
from typing import Tuple, Optional from typing import Tuple, Optional
import fitz # type: ignore[import] import fitz # type: ignore[import]
def open_with_password(path: str) -> fitz.Document: def open_with_password(path: str, max_attempts: int = 3) -> fitz.Document:
"""Open a PDF file, prompting the user for a password if necessary. """Open ``path`` and prompt for a password when the PDF is encrypted.
If the file is encrypted, PyMuPDF raises a ``RuntimeError``. In that PyMuPDF happily opens encrypted PDFs without immediately failing, but it
case the user is prompted to enter the password. If authentication marks them as needing authentication via ``doc.needs_pass``. The original
fails, a ``ValueError`` is raised and the program aborts. implementation relied on ``fitz.open`` raising a ``RuntimeError`` to detect
encryption, which meant passwords were never requested and the caller
would hit authorization errors later. We now inspect ``doc.needs_pass``
directly and allow up to ``max_attempts`` attempts at entering the password
before aborting.
""" """
try: try:
return fitz.open(path)
except RuntimeError:
# Document appears to be encrypted
print(f"The document '{path}' is encrypted.")
password = input("Please enter the password: ")
doc = fitz.open(path) doc = fitz.open(path)
if not doc.authenticate(password): except RuntimeError as exc:
raise ValueError("Incorrect password provided. Unable to open the PDF.") raise RuntimeError(f"Unable to open '{path}': {exc}") from exc
if not doc.needs_pass:
return doc return doc
print(f"The document '{path}' is encrypted.")
for attempt in range(1, max_attempts + 1):
password = getpass.getpass("Please enter the password: ")
if doc.authenticate(password):
return doc
print("Incorrect password. Try again." if attempt < max_attempts else "Incorrect password.")
doc.close()
raise ValueError("Failed to authenticate after multiple attempts. Unable to open the PDF.")
def remove_watermark_segments(doc: fitz.Document) -> int: def remove_watermark_segments(doc: fitz.Document) -> int:
"""Remove known watermark text blocks from every page of the document. """Remove known watermark text blocks from every page of the document.