fix decryption not working

This commit is contained in:
tobias
2026-02-06 15:54:08 +01:00
parent bd42e2f7e3
commit a8381c1fa5

View File

@@ -40,29 +40,42 @@ This script requires the PyMuPDF (``fitz``) package.
import sys
import os
import re
import getpass
from typing import Tuple, Optional
import fitz # type: ignore[import]
def open_with_password(path: str) -> fitz.Document:
"""Open a PDF file, prompting the user for a password if necessary.
def open_with_password(path: str, max_attempts: int = 3) -> fitz.Document:
"""Open ``path`` and prompt for a password when the PDF is encrypted.
If the file is encrypted, PyMuPDF raises a ``RuntimeError``. In that
case the user is prompted to enter the password. If authentication
fails, a ``ValueError`` is raised and the program aborts.
PyMuPDF happily opens encrypted PDFs without immediately failing, but it
marks them as needing authentication via ``doc.needs_pass``. The original
implementation relied on ``fitz.open`` raising a ``RuntimeError`` to detect
encryption, which meant passwords were never requested and the caller
would hit authorization errors later. We now inspect ``doc.needs_pass``
directly and allow up to ``max_attempts`` attempts at entering the password
before aborting.
"""
try:
return fitz.open(path)
except RuntimeError:
# Document appears to be encrypted
print(f"The document '{path}' is encrypted.")
password = input("Please enter the password: ")
doc = fitz.open(path)
if not doc.authenticate(password):
raise ValueError("Incorrect password provided. Unable to open the PDF.")
except RuntimeError as exc:
raise RuntimeError(f"Unable to open '{path}': {exc}") from exc
if not doc.needs_pass:
return doc
print(f"The document '{path}' is encrypted.")
for attempt in range(1, max_attempts + 1):
password = getpass.getpass("Please enter the password: ")
if doc.authenticate(password):
return doc
print("Incorrect password. Try again." if attempt < max_attempts else "Incorrect password.")
doc.close()
raise ValueError("Failed to authenticate after multiple attempts. Unable to open the PDF.")
def remove_watermark_segments(doc: fitz.Document) -> int:
"""Remove known watermark text blocks from every page of the document.