fix decryption not working
This commit is contained in:
@@ -40,29 +40,42 @@ This script requires the PyMuPDF (``fitz``) package.
|
|||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import getpass
|
||||||
from typing import Tuple, Optional
|
from typing import Tuple, Optional
|
||||||
|
|
||||||
import fitz # type: ignore[import]
|
import fitz # type: ignore[import]
|
||||||
|
|
||||||
|
|
||||||
def open_with_password(path: str) -> fitz.Document:
|
def open_with_password(path: str, max_attempts: int = 3) -> fitz.Document:
|
||||||
"""Open a PDF file, prompting the user for a password if necessary.
|
"""Open ``path`` and prompt for a password when the PDF is encrypted.
|
||||||
|
|
||||||
If the file is encrypted, PyMuPDF raises a ``RuntimeError``. In that
|
PyMuPDF happily opens encrypted PDFs without immediately failing, but it
|
||||||
case the user is prompted to enter the password. If authentication
|
marks them as needing authentication via ``doc.needs_pass``. The original
|
||||||
fails, a ``ValueError`` is raised and the program aborts.
|
implementation relied on ``fitz.open`` raising a ``RuntimeError`` to detect
|
||||||
|
encryption, which meant passwords were never requested and the caller
|
||||||
|
would hit authorization errors later. We now inspect ``doc.needs_pass``
|
||||||
|
directly and allow up to ``max_attempts`` attempts at entering the password
|
||||||
|
before aborting.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return fitz.open(path)
|
|
||||||
except RuntimeError:
|
|
||||||
# Document appears to be encrypted
|
|
||||||
print(f"The document '{path}' is encrypted.")
|
|
||||||
password = input("Please enter the password: ")
|
|
||||||
doc = fitz.open(path)
|
doc = fitz.open(path)
|
||||||
if not doc.authenticate(password):
|
except RuntimeError as exc:
|
||||||
raise ValueError("Incorrect password provided. Unable to open the PDF.")
|
raise RuntimeError(f"Unable to open '{path}': {exc}") from exc
|
||||||
|
|
||||||
|
if not doc.needs_pass:
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
|
print(f"The document '{path}' is encrypted.")
|
||||||
|
for attempt in range(1, max_attempts + 1):
|
||||||
|
password = getpass.getpass("Please enter the password: ")
|
||||||
|
if doc.authenticate(password):
|
||||||
|
return doc
|
||||||
|
print("Incorrect password. Try again." if attempt < max_attempts else "Incorrect password.")
|
||||||
|
|
||||||
|
doc.close()
|
||||||
|
raise ValueError("Failed to authenticate after multiple attempts. Unable to open the PDF.")
|
||||||
|
|
||||||
|
|
||||||
def remove_watermark_segments(doc: fitz.Document) -> int:
|
def remove_watermark_segments(doc: fitz.Document) -> int:
|
||||||
"""Remove known watermark text blocks from every page of the document.
|
"""Remove known watermark text blocks from every page of the document.
|
||||||
@@ -157,4 +170,4 @@ def main(argv: list[str]) -> None:
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main(sys.argv)
|
main(sys.argv)
|
||||||
|
|||||||
Reference in New Issue
Block a user