docker_file_analysis/data/generated/cheatsheets/pdf-parser.py.cheat

# pdf-parser.py
# Parse PDF structure, locate objects, extract content, and search for strings
# FOR610 Labs: 3.1 | Sections: 1, 3 | Author: Didier Stevens
# Docs: https://docs.remnux.org/discover-the-tools/analyze+documents/pdf

% pdf, static-analysis, object-extraction, didier-stevens

# Basic usage
pdf-parser.py document.pdf -a

# Select specific item
pdf-parser.py document.pdf -s /URI

# Extract by keyword
pdf-parser.py document.pdf -k /URI

# Dump/extract content
pdf-parser.py document.pdf -o 6 -d object6.jpg


# --- Recipes (multi-tool chains) ---

# >> Extract Embedded Object from PDF
# Scan for suspicious keywords
pdfid.py <document.pdf>
# Find objects containing the keyword
pdf-parser.py <document.pdf> -s /URI
# Extract all values for that keyword
pdf-parser.py <document.pdf> -k /URI
# Dump a specific object to file
pdf-parser.py <document.pdf> -o <obj_id> -d extracted_object
# View extracted image
feh extracted_object &

# >> Extract JavaScript from PDF
# Check if PDF contains JavaScript
pdfid.py <document.pdf>
# Find objects with JavaScript
pdf-parser.py <document.pdf> -s /JavaScript
# Interactive analysis with peepdf
peepdf -i <document.pdf>