docker_file_analysis/scripts/create-enhanced-cheatsheets.sh

#!/bin/bash

# Create enhanced cheat sheets by combining existing ones with file analysis examples
# This leverages existing resources and adds specific malware/PDF analysis use cases

set -e

CHEAT_DIR="/opt/cheatsheets"
NAVI_DIR="/opt/navi-cheats"
TLDR_CACHE="/root/.local/share/tldr"

echo "📚 Creating enhanced cheat sheet collection..."

# Create directories
mkdir -p "$CHEAT_DIR" "$NAVI_DIR" "$TLDR_CACHE"

# Function to download and enhance existing cheat sheets
download_and_enhance() {
    local tool="$1"
    local base_url="https://raw.githubusercontent.com/cheat/cheatsheets/master"
    local output_file="$CHEAT_DIR/$tool.cheat"

    echo "Processing $tool..."

    # Try to download existing cheat sheet
    if curl -s "$base_url/$tool" -o "/tmp/${tool}_base.cheat" && [ -s "/tmp/${tool}_base.cheat" ]; then
        echo "✓ Found existing cheat sheet for $tool"
        cp "/tmp/${tool}_base.cheat" "$output_file"
    else
        echo "⚠ No existing cheat sheet for $tool, creating from scratch"
        echo "# $tool" > "$output_file"
        echo "" >> "$output_file"
    fi
}

# Download base cheat sheets for key tools and fix directory structure
TOOLS=("pdftk" "7z" "tar" "unzip" "exiftool")

# Create proper cheat directory structure (cheat expects files without extensions in subdirs)
mkdir -p "$CHEAT_DIR/personal"

for tool in "${TOOLS[@]}"; do
    download_and_enhance "$tool"
    # Copy to proper cheat structure (without .cheat extension)
    if [[ -f "$CHEAT_DIR/${tool}.cheat" ]]; then
        cp "$CHEAT_DIR/${tool}.cheat" "$CHEAT_DIR/personal/${tool}"
    fi
done

# Enhance pdftk with file analysis specific examples
cat >> "$CHEAT_DIR/pdftk.cheat" << 'EOF'

# === FILE ANALYSIS ENHANCEMENTS ===

# Flatten PDF to prevent JavaScript execution (SECURITY)
pdftk suspicious.pdf output safe.pdf flatten

# Remove form fields and JavaScript (SECURITY)
pdftk malicious.pdf output cleaned.pdf flatten dont_ask

# Extract pages for isolated analysis
pdftk document.pdf cat 1-3 output first_pages.pdf

# Combine with metadata removal workflow
pdftk input.pdf output temp.pdf flatten
exiftool -all= temp.pdf
mv temp.pdf_original analysis_ready.pdf

# Decompress PDF streams for analysis
pdftk compressed.pdf output uncompressed.pdf uncompress

EOF

# Create comprehensive PDF analysis cheat sheet (doesn't exist in standard repos)
cat > "$CHEAT_DIR/pdf-analysis.cheat" << 'EOF'
---
tags: [ pdf, malware, analysis, security ]
---

# PDF Analysis Workflow

# Quick PDF overview and suspicious element detection
pdfid.py document.pdf

# Detailed PDF structure analysis
pdf-parser.py document.pdf

# Interactive analysis with JavaScript detection
peepdf -i document.pdf

# Extract metadata
exiftool document.pdf

# Remove passwords for analysis
qpdf --password=PASSWORD --decrypt encrypted.pdf decrypted.pdf

# Flatten PDF to remove interactive elements (SECURITY)
pdftk suspicious.pdf output safe.pdf flatten

# Extract embedded files
pdf-parser.py --extract document.pdf

# Convert PDF to images for safe viewing
convert document.pdf[0-2] page-%02d.png

# OCR text from PDF images
convert document.pdf page.png && tesseract page.png output

# Check for embedded JavaScript
peepdf -s extract_js document.pdf

# Analyze PDF with Origami (if available)
ruby -e "require 'origami'; pdf = Origami::PDF.read('document.pdf'); puts pdf.each_object.select{|o| o.is_a? Origami::Stream}.count"

# Extract strings from PDF
strings document.pdf | grep -i "javascript\|openaction\|aa\|js"

# Hexdump analysis of PDF structure
hexdump -C document.pdf | head -50

EOF

# Create malware analysis workflow cheat sheet
cat > "$CHEAT_DIR/malware-analysis.cheat" << 'EOF'
---
tags: [ malware, analysis, security, forensics ]
---

# Malware Analysis Workflow

# File type identification
file suspicious.exe

# Extract readable strings
strings -n 8 malware.bin

# Detect capabilities with CAPA
capa malware.exe

# Analyze JavaScript in sandbox
box-js --output-dir=/tmp/js_analysis suspicious.js

# Office document analysis
oledump.py document.doc

# RTF document analysis
rtfdump.py document.rtf

# Email analysis
emldump.py message.eml

# Extract base64 content
base64dump.py document.txt

# Binary analysis and extraction
binwalk malware.bin

# File carving
foremost -t exe,dll,pdf -i disk.img

# Hex analysis
xxd malware.exe | head -20

# Extract metadata from any file
exiftool malware.exe

# Archive analysis
7z l suspicious.zip
unzip -l suspicious.zip

# Network capture analysis (if pcap available)
tshark -r capture.pcap -Y "http.request.uri contains .exe"

EOF

# Create NAVI cheat sheets (interactive format)
cat > "$NAVI_DIR/pdf-analysis.cheat" << 'EOF'
% pdf analysis

# Quick PDF info and suspicious elements
pdfid.py <pdf_file>

# Detailed PDF structure analysis
pdf-parser.py <pdf_file>

# Interactive PDF analysis
peepdf -i <pdf_file>

# Flatten PDF for security (remove JavaScript/forms)
pdftk <input_pdf> output <output_pdf> flatten

# Decrypt password-protected PDF
qpdf --password=<password> --decrypt <encrypted_pdf> <output_pdf>

# Extract PDF metadata
exiftool <pdf_file>

# Convert PDF pages to images
convert <pdf_file>[<page_range>] <output_pattern>

# Extract strings with JavaScript keywords
strings <pdf_file> | grep -i "javascript\|openaction\|aa\|js"

$ pdf_file: find . -name "*.pdf" -type f
$ input_pdf: find . -name "*.pdf" -type f
$ encrypted_pdf: find . -name "*.pdf" -type f
$ page_range: echo "0-2"
$ output_pattern: echo "page-%02d.png"
$ password: echo "password123"
$ output_pdf: echo "output.pdf"

EOF

cat > "$NAVI_DIR/malware-analysis.cheat" << 'EOF'
% malware analysis

# Identify file type
file <suspicious_file>

# Extract printable strings
strings -n <min_length> <binary_file>

# Detect malware capabilities
capa <executable_file>

# Analyze JavaScript in sandbox
box-js --output-dir=<output_dir> <js_file>

# Analyze Office documents
oledump.py <office_file>

# Analyze RTF documents
rtfdump.py <rtf_file>

# Extract and analyze base64 content
base64dump.py <file_with_base64>

# Binary analysis and file extraction
binwalk <binary_file>

# File carving from disk image
foremost -t <file_types> -i <disk_image>

# Hex dump analysis
xxd <binary_file> | head -<num_lines>

$ suspicious_file: find . -type f ! -name "*.txt" ! -name "*.md"
$ binary_file: find . -type f \( -name "*.exe" -o -name "*.dll" -o -name "*.bin" \)
$ executable_file: find . -type f \( -name "*.exe" -o -name "*.dll" \)
$ js_file: find . -name "*.js" -type f
$ office_file: find . -type f \( -name "*.doc" -o -name "*.xls" -o -name "*.ppt" \)
$ rtf_file: find . -name "*.rtf" -type f
$ file_with_base64: find . -name "*.txt" -o -name "*.log" -type f
$ disk_image: find . -name "*.img" -o -name "*.dd" -type f
$ min_length: echo "8"
$ output_dir: echo "/tmp/analysis"
$ file_types: echo "exe,dll,pdf,jpg"
$ num_lines: echo "20"

EOF

# Create custom tldr pages for tools missing from standard tldr
mkdir -p "$TLDR_CACHE/pages/common"

cat > "$TLDR_CACHE/pages/common/pdfid.py.md" << 'EOF'
# pdfid.py

> Analyze PDF files and identify potentially suspicious elements.
> Part of Didier Stevens' PDF analysis toolkit.
> More information: <https://blog.didierstevens.com/programs/pdf-tools/>.

- Analyze a PDF file for suspicious elements:

`pdfid.py {{path/to/document.pdf}}`

- Show detailed analysis including object counts:

`pdfid.py {{[-v|--verbose]}} {{path/to/document.pdf}}`

- Analyze all PDF files in a directory:

`pdfid.py {{path/to/directory/*.pdf}}`

- Output results in CSV format:

`pdfid.py {{[-c|--csv]}} {{path/to/document.pdf}}`

- Scan for specific keywords in PDF:

`pdfid.py {{[-k|--keyword]}} {{javascript}} {{path/to/document.pdf}}`

EOF

cat > "$TLDR_CACHE/pages/common/pdf-parser.py.md" << 'EOF'
# pdf-parser.py

> Parse and analyze PDF file structure, extract objects and streams.
> Part of Didier Stevens' PDF analysis toolkit.
> More information: <https://blog.didierstevens.com/programs/pdf-tools/>.

- Parse PDF structure and show all objects:

`pdf-parser.py {{path/to/document.pdf}}`

- Extract a specific object by number:

`pdf-parser.py {{[-o|--object]}} {{object_number}} {{path/to/document.pdf}}`

- Search for objects containing specific text:

`pdf-parser.py {{[-s|--search]}} {{javascript}} {{path/to/document.pdf}}`

- Extract and decode streams:

`pdf-parser.py {{[-f|--filter]}} {{path/to/document.pdf}}`

- Dump raw object content:

`pdf-parser.py {{[-d|--dump]}} {{[-o|--object]}} {{object_number}} {{path/to/document.pdf}}`

- Generate statistics about PDF structure:

`pdf-parser.py {{[-a|--stats]}} {{path/to/document.pdf}}`

EOF

cat > "$TLDR_CACHE/pages/common/peepdf.md" << 'EOF'
# peepdf

> Interactive PDF analysis framework with JavaScript analysis capabilities.
> More information: <https://eternal-todo.com/tools/peepdf-pdf-analysis-tool>.

- Analyze PDF file interactively:

`peepdf {{[-i|--interactive]}} {{path/to/document.pdf}}`

- Analyze PDF and automatically extract suspicious elements:

`peepdf {{[-f|--force-mode]}} {{path/to/document.pdf}}`

- Load PDF and execute peepdf script:

`peepdf {{[-s|--script]}} {{script.txt}} {{path/to/document.pdf}}`

- Analyze PDF with specific password:

`peepdf {{[-p|--password]}} {{password}} {{path/to/document.pdf}}`

- Generate XML report:

`peepdf {{[-x|--xml]}} {{path/to/document.pdf}}`

- Update malicious URL database:

`peepdf {{[-u|--update]}}`

EOF

cat > "$TLDR_CACHE/pages/common/capa.md" << 'EOF'
# capa

> Detect malware capabilities using the MITRE ATT&CK framework.
> Analyzes executables and maps them to threat behaviors.
> More information: <https://github.com/mandiant/capa>.

- Analyze an executable for capabilities:

`capa {{path/to/malware.exe}}`

- Show verbose analysis with detailed explanations:

`capa {{[-v|--verbose]}} {{path/to/malware.exe}}`

- Output results in JSON format:

`capa {{[-j|--json]}} {{path/to/malware.exe}}`

- Analyze with custom rules directory:

`capa {{[-r|--rules]}} {{path/to/rules}} {{path/to/malware.exe}}`

- Show only capabilities above certain confidence:

`capa {{[-t|--tag]}} {{communication}} {{path/to/malware.exe}}`

- Analyze shellcode instead of PE file:

`capa {{[-f|--format]}} {{shellcode}} {{path/to/shellcode.bin}}`

EOF

# Configure cheat to use our cheat sheets directory (for remnux user)
mkdir -p /home/remnux/.config/cheat
cat > /home/remnux/.config/cheat/conf.yml << 'EOF'
---
cheatpaths:
  - name: personal
    path: /opt/cheatsheets/personal
    tags: [personal]
    readonly: false
EOF
chown -R remnux:remnux /home/remnux/.config/

# Fix navi configuration to prevent fzf preview errors (for remnux user)
mkdir -p /home/remnux/.config/navi
cat > /home/remnux/.config/navi/config.yaml << 'EOF'
cheats:
  paths:
    - /opt/navi-cheats
    - /opt/cheatsheets

finder:
  command: fzf
  preview_window: right:50%

shell:
  command: bash
EOF
chown -R remnux:remnux /home/remnux/.config/

# Copy custom cheat sheets to proper cheat structure
cp "$CHEAT_DIR/pdf-analysis.cheat" "$CHEAT_DIR/personal/pdf-analysis" 2>/dev/null || true
cp "$CHEAT_DIR/malware-analysis.cheat" "$CHEAT_DIR/personal/malware-analysis" 2>/dev/null || true

echo "✅ Enhanced cheat sheet collection created!"
echo ""
echo "📊 Summary:"
echo "  📁 Cheat sheets: $CHEAT_DIR"
echo "  🎯 Navi cheats: $NAVI_DIR"
echo "  📚 TLDR cache: $TLDR_CACHE"
echo ""
echo "Available cheat sheets:"
ls -1 "$CHEAT_DIR/personal/" 2>/dev/null | sed 's/^/  • /' || echo "  • (checking directory structure...)"
echo ""
echo "Usage examples:"
echo "  cheat pdftk              # Show pdftk cheat sheet"
echo "  navi                     # Interactive cheat browser"
echo "  tldr pdfid.py            # Quick examples for pdfid.py"
echo "  fhelp cheat pdf          # File analysis PDF workflow"