Major repository cleanup and enhancement

- Reorganize documentation: moved old docs to docs/ directory - Add comprehensive README.md with build options and usage guide - Add detailed CONTRIBUTING.md with help content management guide - Create Makefile for automated building and testing - Add Dockerfile.scratch for building from Ubuntu 20.04 base - Enhance all Dockerfiles with PowerShell + PSScriptAnalyzer - Add modern shells: zsh (with plugins) and fish (with config) - Add modern CLI tools: fd-find, ripgrep, fzf - Create comprehensive help system with cheat/TLDR/fish completions - Add helper scripts for help content management and coverage checking - Fix Dockerfile.remnux script references - Support three build variants: upstream (REMnux), scratch (Ubuntu), kali Build options: - make build-upstream: Fast, uses REMnux upstream (recommended) - make build-scratch: Full control, builds from Ubuntu 20.04 - make build-kali: Legacy Kali Linux base Features: - PowerShell with PSScriptAnalyzer module - Modern shells (zsh, fish) with custom configurations - Enhanced help system (cheat sheets, TLDR pages, fish completions) - Help coverage checking and bulk import tools - Comprehensive documentation for users and contributors
2025-10-01 11:45:56 +02:00
parent 6bfcfd7935
commit b98aaee3e0
27 changed files with 5000 additions and 62 deletions
--- a/scripts/create-enhanced-cheatsheets.sh
+++ b/scripts/create-enhanced-cheatsheets.sh
@@ -0,0 +1,439 @@
+#!/bin/bash
+
+# Create enhanced cheat sheets by combining existing ones with file analysis examples
+# This leverages existing resources and adds specific malware/PDF analysis use cases
+
+set -e
+
+CHEAT_DIR="/opt/cheatsheets"
+NAVI_DIR="/opt/navi-cheats"
+TLDR_CACHE="/root/.local/share/tldr"
+
+echo "📚 Creating enhanced cheat sheet collection..."
+
+# Create directories
+mkdir -p "$CHEAT_DIR" "$NAVI_DIR" "$TLDR_CACHE"
+
+# Function to download and enhance existing cheat sheets
+download_and_enhance() {
+    local tool="$1"
+    local base_url="https://raw.githubusercontent.com/cheat/cheatsheets/master"
+    local output_file="$CHEAT_DIR/$tool.cheat"
+    
+    echo "Processing $tool..."
+    
+    # Try to download existing cheat sheet
+    if curl -s "$base_url/$tool" -o "/tmp/${tool}_base.cheat" && [ -s "/tmp/${tool}_base.cheat" ]; then
+        echo "✓ Found existing cheat sheet for $tool"
+        cp "/tmp/${tool}_base.cheat" "$output_file"
+    else
+        echo "⚠ No existing cheat sheet for $tool, creating from scratch"
+        echo "# $tool" > "$output_file"
+        echo "" >> "$output_file"
+    fi
+}
+
+# Download base cheat sheets for key tools and fix directory structure
+TOOLS=("pdftk" "7z" "tar" "unzip" "exiftool")
+
+# Create proper cheat directory structure (cheat expects files without extensions in subdirs)
+mkdir -p "$CHEAT_DIR/personal"
+
+for tool in "${TOOLS[@]}"; do
+    download_and_enhance "$tool"
+    # Copy to proper cheat structure (without .cheat extension)
+    if [[ -f "$CHEAT_DIR/${tool}.cheat" ]]; then
+        cp "$CHEAT_DIR/${tool}.cheat" "$CHEAT_DIR/personal/${tool}"
+    fi
+done
+
+# Enhance pdftk with file analysis specific examples
+cat >> "$CHEAT_DIR/pdftk.cheat" << 'EOF'
+
+# === FILE ANALYSIS ENHANCEMENTS ===
+
+# Flatten PDF to prevent JavaScript execution (SECURITY)
+pdftk suspicious.pdf output safe.pdf flatten
+
+# Remove form fields and JavaScript (SECURITY) 
+pdftk malicious.pdf output cleaned.pdf flatten dont_ask
+
+# Extract pages for isolated analysis
+pdftk document.pdf cat 1-3 output first_pages.pdf
+
+# Combine with metadata removal workflow
+pdftk input.pdf output temp.pdf flatten
+exiftool -all= temp.pdf
+mv temp.pdf_original analysis_ready.pdf
+
+# Decompress PDF streams for analysis
+pdftk compressed.pdf output uncompressed.pdf uncompress
+
+EOF
+
+# Create comprehensive PDF analysis cheat sheet (doesn't exist in standard repos)
+cat > "$CHEAT_DIR/pdf-analysis.cheat" << 'EOF'
+---
+tags: [ pdf, malware, analysis, security ]
+---
+
+# PDF Analysis Workflow
+
+# Quick PDF overview and suspicious element detection
+pdfid.py document.pdf
+
+# Detailed PDF structure analysis
+pdf-parser.py document.pdf
+
+# Interactive analysis with JavaScript detection
+peepdf -i document.pdf
+
+# Extract metadata
+exiftool document.pdf
+
+# Remove passwords for analysis
+qpdf --password=PASSWORD --decrypt encrypted.pdf decrypted.pdf
+
+# Flatten PDF to remove interactive elements (SECURITY)
+pdftk suspicious.pdf output safe.pdf flatten
+
+# Extract embedded files
+pdf-parser.py --extract document.pdf
+
+# Convert PDF to images for safe viewing
+convert document.pdf[0-2] page-%02d.png
+
+# OCR text from PDF images  
+convert document.pdf page.png && tesseract page.png output
+
+# Check for embedded JavaScript
+peepdf -s extract_js document.pdf
+
+# Analyze PDF with Origami (if available)
+ruby -e "require 'origami'; pdf = Origami::PDF.read('document.pdf'); puts pdf.each_object.select{|o| o.is_a? Origami::Stream}.count"
+
+# Extract strings from PDF
+strings document.pdf | grep -i "javascript\|openaction\|aa\|js"
+
+# Hexdump analysis of PDF structure
+hexdump -C document.pdf | head -50
+
+EOF
+
+# Create malware analysis workflow cheat sheet
+cat > "$CHEAT_DIR/malware-analysis.cheat" << 'EOF'
+---
+tags: [ malware, analysis, security, forensics ]
+---
+
+# Malware Analysis Workflow
+
+# File type identification
+file suspicious.exe
+
+# Extract readable strings
+strings -n 8 malware.bin
+
+# Detect capabilities with CAPA
+capa malware.exe
+
+# Analyze JavaScript in sandbox
+box-js --output-dir=/tmp/js_analysis suspicious.js
+
+# Office document analysis
+oledump.py document.doc
+
+# RTF document analysis  
+rtfdump.py document.rtf
+
+# Email analysis
+emldump.py message.eml
+
+# Extract base64 content
+base64dump.py document.txt
+
+# Binary analysis and extraction
+binwalk malware.bin
+
+# File carving
+foremost -t exe,dll,pdf -i disk.img
+
+# Hex analysis
+xxd malware.exe | head -20
+
+# Extract metadata from any file
+exiftool malware.exe
+
+# Archive analysis
+7z l suspicious.zip
+unzip -l suspicious.zip
+
+# Network capture analysis (if pcap available)
+tshark -r capture.pcap -Y "http.request.uri contains .exe"
+
+EOF
+
+# Create NAVI cheat sheets (interactive format)
+cat > "$NAVI_DIR/pdf-analysis.cheat" << 'EOF'
+% pdf analysis
+
+# Quick PDF info and suspicious elements
+pdfid.py <pdf_file>
+
+# Detailed PDF structure analysis  
+pdf-parser.py <pdf_file>
+
+# Interactive PDF analysis
+peepdf -i <pdf_file>
+
+# Flatten PDF for security (remove JavaScript/forms)
+pdftk <input_pdf> output <output_pdf> flatten
+
+# Decrypt password-protected PDF
+qpdf --password=<password> --decrypt <encrypted_pdf> <output_pdf>
+
+# Extract PDF metadata
+exiftool <pdf_file>
+
+# Convert PDF pages to images
+convert <pdf_file>[<page_range>] <output_pattern>
+
+# Extract strings with JavaScript keywords
+strings <pdf_file> | grep -i "javascript\|openaction\|aa\|js"
+
+$ pdf_file: find . -name "*.pdf" -type f
+$ input_pdf: find . -name "*.pdf" -type f  
+$ encrypted_pdf: find . -name "*.pdf" -type f
+$ page_range: echo "0-2"
+$ output_pattern: echo "page-%02d.png"
+$ password: echo "password123"
+$ output_pdf: echo "output.pdf"
+
+EOF
+
+cat > "$NAVI_DIR/malware-analysis.cheat" << 'EOF'
+% malware analysis
+
+# Identify file type
+file <suspicious_file>
+
+# Extract printable strings
+strings -n <min_length> <binary_file>
+
+# Detect malware capabilities
+capa <executable_file>
+
+# Analyze JavaScript in sandbox
+box-js --output-dir=<output_dir> <js_file>
+
+# Analyze Office documents
+oledump.py <office_file>
+
+# Analyze RTF documents
+rtfdump.py <rtf_file>
+
+# Extract and analyze base64 content
+base64dump.py <file_with_base64>
+
+# Binary analysis and file extraction
+binwalk <binary_file>
+
+# File carving from disk image
+foremost -t <file_types> -i <disk_image>
+
+# Hex dump analysis
+xxd <binary_file> | head -<num_lines>
+
+$ suspicious_file: find . -type f ! -name "*.txt" ! -name "*.md"
+$ binary_file: find . -type f \( -name "*.exe" -o -name "*.dll" -o -name "*.bin" \)
+$ executable_file: find . -type f \( -name "*.exe" -o -name "*.dll" \)
+$ js_file: find . -name "*.js" -type f
+$ office_file: find . -type f \( -name "*.doc" -o -name "*.xls" -o -name "*.ppt" \)
+$ rtf_file: find . -name "*.rtf" -type f
+$ file_with_base64: find . -name "*.txt" -o -name "*.log" -type f
+$ disk_image: find . -name "*.img" -o -name "*.dd" -type f
+$ min_length: echo "8"
+$ output_dir: echo "/tmp/analysis"
+$ file_types: echo "exe,dll,pdf,jpg"
+$ num_lines: echo "20"
+
+EOF
+
+# Create custom tldr pages for tools missing from standard tldr
+mkdir -p "$TLDR_CACHE/pages/common"
+
+cat > "$TLDR_CACHE/pages/common/pdfid.py.md" << 'EOF'
+# pdfid.py
+
+> Analyze PDF files and identify potentially suspicious elements.
+> Part of Didier Stevens' PDF analysis toolkit.
+> More information: <https://blog.didierstevens.com/programs/pdf-tools/>.
+
+- Analyze a PDF file for suspicious elements:
+
+`pdfid.py {{path/to/document.pdf}}`
+
+- Show detailed analysis including object counts:
+
+`pdfid.py {{[-v|--verbose]}} {{path/to/document.pdf}}`
+
+- Analyze all PDF files in a directory:
+
+`pdfid.py {{path/to/directory/*.pdf}}`
+
+- Output results in CSV format:
+
+`pdfid.py {{[-c|--csv]}} {{path/to/document.pdf}}`
+
+- Scan for specific keywords in PDF:
+
+`pdfid.py {{[-k|--keyword]}} {{javascript}} {{path/to/document.pdf}}`
+
+EOF
+
+cat > "$TLDR_CACHE/pages/common/pdf-parser.py.md" << 'EOF'
+# pdf-parser.py
+
+> Parse and analyze PDF file structure, extract objects and streams.
+> Part of Didier Stevens' PDF analysis toolkit.
+> More information: <https://blog.didierstevens.com/programs/pdf-tools/>.
+
+- Parse PDF structure and show all objects:
+
+`pdf-parser.py {{path/to/document.pdf}}`
+
+- Extract a specific object by number:
+
+`pdf-parser.py {{[-o|--object]}} {{object_number}} {{path/to/document.pdf}}`
+
+- Search for objects containing specific text:
+
+`pdf-parser.py {{[-s|--search]}} {{javascript}} {{path/to/document.pdf}}`
+
+- Extract and decode streams:
+
+`pdf-parser.py {{[-f|--filter]}} {{path/to/document.pdf}}`
+
+- Dump raw object content:
+
+`pdf-parser.py {{[-d|--dump]}} {{[-o|--object]}} {{object_number}} {{path/to/document.pdf}}`
+
+- Generate statistics about PDF structure:
+
+`pdf-parser.py {{[-a|--stats]}} {{path/to/document.pdf}}`
+
+EOF
+
+cat > "$TLDR_CACHE/pages/common/peepdf.md" << 'EOF'
+# peepdf
+
+> Interactive PDF analysis framework with JavaScript analysis capabilities.
+> More information: <https://eternal-todo.com/tools/peepdf-pdf-analysis-tool>.
+
+- Analyze PDF file interactively:
+
+`peepdf {{[-i|--interactive]}} {{path/to/document.pdf}}`
+
+- Analyze PDF and automatically extract suspicious elements:
+
+`peepdf {{[-f|--force-mode]}} {{path/to/document.pdf}}`
+
+- Load PDF and execute peepdf script:
+
+`peepdf {{[-s|--script]}} {{script.txt}} {{path/to/document.pdf}}`
+
+- Analyze PDF with specific password:
+
+`peepdf {{[-p|--password]}} {{password}} {{path/to/document.pdf}}`
+
+- Generate XML report:
+
+`peepdf {{[-x|--xml]}} {{path/to/document.pdf}}`
+
+- Update malicious URL database:
+
+`peepdf {{[-u|--update]}}`
+
+EOF
+
+cat > "$TLDR_CACHE/pages/common/capa.md" << 'EOF'
+# capa
+
+> Detect malware capabilities using the MITRE ATT&CK framework.
+> Analyzes executables and maps them to threat behaviors.
+> More information: <https://github.com/mandiant/capa>.
+
+- Analyze an executable for capabilities:
+
+`capa {{path/to/malware.exe}}`
+
+- Show verbose analysis with detailed explanations:
+
+`capa {{[-v|--verbose]}} {{path/to/malware.exe}}`
+
+- Output results in JSON format:
+
+`capa {{[-j|--json]}} {{path/to/malware.exe}}`
+
+- Analyze with custom rules directory:
+
+`capa {{[-r|--rules]}} {{path/to/rules}} {{path/to/malware.exe}}`
+
+- Show only capabilities above certain confidence:
+
+`capa {{[-t|--tag]}} {{communication}} {{path/to/malware.exe}}`
+
+- Analyze shellcode instead of PE file:
+
+`capa {{[-f|--format]}} {{shellcode}} {{path/to/shellcode.bin}}`
+
+EOF
+
+# Configure cheat to use our cheat sheets directory (for remnux user)
+mkdir -p /home/remnux/.config/cheat
+cat > /home/remnux/.config/cheat/conf.yml << 'EOF'
+---
+cheatpaths:
+  - name: personal
+    path: /opt/cheatsheets/personal
+    tags: [personal]
+    readonly: false
+EOF
+chown -R remnux:remnux /home/remnux/.config/
+
+# Fix navi configuration to prevent fzf preview errors (for remnux user)
+mkdir -p /home/remnux/.config/navi
+cat > /home/remnux/.config/navi/config.yaml << 'EOF'
+cheats:
+  paths:
+    - /opt/navi-cheats
+    - /opt/cheatsheets
+
+finder:
+  command: fzf
+  preview_window: right:50%
+
+shell:
+  command: bash
+EOF
+chown -R remnux:remnux /home/remnux/.config/
+
+# Copy custom cheat sheets to proper cheat structure
+cp "$CHEAT_DIR/pdf-analysis.cheat" "$CHEAT_DIR/personal/pdf-analysis" 2>/dev/null || true
+cp "$CHEAT_DIR/malware-analysis.cheat" "$CHEAT_DIR/personal/malware-analysis" 2>/dev/null || true
+
+echo "✅ Enhanced cheat sheet collection created!"
+echo ""
+echo "📊 Summary:"
+echo "  📁 Cheat sheets: $CHEAT_DIR"
+echo "  🎯 Navi cheats: $NAVI_DIR" 
+echo "  📚 TLDR cache: $TLDR_CACHE"
+echo ""
+echo "Available cheat sheets:"
+ls -1 "$CHEAT_DIR/personal/" 2>/dev/null | sed 's/^/  • /' || echo "  • (checking directory structure...)"
+echo ""
+echo "Usage examples:"
+echo "  cheat pdftk              # Show pdftk cheat sheet"
+echo "  navi                     # Interactive cheat browser"  
+echo "  tldr pdfid.py            # Quick examples for pdfid.py"
+echo "  fhelp cheat pdf          # File analysis PDF workflow"