Major repository cleanup and enhancement

- Reorganize documentation: moved old docs to docs/ directory - Add comprehensive README.md with build options and usage guide - Add detailed CONTRIBUTING.md with help content management guide - Create Makefile for automated building and testing - Add Dockerfile.scratch for building from Ubuntu 20.04 base - Enhance all Dockerfiles with PowerShell + PSScriptAnalyzer - Add modern shells: zsh (with plugins) and fish (with config) - Add modern CLI tools: fd-find, ripgrep, fzf - Create comprehensive help system with cheat/TLDR/fish completions - Add helper scripts for help content management and coverage checking - Fix Dockerfile.remnux script references - Support three build variants: upstream (REMnux), scratch (Ubuntu), kali Build options: - make build-upstream: Fast, uses REMnux upstream (recommended) - make build-scratch: Full control, builds from Ubuntu 20.04 - make build-kali: Legacy Kali Linux base Features: - PowerShell with PSScriptAnalyzer module - Modern shells (zsh, fish) with custom configurations - Enhanced help system (cheat sheets, TLDR pages, fish completions) - Help coverage checking and bulk import tools - Comprehensive documentation for users and contributors
2025-10-01 11:45:56 +02:00
parent 6bfcfd7935
commit b98aaee3e0
27 changed files with 5000 additions and 62 deletions
--- a/scripts/create-offline-help-system.sh
+++ b/scripts/create-offline-help-system.sh
@@ -0,0 +1,492 @@
+#!/bin/bash
+
+# Create comprehensive offline help system focused on tldr and cheat
+# Removes navi complexity and ensures reliable offline operation
+
+set -e
+
+CHEAT_DIR="/opt/cheatsheets"
+TLDR_CACHE="/home/remnux/.local/share/tldr"
+TOOLS_DB="/opt/remnux-docs/tools.db"
+
+echo "📚 Creating streamlined offline help system..."
+
+# Create directories
+mkdir -p "$CHEAT_DIR/personal" "$TLDR_CACHE/pages/common" "/opt/remnux-docs"
+
+# Configure cheat properly for remnux user
+mkdir -p /home/remnux/.config/cheat
+cat > /home/remnux/.config/cheat/conf.yml << 'EOF'
+---
+cheatpaths:
+  - name: personal
+    path: /opt/cheatsheets/personal
+    tags: [personal]
+    readonly: false
+EOF
+
+# Download existing cheat sheets and place them correctly
+echo "📥 Downloading existing cheat sheets..."
+TOOLS=("tar" "7z" "unzip" "grep" "find" "awk" "sed")
+
+for tool in "${TOOLS[@]}"; do
+    echo "  • Downloading $tool cheat sheet..."
+    if curl -s "https://raw.githubusercontent.com/cheat/cheatsheets/master/$tool" -o "$CHEAT_DIR/personal/$tool" && [ -s "$CHEAT_DIR/personal/$tool" ]; then
+        echo "    ✅ Downloaded $tool"
+    else
+        echo "    ⚠ Failed to download $tool, creating basic version"
+        echo "# $tool" > "$CHEAT_DIR/personal/$tool"
+        echo "# Basic usage examples for $tool" >> "$CHEAT_DIR/personal/$tool"
+    fi
+done
+
+# Create comprehensive cheat sheets for your original analysis tools
+echo "📋 Creating analysis-focused cheat sheets..."
+
+# PDF Analysis cheat sheet
+cat > "$CHEAT_DIR/personal/pdf-analysis" << 'EOF'
+# PDF Analysis Workflow
+
+# Quick PDF overview and suspicious element detection
+pdfid.py document.pdf
+
+# Detailed PDF structure analysis
+pdf-parser.py document.pdf
+
+# Interactive analysis with JavaScript detection
+peepdf -i document.pdf
+
+# Extract metadata
+exiftool document.pdf
+
+# Remove passwords for analysis
+qpdf --password=PASSWORD --decrypt encrypted.pdf decrypted.pdf
+
+# Flatten PDF to remove interactive elements (SECURITY)
+pdftk suspicious.pdf output safe.pdf flatten
+
+# Extract embedded files
+pdf-parser.py --extract document.pdf
+
+# Convert PDF to images for safe viewing
+convert document.pdf[0-2] page-%02d.png
+
+# OCR text from PDF images  
+convert document.pdf page.png && tesseract page.png output
+
+# Check for embedded JavaScript
+peepdf -s extract_js document.pdf
+
+# Extract strings from PDF
+strings document.pdf | grep -i "javascript\|openaction\|aa\|js"
+
+# Hexdump analysis of PDF structure
+hexdump -C document.pdf | head -50
+EOF
+
+# Individual tool cheat sheets
+cat > "$CHEAT_DIR/personal/pdfid" << 'EOF'
+# pdfid.py - PDF Analysis Tool
+
+# Basic PDF analysis
+pdfid.py document.pdf
+
+# Verbose output with detailed object counts
+pdfid.py -a document.pdf
+
+# Scan all PDFs in directory
+pdfid.py *.pdf
+
+# Output in CSV format (requires plugins)
+pdfid.py -c document.pdf
+
+# Force analysis of potentially corrupted PDFs
+pdfid.py -f document.pdf
+EOF
+
+cat > "$CHEAT_DIR/personal/pdf-parser" << 'EOF'
+# pdf-parser.py - PDF Structure Analysis
+
+# Parse PDF structure
+pdf-parser.py document.pdf
+
+# Extract specific object by number
+pdf-parser.py -o 5 document.pdf
+
+# Search for objects containing text
+pdf-parser.py -s javascript document.pdf
+
+# Extract and decode streams
+pdf-parser.py -f document.pdf
+
+# Dump raw object content
+pdf-parser.py -d -o 5 document.pdf
+
+# Generate statistics
+pdf-parser.py -a document.pdf
+
+# Extract all objects to files
+pdf-parser.py --extract document.pdf
+EOF
+
+cat > "$CHEAT_DIR/personal/peepdf" << 'EOF'
+# peepdf - Interactive PDF Analysis
+
+# Interactive analysis mode
+peepdf -i document.pdf
+
+# Force mode for suspicious PDFs
+peepdf -f document.pdf
+
+# Load PDF with script
+peepdf -s script.txt document.pdf
+
+# Analyze with specific password
+peepdf -p password document.pdf
+
+# Generate XML report
+peepdf -x document.pdf
+
+# Update malicious URL database
+peepdf -u
+
+# Check for vulnerabilities
+peepdf -C document.pdf
+EOF
+
+cat > "$CHEAT_DIR/personal/pdftk" << 'EOF'
+# pdftk - PDF Manipulation
+
+# Flatten PDF (remove JavaScript/forms) - SECURITY
+pdftk suspicious.pdf output safe.pdf flatten
+
+# Concatenate PDFs
+pdftk file1.pdf file2.pdf cat output combined.pdf
+
+# Extract specific pages
+pdftk document.pdf cat 1-3 output pages1-3.pdf
+
+# Split PDF into single pages
+pdftk document.pdf burst
+
+# Decrypt password-protected PDF
+pdftk encrypted.pdf input_pw password output decrypted.pdf
+
+# Add password to PDF
+pdftk document.pdf output protected.pdf user_pw password
+
+# Decompress PDF streams for analysis
+pdftk compressed.pdf output uncompressed.pdf uncompress
+EOF
+
+# OLE document analysis (oledump)
+cat > "$CHEAT_DIR/personal/oledump.py" << 'EOF'
+# oledump.py - Analyze OLE files (Office documents)
+
+# List all streams (basic overview)
+oledump.py document.doc
+
+# Dump the content of a specific stream (e.g., 8) to stdout
+oledump.py -s 8 -d document.doc > stream8.bin
+
+# Analyze a specific stream (e.g., 8) in detail
+oledump.py -s 8 document.doc
+EOF
+
+# Provide alias without .py for convenience
+cp "$CHEAT_DIR/personal/oledump.py" "$CHEAT_DIR/personal/oledump" 2>/dev/null || true
+
+# Malware analysis cheat sheet
+cat > "$CHEAT_DIR/personal/malware-analysis" << 'EOF'
+# Malware Analysis Workflow
+
+# File identification
+file suspicious.exe
+exiftool suspicious.exe
+
+# String analysis
+strings -n 8 malware.bin
+strings -e l malware.bin  # little-endian 16-bit
+strings -e b malware.bin  # big-endian 16-bit
+
+# Capability detection
+capa malware.exe
+
+# JavaScript analysis
+box-js --output-dir=/tmp/js_analysis suspicious.js
+
+# Office document analysis
+oledump.py document.doc
+rtfdump.py document.rtf
+emldump.py message.eml
+
+# Base64 content extraction
+base64dump.py document.txt
+
+# Binary analysis
+binwalk malware.bin
+hexdump -C malware.exe | head -20
+
+# File carving
+foremost -t exe,dll,pdf -i disk.img
+EOF
+
+cat > "$CHEAT_DIR/personal/capa" << 'EOF'
+# capa - Malware Capability Detection
+
+# Basic capability analysis
+capa malware.exe
+
+# Verbose output with detailed explanations
+capa -v malware.exe
+
+# Output in JSON format
+capa -j malware.exe
+
+# Use custom rules directory
+capa -r /path/to/rules malware.exe
+
+# Show only specific capability tags
+capa -t communication malware.exe
+
+# Analyze shellcode
+capa -f shellcode shellcode.bin
+
+# Analyze with specific architecture
+capa -a x64 malware.exe
+EOF
+
+# Create comprehensive TLDR pages
+echo "📖 Creating TLDR pages for analysis tools..."
+
+cat > "$TLDR_CACHE/pages/common/pdfid.py.md" << 'EOF'
+# pdfid.py
+
+> Analyze PDF files and identify potentially suspicious elements.
+> Part of Didier Stevens' PDF analysis toolkit.
+> More information: <https://blog.didierstevens.com/programs/pdf-tools/>.
+
+- Analyze a PDF file for suspicious elements:
+
+`pdfid.py {{path/to/document.pdf}}`
+
+- Show detailed analysis with object counts:
+
+`pdfid.py {{-a|--all}} {{path/to/document.pdf}}`
+
+- Analyze all PDF files in current directory:
+
+`pdfid.py {{*.pdf}}`
+
+- Output results in CSV format (requires plugins):
+
+`pdfid.py {{-c|--csv}} {{path/to/document.pdf}}`
+
+- Force analysis of potentially corrupted PDF:
+
+`pdfid.py {{-f|--force}} {{path/to/document.pdf}}`
+EOF
+
+cat > "$TLDR_CACHE/pages/common/pdf-parser.py.md" << 'EOF'
+# pdf-parser.py
+
+> Parse and analyze PDF file structure, extract objects and streams.
+> Part of Didier Stevens' PDF analysis toolkit.
+> More information: <https://blog.didierstevens.com/programs/pdf-tools/>.
+
+- Parse PDF structure and show all objects:
+
+`pdf-parser.py {{path/to/document.pdf}}`
+
+- Extract a specific object by number:
+
+`pdf-parser.py {{-o|--object}} {{object_number}} {{path/to/document.pdf}}`
+
+- Search for objects containing specific text:
+
+`pdf-parser.py {{-s|--search}} {{javascript}} {{path/to/document.pdf}}`
+
+- Extract and decode streams:
+
+`pdf-parser.py {{-f|--filter}} {{path/to/document.pdf}}`
+
+- Dump raw object content to file:
+
+`pdf-parser.py {{-d|--dump}} {{-o|--object}} {{object_number}} {{path/to/document.pdf}}`
+
+- Generate statistics about PDF structure:
+
+`pdf-parser.py {{-a|--stats}} {{path/to/document.pdf}}`
+
+- Extract all objects to separate files:
+
+`pdf-parser.py {{--extract}} {{path/to/document.pdf}}`
+EOF
+
+cat > "$TLDR_CACHE/pages/common/peepdf.md" << 'EOF'
+# peepdf
+
+> Interactive PDF analysis framework with JavaScript analysis capabilities.
+> More information: <https://eternal-todo.com/tools/peepdf-pdf-analysis-tool>.
+
+- Analyze PDF file interactively:
+
+`peepdf {{-i|--interactive}} {{path/to/document.pdf}}`
+
+- Analyze PDF and force processing of suspicious elements:
+
+`peepdf {{-f|--force-mode}} {{path/to/document.pdf}}`
+
+- Load PDF and execute peepdf script:
+
+`peepdf {{-s|--script}} {{script.txt}} {{path/to/document.pdf}}`
+
+- Analyze PDF with specific password:
+
+`peepdf {{-p|--password}} {{password}} {{path/to/document.pdf}}`
+
+- Generate XML analysis report:
+
+`peepdf {{-x|--xml}} {{path/to/document.pdf}}`
+
+- Check for known vulnerabilities:
+
+`peepdf {{-C|--check-vulns}} {{path/to/document.pdf}}`
+
+- Update malicious URL database:
+
+`peepdf {{-u|--update}}`
+EOF
+
+cat > "$TLDR_CACHE/pages/common/capa.md" << 'EOF'
+# capa
+
+> Detect malware capabilities using the MITRE ATT&CK framework.
+> Analyzes executables and maps them to threat behaviors.
+> More information: <https://github.com/mandiant/capa>.
+
+- Analyze an executable for capabilities:
+
+`capa {{path/to/malware.exe}}`
+
+- Show verbose analysis with detailed explanations:
+
+`capa {{-v|--verbose}} {{path/to/malware.exe}}`
+
+- Output results in JSON format:
+
+`capa {{-j|--json}} {{path/to/malware.exe}}`
+
+- Analyze with custom rules directory:
+
+`capa {{-r|--rules}} {{path/to/rules}} {{path/to/malware.exe}}`
+
+- Show only capabilities matching specific tag:
+
+`capa {{-t|--tag}} {{communication}} {{path/to/malware.exe}}`
+
+- Analyze shellcode instead of PE file:
+
+`capa {{-f|--format}} {{shellcode}} {{path/to/shellcode.bin}}`
+
+- Analyze with specific architecture:
+
+`capa {{-a|--arch}} {{x64}} {{path/to/malware.exe}}`
+EOF
+
+cat > "$TLDR_CACHE/pages/common/box-js.md" << 'EOF'
+# box-js
+
+> JavaScript sandbox for malware analysis and deobfuscation.
+> More information: <https://github.com/CapacitorSet/box-js>.
+
+- Analyze JavaScript file in sandbox:
+
+`box-js {{suspicious.js}}`
+
+- Analyze with custom output directory:
+
+`box-js {{--output-dir}} {{/tmp/analysis}} {{suspicious.js}}`
+
+- Enable verbose logging:
+
+`box-js {{--verbose}} {{suspicious.js}}`
+
+- Analyze with timeout (in seconds):
+
+`box-js {{--timeout}} {{30}} {{suspicious.js}}`
+
+- Download and analyze JavaScript from URL:
+
+`box-js {{--download}} {{http://example.com/malicious.js}}`
+
+- Analyze with custom user agent:
+
+`box-js {{--user-agent}} {{"Custom Agent"}} {{suspicious.js}}`
+EOF
+
+# Create accurate tools database
+cat > "$TOOLS_DB" << 'EOF'
+# REMnux Analysis Tools Database
+# Format: TOOL_NAME|DESCRIPTION|CATEGORY|USAGE_EXAMPLE
+
+# PDF Analysis Tools (Your Original Focus)
+pdfid.py|Identify suspicious elements in PDF files|pdf analysis|pdfid.py document.pdf
+pdf-parser.py|Parse and analyze PDF structure and objects|pdf analysis|pdf-parser.py document.pdf
+peepdf|Interactive PDF analysis framework with JavaScript detection|pdf analysis|peepdf -i malicious.pdf
+pdftk|Manipulate PDF files (merge, split, flatten)|pdf analysis|pdftk input.pdf output output.pdf flatten
+qpdf|PDF manipulation tool (decrypt, merge, convert)|pdf analysis|qpdf --decrypt encrypted.pdf output.pdf
+
+# Malware Analysis Tools
+capa|Detect malware capabilities using MITRE ATT&CK|malware analysis|capa malware.exe
+box-js|JavaScript sandbox for malware analysis|malware analysis|box-js --output-dir=/tmp suspicious.js
+oledump.py|Analyze OLE files (Office documents)|malware analysis|oledump.py document.xls
+rtfdump.py|Analyze RTF documents|malware analysis|rtfdump.py document.rtf
+emldump.py|Analyze EML email files|malware analysis|emldump.py message.eml
+base64dump.py|Extract and decode base64 strings|malware analysis|base64dump.py document.txt
+
+# File Analysis Tools
+strings|Extract printable strings from files|file analysis|strings -n 10 binary.exe
+hexdump|Display file content in hexadecimal|file analysis|hexdump -C binary.dat
+file|Determine file type|file analysis|file suspicious.dat
+exiftool|Extract metadata from files|file analysis|exiftool document.pdf
+binwalk|Analyze and extract firmware/binary images|file analysis|binwalk firmware.bin
+
+# Archive and Compression
+7z|7-Zip archiver with high compression|utilities|7z x archive.7z
+unzip|Extract ZIP archives|utilities|unzip archive.zip
+tar|Archive files with compression|utilities|tar -xzf archive.tar.gz
+
+# Text Processing and Search
+grep|Search text using patterns|text processing|grep -r "pattern" directory/
+find|Find files and directories|file search|find /path -name "*.pdf"
+awk|Text processing and pattern scanning|text processing|awk '{print $1}' file.txt
+sed|Stream editor for text manipulation|text processing|sed 's/old/new/g' file.txt
+EOF
+
+# Initialize tldr cache and ensure offline functionality
+echo "🔄 Initializing tldr cache for offline use..."
+export HOME=/home/remnux
+su - remnux -c "tldr --update" 2>/dev/null || echo "⚠ TLDR update attempted"
+
+# Set proper ownership
+chown -R remnux:remnux /home/remnux/.config /home/remnux/.local "$CHEAT_DIR" 2>/dev/null || true
+
+echo "✅ Streamlined offline help system created!"
+echo ""
+echo "📊 Summary:"
+echo "  📁 Cheat sheets: $CHEAT_DIR/personal/"
+echo "  📚 TLDR cache: $TLDR_CACHE/pages/common/"
+echo "  🔍 Tools database: $TOOLS_DB"
+echo ""
+echo "Available help:"
+echo "  fhelp cheat pdf-analysis    # PDF analysis workflow"
+echo "  fhelp cheat pdfid           # pdfid.py examples"
+echo "  tldr pdfid.py               # Quick pdfid.py reference"
+echo "  tldr capa                   # Quick capa reference"
+echo ""
+
+# Count resources
+cheat_count=$(find "$CHEAT_DIR/personal" -type f 2>/dev/null | wc -l)
+tldr_count=$(find "$TLDR_CACHE/pages/common" -name "*.md" 2>/dev/null | wc -l)
+echo "📈 Resources: $cheat_count cheat sheets, $tldr_count TLDR pages"