diff --git a/Dockerfile.remnux b/Dockerfile.remnux new file mode 100644 index 0000000..d6a8029 --- /dev/null +++ b/Dockerfile.remnux @@ -0,0 +1,51 @@ +FROM remnux/remnux-distro:latest +LABEL maintainer="tabledevil" + +USER root +ARG DEBIAN_FRONTEND=noninteractive +ENV TZ=Europe/Berlin + +# Install additional system packages that REMnux doesn't include +RUN apt-get update && apt-get install -y \ + busybox \ + catdoc \ + docx2txt \ + mc \ + pipx \ + unrtf \ + && rm -rf /var/lib/apt/lists/* + +# Configure pip +ENV PYTHONDONTWRITEBYTECODE=1 +ADD pip.conf /etc/pip.conf + +# Install Mandiant CAPA for malware analysis +RUN wget -O- https://github.com/mandiant/capa/releases/download/v7.4.0/capa-v7.4.0-linux.zip | busybox unzip -d /usr/bin - \ + && chmod +x /usr/bin/capa + +# Install JavaScript sandbox +RUN npm install box-js --global --production + +# Install unfurl & dependencies via pipx (for URL analysis) +RUN PIPX_HOME=/opt/pipx PIPX_BIN_DIR=/usr/local/bin pipx install --include-deps dfir-unfurl \ + && PIPX_HOME=/opt/pipx PIPX_BIN_DIR=/usr/local/bin pipx inject dfir-unfurl requests six maclookup + +# Install visidata via pipx (for data exploration) +RUN PIPX_HOME=/opt/pipx PIPX_BIN_DIR=/usr/local/bin pipx install --include-deps visidata + +# Create data directory and set permissions +RUN mkdir -p /data \ + && chown remnux:remnux /data + +# Add German documentation files +ADD files/README /opt/README +ADD files/command_help /opt/command_help +RUN echo 'cat /opt/README' >> /etc/bash.bashrc + +# Switch to remnux user (REMnux default user) +USER remnux +ENV LANG=en_US.UTF-8 +ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/home/remnux/.local/bin +WORKDIR /data + +CMD ["/bin/bash"] diff --git a/README.md b/README.md index 30352df..706c841 100644 --- a/README.md +++ b/README.md @@ -1,31 +1,49 @@ # docker_file_analysis + +## REMnux-Based File Analysis Container + +This container is now based on the REMnux malware analysis toolkit, providing a comprehensive set of tools for file analysis, especially PDFs and malware samples. + ## Usage -``` +```bash +# REMnux-based version +docker build -f Dockerfile.remnux -t tabledevil/file-analysis:remnux . +docker run -it --rm -v "$(pwd):/data" tabledevil/file-analysis:remnux + +# Original Kali-based version (legacy) docker run -it --rm -v "$(pwd):/data" tabledevil/file-analysis ``` -## Included Tools -* mraptor -* peepdf -* imagemagick -* pdftk -* docx2txt -* catdoc -* mpack -* exiftool -* origami - pdf analysis - * pdfcop - * pdfextract - * pdfmetadata -* pdfexplode -* vipermonkey (vmonkey) -* didierstevens-suit - * pdf-parser.py - * pdfid.py +## Included Tools (REMnux Base + Additional) + +### PDF Analysis Suite (from REMnux) +* **peepdf** - PDF analysis framework with JavaScript detection +* **pdf-parser.py** - Extract and analyze PDF elements (Didier Stevens) +* **pdfid.py** - Quick PDF structure overview (Didier Stevens) +* **origami** - Ruby gem suite (pdfcop, pdfextract, pdfmetadata) +* **pdftk-java** - PDF manipulation and flattening +* **qpdf** - PDF manipulation (merge, convert, transform) +* **pdfresurrect** - Extract previous versions from PDFs +* **pdftool** - Analyze PDF incremental updates + +### Malware Analysis (Additional) +* **capa** - Malware capability detection (Mandiant) +* **box-js** - JavaScript sandbox analysis +* **oletools** - Office document analysis suite * oledump.py * rtfdump.py * emldump.py - * jpegdump.py - * zipdump.py - * and many more in /opt/didierstevensuite/ + * and more + +### Data Analysis & Utilities (Additional) +* **visidata** - Data exploration and analysis +* **unfurl** - URL and data analysis (DFIR) +* **base64dump** - Base64 decoder (Didier Stevens) +* **tesseract** - OCR text extraction +* **exiftool** - Metadata extraction + +### System Tools +* **mc** - Midnight Commander file manager +* **p7zip-full** - Archive utilities +* All standard REMnux tools and utilities \ No newline at end of file diff --git a/WARP.md b/WARP.md new file mode 100644 index 0000000..dae5ede --- /dev/null +++ b/WARP.md @@ -0,0 +1,106 @@ +# WARP.md + +This file provides guidance to WARP (warp.dev) when working with code in this repository. + +## Project Overview + +This repository contains a Docker-based file analysis toolkit, primarily focused on PDF and malware analysis. It packages multiple security analysis tools into a Kali Linux-based container that can be run on any system with Docker. + +The main image (`tabledevil/file-analysis`) is published to Docker Hub and provides a consistent environment for file analysis tasks. + +## Core Architecture + +- **Base Image**: Kali Linux rolling release +- **Primary Use Case**: Analyzing potentially malicious files (PDFs, Office docs, executables) +- **Execution Model**: Container runs with mounted host directory (`/data`) for file access +- **User Security**: Runs as non-privileged `nonroot` user (UID 1001) for security isolation + +## Development Commands + +### Building the Container +```bash +docker build -t tabledevil/file-analysis . +``` + +### Running the Container +```bash +# Standard usage - mounts current directory +docker run -it --rm -v "$(pwd):/data" tabledevil/file-analysis + +# Run specific command without interactive shell +docker run --rm -v "$(pwd):/data" tabledevil/file-analysis pdfid.py suspicious.pdf +``` + +### Testing Container Functionality +```bash +# Verify installed tools are accessible +docker run --rm tabledevil/file-analysis which pdfid.py +docker run --rm tabledevil/file-analysis which peepdf +docker run --rm tabledevil/file-analysis capa --version +``` + +## Key Tools and Usage Patterns + +The container includes specialized analysis tools: + +**PDF Analysis Suite:** +- `pdfid.py` - Quick PDF structure overview +- `pdf-parser.py` - Extract and analyze PDF elements +- `peepdf` - Interactive PDF analysis with JavaScript detection +- `pdftk` - PDF manipulation and flattening +- Origami suite (`pdfcop`, `pdfextract`, `pdfmetadata`) + +**Malware Analysis:** +- `capa` - Malware capability detection +- `box-js` - JavaScript sandbox analysis +- `oledump.py`, `rtfdump.py`, `emldump.py` - Office document analysis +- `visidata` - Data exploration and analysis + +**File Format Tools:** +- `exiftool` - Metadata extraction +- `catdoc`, `docx2txt` - Document conversion +- `unrtf` - RTF processing +- ImageMagick - Image processing (PDF policy modified for read/write) + +## Environment Configuration + +- **Timezone**: Europe/Berlin +- **Python**: Uses `--break-system-packages` for pip installations due to Kali base +- **PATH**: Extended to include `/opt/didierstevenssuite/` and pypy binaries +- **Working Directory**: `/data` (expected mount point) + +## Development Guidelines + +### Docker Best Practices Applied +- Multi-stage approach with dependency installation +- Non-root user execution +- Minimal layer count optimization +- Proper cleanup of package caches + +### Tool Integration +- Didier Stevens suite tools are cloned from GitHub and made executable +- Python tools installed via both system pip and pipx for isolation +- Ruby gems (Origami) installed system-wide +- npm packages installed globally for JavaScript analysis + +### Security Considerations +- Container runs as unprivileged user +- ImageMagick PDF policy relaxed only for necessary operations +- File analysis happens in isolated container environment + +## File Structure + +- `Dockerfile` - Main container build configuration +- `files/README` - German language tool documentation for container users +- `files/command_help` - Detailed usage examples for PDF analysis tools +- `pip.conf` - Python package installation optimization settings + +## Common Workflow + +1. Place suspicious files in a directory +2. Run container with that directory mounted to `/data` +3. Use appropriate analysis tools based on file type +4. Extract results and artifacts to the mounted directory +5. Container automatically cleans up on exit + +The container is designed for security researchers and incident response teams who need a standardized, portable environment for file analysis without installing potentially dangerous tools on their host systems. \ No newline at end of file diff --git a/remnux-dockerfile-focal.txt b/remnux-dockerfile-focal.txt new file mode 100644 index 0000000..c79805b --- /dev/null +++ b/remnux-dockerfile-focal.txt @@ -0,0 +1,53 @@ +# This Docker image encapsulates the REMnux v7 distro on Ubuntu 20.04 (focal). +# For details about REMnux, including how you can run it on a physical system +# or as a virtual machine, see https://REMnux.org. +# +# You can run this image as a container using a command such as: +# +# docker run --rm -it remnux/remnux-distro /bin/bash +# +# To map a local directory into the container's /home/remnux/files directory, +# you could use a command lile this by supplying the appropriate directory name: +# +# docker run --rm -it -v :/home/remnux/files remnux/remnux-distro /bin/bash +# +# If you'd like to access the container using SSH, you can invoke it like this by +# mapping your local TCP port 22 to the container's internal TCP port 22. In this example, +# the container will remain active in the background: +# +# docker run -d -p 22:22 remnux/remnux-distro +# +# If you're going to run this container in a remote cloud, be sure to change the default +# password and otherwise harden the system according to your requirements. +# +# If you're planning to use Cutter inside the container, you'll need to include the +# --privileged parameter when invoking Docker. +# + +FROM ubuntu:20.04 + +LABEL description="REMnuxยฎ is a Linux toolkit for reverse-engineering and analyzing malicious software." +LABEL maintainer="Lenny Zeltser (@lennyzeltser, zeltser.com)" +LABEL version="v2025.30.1" +ARG CAST_VER=0.16.22 + +USER root + +WORKDIR /tmp +RUN export DEBIAN_FRONTEND=noninteractive && \ + apt-get update && \ + apt-get install -y wget gnupg git && \ + wget https://github.com/ekristen/cast/releases/download/v${CAST_VER}/cast-v${CAST_VER}-linux-amd64.deb && \ + dpkg -i /tmp/cast-v${CAST_VER}-linux-amd64.deb && \ + cast install --mode cloud --user remnux remnux && \ + rm -rf /root/.cache/* && \ + unset DEBIAN_FRONTEND + +RUN rm /tmp/cast-v${CAST_VER}-linux-amd64.deb + +ENV TERM=linux +WORKDIR /home/remnux + +RUN mkdir /var/run/sshd +EXPOSE 22 +CMD ["/usr/sbin/sshd", "-D"] diff --git a/test-containers.sh b/test-containers.sh new file mode 100755 index 0000000..6ff4323 --- /dev/null +++ b/test-containers.sh @@ -0,0 +1,73 @@ +#!/bin/bash + +# Test runner script to compare original Kali container vs new REMnux container +# This ensures we don't lose any functionality in the migration + +set -e + +echo "=== Container Testing Suite ===" +echo "Comparing original Kali container vs new REMnux container" +echo + +# Build the REMnux-based container first +echo "๐Ÿ”ง Building REMnux-based container..." +if docker build -f Dockerfile.remnux -t tabledevil/file-analysis:remnux . >/dev/null 2>&1; then + echo "โœ… REMnux container built successfully" +else + echo "โŒ Failed to build REMnux container" + exit 1 +fi + +echo + +# Test the original Kali-based container (if available) +echo "๐Ÿงช Testing original Kali-based container..." +if docker image inspect tabledevil/file-analysis:latest >/dev/null 2>&1; then + echo "Running tests on original container..." + if docker run --rm -v "$(pwd):/workspace" tabledevil/file-analysis:latest bash /workspace/test-tools.sh; then + echo "โœ… Original container: All tests passed" + ORIGINAL_PASSED=true + else + echo "โš ๏ธ Original container: Some tests failed" + ORIGINAL_PASSED=false + fi +else + echo "โš ๏ธ Original container not found, skipping tests" + ORIGINAL_PASSED="N/A" +fi + +echo + +# Test the new REMnux-based container +echo "๐Ÿงช Testing new REMnux-based container..." +echo "Running tests on REMnux container..." +if docker run --rm -v "$(pwd):/workspace" tabledevil/file-analysis:remnux bash /workspace/test-tools.sh; then + echo "โœ… REMnux container: All tests passed" + REMNUX_PASSED=true +else + echo "โŒ REMnux container: Some tests failed" + REMNUX_PASSED=false +fi + +echo +echo "=== FINAL COMPARISON ===" +echo "Original Kali container: $ORIGINAL_PASSED" +echo "REMnux container: $REMNUX_PASSED" +echo + +if [ "$REMNUX_PASSED" = true ]; then + if [ "$ORIGINAL_PASSED" = true ] || [ "$ORIGINAL_PASSED" = "N/A" ]; then + echo "๐ŸŽ‰ Migration successful! REMnux container has all required tools." + exit 0 + else + echo "โœจ REMnux container is working better than the original!" + exit 0 + fi +else + echo "๐Ÿ’ฅ Migration needs work. REMnux container is missing some tools." + echo + echo "๐Ÿ” To debug, run:" + echo " docker run -it --rm -v \"\$(pwd):/workspace\" tabledevil/file-analysis:remnux bash" + echo " Then manually run: bash /workspace/test-tools.sh" + exit 1 +fi \ No newline at end of file diff --git a/test-tools.sh b/test-tools.sh new file mode 100755 index 0000000..e257927 --- /dev/null +++ b/test-tools.sh @@ -0,0 +1,100 @@ +#!/bin/bash + +# Test script to verify all required analysis tools are present +# This tests the tools we specifically added to our original container + +echo "=== File Analysis Container Tool Test ===" +echo "Testing for required tools..." +echo + +FAILED_TOOLS=() +PASSED_TOOLS=() + +# Function to test if a command exists and works +test_tool() { + local tool_name="$1" + local test_command="$2" + local expected_pattern="$3" + + echo -n "Testing $tool_name... " + + if command -v "${tool_name}" >/dev/null 2>&1; then + if [ -n "$test_command" ]; then + # Run the test command and check output + if output=$(eval "$test_command" 2>&1) && [[ "$output" =~ $expected_pattern ]]; then + echo "โœ“ PASS" + PASSED_TOOLS+=("$tool_name") + else + echo "โœ— FAIL (exists but test failed)" + FAILED_TOOLS+=("$tool_name") + fi + else + echo "โœ“ PASS" + PASSED_TOOLS+=("$tool_name") + fi + else + echo "โœ— FAIL (not found)" + FAILED_TOOLS+=("$tool_name") + fi +} + +# Test PDF Analysis Tools (our core tools) +echo "--- PDF Analysis Tools ---" +test_tool "pdfid.py" "pdfid.py 2>&1 | head -1" "PDFiD" +test_tool "pdf-parser.py" "pdf-parser.py 2>&1 | head -1" "Usage" +test_tool "peepdf" "peepdf --help 2>&1 | head -1" "" +test_tool "pdftk" "pdftk --version 2>&1" "pdftk" + +# Test Ruby Origami suite +echo "--- Ruby Origami Suite ---" +test_tool "pdfcop" "pdfcop --version 2>/dev/null || pdfcop --help 2>&1 | head -1" "" +test_tool "pdfextract" "pdfextract --version 2>/dev/null || pdfextract --help 2>&1 | head -1" "" +test_tool "pdfmetadata" "pdfmetadata --version 2>/dev/null || pdfmetadata --help 2>&1 | head -1" "" + +# Test Malware Analysis Tools (our additions) +echo "--- Malware Analysis Tools ---" +test_tool "capa" "capa --version 2>&1" "capa" +test_tool "box-js" "box-js --version 2>&1" "[0-9]" + +# Test Data Analysis Tools (our additions) +echo "--- Data Analysis Tools ---" +test_tool "vd" "visidata --version 2>&1" "" +test_tool "unfurl_cli.py" "unfurl_cli.py --help 2>&1 | head -1" "usage" + +# Test File Format Tools +echo "--- File Format Tools ---" +test_tool "exiftool" "exiftool -ver 2>&1" "[0-9]" +test_tool "catdoc" "catdoc -V 2>&1" "" +test_tool "docx2txt" "docx2txt --version 2>/dev/null || echo 'docx2txt exists'" "" +test_tool "unrtf" "unrtf --version 2>&1" "" + +# Test System Tools +echo "--- System Tools ---" +test_tool "mc" "mc --version 2>&1" "GNU Midnight Commander" +test_tool "busybox" "busybox --help 2>&1 | head -1" "BusyBox" +test_tool "7z" "7z 2>&1 | head -2 | tail -1" "7-Zip" + +# Test Python packages (oletools) - REMnux has these differently +echo "--- Python Packages ---" +test_tool "oledump.py" "oledump.py --help 2>&1 | head -1" "" +test_tool "rtfdump.py" "rtfdump.py --help 2>&1 | head -1" "" +test_tool "emldump.py" "emldump.py --help 2>&1 | head -1" "" + +# Summary +echo +echo "=== TEST SUMMARY ===" +echo "Passed tools: ${#PASSED_TOOLS[@]}" +echo "Failed tools: ${#FAILED_TOOLS[@]}" + +if [ ${#FAILED_TOOLS[@]} -gt 0 ]; then + echo + echo "FAILED TOOLS:" + printf '%s\n' "${FAILED_TOOLS[@]}" + echo + echo "โŒ Some tools are missing or not working properly" + exit 1 +else + echo + echo "โœ… All tools are present and working!" + exit 0 +fi \ No newline at end of file diff --git a/tool-comparison.md b/tool-comparison.md new file mode 100644 index 0000000..3b96114 --- /dev/null +++ b/tool-comparison.md @@ -0,0 +1,74 @@ +# Tool Comparison: Current Kali vs REMnux + +## Current Tools in Kali-based Container + +### PDF Analysis Tools +- **pdfid.py** - Schnelle รœbersicht รผber PDF-Aufbau +- **pdf-parser.py** - Zerlegen und extrahieren von PDF-Elementen +- **peepdf** - PDF-Analyse Framework mit Javascript Analyse +- **pdftk** - Tool um das PDF zu "flatten" +- **origami** (Ruby gems) - pdfcop, pdfextract, pdfmetadata + +### Malware Analysis Tools +- **capa** - Malware capability detection +- **box-js** - JavaScript sandbox analysis +- **oletools** (oledump.py, rtfdump.py, emldump.py, etc.) +- **visidata** - Data exploration and analysis +- **unfurl** - URL/data analysis + +### General Tools +- **exiftool** - Metadata extraction +- **catdoc**, **docx2txt** - Document conversion +- **unrtf** - RTF processing +- **ImageMagick** - Image processing +- **DidierStevens suite** - many analysis tools in /opt/didierstevenssuite/ + +### System Tools +- **mc** - Midnight Commander +- **wget**, **git**, **p7zip-full**, **npm** + +## REMnux PDF Tools (Already Available) + +### PDF Analysis Suite (โœ… Already have most) +- **peepdf** โœ… - same tool +- **origami** โœ… - same Ruby gem suite +- **pdf-parser.py** โœ… - same Didier Stevens tool +- **pdfid.py** โœ… - same Didier Stevens tool +- **pdftk-java** โœ… - same as pdftk but Java version +- **qpdf** โŒ - PDF manipulation tool we don't have +- **pdfresurrect** โŒ - Extract previous PDF versions, we don't have +- **pdftool** โŒ - Analyze PDF incremental updates, we don't have + +### General Document Analysis (REMnux has) +- **base64dump** โŒ - Didier Stevens Base64 decoder, we don't have +- **tesseract** โŒ - OCR tool, we don't have + +## Analysis: What We Need to Add + +### Tools REMnux has that we don't: +1. **qpdf** - PDF manipulation (merge, convert, transform) +2. **pdfresurrect** - Extract previous versions from PDFs +3. **pdftool** - PDF incremental update analysis +4. **base64dump.py** - Base64 decoder (Didier Stevens) +5. **tesseract** - OCR tool + +### Tools we have that REMnux doesn't explicitly list: +1. **capa** - Malware capability detection +2. **box-js** - JavaScript sandbox +3. **visidata** - Data exploration +4. **unfurl** - URL/data analysis +5. **ImageMagick** - Image processing +6. **catdoc/docx2txt** - Document conversion + +## Migration Strategy + +REMnux base image will provide: +- All our current PDF tools (peepdf, origami, pdf-parser.py, pdfid.py, pdftk) +- Plus additional tools (qpdf, pdfresurrect, pdftool, base64dump, tesseract) + +We need to add: +- capa (malware analysis) +- box-js (JavaScript analysis) +- visidata (data exploration) +- unfurl (URL analysis) +- Our German documentation files \ No newline at end of file