moved pdf-analysis from gists to own repo
renamed pfd-analysis to file-analysis moved files to subfolder
This commit is contained in:
54
file-analysis.dockerfile
Normal file
54
file-analysis.dockerfile
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
FROM ubuntu:16.04
|
||||||
|
MAINTAINER tabledevil
|
||||||
|
|
||||||
|
USER root
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
|
||||||
|
RUN git clone https://github.com/jesparza/peepdf /opt/peepdf
|
||||||
|
RUN git clone https://github.com/DidierStevens/DidierStevensSuite /opt/didierstevenssuite
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y \
|
||||||
|
python3-lxml \
|
||||||
|
libemu2 \
|
||||||
|
pkg-config \
|
||||||
|
autoconf \
|
||||||
|
pdftk \
|
||||||
|
imagemagick \
|
||||||
|
python-pil \
|
||||||
|
python-pip \
|
||||||
|
libboost-python-dev \
|
||||||
|
libboost-thread-dev \
|
||||||
|
libtool ; \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
RUN git clone https://github.com/buffer/pyv8.git ; cd pyv8 ; python setup.py build && python setup.py install && cd .. && rm -rf pyv8
|
||||||
|
RUN git clone https://github.com/buffer/libemu.git ; cd libemu ; autoreconf -v -i && ./configure --prefix=/opt/libemu && make install && cd .. && rm -rf libemu2
|
||||||
|
RUN pip install pylibemu
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y exiftool ; rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
RUN chmod +x /opt/didierstevenssuite/*py
|
||||||
|
RUN ln -s /opt/peepdf/peepdf.py /bin/peepdf.py
|
||||||
|
RUN chmod +x /bin/peepdf.py
|
||||||
|
RUN chmod 777 -R /opt/peepdf/
|
||||||
|
RUN sed -i '/PDF/s/"none"/"read|write"/' /etc/ImageMagick-6/policy.xml
|
||||||
|
|
||||||
|
ENV PATH="/opt/didierstevenssuite/:${PATH}"
|
||||||
|
ADD files/README /opt/README
|
||||||
|
ADD files/command_help /opt/command_help
|
||||||
|
RUN echo 'cat /opt/README' >> /etc/bash.bashrc
|
||||||
|
|
||||||
|
RUN groupadd -g 1000 -r user && \
|
||||||
|
useradd -u 1000 -r -g user -d /home/user -s /sbin/nologin -c "Nonroot User" user && \
|
||||||
|
mkdir /home/user && \
|
||||||
|
chown -R user:user /home/user
|
||||||
|
|
||||||
|
RUN groupadd -g 1001 -r nonroot && \
|
||||||
|
useradd -u 1001 -r -g nonroot -d /home/nonroot -s /sbin/nologin -c "Nonroot User" nonroot && \
|
||||||
|
mkdir /home/nonroot && \
|
||||||
|
chown -R nonroot:nonroot /home/nonroot
|
||||||
|
|
||||||
|
WORKDIR /home/nonroot/
|
||||||
|
USER nonroot
|
||||||
|
WORKDIR /home/nonroot/
|
||||||
|
CMD /bin/bash
|
||||||
11
files/README
Normal file
11
files/README
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
README - pdfanalysis
|
||||||
|
Dieser Container enthält Tools um PDFs zu analysieren:
|
||||||
|
|
||||||
|
pdfid.py - Schnelle Übersicht über PDF-Aufbau.
|
||||||
|
pdf-parser.py - Zerlegen und extrahieren von PDF-Elementen
|
||||||
|
peepdf.py - PDF - Analyse Framework mit Javascript Analyse
|
||||||
|
pdftk - Tool um das PDF zu "flatten"
|
||||||
|
convert - ImageMagick Tool zum convertieren
|
||||||
|
|
||||||
|
Für Kommandobeispiele /opt/command_help lesen.
|
||||||
|
Der Nutzer innerhalb des Containers braucht Schreibrechte auf das gemountete Verzeichnis.
|
||||||
54
files/command_help
Normal file
54
files/command_help
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
## pdfid.py
|
||||||
|
#Überblick über Inhalt eines PDFs
|
||||||
|
pdfid.py <pdf-file>
|
||||||
|
|
||||||
|
#Um einen ganzen Ordner mit PDFs zu analysieren
|
||||||
|
pdfid.py -s <dir>
|
||||||
|
|
||||||
|
#Verarbeitung erzwingen auch wenn Datei defekt
|
||||||
|
pdfid.py -f <pdf-file>
|
||||||
|
|
||||||
|
|
||||||
|
## peepdf.py
|
||||||
|
#Überblick ähnlich pdfid.py
|
||||||
|
peepdf.py <pdf-file>
|
||||||
|
|
||||||
|
#Verarbeitung erzwingen auch wenn Datei defekt ist z.B: bei der Fehlermeldung:
|
||||||
|
# Error: An error has occurred while parsing an indirect object!!
|
||||||
|
peepdf.py -f <pdf-file>
|
||||||
|
|
||||||
|
#starten der interactiven peepdf-shell (empfohlen)
|
||||||
|
peepdf.py -f -i <pdf-file>
|
||||||
|
|
||||||
|
# in der shell zeigt die Eingabe von "help" weitere Informationen an
|
||||||
|
|
||||||
|
|
||||||
|
## pdf-parser.py
|
||||||
|
#Überblick über Struktur des Dokuments
|
||||||
|
pdf-parser.py <pdf-file>
|
||||||
|
|
||||||
|
#Auswahl eines Elements
|
||||||
|
pdf-parser.py -o <obj-ID> <pdf-file>
|
||||||
|
|
||||||
|
#Hashen der Elemente
|
||||||
|
pdf-parser.py -H <pdf-file>
|
||||||
|
pdf-parser.py -H -o <obj-id> <pdf-file>
|
||||||
|
|
||||||
|
#Exportieren eines Objects (z.B. word.doc)
|
||||||
|
pdf-parser.py -d <extracted-filename> -f -o <obj-id> <pdf-file>
|
||||||
|
|
||||||
|
|
||||||
|
## pdftk
|
||||||
|
#Dateien extrahieren
|
||||||
|
pdftk <pdf-file> unpack_files
|
||||||
|
|
||||||
|
#PDF - Plätten (javascript und andere objekte entfernen)
|
||||||
|
#Dieser Prozess ist anschließend zu überprüfen
|
||||||
|
pdftk <pdf-file> cat output flattened_<original_filename>
|
||||||
|
|
||||||
|
#PDF - reparieren
|
||||||
|
pdftk <pdf-file> output fixed<original_filename>
|
||||||
|
|
||||||
|
##convert
|
||||||
|
#PDF - in TIFF Plätten
|
||||||
|
convert <pdf-file> <original_filename>.tiff
|
||||||
Reference in New Issue
Block a user