Rework to CIRCL hashlookup offline bloom (SHA-1)

Replace the self-built 2021 NSRL RDS md5 bloom with CIRCL's offline
hashlookup-full.bloom (SHA-1, NSRL + more), downloaded at build. Old
single-hash CLI preserved (now SHA-1); 'analyse -d <dir>' runs
hashlookup-forensic-analyser against the bundled bloom.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
tabledevil
2026-06-10 13:38:48 +02:00
parent 0d374d6bdb
commit 09ab281881
6 changed files with 97 additions and 246 deletions
+32 -13
View File
@@ -1,19 +1,38 @@
FROM alpine AS builder
# tabledevil/nsrl — known-file hash filter, now backed by CIRCL hashlookup.
#
# Replaces the old self-built NSRL RDS md5 bloom (frozen at RDS 2.72 / 2021)
# with CIRCL's hashlookup-full.bloom: SHA-1, NSRL + many more known-good
# sources, refreshed upstream. Downloaded at build time (~1 GB) so lookups
# are fully offline; the bot rebuilds on a monthly cadence.
#
# # single hashes (old CLI preserved, now SHA-1):
# docker run --rm tabledevil/nsrl <sha1> [<sha1> ...]
# cat sha1s.txt | docker run --rm -i tabledevil/nsrl -s -0 # only misses
#
# # analyse a whole directory tree (hashlookup-forensic-analyser):
# docker run --rm -v /evidence:/data:ro tabledevil/nsrl analyse -d /data
FROM python:3.12-slim
COPY nsrl /nsrl
RUN apk add -U tini alpine-sdk python3 python3-dev py3-pip p7zip \
&& python3 -m pip install git+https://github.com/jaybaird/python-bloomfilter/ \
&& /nsrl/shrink_nsrl.sh \
&& apk del --purge alpine-sdk py3-pip python3-dev p7zip \
&& rm -rf /tmp/* /root/.cache /var/cache/apk/* /nsrl/shrink_nsrl.sh
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates curl git libmagic1 \
&& rm -rf /var/lib/apt/lists/*
FROM alpine
LABEL maintainer="tabledevil"
COPY --from=builder / /
RUN pip install --no-cache-dir flor requests pytz filemagic \
&& git clone --depth=1 https://github.com/hashlookup/hashlookup-forensic-analyser /opt/hfa
# The bloom filter is the data payload — fetched fresh every (monthly) rebuild.
RUN mkdir -p /nsrl \
&& curl -fsSL -o /nsrl/hashlookup-full.bloom \
https://cra.circl.lu/hashlookup/hashlookup-full.bloom \
&& { echo "source = https://cra.circl.lu/hashlookup/hashlookup-full.bloom"; \
curl -fsSI https://cra.circl.lu/hashlookup/hashlookup-full.bloom \
| grep -i '^last-modified' || true; } > /nsrl/bloom.info
COPY nsrl/search.py /nsrl/search.py
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
WORKDIR /nsrl
ENTRYPOINT ["/sbin/tini","--","python3","/nsrl/search.py"]
ENTRYPOINT ["/entrypoint.sh"]
CMD ["-h"]