# tabledevil/nsrl — known-file hash filter, now backed by CIRCL hashlookup.
#
# Replaces the old self-built NSRL RDS md5 bloom (frozen at RDS 2.72 / 2021)
# with CIRCL's hashlookup-full.bloom: SHA-1, NSRL + many more known-good
# sources, refreshed upstream. Downloaded at build time (~1 GB) so lookups
# are fully offline; the bot rebuilds on a monthly cadence.
#
#   # single hashes (old CLI preserved, now SHA-1):
#   docker run --rm tabledevil/nsrl <sha1> [<sha1> ...]
#   cat sha1s.txt | docker run --rm -i tabledevil/nsrl -s -0   # only misses
#
#   # analyse a whole directory tree (hashlookup-forensic-analyser):
#   docker run --rm -v /evidence:/data:ro tabledevil/nsrl analyse -d /data

FROM python:3.12-slim

RUN apt-get update && apt-get install -y --no-install-recommends \
        ca-certificates curl git libmagic1 \
    && rm -rf /var/lib/apt/lists/*

RUN pip install --no-cache-dir flor requests pytz filemagic \
    && git clone --depth=1 https://github.com/hashlookup/hashlookup-forensic-analyser /opt/hfa

# The bloom filter is the data payload — fetched fresh every (monthly) rebuild.
RUN mkdir -p /nsrl \
    && curl -fsSL -o /nsrl/hashlookup-full.bloom \
         https://cra.circl.lu/hashlookup/hashlookup-full.bloom \
    && { echo "source = https://cra.circl.lu/hashlookup/hashlookup-full.bloom"; \
         curl -fsSI https://cra.circl.lu/hashlookup/hashlookup-full.bloom \
           | grep -i '^last-modified' || true; } > /nsrl/bloom.info

COPY nsrl/search.py /nsrl/search.py
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh

WORKDIR /nsrl
ENTRYPOINT ["/entrypoint.sh"]
CMD ["-h"]
