Files
docker_nsrl/nsrl/search.py
T
tabledevil 09ab281881 Rework to CIRCL hashlookup offline bloom (SHA-1)
Replace the self-built 2021 NSRL RDS md5 bloom with CIRCL's offline
hashlookup-full.bloom (SHA-1, NSRL + more), downloaded at build. Old
single-hash CLI preserved (now SHA-1); 'analyse -d <dir>' runs
hashlookup-forensic-analyser against the bundled bloom.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-10 13:38:48 +02:00

70 lines
2.3 KiB
Python
Executable File

#!/usr/bin/env python3
# Known-file SHA-1 lookup against the offline CIRCL hashlookup bloom filter.
# CLI is compatible with the old NSRL md5 search.py (hashes as args or -s
# stdin; -0/-1 to suppress hits/misses; -v verbose) — but hashes are SHA-1.
import argparse
import re
import sys
from flor import BloomFilter
BLOOM_PATH = "/nsrl/hashlookup-full.bloom"
INFO_PATH = "/nsrl/bloom.info"
SHA1_RE = re.compile(r"^[0-9a-fA-F]{40}$")
def main():
parser = argparse.ArgumentParser(
prog="nsrl",
description="Offline known-file lookup (CIRCL hashlookup bloom, SHA-1).",
)
parser.add_argument("-v", "--verbose", action="store_true",
help="Display verbose output")
parser.add_argument("-0", "--no-hits", action="store_true",
help="Suppress output of matching (known) hashes")
parser.add_argument("-1", "--no-misses", action="store_true",
help="Suppress output of unknown hashes")
inputs = parser.add_mutually_exclusive_group(required=True)
inputs.add_argument("hash", metavar="<sha1>", type=str, nargs="*",
default=[], help="SHA-1 hash(es) to look up")
inputs.add_argument("-s", "--stdin", action="store_true",
help="Read hashes from stdin (one per line)")
args = parser.parse_args()
if args.verbose:
try:
sys.stderr.write(open(INFO_PATH).read())
except OSError:
pass
bf = BloomFilter()
with open(BLOOM_PATH, "rb") as fh:
bf.read(fh)
if args.stdin:
hashlist = [line.strip() for line in sys.stdin if line.strip()]
else:
hashlist = args.hash
rc = 0
for hash_hex in hashlist:
if not SHA1_RE.match(hash_hex):
print(f"!:{hash_hex} (not a sha1)", file=sys.stderr)
rc = 2
continue
# hashlookup blooms store uppercase-hex SHA-1 strings
is_known = hash_hex.upper().encode() in bf
if args.verbose:
print(f"{hash_hex}:{is_known}")
elif (is_known and not args.no_hits) or (not is_known and not args.no_misses):
if args.no_hits != args.no_misses:
print(hash_hex)
else:
print(f"{'+' if is_known else '-'}:{hash_hex}")
return rc
if __name__ == "__main__":
sys.exit(main())