Rework to CIRCL hashlookup offline bloom (SHA-1)

Replace the self-built 2021 NSRL RDS md5 bloom with CIRCL's offline
hashlookup-full.bloom (SHA-1, NSRL + more), downloaded at build. Old
single-hash CLI preserved (now SHA-1); 'analyse -d <dir>' runs
hashlookup-forensic-analyser against the bundled bloom.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
tabledevil
2026-06-10 13:38:48 +02:00
parent 0d374d6bdb
commit 09ab281881
6 changed files with 97 additions and 246 deletions
+52 -47
View File
@@ -1,64 +1,69 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Known-file SHA-1 lookup against the offline CIRCL hashlookup bloom filter.
# CLI is compatible with the old NSRL md5 search.py (hashes as args or -s
# stdin; -0/-1 to suppress hits/misses; -v verbose) — but hashes are SHA-1.
import argparse
import binascii
import configparser
import re
import sys
from pybloom import BloomFilter
from flor import BloomFilter
BLOOM_PATH = "/nsrl/hashlookup-full.bloom"
INFO_PATH = "/nsrl/bloom.info"
SHA1_RE = re.compile(r"^[0-9a-fA-F]{40}$")
def main():
default_config_file='/nsrl/nsrl.conf'
config = configparser.ConfigParser()
config.read(default_config_file)
#add commandline options
hash_type=config.get('config','hashfile_type')
parser = argparse.ArgumentParser(prog='nsrl')
parser.add_argument("-v", "--verbose", help="Display verbose output message", action="store_true", required=False)
parser.add_argument("-0", "--no-hits", help="Suppress Output of matching hashes", action="store_true", required=False)
parser.add_argument("-1", "--no-misses", help="Suppress Output of mismatching hashes", action="store_true", required=False)
parser = argparse.ArgumentParser(
prog="nsrl",
description="Offline known-file lookup (CIRCL hashlookup bloom, SHA-1).",
)
parser.add_argument("-v", "--verbose", action="store_true",
help="Display verbose output")
parser.add_argument("-0", "--no-hits", action="store_true",
help="Suppress output of matching (known) hashes")
parser.add_argument("-1", "--no-misses", action="store_true",
help="Suppress output of unknown hashes")
inputs = parser.add_mutually_exclusive_group(required=True)
inputs.add_argument('hash', metavar='<{}>'.format(hash_type), type=str, nargs='*', default=[], help='{} hash to search for.'.format(hash_type))
inputs.add_argument('-s','--stdin',help="Read hashes from stdin", action="store_true")
inputs.add_argument("hash", metavar="<sha1>", type=str, nargs="*",
default=[], help="SHA-1 hash(es) to look up")
inputs.add_argument("-s", "--stdin", action="store_true",
help="Read hashes from stdin (one per line)")
args = parser.parse_args()
if args.verbose:
print("Version INFO: {}".format(config.get('config',"rds_version")))
print("Error Rate: {}".format(config.get('config',"error_rate")))
print("Build Date: {}".format(config.get('config',"build_date")))
print("Filename: {}".format(config.get('config',"hashfile_name")))
print("Hashcount: {}".format(config.get('config',"hash_count")))
try:
sys.stderr.write(open(INFO_PATH).read())
except OSError:
pass
bf = BloomFilter()
with open(BLOOM_PATH, "rb") as fh:
bf.read(fh)
if args.stdin:
hashlist = [line.strip() for line in sys.stdin if line.strip()]
else:
hashlist = args.hash
with open('nsrl.bloom', 'rb') as nb:
bf = BloomFilter.fromfile(nb)
rc = 0
for hash_hex in hashlist:
if not SHA1_RE.match(hash_hex):
print(f"!:{hash_hex} (not a sha1)", file=sys.stderr)
rc = 2
continue
# hashlookup blooms store uppercase-hex SHA-1 strings
is_known = hash_hex.upper().encode() in bf
if args.verbose:
print(f"{hash_hex}:{is_known}")
elif (is_known and not args.no_hits) or (not is_known and not args.no_misses):
if args.no_hits != args.no_misses:
print(hash_hex)
else:
print(f"{'+' if is_known else '-'}:{hash_hex}")
return rc
if args.stdin:
hashlist=[hash.strip() for hash in sys.stdin.readlines()]
else:
hashlist=args.hash
for hash_hex in hashlist:
hash = binascii.unhexlify(hash_hex)
output=""
# only print output if for mismatches if selected
hash_is_a_match=(hash in bf)
if (hash_is_a_match and not args.no_hits) or (not hash_is_a_match and not args.no_misses):
#output
if args.verbose:
output = "{}:{}".format(hash_hex,hash_is_a_match)
elif args.no_hits != args.no_misses :
output = "{}".format(hash_hex)
else:
output = "{}:{}".format("+"if hash_is_a_match else "-",hash_hex)
print(output)
return
if __name__ == "__main__":
try:
main()
except Exception as e:
print("Error: %s" % e)
sys.exit(main())