09ab281881
Replace the self-built 2021 NSRL RDS md5 bloom with CIRCL's offline hashlookup-full.bloom (SHA-1, NSRL + more), downloaded at build. Old single-hash CLI preserved (now SHA-1); 'analyse -d <dir>' runs hashlookup-forensic-analyser against the bundled bloom. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
70 lines
2.3 KiB
Python
Executable File
70 lines
2.3 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# Known-file SHA-1 lookup against the offline CIRCL hashlookup bloom filter.
|
|
# CLI is compatible with the old NSRL md5 search.py (hashes as args or -s
|
|
# stdin; -0/-1 to suppress hits/misses; -v verbose) — but hashes are SHA-1.
|
|
|
|
import argparse
|
|
import re
|
|
import sys
|
|
|
|
from flor import BloomFilter
|
|
|
|
BLOOM_PATH = "/nsrl/hashlookup-full.bloom"
|
|
INFO_PATH = "/nsrl/bloom.info"
|
|
SHA1_RE = re.compile(r"^[0-9a-fA-F]{40}$")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
prog="nsrl",
|
|
description="Offline known-file lookup (CIRCL hashlookup bloom, SHA-1).",
|
|
)
|
|
parser.add_argument("-v", "--verbose", action="store_true",
|
|
help="Display verbose output")
|
|
parser.add_argument("-0", "--no-hits", action="store_true",
|
|
help="Suppress output of matching (known) hashes")
|
|
parser.add_argument("-1", "--no-misses", action="store_true",
|
|
help="Suppress output of unknown hashes")
|
|
inputs = parser.add_mutually_exclusive_group(required=True)
|
|
inputs.add_argument("hash", metavar="<sha1>", type=str, nargs="*",
|
|
default=[], help="SHA-1 hash(es) to look up")
|
|
inputs.add_argument("-s", "--stdin", action="store_true",
|
|
help="Read hashes from stdin (one per line)")
|
|
args = parser.parse_args()
|
|
|
|
if args.verbose:
|
|
try:
|
|
sys.stderr.write(open(INFO_PATH).read())
|
|
except OSError:
|
|
pass
|
|
|
|
bf = BloomFilter()
|
|
with open(BLOOM_PATH, "rb") as fh:
|
|
bf.read(fh)
|
|
|
|
if args.stdin:
|
|
hashlist = [line.strip() for line in sys.stdin if line.strip()]
|
|
else:
|
|
hashlist = args.hash
|
|
|
|
rc = 0
|
|
for hash_hex in hashlist:
|
|
if not SHA1_RE.match(hash_hex):
|
|
print(f"!:{hash_hex} (not a sha1)", file=sys.stderr)
|
|
rc = 2
|
|
continue
|
|
# hashlookup blooms store uppercase-hex SHA-1 strings
|
|
is_known = hash_hex.upper().encode() in bf
|
|
if args.verbose:
|
|
print(f"{hash_hex}:{is_known}")
|
|
elif (is_known and not args.no_hits) or (not is_known and not args.no_misses):
|
|
if args.no_hits != args.no_misses:
|
|
print(hash_hex)
|
|
else:
|
|
print(f"{'+' if is_known else '-'}:{hash_hex}")
|
|
return rc
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|