Rework to CIRCL hashlookup offline bloom (SHA-1)
Replace the self-built 2021 NSRL RDS md5 bloom with CIRCL's offline hashlookup-full.bloom (SHA-1, NSRL + more), downloaded at build. Old single-hash CLI preserved (now SHA-1); 'analyse -d <dir>' runs hashlookup-forensic-analyser against the bundled bloom. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
+52
-47
@@ -1,64 +1,69 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
# Known-file SHA-1 lookup against the offline CIRCL hashlookup bloom filter.
|
||||
# CLI is compatible with the old NSRL md5 search.py (hashes as args or -s
|
||||
# stdin; -0/-1 to suppress hits/misses; -v verbose) — but hashes are SHA-1.
|
||||
|
||||
import argparse
|
||||
import binascii
|
||||
import configparser
|
||||
import re
|
||||
import sys
|
||||
|
||||
from pybloom import BloomFilter
|
||||
from flor import BloomFilter
|
||||
|
||||
BLOOM_PATH = "/nsrl/hashlookup-full.bloom"
|
||||
INFO_PATH = "/nsrl/bloom.info"
|
||||
SHA1_RE = re.compile(r"^[0-9a-fA-F]{40}$")
|
||||
|
||||
|
||||
def main():
|
||||
default_config_file='/nsrl/nsrl.conf'
|
||||
config = configparser.ConfigParser()
|
||||
config.read(default_config_file)
|
||||
#add commandline options
|
||||
hash_type=config.get('config','hashfile_type')
|
||||
|
||||
parser = argparse.ArgumentParser(prog='nsrl')
|
||||
parser.add_argument("-v", "--verbose", help="Display verbose output message", action="store_true", required=False)
|
||||
parser.add_argument("-0", "--no-hits", help="Suppress Output of matching hashes", action="store_true", required=False)
|
||||
parser.add_argument("-1", "--no-misses", help="Suppress Output of mismatching hashes", action="store_true", required=False)
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="nsrl",
|
||||
description="Offline known-file lookup (CIRCL hashlookup bloom, SHA-1).",
|
||||
)
|
||||
parser.add_argument("-v", "--verbose", action="store_true",
|
||||
help="Display verbose output")
|
||||
parser.add_argument("-0", "--no-hits", action="store_true",
|
||||
help="Suppress output of matching (known) hashes")
|
||||
parser.add_argument("-1", "--no-misses", action="store_true",
|
||||
help="Suppress output of unknown hashes")
|
||||
inputs = parser.add_mutually_exclusive_group(required=True)
|
||||
inputs.add_argument('hash', metavar='<{}>'.format(hash_type), type=str, nargs='*', default=[], help='{} hash to search for.'.format(hash_type))
|
||||
inputs.add_argument('-s','--stdin',help="Read hashes from stdin", action="store_true")
|
||||
inputs.add_argument("hash", metavar="<sha1>", type=str, nargs="*",
|
||||
default=[], help="SHA-1 hash(es) to look up")
|
||||
inputs.add_argument("-s", "--stdin", action="store_true",
|
||||
help="Read hashes from stdin (one per line)")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.verbose:
|
||||
print("Version INFO: {}".format(config.get('config',"rds_version")))
|
||||
print("Error Rate: {}".format(config.get('config',"error_rate")))
|
||||
print("Build Date: {}".format(config.get('config',"build_date")))
|
||||
print("Filename: {}".format(config.get('config',"hashfile_name")))
|
||||
print("Hashcount: {}".format(config.get('config',"hash_count")))
|
||||
try:
|
||||
sys.stderr.write(open(INFO_PATH).read())
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
bf = BloomFilter()
|
||||
with open(BLOOM_PATH, "rb") as fh:
|
||||
bf.read(fh)
|
||||
|
||||
if args.stdin:
|
||||
hashlist = [line.strip() for line in sys.stdin if line.strip()]
|
||||
else:
|
||||
hashlist = args.hash
|
||||
|
||||
with open('nsrl.bloom', 'rb') as nb:
|
||||
bf = BloomFilter.fromfile(nb)
|
||||
rc = 0
|
||||
for hash_hex in hashlist:
|
||||
if not SHA1_RE.match(hash_hex):
|
||||
print(f"!:{hash_hex} (not a sha1)", file=sys.stderr)
|
||||
rc = 2
|
||||
continue
|
||||
# hashlookup blooms store uppercase-hex SHA-1 strings
|
||||
is_known = hash_hex.upper().encode() in bf
|
||||
if args.verbose:
|
||||
print(f"{hash_hex}:{is_known}")
|
||||
elif (is_known and not args.no_hits) or (not is_known and not args.no_misses):
|
||||
if args.no_hits != args.no_misses:
|
||||
print(hash_hex)
|
||||
else:
|
||||
print(f"{'+' if is_known else '-'}:{hash_hex}")
|
||||
return rc
|
||||
|
||||
if args.stdin:
|
||||
hashlist=[hash.strip() for hash in sys.stdin.readlines()]
|
||||
else:
|
||||
hashlist=args.hash
|
||||
for hash_hex in hashlist:
|
||||
hash = binascii.unhexlify(hash_hex)
|
||||
output=""
|
||||
|
||||
# only print output if for mismatches if selected
|
||||
hash_is_a_match=(hash in bf)
|
||||
if (hash_is_a_match and not args.no_hits) or (not hash_is_a_match and not args.no_misses):
|
||||
#output
|
||||
if args.verbose:
|
||||
output = "{}:{}".format(hash_hex,hash_is_a_match)
|
||||
elif args.no_hits != args.no_misses :
|
||||
output = "{}".format(hash_hex)
|
||||
else:
|
||||
output = "{}:{}".format("+"if hash_is_a_match else "-",hash_hex)
|
||||
print(output)
|
||||
return
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
main()
|
||||
except Exception as e:
|
||||
print("Error: %s" % e)
|
||||
sys.exit(main())
|
||||
|
||||
Reference in New Issue
Block a user