#!/usr/bin/env python3 # Known-file SHA-1 lookup against the offline CIRCL hashlookup bloom filter. # CLI is compatible with the old NSRL md5 search.py (hashes as args or -s # stdin; -0/-1 to suppress hits/misses; -v verbose) — but hashes are SHA-1. import argparse import re import sys from flor import BloomFilter BLOOM_PATH = "/nsrl/hashlookup-full.bloom" INFO_PATH = "/nsrl/bloom.info" SHA1_RE = re.compile(r"^[0-9a-fA-F]{40}$") def main(): parser = argparse.ArgumentParser( prog="nsrl", description="Offline known-file lookup (CIRCL hashlookup bloom, SHA-1).", ) parser.add_argument("-v", "--verbose", action="store_true", help="Display verbose output") parser.add_argument("-0", "--no-hits", action="store_true", help="Suppress output of matching (known) hashes") parser.add_argument("-1", "--no-misses", action="store_true", help="Suppress output of unknown hashes") inputs = parser.add_mutually_exclusive_group(required=True) inputs.add_argument("hash", metavar="", type=str, nargs="*", default=[], help="SHA-1 hash(es) to look up") inputs.add_argument("-s", "--stdin", action="store_true", help="Read hashes from stdin (one per line)") args = parser.parse_args() if args.verbose: try: sys.stderr.write(open(INFO_PATH).read()) except OSError: pass bf = BloomFilter() with open(BLOOM_PATH, "rb") as fh: bf.read(fh) if args.stdin: hashlist = [line.strip() for line in sys.stdin if line.strip()] else: hashlist = args.hash rc = 0 for hash_hex in hashlist: if not SHA1_RE.match(hash_hex): print(f"!:{hash_hex} (not a sha1)", file=sys.stderr) rc = 2 continue # hashlookup blooms store uppercase-hex SHA-1 strings is_known = hash_hex.upper().encode() in bf if args.verbose: print(f"{hash_hex}:{is_known}") elif (is_known and not args.no_hits) or (not is_known and not args.no_misses): if args.no_hits != args.no_misses: print(hash_hex) else: print(f"{'+' if is_known else '-'}:{hash_hex}") return rc if __name__ == "__main__": sys.exit(main())