From 6c75a0c3923f2e609a107a6c20775346f4cf91c6 Mon Sep 17 00:00:00 2001 From: TKE Date: Fri, 17 Apr 2020 11:29:19 +0200 Subject: [PATCH] added argparse support to build --- nsrl/build.py | 65 ++++++++++++++++++++++++++------------------- nsrl/nsrl.conf | 1 + nsrl/shrink_nsrl.sh | 2 +- 3 files changed, 39 insertions(+), 29 deletions(-) mode change 100755 => 100644 nsrl/build.py diff --git a/nsrl/build.py b/nsrl/build.py old mode 100755 new mode 100644 index 02b28bd..a41f877 --- a/nsrl/build.py +++ b/nsrl/build.py @@ -1,24 +1,13 @@ # !/usr/bin/env python # -*- coding: utf-8 -*- -""" -build.py -~~~~~~~~ - -This module builds a bloomfilter from the NSRL Whitelist Database. - -:copyright: (c) 2014 by Josh "blacktop" Maine. -:license: MIT -:improved_by: https://github.com/kost -""" import binascii import os -import sys - from pybloom import BloomFilter -nsrl_path = '/nsrl/NSRLFile.txt' -error_rate = 0.01 + +import argparse + # reference - http://stackoverflow.com/a/9631635 @@ -30,30 +19,50 @@ def blocks(this_file, size=65536): yield b -def main(argv): - if argv: - error_rate = float(argv[0]) +def main(): + parser = argparse.ArgumentParser(prog='build.py') + parser.add_argument("-v", "--verbose", help="Display verbose output message", action="store_true", required=False) + config = parser.add_mutually_exclusive_group() + config.add_argument('-f', "--config", default="/nsrl/nsrl.conf",help='Config file with all settings') + settings = config.add_argument_group() + settings.add_argument('-e','--error-rate', type=float, default=0.01 ,help="Error Rate for False-Positives") + settings.add_argument('-n','--hashcount',type=int, help="Provide the hashcount") + settings.add_argument('-c','--column', type=int, default=1 ,help="Which Column of inputfile should be processed (0,1,...)") + settings.add_argument('-l','--label', default="MD5",help="What kind of Data is beeing processed (MD5,filenames,...)") + settings.add_argument('-d','--delimiter', default=',' , help="Which char is used to delimit columns in inputfile") + settings.add_argument('-i','--inputfile', default='/nsrl/NSRLFile.txt' , help="Path of input file") + + + args = parser.parse_args() + + nsrl_path = args.inputfile + error_rate = args.error_rate + print "[BUILDING] Using error-rate: {}".format(error_rate) if os.path.isfile(nsrl_path): print "[BUILDING] Reading in NSRL Database" - with open(nsrl_path) as f_line: - # Strip off header - _ = f_line.readline() - print "[BUILDING] Calculating number of hashes in NSRL..." - num_lines = sum(bl.count("\n") for bl in blocks(f_line)) - print "[BUILDING] There are %s hashes in the NSRL Database" % num_lines + if args.hashcount is None: + with open(nsrl_path) as f_line: + # Strip off header + _ = f_line.readline() + print "[BUILDING] Calculating number of hashes in NSRL..." + num_lines = sum(bl.count("\n") for bl in blocks(f_line)) + else: + num_lines=args.hashcount + print "[BUILDING] There are %s hashes in the NSRL Database" % num_lines with open(nsrl_path) as f_nsrl: # Strip off header _ = f_nsrl.readline() print "[BUILDING] Creating bloomfilter" bf = BloomFilter(num_lines, error_rate) print "[BUILDING] Inserting hashes into bloomfilter" + # sha1 hash is in column 0 for line in f_nsrl: - md5_hash = line.split(",")[1].strip('"') - if md5_hash: + hashline = line.split(args.delimiter)[args.column].strip('"') + if hashline: try: - md5 = binascii.unhexlify(md5_hash) - bf.add(md5) + hash = binascii.unhexlify(hashline) + bf.add(hash) except Exception as e: print "[ERROR] %s" % e print "[BUILDING] NSRL bloomfilter contains {} items.".format(len(bf)) @@ -67,4 +76,4 @@ def main(argv): if __name__ == "__main__": - main(sys.argv[1:]) + main() diff --git a/nsrl/nsrl.conf b/nsrl/nsrl.conf index d805f9c..7578c39 100644 --- a/nsrl/nsrl.conf +++ b/nsrl/nsrl.conf @@ -3,6 +3,7 @@ rds_url = https://s3.amazonaws.com/rds.nsrl.nist.gov/RDS/current/rds_modernm.zip rds_name = Reduced Modern version_url = https://s3.amazonaws.com/rds.nsrl.nist.gov/RDS/current/version.txt hashfile_name = NSRLFile.txt +hashfile_path = /nsrl/NSRLFile.txt hashfile_type = md5 hashfile_column = 1 hashfile_delimiter = , diff --git a/nsrl/shrink_nsrl.sh b/nsrl/shrink_nsrl.sh index 3bfb9c1..90208b7 100755 --- a/nsrl/shrink_nsrl.sh +++ b/nsrl/shrink_nsrl.sh @@ -40,7 +40,7 @@ let hash_count=$(cat "/nsrl/${hashfile_name}"|wc -l )-1 echo "[INFO] /nsrl/${hashfile_name} contains ${hash_count} Hashes" echo "[INFO] Build bloomfilter from NSRL Database ..." -cd /nsrl && python /nsrl/build.py $error_rate +cd /nsrl && python /nsrl/build.py -e "${error_rate}" -n "${hash_count}" echo "[INFO] Listing created files ..." ls -lah /nsrl