added argparse support to build

This commit is contained in:
TKE
2020-04-17 11:29:19 +02:00
parent fa146c51a7
commit 6c75a0c392
3 changed files with 39 additions and 29 deletions

53
nsrl/build.py Executable file → Normal file
View File

@@ -1,24 +1,13 @@
# !/usr/bin/env python # !/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
"""
build.py
~~~~~~~~
This module builds a bloomfilter from the NSRL Whitelist Database.
:copyright: (c) 2014 by Josh "blacktop" Maine.
:license: MIT
:improved_by: https://github.com/kost
"""
import binascii import binascii
import os import os
import sys
from pybloom import BloomFilter from pybloom import BloomFilter
nsrl_path = '/nsrl/NSRLFile.txt'
error_rate = 0.01 import argparse
# reference - http://stackoverflow.com/a/9631635 # reference - http://stackoverflow.com/a/9631635
@@ -30,17 +19,36 @@ def blocks(this_file, size=65536):
yield b yield b
def main(argv): def main():
if argv: parser = argparse.ArgumentParser(prog='build.py')
error_rate = float(argv[0]) parser.add_argument("-v", "--verbose", help="Display verbose output message", action="store_true", required=False)
config = parser.add_mutually_exclusive_group()
config.add_argument('-f', "--config", default="/nsrl/nsrl.conf",help='Config file with all settings')
settings = config.add_argument_group()
settings.add_argument('-e','--error-rate', type=float, default=0.01 ,help="Error Rate for False-Positives")
settings.add_argument('-n','--hashcount',type=int, help="Provide the hashcount")
settings.add_argument('-c','--column', type=int, default=1 ,help="Which Column of inputfile should be processed (0,1,...)")
settings.add_argument('-l','--label', default="MD5",help="What kind of Data is beeing processed (MD5,filenames,...)")
settings.add_argument('-d','--delimiter', default=',' , help="Which char is used to delimit columns in inputfile")
settings.add_argument('-i','--inputfile', default='/nsrl/NSRLFile.txt' , help="Path of input file")
args = parser.parse_args()
nsrl_path = args.inputfile
error_rate = args.error_rate
print "[BUILDING] Using error-rate: {}".format(error_rate) print "[BUILDING] Using error-rate: {}".format(error_rate)
if os.path.isfile(nsrl_path): if os.path.isfile(nsrl_path):
print "[BUILDING] Reading in NSRL Database" print "[BUILDING] Reading in NSRL Database"
if args.hashcount is None:
with open(nsrl_path) as f_line: with open(nsrl_path) as f_line:
# Strip off header # Strip off header
_ = f_line.readline() _ = f_line.readline()
print "[BUILDING] Calculating number of hashes in NSRL..." print "[BUILDING] Calculating number of hashes in NSRL..."
num_lines = sum(bl.count("\n") for bl in blocks(f_line)) num_lines = sum(bl.count("\n") for bl in blocks(f_line))
else:
num_lines=args.hashcount
print "[BUILDING] There are %s hashes in the NSRL Database" % num_lines print "[BUILDING] There are %s hashes in the NSRL Database" % num_lines
with open(nsrl_path) as f_nsrl: with open(nsrl_path) as f_nsrl:
# Strip off header # Strip off header
@@ -48,12 +56,13 @@ def main(argv):
print "[BUILDING] Creating bloomfilter" print "[BUILDING] Creating bloomfilter"
bf = BloomFilter(num_lines, error_rate) bf = BloomFilter(num_lines, error_rate)
print "[BUILDING] Inserting hashes into bloomfilter" print "[BUILDING] Inserting hashes into bloomfilter"
# sha1 hash is in column 0
for line in f_nsrl: for line in f_nsrl:
md5_hash = line.split(",")[1].strip('"') hashline = line.split(args.delimiter)[args.column].strip('"')
if md5_hash: if hashline:
try: try:
md5 = binascii.unhexlify(md5_hash) hash = binascii.unhexlify(hashline)
bf.add(md5) bf.add(hash)
except Exception as e: except Exception as e:
print "[ERROR] %s" % e print "[ERROR] %s" % e
print "[BUILDING] NSRL bloomfilter contains {} items.".format(len(bf)) print "[BUILDING] NSRL bloomfilter contains {} items.".format(len(bf))
@@ -67,4 +76,4 @@ def main(argv):
if __name__ == "__main__": if __name__ == "__main__":
main(sys.argv[1:]) main()

View File

@@ -3,6 +3,7 @@ rds_url = https://s3.amazonaws.com/rds.nsrl.nist.gov/RDS/current/rds_modernm.zip
rds_name = Reduced Modern rds_name = Reduced Modern
version_url = https://s3.amazonaws.com/rds.nsrl.nist.gov/RDS/current/version.txt version_url = https://s3.amazonaws.com/rds.nsrl.nist.gov/RDS/current/version.txt
hashfile_name = NSRLFile.txt hashfile_name = NSRLFile.txt
hashfile_path = /nsrl/NSRLFile.txt
hashfile_type = md5 hashfile_type = md5
hashfile_column = 1 hashfile_column = 1
hashfile_delimiter = , hashfile_delimiter = ,

View File

@@ -40,7 +40,7 @@ let hash_count=$(cat "/nsrl/${hashfile_name}"|wc -l )-1
echo "[INFO] /nsrl/${hashfile_name} contains ${hash_count} Hashes" echo "[INFO] /nsrl/${hashfile_name} contains ${hash_count} Hashes"
echo "[INFO] Build bloomfilter from NSRL Database ..." echo "[INFO] Build bloomfilter from NSRL Database ..."
cd /nsrl && python /nsrl/build.py $error_rate cd /nsrl && python /nsrl/build.py -e "${error_rate}" -n "${hash_count}"
echo "[INFO] Listing created files ..." echo "[INFO] Listing created files ..."
ls -lah /nsrl ls -lah /nsrl