added argparse support to build
This commit is contained in:
65
nsrl/build.py
Executable file → Normal file
65
nsrl/build.py
Executable file → Normal file
@@ -1,24 +1,13 @@
|
|||||||
# !/usr/bin/env python
|
# !/usr/bin/env python
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
"""
|
|
||||||
build.py
|
|
||||||
~~~~~~~~
|
|
||||||
|
|
||||||
This module builds a bloomfilter from the NSRL Whitelist Database.
|
|
||||||
|
|
||||||
:copyright: (c) 2014 by Josh "blacktop" Maine.
|
|
||||||
:license: MIT
|
|
||||||
:improved_by: https://github.com/kost
|
|
||||||
"""
|
|
||||||
|
|
||||||
import binascii
|
import binascii
|
||||||
import os
|
import os
|
||||||
import sys
|
|
||||||
|
|
||||||
from pybloom import BloomFilter
|
from pybloom import BloomFilter
|
||||||
|
|
||||||
nsrl_path = '/nsrl/NSRLFile.txt'
|
|
||||||
error_rate = 0.01
|
import argparse
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# reference - http://stackoverflow.com/a/9631635
|
# reference - http://stackoverflow.com/a/9631635
|
||||||
@@ -30,30 +19,50 @@ def blocks(this_file, size=65536):
|
|||||||
yield b
|
yield b
|
||||||
|
|
||||||
|
|
||||||
def main(argv):
|
def main():
|
||||||
if argv:
|
parser = argparse.ArgumentParser(prog='build.py')
|
||||||
error_rate = float(argv[0])
|
parser.add_argument("-v", "--verbose", help="Display verbose output message", action="store_true", required=False)
|
||||||
|
config = parser.add_mutually_exclusive_group()
|
||||||
|
config.add_argument('-f', "--config", default="/nsrl/nsrl.conf",help='Config file with all settings')
|
||||||
|
settings = config.add_argument_group()
|
||||||
|
settings.add_argument('-e','--error-rate', type=float, default=0.01 ,help="Error Rate for False-Positives")
|
||||||
|
settings.add_argument('-n','--hashcount',type=int, help="Provide the hashcount")
|
||||||
|
settings.add_argument('-c','--column', type=int, default=1 ,help="Which Column of inputfile should be processed (0,1,...)")
|
||||||
|
settings.add_argument('-l','--label', default="MD5",help="What kind of Data is beeing processed (MD5,filenames,...)")
|
||||||
|
settings.add_argument('-d','--delimiter', default=',' , help="Which char is used to delimit columns in inputfile")
|
||||||
|
settings.add_argument('-i','--inputfile', default='/nsrl/NSRLFile.txt' , help="Path of input file")
|
||||||
|
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
nsrl_path = args.inputfile
|
||||||
|
error_rate = args.error_rate
|
||||||
|
|
||||||
print "[BUILDING] Using error-rate: {}".format(error_rate)
|
print "[BUILDING] Using error-rate: {}".format(error_rate)
|
||||||
if os.path.isfile(nsrl_path):
|
if os.path.isfile(nsrl_path):
|
||||||
print "[BUILDING] Reading in NSRL Database"
|
print "[BUILDING] Reading in NSRL Database"
|
||||||
with open(nsrl_path) as f_line:
|
if args.hashcount is None:
|
||||||
# Strip off header
|
with open(nsrl_path) as f_line:
|
||||||
_ = f_line.readline()
|
# Strip off header
|
||||||
print "[BUILDING] Calculating number of hashes in NSRL..."
|
_ = f_line.readline()
|
||||||
num_lines = sum(bl.count("\n") for bl in blocks(f_line))
|
print "[BUILDING] Calculating number of hashes in NSRL..."
|
||||||
print "[BUILDING] There are %s hashes in the NSRL Database" % num_lines
|
num_lines = sum(bl.count("\n") for bl in blocks(f_line))
|
||||||
|
else:
|
||||||
|
num_lines=args.hashcount
|
||||||
|
print "[BUILDING] There are %s hashes in the NSRL Database" % num_lines
|
||||||
with open(nsrl_path) as f_nsrl:
|
with open(nsrl_path) as f_nsrl:
|
||||||
# Strip off header
|
# Strip off header
|
||||||
_ = f_nsrl.readline()
|
_ = f_nsrl.readline()
|
||||||
print "[BUILDING] Creating bloomfilter"
|
print "[BUILDING] Creating bloomfilter"
|
||||||
bf = BloomFilter(num_lines, error_rate)
|
bf = BloomFilter(num_lines, error_rate)
|
||||||
print "[BUILDING] Inserting hashes into bloomfilter"
|
print "[BUILDING] Inserting hashes into bloomfilter"
|
||||||
|
# sha1 hash is in column 0
|
||||||
for line in f_nsrl:
|
for line in f_nsrl:
|
||||||
md5_hash = line.split(",")[1].strip('"')
|
hashline = line.split(args.delimiter)[args.column].strip('"')
|
||||||
if md5_hash:
|
if hashline:
|
||||||
try:
|
try:
|
||||||
md5 = binascii.unhexlify(md5_hash)
|
hash = binascii.unhexlify(hashline)
|
||||||
bf.add(md5)
|
bf.add(hash)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print "[ERROR] %s" % e
|
print "[ERROR] %s" % e
|
||||||
print "[BUILDING] NSRL bloomfilter contains {} items.".format(len(bf))
|
print "[BUILDING] NSRL bloomfilter contains {} items.".format(len(bf))
|
||||||
@@ -67,4 +76,4 @@ def main(argv):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main(sys.argv[1:])
|
main()
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ rds_url = https://s3.amazonaws.com/rds.nsrl.nist.gov/RDS/current/rds_modernm.zip
|
|||||||
rds_name = Reduced Modern
|
rds_name = Reduced Modern
|
||||||
version_url = https://s3.amazonaws.com/rds.nsrl.nist.gov/RDS/current/version.txt
|
version_url = https://s3.amazonaws.com/rds.nsrl.nist.gov/RDS/current/version.txt
|
||||||
hashfile_name = NSRLFile.txt
|
hashfile_name = NSRLFile.txt
|
||||||
|
hashfile_path = /nsrl/NSRLFile.txt
|
||||||
hashfile_type = md5
|
hashfile_type = md5
|
||||||
hashfile_column = 1
|
hashfile_column = 1
|
||||||
hashfile_delimiter = ,
|
hashfile_delimiter = ,
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ let hash_count=$(cat "/nsrl/${hashfile_name}"|wc -l )-1
|
|||||||
echo "[INFO] /nsrl/${hashfile_name} contains ${hash_count} Hashes"
|
echo "[INFO] /nsrl/${hashfile_name} contains ${hash_count} Hashes"
|
||||||
|
|
||||||
echo "[INFO] Build bloomfilter from NSRL Database ..."
|
echo "[INFO] Build bloomfilter from NSRL Database ..."
|
||||||
cd /nsrl && python /nsrl/build.py $error_rate
|
cd /nsrl && python /nsrl/build.py -e "${error_rate}" -n "${hash_count}"
|
||||||
echo "[INFO] Listing created files ..."
|
echo "[INFO] Listing created files ..."
|
||||||
ls -lah /nsrl
|
ls -lah /nsrl
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user