119 lines
4.4 KiB
Python
119 lines
4.4 KiB
Python
# !/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import binascii
|
|
import os
|
|
import ConfigParser
|
|
from pybloom import BloomFilter
|
|
|
|
|
|
import argparse
|
|
|
|
|
|
|
|
# reference - http://stackoverflow.com/a/9631635
|
|
def blocks(this_file, size=65536):
|
|
while True:
|
|
b = this_file.read(size)
|
|
if not b:
|
|
break
|
|
yield b
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(prog='build.py')
|
|
parser.add_argument("-v", "--verbose", help="Display verbose output message", action="store_true", required=False)
|
|
config = parser.add_mutually_exclusive_group()
|
|
config.add_argument('-f', "--config", help='Config file with all settings')
|
|
settings = config.add_argument_group()
|
|
settings.add_argument('-e','--error-rate', type=float, help="Error Rate for False-Positives")
|
|
settings.add_argument('-n','--hashcount',type=int, help="Provide the hashcount")
|
|
settings.add_argument('-c','--column', type=int, help="Which Column of inputfile should be processed (0,1,...)")
|
|
settings.add_argument('-l','--label', help="What kind of Data is beeing processed (MD5,filenames,...)")
|
|
settings.add_argument('-d','--delimiter', ' , help="Which char is used to delimit columns in inputfile")
|
|
settings.add_argument('-i','--inputfile', help="Path of input file")
|
|
settings.add_argument('-o','--outputfile', help="Path of input file")
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
#check if config-file was given
|
|
default_config_file='/nsrl/nsrl.conf'
|
|
configfiles=[default_config_file]
|
|
if not args.config is None:
|
|
#add user config
|
|
if os.path.isfile(args.config):
|
|
configfiles.append(args.config)
|
|
|
|
#build config
|
|
conf = ConfigParser.ConfigParser()
|
|
conf.read(configfiles)
|
|
#add commandline options
|
|
# conf=config["config"]
|
|
if args.error_rate:
|
|
conf.set("config","error_rate",str(args.error_rate))
|
|
if args.hashcount:
|
|
conf.set("config","hash_count",str(args.hashcount))
|
|
if args.column:
|
|
conf.set("config","hashfile_column",str(args.column))
|
|
if args.label:
|
|
conf.set("config","hashfile_type",str(args.label))
|
|
if args.delimiter:
|
|
conf.set("config","hashfile_delimiter",str(args.delimiter))
|
|
if args.inputfile:
|
|
conf.set("config","hashfile_path",str(args.inputfile))
|
|
|
|
nsrl_path='/nsrl/NSRLFile.txt'
|
|
error_rate=0.01
|
|
hashfile_delimiter=','
|
|
hashfile_column=0
|
|
hashfile_type='Hash'
|
|
nsrl_path=conf.get("config","hashfile_path")
|
|
error_rate=conf.getfloat("config",'error_rate')
|
|
hashfile_delimiter=conf.get("config",'hashfile_delimiter')
|
|
hashfile_column=conf.getint("config",'hashfile_column')
|
|
hashfile_type=conf.get("config",'hashfile_type')
|
|
|
|
print "[BUILDING] Using error-rate: {}".format(error_rate)
|
|
if os.path.isfile(nsrl_path):
|
|
print "[BUILDING] Reading in NSRL Database"
|
|
if not conf.has_option("config","hash_count"):
|
|
with open(nsrl_path) as f_line:
|
|
# Strip off header
|
|
_ = f_line.readline()
|
|
print "[BUILDING] Calculating number of entries in Inputfile..."
|
|
num_lines = sum(bl.count("\n") for bl in blocks(f_line))
|
|
conf.set("config",'hash_count',str(num_lines))
|
|
else:
|
|
num_lines=conf.getint("config","hash_count")
|
|
print "[BUILDING] There are {} {}s in the Database".format(num_lines,hashfile_type)
|
|
with open(nsrl_path) as f_nsrl:
|
|
# Strip off header
|
|
_ = f_nsrl.readline()
|
|
print "[BUILDING] Creating bloomfilter"
|
|
bf = BloomFilter(num_lines, error_rate)
|
|
print "[BUILDING] Inserting {} into bloomfilter".format(hashfile_type)
|
|
# sha1 hash is in column 0
|
|
for line in f_nsrl:
|
|
hashline = line.split(hashfile_delimiter)[hashfile_column].strip('"')
|
|
if hashline:
|
|
try:
|
|
hash = binascii.unhexlify(hashline)
|
|
bf.add(hash)
|
|
except Exception as e:
|
|
print "[ERROR] %s" % e
|
|
print "[BUILDING] NSRL bloomfilter contains {} items.".format(len(bf))
|
|
with open('nsrl.bloom', 'wb') as nb:
|
|
bf.tofile(nb)
|
|
print "[BUILDING] Complete"
|
|
else:
|
|
print("[ERROR] No such file or directory: %s", nsrl_path)
|
|
|
|
#save config
|
|
with open(default_config_file,'w') as configfile:
|
|
conf.write(configfile)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|