Add validation functionality to scatterhash

This commit is contained in:
TKE
2020-12-23 14:11:54 +01:00
parent 3b2585abe5
commit 953152f0e4

View File

@@ -26,17 +26,18 @@ def get_offsets(blocksize, blockcount,blocks_to_hash):
offset = int(blocksize*i) offset = int(blocksize*i)
yield offset yield offset
def get_hash(file,hashalgo,spread,maxsize): def get_hash(file,hashalgo,spread=-1,maxsize=-1,blocks_to_hash=-1):
h=hashlib.new(hashalgo) h=hashlib.new(hashalgo)
filesize = os.path.getsize(file.name) filesize = os.path.getsize(file.name)
blocksize = h.block_size*65535 blocksize = h.block_size*65535
blockcount = math.ceil(filesize/blocksize) blockcount = math.ceil(filesize/blocksize)
blocks_to_hash = math.ceil(blockcount*spread/100) if blocks_to_hash == -1 :
if (blocks_to_hash * blocksize) > maxsize: blocks_to_hash = math.ceil(blockcount*spread/100)
blocks_to_hash = math.ceil(maxsize/blocksize) if (blocks_to_hash * blocksize) > maxsize:
blocks_to_hash = math.ceil(maxsize/blocksize)
if filesize>blocksize: if filesize>blocksize:
for of in get_offsets(blocksize,blockcount,blocks_to_hash): for of in get_offsets(blocksize,blockcount,blocks_to_hash):
infile.seek(of) file.seek(of)
h.update(file.read(blocksize)) h.update(file.read(blocksize))
else: else:
h.update(file.read(blocksize)) h.update(file.read(blocksize))
@@ -49,11 +50,36 @@ parser.add_argument('-p',metavar='N', action="store",dest="spread",type=int, nar
parser.add_argument('-s',metavar='N', action="store",dest="size",type=int, nargs='?',default=10,help='maximum amount of data per file in MB') parser.add_argument('-s',metavar='N', action="store",dest="size",type=int, nargs='?',default=10,help='maximum amount of data per file in MB')
parser.add_argument('-c', action="store",dest="hashalgo",nargs='?',default="md5",help='select an hashalgorithm (default=md5)') parser.add_argument('-c', action="store",dest="hashalgo",nargs='?',default="md5",help='select an hashalgorithm (default=md5)')
parser.add_argument('file', type=argparse.FileType('rb'), nargs='+') parser.add_argument('file', type=argparse.FileType('rb'), nargs='+')
parser.add_argument('-v', default=False, dest="validate", action='store_true', help='read output-file of previous run and validate hashes')
parser.add_argument('-1', default=True, dest="mismatches", action='store_false', help='suppress mismatches')
parser.add_argument('-0', default=True, dest="matches", action='store_false', help='suppress matches')
args = parser.parse_args() args = parser.parse_args()
hashalgo = args.hashalgo if not args.validate:
spread = args.spread hashalgo = args.hashalgo
maxsize = args.size * 1024 * 1024 spread = args.spread
for infile in args.file: maxsize = args.size * 1024 * 1024
hashvalue = get_hash(infile,hashalgo,spread,maxsize) for infile in args.file:
print(hashvalue) print(get_hash(infile,hashalgo,spread,maxsize))
else:
print("validating")
for line in args.file[0]:
line=line.decode().strip()
hash, blocks_hashed, filesize, hashalgo, file = line.split(';')
blocks_hashed=int(blocks_hashed)
filesize=int(filesize)
if os.path.isfile(file):
if os.path.getsize(file) != filesize:
result="BAD_SIZE"
else:
rehash=get_hash(open(file,'rb'),hashalgo,blocks_to_hash=blocks_hashed)
if hash == rehash.split(";")[0]:
result = "OK"
else:
result = "BAD_HASH"
else:
result="FILE_NOT_FOUND"
if args.mismatches and not result == "OK":
print("{};{}".format(result,line))
elif args.matches and result == "OK":
print("{};{}".format(result,line))