Update scatterhash to honor a maxsize
This commit is contained in:
@@ -26,29 +26,34 @@ def get_offsets(blocksize, blockcount,blocks_to_hash):
|
|||||||
offset = int(blocksize*i)
|
offset = int(blocksize*i)
|
||||||
yield offset
|
yield offset
|
||||||
|
|
||||||
def get_hash(file,hashalgo,spread):
|
def get_hash(file,hashalgo,spread,maxsize):
|
||||||
h=hashlib.new(hashalgo)
|
h=hashlib.new(hashalgo)
|
||||||
filesize = os.path.getsize(file.name)
|
filesize = os.path.getsize(file.name)
|
||||||
blocksize = h.block_size*65535
|
blocksize = h.block_size*65535
|
||||||
blockcount = math.ceil(filesize/blocksize)
|
blockcount = math.ceil(filesize/blocksize)
|
||||||
blocks_to_hash = math.ceil(blockcount*spread/100)
|
blocks_to_hash = math.ceil(blockcount*spread/100)
|
||||||
|
if (blocks_to_hash * blocksize) > maxsize:
|
||||||
|
blocks_to_hash = math.ceil(maxsize/blocksize)
|
||||||
if filesize>blocksize:
|
if filesize>blocksize:
|
||||||
for of in get_offsets(blocksize,blockcount,blocks_to_hash):
|
for of in get_offsets(blocksize,blockcount,blocks_to_hash):
|
||||||
infile.seek(of)
|
infile.seek(of)
|
||||||
h.update(file.read(blocksize))
|
h.update(file.read(blocksize))
|
||||||
else:
|
else:
|
||||||
h.update(file.read(blocksize))
|
h.update(file.read(blocksize))
|
||||||
result="{};{};{};{};{}".format(h.hexdigest(),spread,filesize,hashalgo,file.name)
|
result="{};{};{};{};{}".format(h.hexdigest(),blocks_to_hash,filesize,hashalgo,file.name)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description='Sparsly hash large files. Only a given percentage of the file is actualy hashed.')
|
parser = argparse.ArgumentParser(description='Sparsly hash large files. Only a given percentage of the file is actualy hashed.')
|
||||||
|
|
||||||
parser.add_argument('-p',metavar='N', action="store",dest="spread",type=int, nargs='?',default=10,help='percentage of file to hash. 0 < N < 100 (default=10)')
|
parser.add_argument('-p',metavar='N', action="store",dest="spread",type=int, nargs='?',default=10,help='percentage of file to hash. 0 < N < 100 (default=10)')
|
||||||
|
parser.add_argument('-s',metavar='N', action="store",dest="size",type=int, nargs='?',default=10,help='maximum amount of data per file in MB')
|
||||||
parser.add_argument('-c', action="store",dest="hashalgo",nargs='?',default="md5",help='select an hashalgorithm (default=md5)')
|
parser.add_argument('-c', action="store",dest="hashalgo",nargs='?',default="md5",help='select an hashalgorithm (default=md5)')
|
||||||
parser.add_argument('file', type=argparse.FileType('rb'), nargs='+')
|
parser.add_argument('file', type=argparse.FileType('rb'), nargs='+')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
hashalgo = args.hashalgo
|
hashalgo = args.hashalgo
|
||||||
spread = args.spread
|
spread = args.spread
|
||||||
|
maxsize = args.size * 1024 * 1024
|
||||||
for infile in args.file:
|
for infile in args.file:
|
||||||
hashvalue = get_hash(infile,hashalgo,spread)
|
hashvalue = get_hash(infile,hashalgo,spread,maxsize)
|
||||||
print(hashvalue)
|
print(hashvalue)
|
||||||
|
|||||||
Reference in New Issue
Block a user