first version of scatterhash
This commit is contained in:
49
scatterhash.py
Executable file
49
scatterhash.py
Executable file
@@ -0,0 +1,49 @@
|
||||
#!/usr/bin/python3
|
||||
import sys
|
||||
import hashlib
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
def even_select(N, M):
|
||||
if M > N/2:
|
||||
cut = np.zeros(N, dtype=int)
|
||||
q, r = divmod(N, N-M)
|
||||
indices = [q*i + min(i, r) for i in range(N-M)]
|
||||
cut[indices] = True
|
||||
else:
|
||||
cut = np.ones(N, dtype=int)
|
||||
q, r = divmod(N, M)
|
||||
indices = [q*i + min(i, r) for i in range(M)]
|
||||
cut[indices] = False
|
||||
|
||||
return cut
|
||||
|
||||
def get_offsets(chunksize, spread):
|
||||
selection=even_select(100,spread)
|
||||
for i in range(0,100):
|
||||
if selection[i]==0:
|
||||
offset=int(chunksize*i)
|
||||
yield offset
|
||||
|
||||
def get_blocks(filename,spread,blocksize):
|
||||
filesize=os.path.getsize(filename)
|
||||
chunksize=filesize/100
|
||||
with open(filename,'rb') as infile:
|
||||
for of in get_offsets(chunksize,spread):
|
||||
infile.seek(of)
|
||||
tohashsize=chunksize
|
||||
while tohashsize > 0:
|
||||
yield infile.read(blocksize)
|
||||
tohashsize-=h.block_size
|
||||
|
||||
|
||||
|
||||
hashalgo="md5"
|
||||
filename=sys.argv[2]
|
||||
spread=int(sys.argv[1]) #percentage of hash
|
||||
h=hashlib.new(hashalgo)
|
||||
|
||||
blocksize=h.block_size*4
|
||||
for block in get_blocks(filename,spread,blocksize):
|
||||
h.update(block)
|
||||
print(h.hexdigest())
|
||||
Reference in New Issue
Block a user