initial
This commit is contained in:
commit
6c186bd3fb
|
@ -0,0 +1,96 @@
|
|||
import os
|
||||
import hashlib
|
||||
|
||||
|
||||
CHUNK_SIZE = 1024 * 1024 * 4 # 4 mb chunks
|
||||
|
||||
|
||||
def hash_chunk(data):
|
||||
h = hashlib.md5()
|
||||
h.update(data)
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
class BaseChunkClient(object):
|
||||
def __init__(self, fpath, chunk_size=CHUNK_SIZE):
|
||||
self.chunk_size = chunk_size
|
||||
self.fpath = fpath
|
||||
|
||||
def get_hashes(self):
|
||||
"""
|
||||
yield a stream of hashes of file chunks. The returned format is tuples of (chunk_number, chunk_hash, )
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_chunk(self, chunk_number):
|
||||
"""
|
||||
return a file handle from which CHUNK_SIZE bytes of data can be read
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def put_chunk(self, chunk_number, contents):
|
||||
"""
|
||||
insert the data for chunk_number's position within the file, the content given by contents (which is a file-like object)
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class LocalChunkClient(BaseChunkClient):
|
||||
def __init__(self, fpath, chunk_size=CHUNK_SIZE):
|
||||
super().__init__(fpath, chunk_size)
|
||||
self.file = open(self.fpath, "rb") # for get chunk operations, this generic file is used instead of doing lots of open/close
|
||||
|
||||
def get_hashes(self):
|
||||
i = 0
|
||||
with open(self.fpath, "rb") as f:
|
||||
while True:
|
||||
data = f.read(self.chunk_size)
|
||||
if not data:
|
||||
break
|
||||
yield (i, hash_chunk(data))
|
||||
i += 1
|
||||
|
||||
def get_chunk(self, chunk_number):
|
||||
"""
|
||||
return a file handle from which CHUNK_SIZE bytes of data can be read
|
||||
"""
|
||||
position = chunk_number * self.chunk_size
|
||||
if position > os.path.getsize(self.fpath):#TODO not sure if > or >=
|
||||
raise Exception("requested chunk {} is beyond EOF".format(chunk_number))
|
||||
self.file.seek(position)#TODO not thread safe
|
||||
return self.file.read(self.chunk_size)
|
||||
|
||||
def put_chunk(self, chunk_number, contents):
|
||||
"""
|
||||
insert the data for chunk_number's position within the file, the content given by contents (which is a file-like object) lol not actually
|
||||
"""
|
||||
position = chunk_number * self.chunk_size
|
||||
# if position > self.fsize:#TODO not sure if > or >=
|
||||
# raise Exception("requested chunk {} is beyond EOF".format(chunk_number))
|
||||
|
||||
with open(self.fpath, "rb+") as f:
|
||||
f.seek(position)
|
||||
f.write(contents)
|
||||
|
||||
|
||||
def main():
|
||||
src = LocalChunkClient("test.zip")
|
||||
src_hashes = [i for i in src.get_hashes()]
|
||||
|
||||
dest = LocalChunkClient("dest.zip")
|
||||
dest_hashes = [i for i in dest.get_hashes()]
|
||||
|
||||
for i, (chunk_number, chunk_hash) in src_hashes:
|
||||
if i > len(dest_hashes) or chunk_hash != dest_hashes[i][1]:
|
||||
print("would copy chunk", i)
|
||||
|
||||
# chunk_six = lc.get_chunk(6)
|
||||
# lc.put_chunk(6, chunk_six)
|
||||
|
||||
import pdb
|
||||
pdb.set_trace()
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in New Issue