This commit is contained in:
dave 2021-12-23 14:58:04 -08:00
commit 6c186bd3fb
2 changed files with 96 additions and 0 deletions

96
blobsend/__init__.py Normal file
View File

@ -0,0 +1,96 @@
import os
import hashlib
CHUNK_SIZE = 1024 * 1024 * 4 # 4 mb chunks
def hash_chunk(data):
h = hashlib.md5()
h.update(data)
return h.hexdigest()
class BaseChunkClient(object):
def __init__(self, fpath, chunk_size=CHUNK_SIZE):
self.chunk_size = chunk_size
self.fpath = fpath
def get_hashes(self):
"""
yield a stream of hashes of file chunks. The returned format is tuples of (chunk_number, chunk_hash, )
"""
raise NotImplementedError()
def get_chunk(self, chunk_number):
"""
return a file handle from which CHUNK_SIZE bytes of data can be read
"""
raise NotImplementedError()
def put_chunk(self, chunk_number, contents):
"""
insert the data for chunk_number's position within the file, the content given by contents (which is a file-like object)
"""
raise NotImplementedError()
class LocalChunkClient(BaseChunkClient):
def __init__(self, fpath, chunk_size=CHUNK_SIZE):
super().__init__(fpath, chunk_size)
self.file = open(self.fpath, "rb") # for get chunk operations, this generic file is used instead of doing lots of open/close
def get_hashes(self):
i = 0
with open(self.fpath, "rb") as f:
while True:
data = f.read(self.chunk_size)
if not data:
break
yield (i, hash_chunk(data))
i += 1
def get_chunk(self, chunk_number):
"""
return a file handle from which CHUNK_SIZE bytes of data can be read
"""
position = chunk_number * self.chunk_size
if position > os.path.getsize(self.fpath):#TODO not sure if > or >=
raise Exception("requested chunk {} is beyond EOF".format(chunk_number))
self.file.seek(position)#TODO not thread safe
return self.file.read(self.chunk_size)
def put_chunk(self, chunk_number, contents):
"""
insert the data for chunk_number's position within the file, the content given by contents (which is a file-like object) lol not actually
"""
position = chunk_number * self.chunk_size
# if position > self.fsize:#TODO not sure if > or >=
# raise Exception("requested chunk {} is beyond EOF".format(chunk_number))
with open(self.fpath, "rb+") as f:
f.seek(position)
f.write(contents)
def main():
src = LocalChunkClient("test.zip")
src_hashes = [i for i in src.get_hashes()]
dest = LocalChunkClient("dest.zip")
dest_hashes = [i for i in dest.get_hashes()]
for i, (chunk_number, chunk_hash) in src_hashes:
if i > len(dest_hashes) or chunk_hash != dest_hashes[i][1]:
print("would copy chunk", i)
# chunk_six = lc.get_chunk(6)
# lc.put_chunk(6, chunk_six)
import pdb
pdb.set_trace()
pass
if __name__ == '__main__':
main()

0
setup.py Normal file
View File