basic tool
This commit is contained in:
parent
6c186bd3fb
commit
1130138f3e
@ -1,4 +1,3 @@
|
||||
import os
|
||||
import hashlib
|
||||
|
||||
|
||||
@ -9,88 +8,3 @@ def hash_chunk(data):
|
||||
h = hashlib.md5()
|
||||
h.update(data)
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
class BaseChunkClient(object):
|
||||
def __init__(self, fpath, chunk_size=CHUNK_SIZE):
|
||||
self.chunk_size = chunk_size
|
||||
self.fpath = fpath
|
||||
|
||||
def get_hashes(self):
|
||||
"""
|
||||
yield a stream of hashes of file chunks. The returned format is tuples of (chunk_number, chunk_hash, )
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_chunk(self, chunk_number):
|
||||
"""
|
||||
return a file handle from which CHUNK_SIZE bytes of data can be read
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def put_chunk(self, chunk_number, contents):
|
||||
"""
|
||||
insert the data for chunk_number's position within the file, the content given by contents (which is a file-like object)
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class LocalChunkClient(BaseChunkClient):
|
||||
def __init__(self, fpath, chunk_size=CHUNK_SIZE):
|
||||
super().__init__(fpath, chunk_size)
|
||||
self.file = open(self.fpath, "rb") # for get chunk operations, this generic file is used instead of doing lots of open/close
|
||||
|
||||
def get_hashes(self):
|
||||
i = 0
|
||||
with open(self.fpath, "rb") as f:
|
||||
while True:
|
||||
data = f.read(self.chunk_size)
|
||||
if not data:
|
||||
break
|
||||
yield (i, hash_chunk(data))
|
||||
i += 1
|
||||
|
||||
def get_chunk(self, chunk_number):
|
||||
"""
|
||||
return a file handle from which CHUNK_SIZE bytes of data can be read
|
||||
"""
|
||||
position = chunk_number * self.chunk_size
|
||||
if position > os.path.getsize(self.fpath):#TODO not sure if > or >=
|
||||
raise Exception("requested chunk {} is beyond EOF".format(chunk_number))
|
||||
self.file.seek(position)#TODO not thread safe
|
||||
return self.file.read(self.chunk_size)
|
||||
|
||||
def put_chunk(self, chunk_number, contents):
|
||||
"""
|
||||
insert the data for chunk_number's position within the file, the content given by contents (which is a file-like object) lol not actually
|
||||
"""
|
||||
position = chunk_number * self.chunk_size
|
||||
# if position > self.fsize:#TODO not sure if > or >=
|
||||
# raise Exception("requested chunk {} is beyond EOF".format(chunk_number))
|
||||
|
||||
with open(self.fpath, "rb+") as f:
|
||||
f.seek(position)
|
||||
f.write(contents)
|
||||
|
||||
|
||||
def main():
|
||||
src = LocalChunkClient("test.zip")
|
||||
src_hashes = [i for i in src.get_hashes()]
|
||||
|
||||
dest = LocalChunkClient("dest.zip")
|
||||
dest_hashes = [i for i in dest.get_hashes()]
|
||||
|
||||
for i, (chunk_number, chunk_hash) in src_hashes:
|
||||
if i > len(dest_hashes) or chunk_hash != dest_hashes[i][1]:
|
||||
print("would copy chunk", i)
|
||||
|
||||
# chunk_six = lc.get_chunk(6)
|
||||
# lc.put_chunk(6, chunk_six)
|
||||
|
||||
import pdb
|
||||
pdb.set_trace()
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
51
blobsend/cli.py
Normal file
51
blobsend/cli.py
Normal file
@ -0,0 +1,51 @@
|
||||
import argparse
|
||||
from urllib.parse import urlparse
|
||||
from blobsend.client_file import FileChunkClient
|
||||
|
||||
|
||||
SCHEMES = {
|
||||
"file": FileChunkClient,
|
||||
}
|
||||
|
||||
|
||||
def get_args():
|
||||
parser = argparse.ArgumentParser(description="file blob copy utility")
|
||||
parser.add_argument("src", help="source file uri")
|
||||
parser.add_argument("dest", help="dest file uri")
|
||||
|
||||
return parser.parse_args(), parser
|
||||
|
||||
|
||||
def get_client(uri):
|
||||
clss = SCHEMES[uri.scheme or "file"]
|
||||
return clss.from_uri(uri)
|
||||
|
||||
|
||||
def main():
|
||||
args, parser = get_args()
|
||||
print(args)
|
||||
|
||||
src = get_client(urlparse(args.src))
|
||||
dest = get_client(urlparse(args.dest))
|
||||
|
||||
dest_hashes_iter = dest.get_hashes()
|
||||
for src_chunk_number, src_chunk_hash in src.get_hashes():
|
||||
dest_chunk_number = None
|
||||
dest_chunk_hash = None
|
||||
try:
|
||||
dest_chunk_number, dest_chunk_hash = next(dest_hashes_iter)
|
||||
except StopIteration:
|
||||
pass
|
||||
|
||||
if dest_chunk_number is not None and src_chunk_number != dest_chunk_number:
|
||||
raise Exception("sequence mismatch?")
|
||||
|
||||
if src_chunk_hash != dest_chunk_hash:
|
||||
blob = src.get_chunk(src_chunk_number)
|
||||
dest.put_chunk(src_chunk_number, blob)
|
||||
|
||||
dest.set_length(src.get_length())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
44
blobsend/client_base.py
Normal file
44
blobsend/client_base.py
Normal file
@ -0,0 +1,44 @@
|
||||
from blobsend import CHUNK_SIZE
|
||||
|
||||
|
||||
class BaseChunkClient(object):
|
||||
def __init__(self, fpath, chunk_size=CHUNK_SIZE):
|
||||
self.chunk_size = chunk_size
|
||||
self.fpath = fpath
|
||||
|
||||
def get_hashes(self):
|
||||
"""
|
||||
yield a stream of hashes of file chunks. The returned format is tuples of (chunk_number, chunk_hash, )
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_chunk(self, chunk_number):
|
||||
"""
|
||||
return a file handle from which CHUNK_SIZE bytes of data can be read
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def put_chunk(self, chunk_number, contents):
|
||||
"""
|
||||
insert the data for chunk_number's position within the file, the content given by contents (which is a file-like object)
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_length(self):
|
||||
"""
|
||||
get the file size
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def set_length(self, length):
|
||||
"""
|
||||
truncate or extend the file
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
@staticmethod
|
||||
def from_uri(uri):
|
||||
"""
|
||||
instantiate a client from the given uri
|
||||
"""
|
||||
raise NotImplementedError()
|
62
blobsend/client_file.py
Normal file
62
blobsend/client_file.py
Normal file
@ -0,0 +1,62 @@
|
||||
import os
|
||||
from blobsend.client_base import BaseChunkClient
|
||||
from blobsend import CHUNK_SIZE, hash_chunk
|
||||
|
||||
|
||||
class FileChunkClient(BaseChunkClient):
|
||||
def __init__(self, fpath, chunk_size=CHUNK_SIZE):
|
||||
super().__init__(fpath, chunk_size)
|
||||
self.file = open(self.fpath, "ab+") # for get chunk operations, this generic file is used instead of doing lots of open/close
|
||||
self.file.seek(0)
|
||||
|
||||
def get_hashes(self):
|
||||
i = 0
|
||||
with open(self.fpath, "rb+") as f:
|
||||
while True:
|
||||
data = f.read(self.chunk_size)
|
||||
if not data:
|
||||
break
|
||||
yield (i, hash_chunk(data))
|
||||
i += 1
|
||||
|
||||
def get_chunk(self, chunk_number):
|
||||
"""
|
||||
return a file handle from which CHUNK_SIZE bytes of data can be read
|
||||
"""
|
||||
position = chunk_number * self.chunk_size
|
||||
if position > os.path.getsize(self.fpath):#TODO not sure if > or >=
|
||||
raise Exception("requested chunk {} is beyond EOF".format(chunk_number))
|
||||
self.file.seek(position)#TODO not thread safe
|
||||
return self.file.read(self.chunk_size)
|
||||
|
||||
def put_chunk(self, chunk_number, contents):
|
||||
"""
|
||||
insert the data for chunk_number's position within the file, the content given by contents (which is a file-like object) lol not actually
|
||||
"""
|
||||
position = chunk_number * self.chunk_size
|
||||
# if position > self.fsize:#TODO not sure if > or >=
|
||||
# raise Exception("requested chunk {} is beyond EOF".format(chunk_number))
|
||||
|
||||
with open(self.fpath, "rb+") as f:
|
||||
f.seek(position)
|
||||
f.write(contents)
|
||||
|
||||
def get_length(self):
|
||||
"""
|
||||
get the file size
|
||||
"""
|
||||
self.file.seek(0, 2) # seek to end
|
||||
return self.file.tell()
|
||||
|
||||
def set_length(self, length):
|
||||
if length < self.get_length():
|
||||
self.file.truncate(length)
|
||||
# do nothing for the case of extending the file
|
||||
# put_chunk handles it
|
||||
|
||||
@staticmethod
|
||||
def from_uri(uri):
|
||||
"""
|
||||
instantiate a client from the given uri
|
||||
"""
|
||||
return FileChunkClient(uri.path)
|
22
setup.py
22
setup.py
@ -0,0 +1,22 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from setuptools import setup
|
||||
|
||||
|
||||
__version__ = "0.0.0"
|
||||
|
||||
|
||||
setup(name='blobsend',
|
||||
version=__version__,
|
||||
description='tool for updating sub-portions of large files',
|
||||
url='',
|
||||
author='dpedu',
|
||||
author_email='dave@davepedu.com',
|
||||
packages=['blobsend'],
|
||||
install_requires=[],
|
||||
entry_points={
|
||||
"console_scripts": [
|
||||
"blobsend = blobsend.cli:main",
|
||||
]
|
||||
},
|
||||
zip_safe=False)
|
Loading…
Reference in New Issue
Block a user