From 3d156e891662193dd34b0555bab25d7c799bd6ff Mon Sep 17 00:00:00 2001 From: dave Date: Thu, 23 Dec 2021 17:24:53 -0800 Subject: [PATCH] add ssh scheme --- blobsend/cli.py | 23 +++++++---- blobsend/client_base.py | 14 ++++--- blobsend/client_file.py | 11 ++--- blobsend/client_ssh.py | 76 +++++++++++++++++++++++++++++++++++ blobsend/client_ssh_remote.py | 32 +++++++++++++++ requirements.txt | 7 ++++ setup.py | 1 + 7 files changed, 146 insertions(+), 18 deletions(-) create mode 100644 blobsend/client_ssh.py create mode 100644 blobsend/client_ssh_remote.py create mode 100644 requirements.txt diff --git a/blobsend/cli.py b/blobsend/cli.py index 730b263..7dd2ebe 100644 --- a/blobsend/cli.py +++ b/blobsend/cli.py @@ -1,13 +1,21 @@ import argparse from urllib.parse import urlparse +from blobsend import CHUNK_SIZE from blobsend.client_file import FileChunkClient +from blobsend.client_ssh import SshChunkClient SCHEMES = { "file": FileChunkClient, + "ssh": SshChunkClient, } +def get_client(uri, is_src): + clss = SCHEMES[uri.scheme or "file"] + return clss.from_uri(uri, is_src) + + def get_args(): parser = argparse.ArgumentParser(description="file blob copy utility") parser.add_argument("src", help="source file uri") @@ -16,17 +24,14 @@ def get_args(): return parser.parse_args(), parser -def get_client(uri): - clss = SCHEMES[uri.scheme or "file"] - return clss.from_uri(uri) - - def main(): args, parser = get_args() print(args) - src = get_client(urlparse(args.src)) - dest = get_client(urlparse(args.dest)) + src = get_client(urlparse(args.src), True) + dest = get_client(urlparse(args.dest), False) + + num_chunks = src.get_length() // CHUNK_SIZE dest_hashes_iter = dest.get_hashes() for src_chunk_number, src_chunk_hash in src.get_hashes(): @@ -41,11 +46,15 @@ def main(): raise Exception("sequence mismatch?") if src_chunk_hash != dest_chunk_hash: + print("Copying chunk", src_chunk_number, "/", num_chunks) blob = src.get_chunk(src_chunk_number) dest.put_chunk(src_chunk_number, blob) dest.set_length(src.get_length()) + src.close() + dest.close() + if __name__ == '__main__': main() diff --git a/blobsend/client_base.py b/blobsend/client_base.py index 1b72516..a888f99 100644 --- a/blobsend/client_base.py +++ b/blobsend/client_base.py @@ -1,10 +1,6 @@ -from blobsend import CHUNK_SIZE - - class BaseChunkClient(object): - def __init__(self, fpath, chunk_size=CHUNK_SIZE): + def __init__(self, chunk_size): self.chunk_size = chunk_size - self.fpath = fpath def get_hashes(self): """ @@ -36,8 +32,14 @@ class BaseChunkClient(object): """ raise NotImplementedError() + def close(self): + """ + truncate or extend the file + """ + raise NotImplementedError() + @staticmethod - def from_uri(uri): + def from_uri(uri, is_src): """ instantiate a client from the given uri """ diff --git a/blobsend/client_file.py b/blobsend/client_file.py index e71f168..7cfa802 100644 --- a/blobsend/client_file.py +++ b/blobsend/client_file.py @@ -5,7 +5,8 @@ from blobsend import CHUNK_SIZE, hash_chunk class FileChunkClient(BaseChunkClient): def __init__(self, fpath, chunk_size=CHUNK_SIZE): - super().__init__(fpath, chunk_size) + super().__init__(chunk_size) + self.fpath = fpath self.file = open(self.fpath, "ab+") # for get chunk operations, this generic file is used instead of doing lots of open/close self.file.seek(0) @@ -34,9 +35,6 @@ class FileChunkClient(BaseChunkClient): insert the data for chunk_number's position within the file, the content given by contents (which is a file-like object) lol not actually """ position = chunk_number * self.chunk_size - # if position > self.fsize:#TODO not sure if > or >= - # raise Exception("requested chunk {} is beyond EOF".format(chunk_number)) - with open(self.fpath, "rb+") as f: f.seek(position) f.write(contents) @@ -54,8 +52,11 @@ class FileChunkClient(BaseChunkClient): # do nothing for the case of extending the file # put_chunk handles it + def close(self): + self.file.close() + @staticmethod - def from_uri(uri): + def from_uri(uri, is_src): """ instantiate a client from the given uri """ diff --git a/blobsend/client_ssh.py b/blobsend/client_ssh.py new file mode 100644 index 0000000..c456e48 --- /dev/null +++ b/blobsend/client_ssh.py @@ -0,0 +1,76 @@ +import paramiko +from blobsend.client_base import BaseChunkClient +from blobsend import CHUNK_SIZE, hash_chunk + + +""" +ssh client +- assumes this utility (blobcopy) is installed on the remote end +""" + +REMOTE_UTILITY = "/Users/dave/code/blobsend/testenv/bin/_blobsend_ssh_remote"# + + +class SshChunkClient(BaseChunkClient): + def __init__(self, server, username, password, fpath, is_src, chunk_size=CHUNK_SIZE): + super().__init__(chunk_size) + self.fpath = fpath + self.ssh = paramiko.SSHClient() + self.ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + self.ssh.connect(hostname=server, + username=username, + password=password) + + self.sftp = self.ssh.open_sftp() + + # If the file doesnt exist and we are the destination, create it + if not is_src: + try: + with self.sftp.open(self.fpath, "r"): + pass + except FileNotFoundError: + with self.sftp.open(self.fpath, "wb") as f: + pass + + # it seems like mode "ab+" doesn't work the same way under paramiko + # it refuses to seek before the open point (which is the end of the file) + self.file = self.sftp.open(self.fpath, "r+") + + def get_hashes(self): + stdin, stdout, stderr = self.ssh.exec_command("{} chunks {}".format(REMOTE_UTILITY, self.fpath))#TODO safe arg escapes + stdin.close() + for line in iter(lambda: stdout.readline(1024), ""): + chunk_number, chunk_hash = line.strip().split(" ") + yield (int(chunk_number), chunk_hash, ) + + def get_chunk(self, chunk_number): + position = chunk_number * self.chunk_size + if position > self.get_length(): + raise Exception("requested chunk {} is beyond EOF".format(chunk_number)) + self.file.seek(position)#TODO not thread safe + return self.file.read(self.chunk_size) + + def put_chunk(self, chunk_number, contents): + position = chunk_number * self.chunk_size + self.file.seek(position) + self.file.write(contents) + + def get_length(self): + self.file.seek(0, 2) # seek to end + return self.file.tell() + + def set_length(self, length): + if length < self.get_length(): + self.file.truncate(length) + # do nothing for the case of extending the file + # put_chunk handles it + + def close(self): + self.file.close() + + @staticmethod + def from_uri(uri, is_src): + """ + instantiate a client from the given uri + """ + return SshChunkClient(uri.hostname, uri.username, uri.password, uri.path, is_src) diff --git a/blobsend/client_ssh_remote.py b/blobsend/client_ssh_remote.py new file mode 100644 index 0000000..b6c366d --- /dev/null +++ b/blobsend/client_ssh_remote.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 + +import sys +import argparse +from blobsend.client_file import FileChunkClient + + +def cmd_chunks(args, parser): + c = FileChunkClient(args.fpath) + for chunk_number, chunk_hash in c.get_hashes(): + print(chunk_number, chunk_hash) + sys.stdout.flush() + + +def get_args(): + parser = argparse.ArgumentParser(description="blob copy ssh helper tool") + sp_action = parser.add_subparsers(dest="action", help="action to take") + + p_getchunks = sp_action.add_parser("chunks", help="get file chunks") + p_getchunks.add_argument("fpath", help="file path") + p_getchunks.set_defaults(function=cmd_chunks) + + return parser.parse_args(), parser + + +def main(): + args, parser = get_args() + args.function(args, parser) + + +if __name__ == '__main__': + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e4af3dc --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +bcrypt==3.2.0 +cffi==1.15.0 +cryptography==36.0.1 +paramiko==2.9.0 +pycparser==2.21 +PyNaCl==1.4.0 +six==1.16.0 diff --git a/setup.py b/setup.py index b4b0fa3..54767d1 100644 --- a/setup.py +++ b/setup.py @@ -17,6 +17,7 @@ setup(name='blobsend', entry_points={ "console_scripts": [ "blobsend = blobsend.cli:main", + "_blobsend_ssh_remote = blobsend.client_ssh_remote:main" ] }, zip_safe=False)