basic uploading
This commit is contained in:
parent
25b282c856
commit
7ee79b4455
|
@ -0,0 +1,18 @@
|
|||
import boto3
|
||||
from botocore.client import Config as BotoConfig
|
||||
|
||||
|
||||
def get_s3(url):
|
||||
# set up s3 client
|
||||
s3args = {"config": BotoConfig(signature_version='s3v4')}
|
||||
|
||||
endpoint_url = f"{url.scheme}://{url.hostname}"
|
||||
if url.port:
|
||||
endpoint_url += f":{url.port}"
|
||||
s3args["endpoint_url"] = endpoint_url
|
||||
|
||||
if url.username and url.password:
|
||||
s3args["aws_access_key_id"] = url.username
|
||||
s3args["aws_secret_access_key"] = url.password
|
||||
|
||||
return boto3.client('s3', **s3args)
|
|
@ -0,0 +1,4 @@
|
|||
MAX_QUEUED_CHUNKS = 5 # max size of pre-upload file chunk queue
|
||||
MAX_PARALLEL_UPLOADS = 10 # max number of uploads happening in parallel
|
||||
# memory usage will be the sum of the above numbers times the chunk size
|
||||
CHUNK_SIZE = 1024 * 1024 * 10 # 10 MB
|
|
@ -0,0 +1,153 @@
|
|||
import json
|
||||
import argparse
|
||||
import cherrypy
|
||||
import logging
|
||||
import signal
|
||||
import uuid
|
||||
import hashlib
|
||||
from datetime import datetime
|
||||
from urllib.parse import urlparse
|
||||
from queue import Queue
|
||||
|
||||
from backupdb2.common import MAX_QUEUED_CHUNKS, CHUNK_SIZE
|
||||
from backupdb2.upload import S3UploadQueueThread, Chunk
|
||||
from backupdb2.boto import get_s3
|
||||
|
||||
|
||||
class BackupdbHttp(object):
|
||||
def __init__(self, bucket, s3conn):
|
||||
self.bucket = bucket
|
||||
self.s3 = s3conn
|
||||
|
||||
self.api = self
|
||||
self.v1 = self
|
||||
|
||||
@cherrypy.expose
|
||||
def index(self):
|
||||
yield "TODO list of backups/namespaces etc"
|
||||
|
||||
@cherrypy.expose
|
||||
@cherrypy.tools.json_out()
|
||||
def upload(self, name, namespace="default"):
|
||||
#TODO validate name & namespace
|
||||
# cherrypy.response.timeout = 3600
|
||||
now = datetime.now()
|
||||
uid = str(uuid.uuid4())
|
||||
sha = hashlib.sha256()
|
||||
queue = Queue(MAX_QUEUED_CHUNKS)
|
||||
background = S3UploadQueueThread(queue, self.bucket, self.s3)
|
||||
background.start()
|
||||
|
||||
total_bytes = 0
|
||||
sequence = 0
|
||||
|
||||
while True:
|
||||
data = b''
|
||||
while len(data) < CHUNK_SIZE:
|
||||
readmax = CHUNK_SIZE - len(data)
|
||||
bit = cherrypy.request.body.read(readmax)
|
||||
if not bit:
|
||||
break
|
||||
data += bit
|
||||
if not data:
|
||||
break
|
||||
total_bytes += len(data)
|
||||
sha.update(data)
|
||||
queue.put(Chunk(sequence, data, uid, name, namespace))
|
||||
sequence += 1
|
||||
logging.debug(f"total uploaded: {total_bytes}")
|
||||
|
||||
queue.put(None) # signals that there are no more items to be processed
|
||||
shasum = sha.hexdigest()
|
||||
metadata = {
|
||||
"date": now.isoformat(),
|
||||
"uid": uid,
|
||||
"chunks": sequence,
|
||||
"size": total_bytes,
|
||||
"sha256": shasum
|
||||
}
|
||||
|
||||
logging.debug("read all chunks, joining uploader")
|
||||
background.join()
|
||||
|
||||
logging.debug("upload complete, writing metadata")
|
||||
meta_response = self.s3.put_object(
|
||||
Bucket=self.bucket,
|
||||
Key=f"{namespace}/{name}/tmp/{uid}/meta.json",
|
||||
Body=json.dumps(metadata, indent=4, sort_keys=True)
|
||||
)
|
||||
if meta_response["ResponseMetadata"]["HTTPStatusCode"] != 200:
|
||||
cherrypy.response.status = 500
|
||||
return {"errors": "backend upload failed: " + str(meta_response["ResponseMetadata"])}
|
||||
|
||||
if background.success():
|
||||
logging.debug("upload success")
|
||||
return metadata
|
||||
else:
|
||||
cherrypy.response.status = 500
|
||||
logging.error(f"uploader failed: {background.errors}")
|
||||
return {"errors": background.errors}
|
||||
|
||||
@cherrypy.expose
|
||||
@cherrypy.tools.json_out()
|
||||
def get_latest(self, name, namespace="default"):
|
||||
pass
|
||||
|
||||
|
||||
def run_http(args):
|
||||
s3url = urlparse(args.s3_url)
|
||||
bucket = s3url.path[1:]
|
||||
s3 = get_s3(s3url)
|
||||
|
||||
# ensure bucket exists
|
||||
if bucket not in [b['Name'] for b in s3.list_buckets()['Buckets']]:
|
||||
print("Creating bucket")
|
||||
s3.create_bucket(Bucket=bucket)
|
||||
|
||||
web = BackupdbHttp(bucket, s3)
|
||||
|
||||
cherrypy.tree.mount(web, '/', {'/': {}})
|
||||
|
||||
# General config options
|
||||
cherrypy.config.update({
|
||||
'request.show_tracebacks': True,
|
||||
'server.thread_pool': 1,
|
||||
'server.socket_host': "0.0.0.0",
|
||||
'server.socket_port': args.port,
|
||||
'server.show_tracebacks': True,
|
||||
'log.screen': False,
|
||||
'engine.autoreload.on': args.debug,
|
||||
'server.max_request_body_size': 0,
|
||||
})
|
||||
|
||||
def signal_handler(signum, stack):
|
||||
logging.critical('Got sig {}, exiting...'.format(signum))
|
||||
cherrypy.engine.exit()
|
||||
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
try:
|
||||
cherrypy.engine.start()
|
||||
cherrypy.engine.block()
|
||||
finally:
|
||||
cherrypy.engine.exit()
|
||||
|
||||
|
||||
def get_args():
|
||||
p = argparse.ArgumentParser()
|
||||
p.add_argument("-p", "--port", default=8080, type=int, help="listen port for http server")
|
||||
p.add_argument("-s", "--s3-url", required=True, help="minio server address")
|
||||
p.add_argument("--debug", action="store_true", help="debug mode")
|
||||
return p.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
args = get_args()
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG if args.debug else logging.INFO,
|
||||
format="%(asctime)-15s %(levelname)-8s %(filename)s:%(lineno)d %(message)s"
|
||||
)
|
||||
logging.getLogger("botocore").setLevel(logging.ERROR)
|
||||
logging.getLogger("urllib3").setLevel(logging.ERROR)
|
||||
run_http(args)
|
|
@ -0,0 +1,68 @@
|
|||
import logging
|
||||
import traceback
|
||||
from threading import Thread, Semaphore
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from dataclasses import dataclass
|
||||
|
||||
from backupdb2.common import MAX_PARALLEL_UPLOADS
|
||||
|
||||
|
||||
@dataclass
|
||||
class Chunk:
|
||||
"""
|
||||
Object containing one chunk of data to be uploaded
|
||||
"""
|
||||
sequence: int # order of the chunk in the file
|
||||
data: bytes
|
||||
uid: str # uid of the backup
|
||||
name: str # name of the backup e.g. "plexmain"
|
||||
namespace: str # namespace of the backup
|
||||
|
||||
@property
|
||||
def path(self) -> str:
|
||||
return f"{self.namespace}/{self.name}/tmp/{self.uid}/backup.tar.gz.{self.sequence:08d}"
|
||||
|
||||
|
||||
class S3UploadQueueThread(Thread):
|
||||
def __init__(self, queue, bucket, s3conn):
|
||||
super().__init__()
|
||||
self.queue = queue
|
||||
self.bucket = bucket
|
||||
self.s3 = s3conn
|
||||
self.lock = Semaphore(MAX_PARALLEL_UPLOADS)
|
||||
self.errors = []
|
||||
|
||||
def run(self):
|
||||
with ThreadPoolExecutor(max_workers=MAX_PARALLEL_UPLOADS) as executor:
|
||||
futures = []
|
||||
while True:
|
||||
chunk = self.queue.get()
|
||||
if chunk is None:
|
||||
break
|
||||
self.lock.acquire()
|
||||
futures.append(executor.submit(self.upload, chunk))
|
||||
|
||||
for future in as_completed(futures):
|
||||
logging.debug("upload future completed")
|
||||
try:
|
||||
future.result()
|
||||
except:
|
||||
self.errors.append(traceback.format_exc())
|
||||
|
||||
def upload(self, chunk):
|
||||
try:
|
||||
self.do_upload(chunk)
|
||||
finally:
|
||||
self.lock.release()
|
||||
|
||||
def do_upload(self, chunk):
|
||||
logging.debug(f"uploading chunk {chunk.sequence} of len {len(chunk.data)} to {chunk.path}")
|
||||
meta_response = self.s3.put_object(
|
||||
Bucket=self.bucket,
|
||||
Key=chunk.path,
|
||||
Body=chunk.data
|
||||
)
|
||||
assert(meta_response["ResponseMetadata"]["HTTPStatusCode"] == 200), f"Upload failed: {meta_response}" #TODO
|
||||
|
||||
def success(self):
|
||||
return not self.errors
|
|
@ -0,0 +1,22 @@
|
|||
boto3==1.17.82
|
||||
botocore==1.20.82
|
||||
certifi==2020.12.5
|
||||
chardet==4.0.0
|
||||
cheroot==8.5.2
|
||||
CherryPy==18.6.0
|
||||
idna==2.10
|
||||
jaraco.classes==3.2.1
|
||||
jaraco.collections==3.3.0
|
||||
jaraco.functools==3.3.0
|
||||
jaraco.text==3.5.0
|
||||
jmespath==0.10.0
|
||||
more-itertools==8.8.0
|
||||
portend==2.7.1
|
||||
python-dateutil==2.8.1
|
||||
pytz==2021.1
|
||||
requests==2.25.1
|
||||
s3transfer==0.4.2
|
||||
six==1.16.0
|
||||
tempora==4.0.2
|
||||
urllib3==1.26.5
|
||||
zc.lockfile==2.0
|
9
setup.py
9
setup.py
|
@ -11,17 +11,18 @@ with open("requirements.txt") as f:
|
|||
deps = f.read().split()
|
||||
|
||||
|
||||
setup(name='package',
|
||||
setup(name='backupdb2',
|
||||
version=__version__,
|
||||
description='description',
|
||||
description='backup server and cli',
|
||||
url='',
|
||||
author='dpedu',
|
||||
author_email='dave@davepedu.com',
|
||||
packages=['package'],
|
||||
packages=['backupdb2'],
|
||||
install_requires=deps,
|
||||
entry_points={
|
||||
"console_scripts": [
|
||||
"packagecli = package.cli:main",
|
||||
"backupdb2 = backupdb2.cli:main",
|
||||
"backupdbserver = backupdb2.server:main",
|
||||
]
|
||||
},
|
||||
zip_safe=False)
|
||||
|
|
Loading…
Reference in New Issue