basic uploading
This commit is contained in:
parent
25b282c856
commit
7ee79b4455
|
@ -0,0 +1,18 @@
|
||||||
|
import boto3
|
||||||
|
from botocore.client import Config as BotoConfig
|
||||||
|
|
||||||
|
|
||||||
|
def get_s3(url):
|
||||||
|
# set up s3 client
|
||||||
|
s3args = {"config": BotoConfig(signature_version='s3v4')}
|
||||||
|
|
||||||
|
endpoint_url = f"{url.scheme}://{url.hostname}"
|
||||||
|
if url.port:
|
||||||
|
endpoint_url += f":{url.port}"
|
||||||
|
s3args["endpoint_url"] = endpoint_url
|
||||||
|
|
||||||
|
if url.username and url.password:
|
||||||
|
s3args["aws_access_key_id"] = url.username
|
||||||
|
s3args["aws_secret_access_key"] = url.password
|
||||||
|
|
||||||
|
return boto3.client('s3', **s3args)
|
|
@ -0,0 +1,4 @@
|
||||||
|
MAX_QUEUED_CHUNKS = 5 # max size of pre-upload file chunk queue
|
||||||
|
MAX_PARALLEL_UPLOADS = 10 # max number of uploads happening in parallel
|
||||||
|
# memory usage will be the sum of the above numbers times the chunk size
|
||||||
|
CHUNK_SIZE = 1024 * 1024 * 10 # 10 MB
|
|
@ -0,0 +1,153 @@
|
||||||
|
import json
|
||||||
|
import argparse
|
||||||
|
import cherrypy
|
||||||
|
import logging
|
||||||
|
import signal
|
||||||
|
import uuid
|
||||||
|
import hashlib
|
||||||
|
from datetime import datetime
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
from queue import Queue
|
||||||
|
|
||||||
|
from backupdb2.common import MAX_QUEUED_CHUNKS, CHUNK_SIZE
|
||||||
|
from backupdb2.upload import S3UploadQueueThread, Chunk
|
||||||
|
from backupdb2.boto import get_s3
|
||||||
|
|
||||||
|
|
||||||
|
class BackupdbHttp(object):
|
||||||
|
def __init__(self, bucket, s3conn):
|
||||||
|
self.bucket = bucket
|
||||||
|
self.s3 = s3conn
|
||||||
|
|
||||||
|
self.api = self
|
||||||
|
self.v1 = self
|
||||||
|
|
||||||
|
@cherrypy.expose
|
||||||
|
def index(self):
|
||||||
|
yield "TODO list of backups/namespaces etc"
|
||||||
|
|
||||||
|
@cherrypy.expose
|
||||||
|
@cherrypy.tools.json_out()
|
||||||
|
def upload(self, name, namespace="default"):
|
||||||
|
#TODO validate name & namespace
|
||||||
|
# cherrypy.response.timeout = 3600
|
||||||
|
now = datetime.now()
|
||||||
|
uid = str(uuid.uuid4())
|
||||||
|
sha = hashlib.sha256()
|
||||||
|
queue = Queue(MAX_QUEUED_CHUNKS)
|
||||||
|
background = S3UploadQueueThread(queue, self.bucket, self.s3)
|
||||||
|
background.start()
|
||||||
|
|
||||||
|
total_bytes = 0
|
||||||
|
sequence = 0
|
||||||
|
|
||||||
|
while True:
|
||||||
|
data = b''
|
||||||
|
while len(data) < CHUNK_SIZE:
|
||||||
|
readmax = CHUNK_SIZE - len(data)
|
||||||
|
bit = cherrypy.request.body.read(readmax)
|
||||||
|
if not bit:
|
||||||
|
break
|
||||||
|
data += bit
|
||||||
|
if not data:
|
||||||
|
break
|
||||||
|
total_bytes += len(data)
|
||||||
|
sha.update(data)
|
||||||
|
queue.put(Chunk(sequence, data, uid, name, namespace))
|
||||||
|
sequence += 1
|
||||||
|
logging.debug(f"total uploaded: {total_bytes}")
|
||||||
|
|
||||||
|
queue.put(None) # signals that there are no more items to be processed
|
||||||
|
shasum = sha.hexdigest()
|
||||||
|
metadata = {
|
||||||
|
"date": now.isoformat(),
|
||||||
|
"uid": uid,
|
||||||
|
"chunks": sequence,
|
||||||
|
"size": total_bytes,
|
||||||
|
"sha256": shasum
|
||||||
|
}
|
||||||
|
|
||||||
|
logging.debug("read all chunks, joining uploader")
|
||||||
|
background.join()
|
||||||
|
|
||||||
|
logging.debug("upload complete, writing metadata")
|
||||||
|
meta_response = self.s3.put_object(
|
||||||
|
Bucket=self.bucket,
|
||||||
|
Key=f"{namespace}/{name}/tmp/{uid}/meta.json",
|
||||||
|
Body=json.dumps(metadata, indent=4, sort_keys=True)
|
||||||
|
)
|
||||||
|
if meta_response["ResponseMetadata"]["HTTPStatusCode"] != 200:
|
||||||
|
cherrypy.response.status = 500
|
||||||
|
return {"errors": "backend upload failed: " + str(meta_response["ResponseMetadata"])}
|
||||||
|
|
||||||
|
if background.success():
|
||||||
|
logging.debug("upload success")
|
||||||
|
return metadata
|
||||||
|
else:
|
||||||
|
cherrypy.response.status = 500
|
||||||
|
logging.error(f"uploader failed: {background.errors}")
|
||||||
|
return {"errors": background.errors}
|
||||||
|
|
||||||
|
@cherrypy.expose
|
||||||
|
@cherrypy.tools.json_out()
|
||||||
|
def get_latest(self, name, namespace="default"):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def run_http(args):
|
||||||
|
s3url = urlparse(args.s3_url)
|
||||||
|
bucket = s3url.path[1:]
|
||||||
|
s3 = get_s3(s3url)
|
||||||
|
|
||||||
|
# ensure bucket exists
|
||||||
|
if bucket not in [b['Name'] for b in s3.list_buckets()['Buckets']]:
|
||||||
|
print("Creating bucket")
|
||||||
|
s3.create_bucket(Bucket=bucket)
|
||||||
|
|
||||||
|
web = BackupdbHttp(bucket, s3)
|
||||||
|
|
||||||
|
cherrypy.tree.mount(web, '/', {'/': {}})
|
||||||
|
|
||||||
|
# General config options
|
||||||
|
cherrypy.config.update({
|
||||||
|
'request.show_tracebacks': True,
|
||||||
|
'server.thread_pool': 1,
|
||||||
|
'server.socket_host': "0.0.0.0",
|
||||||
|
'server.socket_port': args.port,
|
||||||
|
'server.show_tracebacks': True,
|
||||||
|
'log.screen': False,
|
||||||
|
'engine.autoreload.on': args.debug,
|
||||||
|
'server.max_request_body_size': 0,
|
||||||
|
})
|
||||||
|
|
||||||
|
def signal_handler(signum, stack):
|
||||||
|
logging.critical('Got sig {}, exiting...'.format(signum))
|
||||||
|
cherrypy.engine.exit()
|
||||||
|
|
||||||
|
signal.signal(signal.SIGINT, signal_handler)
|
||||||
|
signal.signal(signal.SIGTERM, signal_handler)
|
||||||
|
|
||||||
|
try:
|
||||||
|
cherrypy.engine.start()
|
||||||
|
cherrypy.engine.block()
|
||||||
|
finally:
|
||||||
|
cherrypy.engine.exit()
|
||||||
|
|
||||||
|
|
||||||
|
def get_args():
|
||||||
|
p = argparse.ArgumentParser()
|
||||||
|
p.add_argument("-p", "--port", default=8080, type=int, help="listen port for http server")
|
||||||
|
p.add_argument("-s", "--s3-url", required=True, help="minio server address")
|
||||||
|
p.add_argument("--debug", action="store_true", help="debug mode")
|
||||||
|
return p.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
args = get_args()
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.DEBUG if args.debug else logging.INFO,
|
||||||
|
format="%(asctime)-15s %(levelname)-8s %(filename)s:%(lineno)d %(message)s"
|
||||||
|
)
|
||||||
|
logging.getLogger("botocore").setLevel(logging.ERROR)
|
||||||
|
logging.getLogger("urllib3").setLevel(logging.ERROR)
|
||||||
|
run_http(args)
|
|
@ -0,0 +1,68 @@
|
||||||
|
import logging
|
||||||
|
import traceback
|
||||||
|
from threading import Thread, Semaphore
|
||||||
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
from backupdb2.common import MAX_PARALLEL_UPLOADS
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Chunk:
|
||||||
|
"""
|
||||||
|
Object containing one chunk of data to be uploaded
|
||||||
|
"""
|
||||||
|
sequence: int # order of the chunk in the file
|
||||||
|
data: bytes
|
||||||
|
uid: str # uid of the backup
|
||||||
|
name: str # name of the backup e.g. "plexmain"
|
||||||
|
namespace: str # namespace of the backup
|
||||||
|
|
||||||
|
@property
|
||||||
|
def path(self) -> str:
|
||||||
|
return f"{self.namespace}/{self.name}/tmp/{self.uid}/backup.tar.gz.{self.sequence:08d}"
|
||||||
|
|
||||||
|
|
||||||
|
class S3UploadQueueThread(Thread):
|
||||||
|
def __init__(self, queue, bucket, s3conn):
|
||||||
|
super().__init__()
|
||||||
|
self.queue = queue
|
||||||
|
self.bucket = bucket
|
||||||
|
self.s3 = s3conn
|
||||||
|
self.lock = Semaphore(MAX_PARALLEL_UPLOADS)
|
||||||
|
self.errors = []
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
with ThreadPoolExecutor(max_workers=MAX_PARALLEL_UPLOADS) as executor:
|
||||||
|
futures = []
|
||||||
|
while True:
|
||||||
|
chunk = self.queue.get()
|
||||||
|
if chunk is None:
|
||||||
|
break
|
||||||
|
self.lock.acquire()
|
||||||
|
futures.append(executor.submit(self.upload, chunk))
|
||||||
|
|
||||||
|
for future in as_completed(futures):
|
||||||
|
logging.debug("upload future completed")
|
||||||
|
try:
|
||||||
|
future.result()
|
||||||
|
except:
|
||||||
|
self.errors.append(traceback.format_exc())
|
||||||
|
|
||||||
|
def upload(self, chunk):
|
||||||
|
try:
|
||||||
|
self.do_upload(chunk)
|
||||||
|
finally:
|
||||||
|
self.lock.release()
|
||||||
|
|
||||||
|
def do_upload(self, chunk):
|
||||||
|
logging.debug(f"uploading chunk {chunk.sequence} of len {len(chunk.data)} to {chunk.path}")
|
||||||
|
meta_response = self.s3.put_object(
|
||||||
|
Bucket=self.bucket,
|
||||||
|
Key=chunk.path,
|
||||||
|
Body=chunk.data
|
||||||
|
)
|
||||||
|
assert(meta_response["ResponseMetadata"]["HTTPStatusCode"] == 200), f"Upload failed: {meta_response}" #TODO
|
||||||
|
|
||||||
|
def success(self):
|
||||||
|
return not self.errors
|
|
@ -0,0 +1,22 @@
|
||||||
|
boto3==1.17.82
|
||||||
|
botocore==1.20.82
|
||||||
|
certifi==2020.12.5
|
||||||
|
chardet==4.0.0
|
||||||
|
cheroot==8.5.2
|
||||||
|
CherryPy==18.6.0
|
||||||
|
idna==2.10
|
||||||
|
jaraco.classes==3.2.1
|
||||||
|
jaraco.collections==3.3.0
|
||||||
|
jaraco.functools==3.3.0
|
||||||
|
jaraco.text==3.5.0
|
||||||
|
jmespath==0.10.0
|
||||||
|
more-itertools==8.8.0
|
||||||
|
portend==2.7.1
|
||||||
|
python-dateutil==2.8.1
|
||||||
|
pytz==2021.1
|
||||||
|
requests==2.25.1
|
||||||
|
s3transfer==0.4.2
|
||||||
|
six==1.16.0
|
||||||
|
tempora==4.0.2
|
||||||
|
urllib3==1.26.5
|
||||||
|
zc.lockfile==2.0
|
9
setup.py
9
setup.py
|
@ -11,17 +11,18 @@ with open("requirements.txt") as f:
|
||||||
deps = f.read().split()
|
deps = f.read().split()
|
||||||
|
|
||||||
|
|
||||||
setup(name='package',
|
setup(name='backupdb2',
|
||||||
version=__version__,
|
version=__version__,
|
||||||
description='description',
|
description='backup server and cli',
|
||||||
url='',
|
url='',
|
||||||
author='dpedu',
|
author='dpedu',
|
||||||
author_email='dave@davepedu.com',
|
author_email='dave@davepedu.com',
|
||||||
packages=['package'],
|
packages=['backupdb2'],
|
||||||
install_requires=deps,
|
install_requires=deps,
|
||||||
entry_points={
|
entry_points={
|
||||||
"console_scripts": [
|
"console_scripts": [
|
||||||
"packagecli = package.cli:main",
|
"backupdb2 = backupdb2.cli:main",
|
||||||
|
"backupdbserver = backupdb2.server:main",
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
zip_safe=False)
|
zip_safe=False)
|
||||||
|
|
Loading…
Reference in New Issue