This commit is contained in:
dave 2021-05-29 19:12:35 -07:00
parent 7ee79b4455
commit 0f9f4a7142
2 changed files with 151 additions and 10 deletions

View File

@ -14,17 +14,156 @@ from backupdb2.upload import S3UploadQueueThread, Chunk
from backupdb2.boto import get_s3
class BackupdbHttp(object):
class BackupManager(object):
"""
Client for listing/reading backups
"""
def __init__(self, bucket, s3conn):
self.bucket = bucket
self.s3 = s3conn
self.api = self
self.v1 = self
def list_namespaces(self):
return self.list_prefix()
def list_backups(self, namespace="default"):
return self.list_prefix(f"{namespace}/")
def list_dates(self, backup, namespace="default"):
return self.list_prefix(f"{namespace}/{backup}/backups/") #TODO technically we should only list those with a meta.json as that signals completeness
def list_prefix(self, prefix=""):
r = self.s3.list_objects(
Bucket=self.bucket,
Delimiter="/",
Prefix=prefix,
)
if r["ResponseMetadata"]["HTTPStatusCode"] != 200:
raise Exception(str(r["ResponseMetadata"]))
plen = len(prefix)
return [
o["Prefix"][plen:].rstrip("/") for o in r.get("CommonPrefixes", [])
] + [
o["Key"][plen:].rstrip("/") for o in r.get("Contents", [])
]
def get_metadata(self, backup, date, namespace="default"):
return json.loads(
self.s3.get_object(
Bucket=self.bucket,
Key=f"{namespace}/{backup}/backups/{date}/meta.json"
)["Body"].read().decode("utf-8")
)
def get_stream(self, backup, date, namespace="default"):
prefix = f"{namespace}/{backup}/backups/{date}/"
chunks = self.list_prefix(prefix)
chunks.sort()
for chunk in chunks:
if not chunk.startswith("backup.tar.gz."):
continue # ignore metadata etc
yield from self.s3.get_object(
Bucket=self.bucket,
Key=f"{prefix}{chunk}"
)["Body"].iter_chunks(chunk_size=1024 * 1024)
class WebBase(object):
"""
Base class for web components that provides common tools
"""
def __init__(self, bucket, s3conn):
self.bucket = bucket
self.s3 = s3conn
self.mgr = BackupManager(bucket, s3conn)
class BackupdbHttp(WebBase):
"""
Absolute bare minimum html browser web interface
"""
def __init__(self, bucket, s3conn):
super().__init__(bucket, s3conn)
self.api = BackupdbApiV1(bucket, s3conn)
@cherrypy.expose
def index(self):
yield "TODO list of backups/namespaces etc"
yield "<h1>Namespaces</h1><hr>"
for ns in self.mgr.list_namespaces():
yield f'<a href="backups?namespace={ns}">{ns}</a><br />'# TODO lol injection
@cherrypy.expose
def backups(self, namespace="default"):
yield f'<h1>Backups for namespace: <em>{namespace}</em></h1><hr>'# TODO lol injection
for backup in self.mgr.list_backups(namespace=namespace):
yield f'<a href="dates?namespace={namespace}&backup={backup}">{backup}</a><br />'
@cherrypy.expose
def dates(self, backup, namespace="default"):
yield f'<h1>Dates for backup: <em>{backup}</em> in namespace: <em>{namespace}</em></h1><hr>'# TODO lol injection
for date in self.mgr.list_dates(backup, namespace=namespace):
yield f'<a href="api/v1/download?namespace={namespace}&backup={backup}&date={date}">{date}</a> (<a href="api/v1/download?namespace={namespace}&backup={backup}&date={date}&meta=1">meta</a>)<br />'
class BackupdbApiV1(WebBase):
"""
V1 json api
/api/v1/namespaces -> list of namespaces -> ["default"]
/api/v1/backups -> list of backup names -> ["testbackup"]
/api/v1/dates -> list of backup dates ?backup=testbackup -> ["2021-05-27T20:41:32.833886"]
/api/v1/download -> stream of tar.gz data ?backup=testbackup&date=<date> -> (data)
/api/v1/download -> json metadata   above + &meta=1
Param `namespace` is optional on all endpoints except /namespaces
"""
def __init__(self, bucket, s3conn):
super().__init__(bucket, s3conn)
self.v1 = self
@cherrypy.expose
def index(self, name=None, namespace="default"):
yield '<a href="namespaces">namespaces</a><br />'
yield '<a href="backups">backups</a><br />'
yield '<a href="dates">dates</a><br />'
yield '<a href="download">download</a><br />'
@cherrypy.expose
@cherrypy.tools.json_out()
def namespaces(self):
return self.mgr.list_namespaces()
@cherrypy.expose
@cherrypy.tools.json_out()
def backups(self, namespace="default"):
return self.mgr.list_backups(namespace)
@cherrypy.expose
@cherrypy.tools.json_out()
def dates(self, backup, namespace="default"):
return self.mgr.list_dates(backup, namespace=namespace)
@cherrypy.expose
def download(self, date, backup, meta=False, namespace="default"):
metadata = self.mgr.get_metadata(backup, date, namespace=namespace)
if meta:
cherrypy.response.headers["Content-Type"] = "application/json"
else:
cherrypy.response.headers["Content-Type"] = "application/gzip"
cherrypy.response.headers["Content-Length"] = metadata["size"]
cherrypy.response.headers["Content-Disposition"] = f'filename="{namespace}-{backup}-{date}.tar.gz"'# TODO lol injection
def download():
if meta:
yield json.dumps(metadata).encode('utf-8')
return
yield from self.mgr.get_stream(backup, date, namespace=namespace)
return download()
download._cp_config = {'response.stream': True}
@cherrypy.expose
@cherrypy.tools.json_out()
@ -53,7 +192,7 @@ class BackupdbHttp(object):
break
total_bytes += len(data)
sha.update(data)
queue.put(Chunk(sequence, data, uid, name, namespace))
queue.put(Chunk(sequence, data, uid, now, name, namespace))
sequence += 1
logging.debug(f"total uploaded: {total_bytes}")
@ -73,7 +212,7 @@ class BackupdbHttp(object):
logging.debug("upload complete, writing metadata")
meta_response = self.s3.put_object(
Bucket=self.bucket,
Key=f"{namespace}/{name}/tmp/{uid}/meta.json",
Key=f"{namespace}/{name}/backups/{now.isoformat()}/meta.json",
Body=json.dumps(metadata, indent=4, sort_keys=True)
)
if meta_response["ResponseMetadata"]["HTTPStatusCode"] != 200:
@ -101,7 +240,7 @@ def run_http(args):
# ensure bucket exists
if bucket not in [b['Name'] for b in s3.list_buckets()['Buckets']]:
print("Creating bucket")
logging.warning("Creating bucket")
s3.create_bucket(Bucket=bucket)
web = BackupdbHttp(bucket, s3)
@ -111,7 +250,7 @@ def run_http(args):
# General config options
cherrypy.config.update({
'request.show_tracebacks': True,
'server.thread_pool': 1,
'server.thread_pool': 5,
'server.socket_host': "0.0.0.0",
'server.socket_port': args.port,
'server.show_tracebacks': True,

View File

@ -1,5 +1,6 @@
import logging
import traceback
from datetime import datetime
from threading import Thread, Semaphore
from concurrent.futures import ThreadPoolExecutor, as_completed
from dataclasses import dataclass
@ -14,13 +15,14 @@ class Chunk:
"""
sequence: int # order of the chunk in the file
data: bytes
uid: str # uid of the backup
uid: str # uid label
date: datetime # date label
name: str # name of the backup e.g. "plexmain"
namespace: str # namespace of the backup
@property
def path(self) -> str:
return f"{self.namespace}/{self.name}/tmp/{self.uid}/backup.tar.gz.{self.sequence:08d}"
return f"{self.namespace}/{self.name}/backups/{self.date.isoformat()}/backup.tar.gz.{self.sequence:08d}"
class S3UploadQueueThread(Thread):