Compare commits

...

2 Commits

Author SHA1 Message Date
dave dbe87c00f6 separate emit function 2021-03-18 13:02:46 -07:00
dave 132824295a begin non-cgi version 2021-03-12 16:02:46 -08:00
6 changed files with 254 additions and 2 deletions

0
backupdb/__init__.py Normal file
View File

211
backupdb/server.py Normal file
View File

@ -0,0 +1,211 @@
import boto3
import logging
import os
from urllib.parse import urlparse
import cherrypy
import tempfile
from botocore.client import Config as BotoConfig
from datetime import datetime
from pprint import pprint
DIR_TIMESTAMP_FORMAT = "%Y-%m-%dT%H:%M:%S.%f"
class AppWeb(object):
def __init__(self, s3, bucket_name):
self.s3 = s3
self.s3_root = "repos"
self.bucket_name = bucket_name
self.api = self
self.v1 = self
@cherrypy.expose
def index(self):
yield 'Backup DB'
def get_repo_path(self, repo_name):
return "/".join([self.s3_root, "repos", repo_name, "data"])
def get_backup_path(self, repo_name, backup_datestr):
return "/".join([self.get_repo_path(repo_name), backup_datestr])
@cherrypy.expose
def new_backup(self, name, proto, keep):
"""
Accept a new backup.
:param proto: protocol name, archive or rsync
:param name: backup repository name
:param keep: how many backups to trim to after accepting this one
"""
#TODO validate name
#TODO rsync
if proto != "archive":
yield "lol"
return
date = datetime.now().strftime(DIR_TIMESTAMP_FORMAT)
# print(cherrypy.request.method)
# print(len(cherrypy.request.body.read()))
chunk_size = 32 * 1024 * 1024
buf = 256 * 1024
part = 0
#TODO improve this such that it uploads chunks to S3 while still downloading from the client
while True:
with tempfile.TemporaryFile() as tmp:
#TODO: background task to later combine .part files into one large object under /data/backup.tar.gz
backup_key = self.get_backup_path(name, date) + "/upload/backup.tar.gz.part{}".format(part)
# TODO dont read it all at once
chunk = cherrypy.request.body.read(chunk_size)
if not chunk:
break
tmp.write(chunk)
tmp.seek(0)
# print("putting", backup_key)
self.s3.put_object(
Body=tmp,
Bucket=self.bucket_name,
Key=backup_key)
part += 1
yield "OK - {}".format(backup_key)
@cherrypy.expose
def get_backup(self, name, which=None):
"""
Returns .tar.gz data to the browser
"""
repo_path = self.get_repo_path(name) + "/"
if not which:
backups = self.s3.list_objects(Bucket=self.bucket_name,
Prefix=repo_path,
Delimiter="/") # TODO pagination
backups = [i["Prefix"] for i in backups["CommonPrefixes"]]
import pdb
pdb.set_trace()
# each entry is a path like: 'repos/repos/testing_archive/data/2021-03-12T13:31:04.845826/'
backups = [i.split(repo_path)[1][0:-1] for i in backups]
# each entry now is a backup id like: '2021-03-12T13:31:04.845826'
backups.sort(reverse=True)
selected_backup = backups[0]
else:
selected_backup = which #TODO normalize string
backup_path = self.get_backup_path(name, selected_backup)
elements = self.s3.list_objects(Bucket=self.bucket_name,
Prefix=backup_path) # TODO pagination
keys = []
size = 0
for key in elements['Contents']:
keys.append(key['Key'])
size += key['Size']
# keys is a sorted list of bucket paths to emit
keys.sort()
print(size)
cherrypy.response.headers.update({"Content-Length": str(size),
"Content-Type": 'application/x-gzip',
"Content-Disposition": 'attachment; filename="backup.tar.gz"'})
return self.emit(keys)
get_backup._cp_config = {'response.stream': True}
def emit(self, keys):
for key in keys:
# print("getting", key)
o = self.s3.get_object(Bucket=self.bucket_name, Key=key)
while True:
chunk = o["Body"].read(256 * 1024)
if not chunk:
break
yield chunk
def main():
import argparse
import signal
parser = argparse.ArgumentParser(description="backupdb server")
parser.add_argument('-p', '--port', default=8080, type=int, help="http port to listen on")
parser.add_argument('-s', '--s3', help="http:// or https:// connection string",
default=os.environ.get("S3_URL"))
parser.add_argument('--debug', action="store_true", help="enable development options")
args = parser.parse_args()
logging.basicConfig(level=logging.INFO if args.debug else logging.WARNING,
format="%(asctime)-15s %(levelname)-8s %(filename)s:%(lineno)d %(message)s")
if not args.s3:
parser.error("--s3 or S3_URL required")
# set up s3 client
s3url = urlparse(args.s3)
s3args = {"config": BotoConfig(signature_version='s3v4')}
endpoint_url = f"{s3url.scheme}://{s3url.hostname}"
if s3url.port:
endpoint_url += f":{s3url.port}"
s3args["endpoint_url"] = endpoint_url
if s3url.username and s3url.password:
s3args["aws_access_key_id"] = s3url.username
s3args["aws_secret_access_key"] = s3url.password
s3 = boto3.client('s3', **s3args)
bucket = s3url.path[1:]
# ensure bucket exists
if bucket not in [b['Name'] for b in s3.list_buckets()['Buckets']]:
print("Creating bucket")
s3.create_bucket(Bucket=bucket)
# set up main web screen
web = AppWeb(s3, bucket)
cherrypy.tree.mount(web, '/', {'/': {'tools.trailing_slash.on': False}})
cherrypy.config.update({
'tools.sessions.on': False,
'request.show_tracebacks': True,
'server.socket_port': args.port,
'server.thread_pool': 5,
'server.socket_host': '0.0.0.0',
'server.show_tracebacks': True,
'log.screen': False,
'engine.autoreload.on': args.debug,
'server.max_request_body_size': 0,
'server.socket_timeout': 7200,
'response.timeout': 7200
})
def signal_handler(signum, stack):
logging.warning('Got sig {}, exiting...'.format(signum))
cherrypy.engine.exit()
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
try:
cherrypy.engine.start()
cherrypy.engine.block()
finally:
cherrypy.engine.exit()
if __name__ == '__main__':
main()

View File

@ -11,6 +11,7 @@ server {
fastcgi_temp_path /data/data/datadb/tmp/;
proxy_temp_path /data/data/datadb/tmp/;
#auth_basic "Restricted";
#auth_basic_user_file /data/htpasswd;
@ -25,9 +26,10 @@ server {
fastcgi_pass unix:/tmp/fcgiwrap.socket;
include /etc/nginx/fastcgi_params;
fastcgi_param SCRIPT_FILENAME /usr/share/backupdb$fastcgi_script_name;
fastcgi_read_timeout 600s;
fastcgi_send_timeout 600s;
fastcgi_read_timeout 7200s;
fastcgi_send_timeout 7200s;
client_max_body_size 0;
fastcgi_buffering off;
}
location /api/ {

18
requirements.txt Normal file
View File

@ -0,0 +1,18 @@
boto3==1.17.26
botocore==1.20.26
cheroot==8.5.2
CherryPy==18.6.0
jaraco.classes==3.2.1
jaraco.collections==3.2.0
jaraco.functools==3.2.1
jaraco.text==3.5.0
jmespath==0.10.0
more-itertools==8.7.0
portend==2.7.1
python-dateutil==2.8.1
pytz==2021.1
s3transfer==0.3.4
six==1.15.0
tempora==4.0.1
urllib3==1.26.3
zc.lockfile==2.0

View File

@ -16,6 +16,7 @@ from random import randint
from time import time
from hashlib import md5
from glob import iglob
import logging
import json
@ -225,6 +226,7 @@ def handle_req():
if __name__ == "__main__":
try:
#logging.basicConfig(level=logging.WARNING)
handle_req()
except Exception as e:
start_response(status_code=("500", "Internal server error"))

19
setup.py Normal file
View File

@ -0,0 +1,19 @@
#!/usr/bin/env python3
from setuptools import setup
__version__ = "0.0.0"
setup(name='backupdb',
version=__version__,
description='Backupdb clienet + ',
url='',
author='dpedu',
author_email='dave@davepedu.com',
packages=['backupdb'],
entry_points={
"console_scripts": [
"backupdbserver = backupdb.server:main",
# "pubsubbot = pyircbot.clipub:main"
]
},
zip_safe=False)