From 51b3910177237e4a1f567ee1cb0ea5efad021cb4 Mon Sep 17 00:00:00 2001 From: dave Date: Sun, 7 Oct 2018 15:17:08 -0700 Subject: [PATCH] base on new nexus image --- Dockerfile | 3 +- README.md | 15 ++- makedirs | 8 +- scripts/common/__init__.py | 0 scripts/common/datadb.py | 42 +++++++ scripts/get_backup | 85 ++++++++++++++ scripts/new_backup | 233 +++++++++++++++++++++++++++++++++++++ scripts/test | 25 ++++ 8 files changed, 404 insertions(+), 7 deletions(-) create mode 100644 scripts/common/__init__.py create mode 100644 scripts/common/datadb.py create mode 100755 scripts/get_backup create mode 100755 scripts/new_backup create mode 100755 scripts/test diff --git a/Dockerfile b/Dockerfile index a9ac38c..0fbf6d3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,4 @@ FROM apps2reg:5000/dpedu/nexus -ADD default /etc/nginx/sites-available/default ADD makedirs /start.d/ -ADD backupdb-scripts /usr/share/backupdb +ADD scripts/ /data/scripts/ diff --git a/README.md b/README.md index 71369f7..f8ef01e 100644 --- a/README.md +++ b/README.md @@ -1 +1,14 @@ -For more info see: http://gitlab.davepedu.com/dave/datadb-cli +# docker-backupdb + +**Nexus-based service for backup centralization** + + +## Running the image + +Persist the dirs/files: + +* `/data/data/`: primary storage location of backed up data +* `/data/keys/`: ssh server host keys to persist +* `/data/nexus_authorized_keys`: file containing authorized ssh keys for the nexus user + +Expose ports 22 and 80. diff --git a/makedirs b/makedirs index 6b708d6..5177c58 100755 --- a/makedirs +++ b/makedirs @@ -1,8 +1,8 @@ #!/bin/sh -mkdir -p /nexus/datadb/tmp /nexus/datadb/backups +mkdir -p /data/data/datadb/tmp /data/data/datadb/backups chown nexus:nexus \ - /nexus/datadb \ - /nexus/datadb/tmp \ - /nexus/datadb/backups + /data/data/datadb \ + /data/data/datadb/tmp \ + /data/data/datadb/backups diff --git a/scripts/common/__init__.py b/scripts/common/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/common/datadb.py b/scripts/common/datadb.py new file mode 100644 index 0000000..1b71029 --- /dev/null +++ b/scripts/common/datadb.py @@ -0,0 +1,42 @@ +import os +from datetime import datetime +from os.path import join as pathjoin +from os.path import exists + + +DATADB_ROOT = "/data/data/datadb/backups/" +DATADB_TMP = "/data/data/datadb/tmp/" + +DATADB_DIR_TIMESTAMP_FORMAT = "%Y-%m-%dT%H:%M:%S.%f" # Same as isoformat(), but we need to parse it back + + +class NoBackupException(Exception): + pass + + +def get_backup_dir(backup_name): + """ + Returns path to this profile's backup base dir. The base dir contains the 'data' directory + """ + return pathjoin(DATADB_ROOT, backup_name) + + +def get_latest_backup(backup_name): + """ + Get the absolute local path to a backup or raise an exception if none exists. When getting a backup, sort folder + names (they're timestamps) and return newest. + :returns: str absolute path to backup seq /0/ + """ + backups_dir = pathjoin(get_backup_dir(backup_name), 'data') + + if not exists(backups_dir): + raise NoBackupException("Backup {} does not exist".format(backup_name)) + + dirs = os.listdir(backups_dir) + + if not dirs: + raise NoBackupException("No backups exist for {}".format(backup_name)) + + dirs = sorted([datetime.strptime(d, DATADB_DIR_TIMESTAMP_FORMAT) for d in dirs]) + + return pathjoin(backups_dir, dirs[-1].strftime(DATADB_DIR_TIMESTAMP_FORMAT), 'data') diff --git a/scripts/get_backup b/scripts/get_backup new file mode 100755 index 0000000..9431ed3 --- /dev/null +++ b/scripts/get_backup @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 + +import traceback +import os +from sys import exit, stdout +from os.path import join as pathjoin +from os.path import getsize +from nexus.cgi import parse_qs, parse_auth, start_response +from common.datadb import get_latest_backup + + +def handle_head(backup_name): + try: + # backup_path = get_latest_backup(backup_name) + # TODO appropriate content-length for HEAD + start_response(extra_headers=['Content-length: 0']) + except: + start_response(status_code=("404", "Not Found",), extra_headers=['Content-length: 0']) + exit(0) + + +def handle_get_rsync(backup_name): + """ + Prints the absolute path an rsync backup should pull from + """ + backup_path = get_latest_backup(backup_name) + + start_response() + print(backup_path + '/') + + +def handle_get_archive(backup_name): + """ + Returns .tar.gz data to the browser + """ + backup_path = pathjoin(get_latest_backup(backup_name), 'backup.tar.gz') + + with open(backup_path, 'rb') as f: + start_response(content_type="application/x-gzip", + extra_headers=["Content-length: %s" % getsize(backup_path), + "Content-Disposition: attachment; filename=\"backup.tar.gz\""]) + while True: + data = f.read(8192) + if not data: + break + stdout.buffer.write(data) + exit(0) + + +def handle_req(): + """ + Parse http query parameters and act accordingly. + """ + params = parse_qs() + + for param_name in ["proto", "name"]: + if param_name not in params: + raise Exception("Missing parameter: %s" % param_name) + + if os.environ['REQUEST_METHOD'] == "GET" and params["proto"] == "rsync": + # Should return absolute local path to latest backup dir + handle_get_rsync(params["name"]) + + elif os.environ['REQUEST_METHOD'] == "GET" and params["proto"] == "archive": + # Should respond by transferring tar.gz data + handle_get_archive(params["name"]) + + elif os.environ['REQUEST_METHOD'] == "HEAD": + # Respond with 200 or 404 depending if backup exists + # TODO: deeper inspection so the headers can be flushed out + handle_head(params["name"]) + + else: + raise Exception("Invalid request. Params: %s" % params) + + +if __name__ == "__main__": + try: + handle_req() + except Exception as e: + start_response(status_code=("500", "Internal server error")) + + tb = traceback.format_exc() + print(tb) + diff --git a/scripts/new_backup b/scripts/new_backup new file mode 100755 index 0000000..9938e8f --- /dev/null +++ b/scripts/new_backup @@ -0,0 +1,233 @@ +#!/usr/bin/env python3 + +import os +import sys +import traceback +from os import mkdir, rename, unlink, rmdir, utime +from os.path import exists +from os.path import join as pathjoin +from nexus.cgi import parse_qs, parse_auth, start_response +from common.datadb import DATADB_ROOT, DATADB_TMP, DATADB_DIR_TIMESTAMP_FORMAT, get_backup_dir, get_latest_backup, \ + NoBackupException +from datetime import datetime +from shutil import rmtree, move +from subprocess import Popen, PIPE +from random import randint +from time import time +from hashlib import md5 +from glob import iglob +import json + + +def rotate_backups(backup_dir, max_backups=5): + """ + In the backup dir, cascade backups. List the backup dir and parse folder timestamps. Sort and delete old. + Create a symlink pointing to the newest backup + :param backup_dir: absolute path to dir containing the numbered dirs we will be rotating + :param max_backups: Max number of dirs to keep + :returns: Full path of new data dir + """ + + # Path to this profile's backup data dir + # profile_base_path = pathjoin(DATADB_ROOT, backup_name, 'data') + + dirs = sorted([datetime.strptime(d, DATADB_DIR_TIMESTAMP_FORMAT) for d in os.listdir(backup_dir)]) + dirs.reverse() + # we the list of dirs sorted newest to oldest + + if len(dirs) > max_backups: + for dirname in dirs[max_backups:]: + rmtree(pathjoin(backup_dir, dirname.strftime(DATADB_DIR_TIMESTAMP_FORMAT))) + + return prepare_new_backup_dir(backup_dir) + + +def prepare_new_backup_dir(backup_dir): + # Create the new backup dir + new_backup_path = pathjoin(backup_dir, datetime.now().strftime(DATADB_DIR_TIMESTAMP_FORMAT)) + mkdir(new_backup_path) + mkdir(pathjoin(new_backup_path, "data")) + return new_backup_path + '/data/' + + +def prepare_backup_dirs(backup_name, max_backups=5, rotate=True): + """ + Check and create dirs where backups under this name will go + :param backup_name: name of backup profile + :returns: absolute path to newly created backup dir (0) + """ + # print("prepare_backup(%s, %s)" % (backup_name, proto)) + + # Ensure the following dir exists: //data/ + backup_base_path = get_backup_dir(backup_name) + if not exists(backup_base_path): + mkdir(backup_base_path) + + backup_data_path = pathjoin(backup_base_path, 'data') + if not exists(backup_data_path): + mkdir(backup_data_path) + + if not rotate: + # Get the path to the latest backup if using in place mode + # If no backup is found, we'll call the rotate function anyway to get one created + try: + return get_latest_backup(backup_name) + except NoBackupException: + pass + + return rotate_backups(backup_data_path, max_backups=max_backups) + + +def handle_get_rsync(backup_name, sync_prev=False, force_existing=False): + """ + Prepare a temp dest dir for an incoming rsync backup + :param backup_name: name of backup profile + :param sync_prev: disk copy the previous backup that will be rsynced on top of to save bandwidth + :param force_existing: force using existing backups (ideal for single in-place backups of very large things) + """ + + if force_existing: + backup_0 = prepare_backup_dirs(backup_name, max_backups=1, rotate=False) + # touch the backup dir + utime(get_backup_dir(backup_name)) + start_response() + print(json.dumps([backup_0, None])) + exit(0) + + # generate random token + now = int(time()) + token = md5() + token.update("{}{}{}".format(now, backup_name, randint(0, 999999999)).encode("UTF-8")) + token = "{}.{}".format(token.hexdigest(), now) + + # create tmpdir using token + backup_dir = pathjoin(DATADB_TMP, token) + os.mkdir(backup_dir) + + if sync_prev: + prev_path = pathjoin(get_backup_dir(backup_name), 'data', '0', 'data') + if exists(prev_path): + # if we're using rsync let's cp -r the previous backup to the empty new dir. + # this should save some network time rsyncing later + #copytree(prev_backup_path, new_backup_path) + cp = Popen(['rsync', '-avr', '--one-file-system', prev_path+'/', backup_dir+'/'], + stdout=PIPE, stderr=PIPE) + cp.communicate() + + # return both to requester + start_response() + print(json.dumps([backup_dir, token])) + + exit(0) + + +def handle_put_rsync(backup_name, tmp_token, max_backups): + """ + Requested after rsync has completed successfully on the client end. Moves + files from tmp dir identififed by tmp_token, to a final location prepared by + rotating backups + """ + # Prepare new dir + new_target_dir = prepare_backup_dirs(backup_name, max_backups=max_backups) + + # find tmp dir + tmp_dir = pathjoin(DATADB_TMP, tmp_token) + + # move its contents + contents = iglob(pathjoin(tmp_dir, '*')) + for f in contents: + # chop off leading path that iglob adds + f = f[len(tmp_dir)+1:] + + move( + pathjoin(tmp_dir, f), + pathjoin(new_target_dir, f) + ) + + # delete temp dir + rmdir(tmp_dir) + + # touch the backup dir + utime(get_backup_dir(backup_name)) + + # Print confirmation + start_response() + print("OK") + exit(0) + + +def handle_put_archive(backup_name, fileStream, max_backups): + """ + Prepare and accept a new archive backup - a single tar.gz archive. + :param backup_name: profile the new file will be added to + :param fileStream: file-like object to read archive data from, to disk + """ + + # Temp file we will store data in as it is uploaded + tmp_fname = pathjoin(DATADB_TMP, "%s.tar.gz" % time()) + + # Track uploaded data size + bk_size = 0 + with open(tmp_fname, 'wb') as f: + while True: + data = fileStream.read(8192) + if not data: + break + bk_size += len(data) + f.write(data) + + # No data = assume something failed + if bk_size == 0: + unlink(tmp_fname) + raise Exception("No file uploaded...") + + new_target_dir = prepare_backup_dirs(backup_name, max_backups=max_backups) + + # Move backup into place + rename(tmp_fname, pathjoin(new_target_dir, 'backup.tar.gz')) + + # touch the backup dir + utime(get_backup_dir(backup_name)) + + # Done + start_response() # send 200 response code + exit(0) + + +def handle_req(): + """ + Parse http query parameters and act accordingly. + """ + params = parse_qs() + + for param_name in ["proto", "name"]: + if param_name not in params: + raise Exception("Missing parameter: %s" % param_name) + + max_backups = int(params["keep"]) if "keep" in params else 5 + assert max_backups > 0, "Must keep at least one backup" + + if os.environ['REQUEST_METHOD'] == "GET" and params["proto"] == "rsync": + # Rsync prepare is GET + handle_get_rsync(params["name"], sync_prev=True, force_existing="inplace" in params) + + elif os.environ['REQUEST_METHOD'] == "PUT" and params["proto"] == "rsync": + # Rsync finalize is PUT + handle_put_rsync(params["name"], params["token"], max_backups) + + elif os.environ['REQUEST_METHOD'] == "PUT" and params["proto"] == "archive": + # Archive mode PUTs a file + handle_put_archive(params["name"], sys.stdin.buffer, max_backups) + + else: + raise Exception("Invalid request. Params: %s" % params) + + +if __name__ == "__main__": + try: + handle_req() + except Exception as e: + start_response(status_code=("500", "Internal server error")) + + tb = traceback.format_exc() + print(tb) diff --git a/scripts/test b/scripts/test new file mode 100755 index 0000000..ed6e51b --- /dev/null +++ b/scripts/test @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 + +import os +from urllib.parse import parse_qs + + +def start_response(content_type="text/html", status_code=("200", "OK",)): + print('Status: %s %s' % (status_code)) + print("Content-Type: %s" % content_type) + print() + + +if __name__ == "__main__": + try: + + data = parse_qs(os.environ["QUERY_STRING"]) + + assert "yo" in data + + start_response() + print("you passed: ?yo=%s" % data["yo"][0]) + + except Exception as e: + start_response(status_code=('500', "you fucked up")) + print(str(e))