#!/usr/bin/env python3 import os import sys import traceback from os import mkdir, rename, unlink, rmdir, utime from os.path import exists from os.path import join as pathjoin from common.cgi import parse_qs, parse_auth, start_response from common.datadb import DATADB_ROOT, DATADB_TMP, DATADB_DIR_TIMESTAMP_FORMAT, get_backup_dir, get_latest_backup, \ NoBackupException from datetime import datetime from shutil import rmtree, move from subprocess import Popen, PIPE from random import randint from time import time from hashlib import md5 from glob import iglob import json def rotate_backups(backup_dir, max_backups=5): """ In the backup dir, cascade backups. List the backup dir and parse folder timestamps. Sort and delete old. Create a symlink pointing to the newest backup :param backup_dir: absolute path to dir containing the numbered dirs we will be rotating :param max_backups: Max number of dirs to keep :returns: Full path of new data dir """ # Path to this profile's backup data dir # profile_base_path = pathjoin(DATADB_ROOT, backup_name, 'data') dirs = sorted([datetime.strptime(d, DATADB_DIR_TIMESTAMP_FORMAT) for d in os.listdir(backup_dir)]) dirs.reverse() # we the list of dirs sorted newest to oldest if len(dirs) > max_backups: for dirname in dirs[max_backups:]: rmtree(pathjoin(backup_dir, dirname.strftime(DATADB_DIR_TIMESTAMP_FORMAT))) return prepare_new_backup_dir(backup_dir) def prepare_new_backup_dir(backup_dir): # Create the new backup dir new_backup_path = pathjoin(backup_dir, datetime.now().strftime(DATADB_DIR_TIMESTAMP_FORMAT)) mkdir(new_backup_path) mkdir(pathjoin(new_backup_path, "data")) return new_backup_path + '/data/' def prepare_backup_dirs(backup_name, max_backups=5, rotate=True): """ Check and create dirs where backups under this name will go :param backup_name: name of backup profile :returns: absolute path to newly created backup dir (0) """ # print("prepare_backup(%s, %s)" % (backup_name, proto)) # Ensure the following dir exists: //data/ backup_base_path = get_backup_dir(backup_name) if not exists(backup_base_path): mkdir(backup_base_path) backup_data_path = pathjoin(backup_base_path, 'data') if not exists(backup_data_path): mkdir(backup_data_path) if not rotate: # Get the path to the latest backup if using in place mode # If no backup is found, we'll call the rotate function anyway to get one created try: return get_latest_backup(backup_name) except NoBackupException: pass return rotate_backups(backup_data_path, max_backups=max_backups) def handle_get_rsync(backup_name, sync_prev=False, force_existing=False): """ Prepare a temp dest dir for an incoming rsync backup :param backup_name: name of backup profile :param sync_prev: disk copy the previous backup that will be rsynced on top of to save bandwidth :param force_existing: force using existing backups (ideal for single in-place backups of very large things) """ if force_existing: backup_0 = prepare_backup_dirs(backup_name, max_backups=1, rotate=False) # touch the backup dir utime(get_backup_dir(backup_name)) start_response() print(json.dumps([backup_0, None])) exit(0) # generate random token now = int(time()) token = md5() token.update("{}{}{}".format(now, backup_name, randint(0, 999999999)).encode("UTF-8")) token = "{}.{}".format(token.hexdigest(), now) # create tmpdir using token backup_dir = pathjoin(DATADB_TMP, token) os.mkdir(backup_dir) if sync_prev: prev_path = pathjoin(get_backup_dir(backup_name), 'data', '0', 'data') if exists(prev_path): # if we're using rsync let's cp -r the previous backup to the empty new dir. # this should save some network time rsyncing later #copytree(prev_backup_path, new_backup_path) cp = Popen(['rsync', '-avr', '--one-file-system', prev_path+'/', backup_dir+'/'], stdout=PIPE, stderr=PIPE) cp.communicate() # return both to requester start_response() print(json.dumps([backup_dir, token])) exit(0) def handle_put_rsync(backup_name, tmp_token, max_backups): """ Requested after rsync has completed successfully on the client end. Moves files from tmp dir identififed by tmp_token, to a final location prepared by rotating backups """ # Prepare new dir new_target_dir = prepare_backup_dirs(backup_name, max_backups=max_backups) # find tmp dir tmp_dir = pathjoin(DATADB_TMP, tmp_token) # move its contents contents = iglob(pathjoin(tmp_dir, '*')) for f in contents: # chop off leading path that iglob adds f = f[len(tmp_dir)+1:] move( pathjoin(tmp_dir, f), pathjoin(new_target_dir, f) ) # delete temp dir rmdir(tmp_dir) # touch the backup dir utime(get_backup_dir(backup_name)) # Print confirmation start_response() print("OK") exit(0) def handle_put_archive(backup_name, fileStream, max_backups): """ Prepare and accept a new archive backup - a single tar.gz archive. :param backup_name: profile the new file will be added to :param fileStream: file-like object to read archive data from, to disk """ # Temp file we will store data in as it is uploaded tmp_fname = pathjoin(DATADB_TMP, "%s.tar.gz" % time()) # Track uploaded data size bk_size = 0 with open(tmp_fname, 'wb') as f: while True: data = fileStream.read(8192) if not data: break bk_size += len(data) f.write(data) # No data = assume something failed if bk_size == 0: unlink(tmp_fname) raise Exception("No file uploaded...") new_target_dir = prepare_backup_dirs(backup_name, max_backups=max_backups) # Move backup into place rename(tmp_fname, pathjoin(new_target_dir, 'backup.tar.gz')) # touch the backup dir utime(get_backup_dir(backup_name)) # Done start_response() # send 200 response code exit(0) def handle_req(): """ Parse http query parameters and act accordingly. """ params = parse_qs() for param_name in ["proto", "name"]: if param_name not in params: raise Exception("Missing parameter: %s" % param_name) max_backups = int(params["keep"]) if "keep" in params else 5 assert max_backups > 0, "Must keep at least one backup" if os.environ['REQUEST_METHOD'] == "GET" and params["proto"] == "rsync": # Rsync prepare is GET handle_get_rsync(params["name"], sync_prev=True, force_existing="inplace" in params) elif os.environ['REQUEST_METHOD'] == "PUT" and params["proto"] == "rsync": # Rsync finalize is PUT handle_put_rsync(params["name"], params["token"], max_backups) elif os.environ['REQUEST_METHOD'] == "PUT" and params["proto"] == "archive": # Archive mode PUTs a file handle_put_archive(params["name"], sys.stdin.buffer, max_backups) else: raise Exception("Invalid request. Params: %s" % params) if __name__ == "__main__": try: handle_req() except Exception as e: start_response(status_code=("500", "Internal server error")) tb = traceback.format_exc() print(tb)