#!/usr/bin/env python3 import os,sys,cgi import traceback from os import mkdir,rename,unlink,rmdir,utime,makedirs from os.path import exists from os.path import join as pathjoin from common.cgi import parse_qs,parse_auth,start_response from common.datadb import DATADB_ROOT,DATADB_TMP from shutil import rmtree, move from subprocess import Popen,PIPE from random import randint from time import time from hashlib import md5 from glob import iglob import json def get_backup_dir(backup_name): """ Returns path to this profile's backup base dir. The base dir contains the 'data' directory """ return pathjoin(DATADB_ROOT, backup_name) def rotate_backups(backup_dir, max_backups=5): """ In the backup dir, cascade backups. (/1/ becomes /2/, /0/ becomes /1/, etc) :param backup_dir: absolute path to dir containing the numbered dirs we will be rotating :param max_backups: Max number of dirs to keep :returns: Full path of new data dir """ # Path to this profile's backup data dir #profile_base_path = pathjoin(DATADB_ROOT, backup_name, 'data') dirs = os.listdir(backup_dir) if len(dirs) > 0: # If there are some dirs, rotate them # Assume all items are dirs and all dirs are named numerically dirs = sorted([int(d) for d in dirs]) dirs.reverse() # we now have [5, 4, 3, 2, 1, 0] for item in dirs: if (item+1) >= max_backups: rmtree(pathjoin(backup_dir, str(item))) continue # Rotate each backup once rename( pathjoin(backup_dir, str(item)), pathjoin(backup_dir, str(item+1)) ) # Create the new backup dir new_backup_path = pathjoin(backup_dir, "0") mkdir(new_backup_path) mkdir(pathjoin(new_backup_path, "data")) prev_backup_path = pathjoin(backup_dir, "1") return new_backup_path+'/data/' def prepare_backup_dirs(backup_name, max_backups=5, rotate=True): """ Check and create dirs where backups under this name will go :param backup_name: name of backup profile :returns: absolute path to newly created backup dir (0) """ #print("prepare_backup(%s, %s)" % (backup_name, proto)) # Ensure the following dir exists: //data/0/ backup_base_path = get_backup_dir(backup_name) if not exists(backup_base_path): mkdir(backup_base_path) backup_data_path = pathjoin(backup_base_path, 'data') if not exists(backup_data_path): mkdir(backup_data_path) if rotate: # Should always return bkname/data/0/data/ new_path = rotate_backups(backup_data_path, max_backups=max_backups) else: new_path = pathjoin(backup_data_path, '0', 'data') + '/' if not exists(new_path): makedirs(new_path) return new_path def handle_get_rsync(backup_name, sync_prev=False, force_existing=False): """ Prepare a temp dest dir for an incoming rsync backup :param backup_name: name of backup profile :param sync_prev: disk copy the previous backup that will be rsynced on top of to save bandwidth :param force_existing: force using existing backups (ideal for single in-place backups of very large things) """ if force_existing: backup_0 = prepare_backup_dirs(backup_name, max_backups=1, rotate=False) # touch the backup dir utime(get_backup_dir(backup_name)) start_response() print(json.dumps([backup_0, None])) exit(0) # generate random token now = int(time()) token = md5() token.update("{}{}{}".format(now, backup_name, randint(0, 999999999)).encode("UTF-8")) token = "{}.{}".format(token.hexdigest(), now) # create tmpdir using token backup_dir = pathjoin(DATADB_TMP, token) os.mkdir(backup_dir) if sync_prev: prev_path = pathjoin(get_backup_dir(backup_name), 'data', '0', 'data') if exists(prev_path): # if we're using rsync let's cp -r the previous backup to the empty new dir. # this should save some network time rsyncing later #copytree(prev_backup_path, new_backup_path) cp = Popen(['rsync', '-avr', '--one-file-system', prev_path+'/', backup_dir+'/'], stdout=PIPE, stderr=PIPE) cp.communicate() # return both to requester start_response() print(json.dumps([backup_dir, token])) exit(0) def handle_put_rsync(backup_name, tmp_token, max_backups): """ Requested after rsync has completed successfully on the client end. Moves files from tmp dir identififed by tmp_token, to a final location prepared by rotating backups """ # Prepare new dir new_target_dir = prepare_backup_dirs(backup_name, max_backups=max_backups) # find tmp dir tmp_dir = pathjoin(DATADB_TMP, tmp_token) # move its contents contents = iglob(pathjoin(tmp_dir, '*')) for f in contents: # chop off leading path that iglob adds f = f[len(tmp_dir)+1:] move( pathjoin(tmp_dir, f), pathjoin(new_target_dir, f) ) # delete temp dir rmdir(tmp_dir) # touch the backup dir utime(get_backup_dir(backup_name)) # Print confirmation start_response() print("OK") exit(0) def handle_put_archive(backup_name, fileStream, max_backups): """ Prepare and accept a new archive backup - a single tar.gz archive. :param backup_name: profile the new file will be added to :param fileStream: file-like object to read archive data from, to disk """ # Temp file we will store data in as it is uploaded tmp_fname = pathjoin(DATADB_TMP, "%s.tar.gz" % time()) # Track uploaded data size bk_size = 0 with open(tmp_fname, 'wb') as f: while True: data = fileStream.read(8192) if not data: break bk_size += len(data) f.write(data) # No data = assume something failed if bk_size == 0: unlink(tmp_fname) raise Exception("No file uploaded...") new_target_dir = prepare_backup_dirs(backup_name, max_backups=max_backups) # Move backup into place rename(tmp_fname, pathjoin(new_target_dir, 'backup.tar.gz')) # touch the backup dir utime(get_backup_dir(backup_name)) # Done start_response() # send 200 response code exit(0) def handle_req(): """ Parse http query parameters and act accordingly. """ params = parse_qs() for param_name in ["proto", "name"]: if not param_name in params: raise Exception("Missing parameter: %s" % param_name) max_backups = int(params["keep"]) if "keep" in params else 5 assert max_backups > 0, "Must keep at least one backup" if os.environ['REQUEST_METHOD'] == "GET" and params["proto"] == "rsync": # Rsync prepare is GET handle_get_rsync(params["name"], sync_prev=True, force_existing="inplace" in params) elif os.environ['REQUEST_METHOD'] == "PUT" and params["proto"] == "rsync": # Rsync finalize is PUT handle_put_rsync(params["name"], params["token"], max_backups) elif os.environ['REQUEST_METHOD'] == "PUT" and params["proto"] == "archive": # Archive mode PUTs a file handle_put_archive(params["name"], sys.stdin.buffer, max_backups) else: raise Exception("Invalid request. Params: %s" % params) if __name__ == "__main__": try: handle_req() except Exception as e: start_response(status_code=("500", "Internal server error")) tb = traceback.format_exc() print(tb)