2015-12-26 21:38:18 -08:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
2017-05-27 12:08:31 -07:00
|
|
|
import os
|
|
|
|
import sys
|
2015-12-26 21:41:51 -08:00
|
|
|
import traceback
|
2017-05-27 12:08:31 -07:00
|
|
|
from os import mkdir, rename, unlink, rmdir, utime
|
2015-12-26 21:38:18 -08:00
|
|
|
from os.path import exists
|
|
|
|
from os.path import join as pathjoin
|
2017-05-27 12:08:31 -07:00
|
|
|
from common.cgi import parse_qs, parse_auth, start_response
|
2017-05-27 13:53:16 -07:00
|
|
|
from common.datadb import DATADB_ROOT, DATADB_TMP, DATADB_DIR_TIMESTAMP_FORMAT, get_backup_dir, get_latest_backup, \
|
|
|
|
NoBackupException
|
2017-05-24 23:05:52 -07:00
|
|
|
from datetime import datetime
|
2016-07-02 18:54:33 -07:00
|
|
|
from shutil import rmtree, move
|
2017-05-27 12:08:31 -07:00
|
|
|
from subprocess import Popen, PIPE
|
2016-07-02 18:54:33 -07:00
|
|
|
from random import randint
|
2015-12-26 21:38:18 -08:00
|
|
|
from time import time
|
2016-07-02 18:54:33 -07:00
|
|
|
from hashlib import md5
|
|
|
|
from glob import iglob
|
|
|
|
import json
|
2015-12-26 21:41:51 -08:00
|
|
|
|
2017-05-24 23:05:52 -07:00
|
|
|
|
2016-07-02 18:54:33 -07:00
|
|
|
def rotate_backups(backup_dir, max_backups=5):
|
2015-12-26 21:38:18 -08:00
|
|
|
"""
|
2017-05-24 23:05:52 -07:00
|
|
|
In the backup dir, cascade backups. List the backup dir and parse folder timestamps. Sort and delete old.
|
|
|
|
Create a symlink pointing to the newest backup
|
2015-12-26 21:38:18 -08:00
|
|
|
:param backup_dir: absolute path to dir containing the numbered dirs we will be rotating
|
|
|
|
:param max_backups: Max number of dirs to keep
|
|
|
|
:returns: Full path of new data dir
|
|
|
|
"""
|
2017-05-27 12:04:55 -07:00
|
|
|
|
2015-12-26 21:38:18 -08:00
|
|
|
# Path to this profile's backup data dir
|
2017-05-27 12:08:31 -07:00
|
|
|
# profile_base_path = pathjoin(DATADB_ROOT, backup_name, 'data')
|
2017-05-27 12:04:55 -07:00
|
|
|
|
2017-05-24 23:05:52 -07:00
|
|
|
dirs = sorted([datetime.strptime(d, DATADB_DIR_TIMESTAMP_FORMAT) for d in os.listdir(backup_dir)])
|
|
|
|
dirs.reverse()
|
|
|
|
# we the list of dirs sorted newest to oldest
|
2017-05-27 12:04:55 -07:00
|
|
|
|
2017-05-24 23:05:52 -07:00
|
|
|
if len(dirs) > max_backups:
|
|
|
|
for dirname in dirs[max_backups:]:
|
|
|
|
rmtree(pathjoin(backup_dir, dirname.strftime(DATADB_DIR_TIMESTAMP_FORMAT)))
|
|
|
|
|
|
|
|
return prepare_new_backup_dir(backup_dir)
|
|
|
|
|
|
|
|
|
|
|
|
def prepare_new_backup_dir(backup_dir):
|
2015-12-26 21:38:18 -08:00
|
|
|
# Create the new backup dir
|
2017-05-24 23:05:52 -07:00
|
|
|
new_backup_path = pathjoin(backup_dir, datetime.now().strftime(DATADB_DIR_TIMESTAMP_FORMAT))
|
2015-12-26 21:38:18 -08:00
|
|
|
mkdir(new_backup_path)
|
|
|
|
mkdir(pathjoin(new_backup_path, "data"))
|
2017-05-27 12:08:31 -07:00
|
|
|
return new_backup_path + '/data/'
|
2015-12-26 21:38:18 -08:00
|
|
|
|
|
|
|
|
2016-07-02 19:35:55 -07:00
|
|
|
def prepare_backup_dirs(backup_name, max_backups=5, rotate=True):
|
2015-12-26 21:38:18 -08:00
|
|
|
"""
|
|
|
|
Check and create dirs where backups under this name will go
|
2016-07-02 18:54:33 -07:00
|
|
|
:param backup_name: name of backup profile
|
2015-12-26 21:38:18 -08:00
|
|
|
:returns: absolute path to newly created backup dir (0)
|
|
|
|
"""
|
2017-05-27 12:08:31 -07:00
|
|
|
# print("prepare_backup(%s, %s)" % (backup_name, proto))
|
2017-05-27 12:04:55 -07:00
|
|
|
|
2017-05-27 13:53:16 -07:00
|
|
|
# Ensure the following dir exists: <DATADB_ROOT>/<backup_name>/data/
|
2016-07-02 18:54:33 -07:00
|
|
|
backup_base_path = get_backup_dir(backup_name)
|
2015-12-26 21:38:18 -08:00
|
|
|
if not exists(backup_base_path):
|
|
|
|
mkdir(backup_base_path)
|
2017-05-27 12:04:55 -07:00
|
|
|
|
2015-12-26 21:38:18 -08:00
|
|
|
backup_data_path = pathjoin(backup_base_path, 'data')
|
|
|
|
if not exists(backup_data_path):
|
|
|
|
mkdir(backup_data_path)
|
2017-05-27 12:04:55 -07:00
|
|
|
|
2017-05-27 13:53:16 -07:00
|
|
|
if not rotate:
|
|
|
|
# Get the path to the latest backup if using in place mode
|
|
|
|
# If no backup is found, we'll call the rotate function anyway to get one created
|
|
|
|
try:
|
|
|
|
return get_latest_backup(backup_name)
|
|
|
|
except NoBackupException:
|
|
|
|
pass
|
2017-05-27 12:04:55 -07:00
|
|
|
|
2017-05-27 13:53:16 -07:00
|
|
|
return rotate_backups(backup_data_path, max_backups=max_backups)
|
2015-12-26 21:38:18 -08:00
|
|
|
|
|
|
|
|
2016-07-02 19:35:55 -07:00
|
|
|
def handle_get_rsync(backup_name, sync_prev=False, force_existing=False):
|
2016-07-02 18:54:33 -07:00
|
|
|
"""
|
|
|
|
Prepare a temp dest dir for an incoming rsync backup
|
|
|
|
:param backup_name: name of backup profile
|
|
|
|
:param sync_prev: disk copy the previous backup that will be rsynced on top of to save bandwidth
|
2016-07-02 19:35:55 -07:00
|
|
|
:param force_existing: force using existing backups (ideal for single in-place backups of very large things)
|
2016-07-02 18:54:33 -07:00
|
|
|
"""
|
|
|
|
|
2016-07-02 19:35:55 -07:00
|
|
|
if force_existing:
|
|
|
|
backup_0 = prepare_backup_dirs(backup_name, max_backups=1, rotate=False)
|
|
|
|
# touch the backup dir
|
|
|
|
utime(get_backup_dir(backup_name))
|
|
|
|
start_response()
|
|
|
|
print(json.dumps([backup_0, None]))
|
|
|
|
exit(0)
|
|
|
|
|
2016-07-02 18:54:33 -07:00
|
|
|
# generate random token
|
|
|
|
now = int(time())
|
|
|
|
token = md5()
|
|
|
|
token.update("{}{}{}".format(now, backup_name, randint(0, 999999999)).encode("UTF-8"))
|
|
|
|
token = "{}.{}".format(token.hexdigest(), now)
|
|
|
|
|
|
|
|
# create tmpdir using token
|
|
|
|
backup_dir = pathjoin(DATADB_TMP, token)
|
|
|
|
os.mkdir(backup_dir)
|
|
|
|
|
|
|
|
if sync_prev:
|
2016-07-02 19:35:55 -07:00
|
|
|
prev_path = pathjoin(get_backup_dir(backup_name), 'data', '0', 'data')
|
2016-07-02 18:54:33 -07:00
|
|
|
if exists(prev_path):
|
2017-05-27 12:04:55 -07:00
|
|
|
# if we're using rsync let's cp -r the previous backup to the empty new dir.
|
2016-07-02 18:54:33 -07:00
|
|
|
# this should save some network time rsyncing later
|
|
|
|
#copytree(prev_backup_path, new_backup_path)
|
|
|
|
cp = Popen(['rsync', '-avr', '--one-file-system', prev_path+'/', backup_dir+'/'],
|
|
|
|
stdout=PIPE, stderr=PIPE)
|
|
|
|
cp.communicate()
|
|
|
|
|
|
|
|
# return both to requester
|
|
|
|
start_response()
|
|
|
|
print(json.dumps([backup_dir, token]))
|
|
|
|
|
|
|
|
exit(0)
|
|
|
|
|
|
|
|
|
|
|
|
def handle_put_rsync(backup_name, tmp_token, max_backups):
|
2015-12-26 21:38:18 -08:00
|
|
|
"""
|
2016-07-02 18:54:33 -07:00
|
|
|
Requested after rsync has completed successfully on the client end. Moves
|
|
|
|
files from tmp dir identififed by tmp_token, to a final location prepared by
|
|
|
|
rotating backups
|
2015-12-26 21:38:18 -08:00
|
|
|
"""
|
|
|
|
# Prepare new dir
|
2016-07-02 18:54:33 -07:00
|
|
|
new_target_dir = prepare_backup_dirs(backup_name, max_backups=max_backups)
|
|
|
|
|
|
|
|
# find tmp dir
|
|
|
|
tmp_dir = pathjoin(DATADB_TMP, tmp_token)
|
|
|
|
|
|
|
|
# move its contents
|
|
|
|
contents = iglob(pathjoin(tmp_dir, '*'))
|
|
|
|
for f in contents:
|
|
|
|
# chop off leading path that iglob adds
|
|
|
|
f = f[len(tmp_dir)+1:]
|
|
|
|
|
|
|
|
move(
|
|
|
|
pathjoin(tmp_dir, f),
|
|
|
|
pathjoin(new_target_dir, f)
|
|
|
|
)
|
|
|
|
|
|
|
|
# delete temp dir
|
|
|
|
rmdir(tmp_dir)
|
|
|
|
|
|
|
|
# touch the backup dir
|
|
|
|
utime(get_backup_dir(backup_name))
|
|
|
|
|
|
|
|
# Print confirmation
|
2015-12-26 21:38:18 -08:00
|
|
|
start_response()
|
2016-07-02 18:54:33 -07:00
|
|
|
print("OK")
|
2015-12-26 21:38:18 -08:00
|
|
|
exit(0)
|
|
|
|
|
|
|
|
|
2016-07-02 18:54:33 -07:00
|
|
|
def handle_put_archive(backup_name, fileStream, max_backups):
|
2015-12-26 21:38:18 -08:00
|
|
|
"""
|
|
|
|
Prepare and accept a new archive backup - a single tar.gz archive.
|
2016-07-02 18:54:33 -07:00
|
|
|
:param backup_name: profile the new file will be added to
|
2015-12-26 21:38:18 -08:00
|
|
|
:param fileStream: file-like object to read archive data from, to disk
|
|
|
|
"""
|
2017-05-27 12:04:55 -07:00
|
|
|
|
2015-12-26 21:38:18 -08:00
|
|
|
# Temp file we will store data in as it is uploaded
|
|
|
|
tmp_fname = pathjoin(DATADB_TMP, "%s.tar.gz" % time())
|
2017-05-27 12:04:55 -07:00
|
|
|
|
2015-12-26 21:38:18 -08:00
|
|
|
# Track uploaded data size
|
|
|
|
bk_size = 0
|
|
|
|
with open(tmp_fname, 'wb') as f:
|
|
|
|
while True:
|
|
|
|
data = fileStream.read(8192)
|
|
|
|
if not data:
|
|
|
|
break
|
|
|
|
bk_size += len(data)
|
|
|
|
f.write(data)
|
2017-05-27 12:04:55 -07:00
|
|
|
|
2015-12-26 21:38:18 -08:00
|
|
|
# No data = assume something failed
|
|
|
|
if bk_size == 0:
|
|
|
|
unlink(tmp_fname)
|
|
|
|
raise Exception("No file uploaded...")
|
2017-05-27 12:04:55 -07:00
|
|
|
|
2016-07-02 18:54:33 -07:00
|
|
|
new_target_dir = prepare_backup_dirs(backup_name, max_backups=max_backups)
|
2017-05-27 12:04:55 -07:00
|
|
|
|
2015-12-26 21:38:18 -08:00
|
|
|
# Move backup into place
|
|
|
|
rename(tmp_fname, pathjoin(new_target_dir, 'backup.tar.gz'))
|
2017-05-27 12:04:55 -07:00
|
|
|
|
2016-07-02 18:54:33 -07:00
|
|
|
# touch the backup dir
|
|
|
|
utime(get_backup_dir(backup_name))
|
2017-05-27 12:04:55 -07:00
|
|
|
|
2015-12-26 21:38:18 -08:00
|
|
|
# Done
|
2017-05-27 12:08:31 -07:00
|
|
|
start_response() # send 200 response code
|
2015-12-26 21:38:18 -08:00
|
|
|
exit(0)
|
|
|
|
|
|
|
|
|
|
|
|
def handle_req():
|
|
|
|
"""
|
|
|
|
Parse http query parameters and act accordingly.
|
|
|
|
"""
|
|
|
|
params = parse_qs()
|
2017-05-27 12:04:55 -07:00
|
|
|
|
2015-12-26 21:38:18 -08:00
|
|
|
for param_name in ["proto", "name"]:
|
2017-05-27 12:08:31 -07:00
|
|
|
if param_name not in params:
|
2015-12-26 21:38:18 -08:00
|
|
|
raise Exception("Missing parameter: %s" % param_name)
|
2017-05-27 12:04:55 -07:00
|
|
|
|
2015-12-26 21:38:18 -08:00
|
|
|
max_backups = int(params["keep"]) if "keep" in params else 5
|
|
|
|
assert max_backups > 0, "Must keep at least one backup"
|
2017-05-27 12:04:55 -07:00
|
|
|
|
2015-12-26 21:38:18 -08:00
|
|
|
if os.environ['REQUEST_METHOD'] == "GET" and params["proto"] == "rsync":
|
2016-07-02 18:54:33 -07:00
|
|
|
# Rsync prepare is GET
|
2016-07-02 19:35:55 -07:00
|
|
|
handle_get_rsync(params["name"], sync_prev=True, force_existing="inplace" in params)
|
2017-05-27 12:04:55 -07:00
|
|
|
|
2016-07-02 18:54:33 -07:00
|
|
|
elif os.environ['REQUEST_METHOD'] == "PUT" and params["proto"] == "rsync":
|
|
|
|
# Rsync finalize is PUT
|
|
|
|
handle_put_rsync(params["name"], params["token"], max_backups)
|
2017-05-27 12:04:55 -07:00
|
|
|
|
2015-12-26 21:38:18 -08:00
|
|
|
elif os.environ['REQUEST_METHOD'] == "PUT" and params["proto"] == "archive":
|
|
|
|
# Archive mode PUTs a file
|
|
|
|
handle_put_archive(params["name"], sys.stdin.buffer, max_backups)
|
2017-05-27 12:04:55 -07:00
|
|
|
|
2015-12-26 21:38:18 -08:00
|
|
|
else:
|
|
|
|
raise Exception("Invalid request. Params: %s" % params)
|
|
|
|
|
2015-12-26 21:41:51 -08:00
|
|
|
|
2015-12-26 21:38:18 -08:00
|
|
|
if __name__ == "__main__":
|
|
|
|
try:
|
|
|
|
handle_req()
|
|
|
|
except Exception as e:
|
|
|
|
start_response(status_code=("500", "Internal server error"))
|
2017-05-27 12:04:55 -07:00
|
|
|
|
2015-12-26 21:38:18 -08:00
|
|
|
tb = traceback.format_exc()
|
|
|
|
print(tb)
|