datadb-scripts/new_backup

237 lines
7.5 KiB
Plaintext
Raw Normal View History

2015-12-26 21:38:18 -08:00
#!/usr/bin/env python3
import os,sys,cgi
2015-12-26 21:41:51 -08:00
import traceback
2016-07-02 19:35:55 -07:00
from os import mkdir,rename,unlink,rmdir,utime,makedirs
2015-12-26 21:38:18 -08:00
from os.path import exists
from os.path import join as pathjoin
2015-12-26 21:41:51 -08:00
from common.cgi import parse_qs,parse_auth,start_response
2017-05-24 23:05:52 -07:00
from common.datadb import DATADB_ROOT, DATADB_TMP, DATADB_DIR_TIMESTAMP_FORMAT
from datetime import datetime
from shutil import rmtree, move
2015-12-26 21:38:18 -08:00
from subprocess import Popen,PIPE
from random import randint
2015-12-26 21:38:18 -08:00
from time import time
from hashlib import md5
from glob import iglob
import json
2015-12-26 21:41:51 -08:00
2017-05-24 23:05:52 -07:00
def get_backup_dir(backup_name):
"""
Returns path to this profile's backup base dir. The base dir contains the 'data' directory
"""
return pathjoin(DATADB_ROOT, backup_name)
2015-12-26 21:38:18 -08:00
2017-05-24 23:05:52 -07:00
def rotate_backups(backup_dir, max_backups=5):
2015-12-26 21:38:18 -08:00
"""
2017-05-24 23:05:52 -07:00
In the backup dir, cascade backups. List the backup dir and parse folder timestamps. Sort and delete old.
Create a symlink pointing to the newest backup
2015-12-26 21:38:18 -08:00
:param backup_dir: absolute path to dir containing the numbered dirs we will be rotating
:param max_backups: Max number of dirs to keep
:returns: Full path of new data dir
"""
# Path to this profile's backup data dir
#profile_base_path = pathjoin(DATADB_ROOT, backup_name, 'data')
2015-12-26 21:38:18 -08:00
2017-05-24 23:05:52 -07:00
dirs = sorted([datetime.strptime(d, DATADB_DIR_TIMESTAMP_FORMAT) for d in os.listdir(backup_dir)])
dirs.reverse()
# we the list of dirs sorted newest to oldest
2015-12-26 21:38:18 -08:00
2017-05-24 23:05:52 -07:00
if len(dirs) > max_backups:
for dirname in dirs[max_backups:]:
rmtree(pathjoin(backup_dir, dirname.strftime(DATADB_DIR_TIMESTAMP_FORMAT)))
return prepare_new_backup_dir(backup_dir)
def prepare_new_backup_dir(backup_dir):
2015-12-26 21:38:18 -08:00
# Create the new backup dir
2017-05-24 23:05:52 -07:00
new_backup_path = pathjoin(backup_dir, datetime.now().strftime(DATADB_DIR_TIMESTAMP_FORMAT))
2015-12-26 21:38:18 -08:00
mkdir(new_backup_path)
mkdir(pathjoin(new_backup_path, "data"))
return new_backup_path+'/data/'
2016-07-02 19:35:55 -07:00
def prepare_backup_dirs(backup_name, max_backups=5, rotate=True):
2015-12-26 21:38:18 -08:00
"""
Check and create dirs where backups under this name will go
:param backup_name: name of backup profile
2015-12-26 21:38:18 -08:00
:returns: absolute path to newly created backup dir (0)
"""
#print("prepare_backup(%s, %s)" % (backup_name, proto))
2015-12-26 21:38:18 -08:00
# Ensure the following dir exists: <DATADB_ROOT>/<backup_name>/data/0/
backup_base_path = get_backup_dir(backup_name)
2015-12-26 21:38:18 -08:00
if not exists(backup_base_path):
mkdir(backup_base_path)
backup_data_path = pathjoin(backup_base_path, 'data')
if not exists(backup_data_path):
mkdir(backup_data_path)
2016-07-02 19:35:55 -07:00
if rotate:
# Should always return bkname/data/0/data/
new_path = rotate_backups(backup_data_path, max_backups=max_backups)
else:
2017-05-24 23:05:52 -07:00
prepare_new_backup_dir(backup_data_path)
2015-12-26 21:38:18 -08:00
return new_path
2016-07-02 19:35:55 -07:00
def handle_get_rsync(backup_name, sync_prev=False, force_existing=False):
"""
Prepare a temp dest dir for an incoming rsync backup
:param backup_name: name of backup profile
:param sync_prev: disk copy the previous backup that will be rsynced on top of to save bandwidth
2016-07-02 19:35:55 -07:00
:param force_existing: force using existing backups (ideal for single in-place backups of very large things)
"""
2016-07-02 19:35:55 -07:00
if force_existing:
backup_0 = prepare_backup_dirs(backup_name, max_backups=1, rotate=False)
# touch the backup dir
utime(get_backup_dir(backup_name))
start_response()
print(json.dumps([backup_0, None]))
exit(0)
# generate random token
now = int(time())
token = md5()
token.update("{}{}{}".format(now, backup_name, randint(0, 999999999)).encode("UTF-8"))
token = "{}.{}".format(token.hexdigest(), now)
# create tmpdir using token
backup_dir = pathjoin(DATADB_TMP, token)
os.mkdir(backup_dir)
if sync_prev:
2016-07-02 19:35:55 -07:00
prev_path = pathjoin(get_backup_dir(backup_name), 'data', '0', 'data')
if exists(prev_path):
# if we're using rsync let's cp -r the previous backup to the empty new dir.
# this should save some network time rsyncing later
#copytree(prev_backup_path, new_backup_path)
cp = Popen(['rsync', '-avr', '--one-file-system', prev_path+'/', backup_dir+'/'],
stdout=PIPE, stderr=PIPE)
cp.communicate()
# return both to requester
start_response()
print(json.dumps([backup_dir, token]))
exit(0)
def handle_put_rsync(backup_name, tmp_token, max_backups):
2015-12-26 21:38:18 -08:00
"""
Requested after rsync has completed successfully on the client end. Moves
files from tmp dir identififed by tmp_token, to a final location prepared by
rotating backups
2015-12-26 21:38:18 -08:00
"""
# Prepare new dir
new_target_dir = prepare_backup_dirs(backup_name, max_backups=max_backups)
# find tmp dir
tmp_dir = pathjoin(DATADB_TMP, tmp_token)
# move its contents
contents = iglob(pathjoin(tmp_dir, '*'))
for f in contents:
# chop off leading path that iglob adds
f = f[len(tmp_dir)+1:]
move(
pathjoin(tmp_dir, f),
pathjoin(new_target_dir, f)
)
# delete temp dir
rmdir(tmp_dir)
# touch the backup dir
utime(get_backup_dir(backup_name))
# Print confirmation
2015-12-26 21:38:18 -08:00
start_response()
print("OK")
2015-12-26 21:38:18 -08:00
exit(0)
def handle_put_archive(backup_name, fileStream, max_backups):
2015-12-26 21:38:18 -08:00
"""
Prepare and accept a new archive backup - a single tar.gz archive.
:param backup_name: profile the new file will be added to
2015-12-26 21:38:18 -08:00
:param fileStream: file-like object to read archive data from, to disk
"""
# Temp file we will store data in as it is uploaded
tmp_fname = pathjoin(DATADB_TMP, "%s.tar.gz" % time())
# Track uploaded data size
bk_size = 0
with open(tmp_fname, 'wb') as f:
while True:
data = fileStream.read(8192)
if not data:
break
bk_size += len(data)
f.write(data)
# No data = assume something failed
if bk_size == 0:
unlink(tmp_fname)
raise Exception("No file uploaded...")
new_target_dir = prepare_backup_dirs(backup_name, max_backups=max_backups)
2015-12-26 21:38:18 -08:00
# Move backup into place
rename(tmp_fname, pathjoin(new_target_dir, 'backup.tar.gz'))
# touch the backup dir
utime(get_backup_dir(backup_name))
2015-12-26 21:38:18 -08:00
# Done
start_response() # send 200 response code
exit(0)
def handle_req():
"""
Parse http query parameters and act accordingly.
"""
params = parse_qs()
for param_name in ["proto", "name"]:
if not param_name in params:
raise Exception("Missing parameter: %s" % param_name)
max_backups = int(params["keep"]) if "keep" in params else 5
assert max_backups > 0, "Must keep at least one backup"
if os.environ['REQUEST_METHOD'] == "GET" and params["proto"] == "rsync":
# Rsync prepare is GET
2016-07-02 19:35:55 -07:00
handle_get_rsync(params["name"], sync_prev=True, force_existing="inplace" in params)
elif os.environ['REQUEST_METHOD'] == "PUT" and params["proto"] == "rsync":
# Rsync finalize is PUT
handle_put_rsync(params["name"], params["token"], max_backups)
2015-12-26 21:38:18 -08:00
elif os.environ['REQUEST_METHOD'] == "PUT" and params["proto"] == "archive":
# Archive mode PUTs a file
handle_put_archive(params["name"], sys.stdin.buffer, max_backups)
else:
raise Exception("Invalid request. Params: %s" % params)
2015-12-26 21:41:51 -08:00
2015-12-26 21:38:18 -08:00
if __name__ == "__main__":
try:
handle_req()
except Exception as e:
start_response(status_code=("500", "Internal server error"))
tb = traceback.format_exc()
print(tb)