datadb-scripts/new_backup

234 lines
7.4 KiB
Plaintext
Raw Permalink Normal View History

2015-12-26 21:38:18 -08:00
#!/usr/bin/env python3
2017-05-27 12:08:31 -07:00
import os
import sys
2015-12-26 21:41:51 -08:00
import traceback
2017-05-27 12:08:31 -07:00
from os import mkdir, rename, unlink, rmdir, utime
2015-12-26 21:38:18 -08:00
from os.path import exists
from os.path import join as pathjoin
2017-05-27 12:08:31 -07:00
from common.cgi import parse_qs, parse_auth, start_response
2017-05-27 13:53:16 -07:00
from common.datadb import DATADB_ROOT, DATADB_TMP, DATADB_DIR_TIMESTAMP_FORMAT, get_backup_dir, get_latest_backup, \
NoBackupException
2017-05-24 23:05:52 -07:00
from datetime import datetime
from shutil import rmtree, move
2017-05-27 12:08:31 -07:00
from subprocess import Popen, PIPE
from random import randint
2015-12-26 21:38:18 -08:00
from time import time
from hashlib import md5
from glob import iglob
import json
2015-12-26 21:41:51 -08:00
2017-05-24 23:05:52 -07:00
def rotate_backups(backup_dir, max_backups=5):
2015-12-26 21:38:18 -08:00
"""
2017-05-24 23:05:52 -07:00
In the backup dir, cascade backups. List the backup dir and parse folder timestamps. Sort and delete old.
Create a symlink pointing to the newest backup
2015-12-26 21:38:18 -08:00
:param backup_dir: absolute path to dir containing the numbered dirs we will be rotating
:param max_backups: Max number of dirs to keep
:returns: Full path of new data dir
"""
2017-05-27 12:04:55 -07:00
2015-12-26 21:38:18 -08:00
# Path to this profile's backup data dir
2017-05-27 12:08:31 -07:00
# profile_base_path = pathjoin(DATADB_ROOT, backup_name, 'data')
2017-05-27 12:04:55 -07:00
2017-05-24 23:05:52 -07:00
dirs = sorted([datetime.strptime(d, DATADB_DIR_TIMESTAMP_FORMAT) for d in os.listdir(backup_dir)])
dirs.reverse()
# we the list of dirs sorted newest to oldest
2017-05-27 12:04:55 -07:00
2017-05-24 23:05:52 -07:00
if len(dirs) > max_backups:
for dirname in dirs[max_backups:]:
rmtree(pathjoin(backup_dir, dirname.strftime(DATADB_DIR_TIMESTAMP_FORMAT)))
return prepare_new_backup_dir(backup_dir)
def prepare_new_backup_dir(backup_dir):
2015-12-26 21:38:18 -08:00
# Create the new backup dir
2017-05-24 23:05:52 -07:00
new_backup_path = pathjoin(backup_dir, datetime.now().strftime(DATADB_DIR_TIMESTAMP_FORMAT))
2015-12-26 21:38:18 -08:00
mkdir(new_backup_path)
mkdir(pathjoin(new_backup_path, "data"))
2017-05-27 12:08:31 -07:00
return new_backup_path + '/data/'
2015-12-26 21:38:18 -08:00
2016-07-02 19:35:55 -07:00
def prepare_backup_dirs(backup_name, max_backups=5, rotate=True):
2015-12-26 21:38:18 -08:00
"""
Check and create dirs where backups under this name will go
:param backup_name: name of backup profile
2015-12-26 21:38:18 -08:00
:returns: absolute path to newly created backup dir (0)
"""
2017-05-27 12:08:31 -07:00
# print("prepare_backup(%s, %s)" % (backup_name, proto))
2017-05-27 12:04:55 -07:00
2017-05-27 13:53:16 -07:00
# Ensure the following dir exists: <DATADB_ROOT>/<backup_name>/data/
backup_base_path = get_backup_dir(backup_name)
2015-12-26 21:38:18 -08:00
if not exists(backup_base_path):
mkdir(backup_base_path)
2017-05-27 12:04:55 -07:00
2015-12-26 21:38:18 -08:00
backup_data_path = pathjoin(backup_base_path, 'data')
if not exists(backup_data_path):
mkdir(backup_data_path)
2017-05-27 12:04:55 -07:00
2017-05-27 13:53:16 -07:00
if not rotate:
# Get the path to the latest backup if using in place mode
# If no backup is found, we'll call the rotate function anyway to get one created
try:
return get_latest_backup(backup_name)
except NoBackupException:
pass
2017-05-27 12:04:55 -07:00
2017-05-27 13:53:16 -07:00
return rotate_backups(backup_data_path, max_backups=max_backups)
2015-12-26 21:38:18 -08:00
2016-07-02 19:35:55 -07:00
def handle_get_rsync(backup_name, sync_prev=False, force_existing=False):
"""
Prepare a temp dest dir for an incoming rsync backup
:param backup_name: name of backup profile
:param sync_prev: disk copy the previous backup that will be rsynced on top of to save bandwidth
2016-07-02 19:35:55 -07:00
:param force_existing: force using existing backups (ideal for single in-place backups of very large things)
"""
2016-07-02 19:35:55 -07:00
if force_existing:
backup_0 = prepare_backup_dirs(backup_name, max_backups=1, rotate=False)
# touch the backup dir
utime(get_backup_dir(backup_name))
start_response()
print(json.dumps([backup_0, None]))
exit(0)
# generate random token
now = int(time())
token = md5()
token.update("{}{}{}".format(now, backup_name, randint(0, 999999999)).encode("UTF-8"))
token = "{}.{}".format(token.hexdigest(), now)
# create tmpdir using token
backup_dir = pathjoin(DATADB_TMP, token)
os.mkdir(backup_dir)
if sync_prev:
2016-07-02 19:35:55 -07:00
prev_path = pathjoin(get_backup_dir(backup_name), 'data', '0', 'data')
if exists(prev_path):
2017-05-27 12:04:55 -07:00
# if we're using rsync let's cp -r the previous backup to the empty new dir.
# this should save some network time rsyncing later
#copytree(prev_backup_path, new_backup_path)
cp = Popen(['rsync', '-avr', '--one-file-system', prev_path+'/', backup_dir+'/'],
stdout=PIPE, stderr=PIPE)
cp.communicate()
# return both to requester
start_response()
print(json.dumps([backup_dir, token]))
exit(0)
def handle_put_rsync(backup_name, tmp_token, max_backups):
2015-12-26 21:38:18 -08:00
"""
Requested after rsync has completed successfully on the client end. Moves
files from tmp dir identififed by tmp_token, to a final location prepared by
rotating backups
2015-12-26 21:38:18 -08:00
"""
# Prepare new dir
new_target_dir = prepare_backup_dirs(backup_name, max_backups=max_backups)
# find tmp dir
tmp_dir = pathjoin(DATADB_TMP, tmp_token)
# move its contents
contents = iglob(pathjoin(tmp_dir, '*'))
for f in contents:
# chop off leading path that iglob adds
f = f[len(tmp_dir)+1:]
move(
pathjoin(tmp_dir, f),
pathjoin(new_target_dir, f)
)
# delete temp dir
rmdir(tmp_dir)
# touch the backup dir
utime(get_backup_dir(backup_name))
# Print confirmation
2015-12-26 21:38:18 -08:00
start_response()
print("OK")
2015-12-26 21:38:18 -08:00
exit(0)
def handle_put_archive(backup_name, fileStream, max_backups):
2015-12-26 21:38:18 -08:00
"""
Prepare and accept a new archive backup - a single tar.gz archive.
:param backup_name: profile the new file will be added to
2015-12-26 21:38:18 -08:00
:param fileStream: file-like object to read archive data from, to disk
"""
2017-05-27 12:04:55 -07:00
2015-12-26 21:38:18 -08:00
# Temp file we will store data in as it is uploaded
tmp_fname = pathjoin(DATADB_TMP, "%s.tar.gz" % time())
2017-05-27 12:04:55 -07:00
2015-12-26 21:38:18 -08:00
# Track uploaded data size
bk_size = 0
with open(tmp_fname, 'wb') as f:
while True:
data = fileStream.read(8192)
if not data:
break
bk_size += len(data)
f.write(data)
2017-05-27 12:04:55 -07:00
2015-12-26 21:38:18 -08:00
# No data = assume something failed
if bk_size == 0:
unlink(tmp_fname)
raise Exception("No file uploaded...")
2017-05-27 12:04:55 -07:00
new_target_dir = prepare_backup_dirs(backup_name, max_backups=max_backups)
2017-05-27 12:04:55 -07:00
2015-12-26 21:38:18 -08:00
# Move backup into place
rename(tmp_fname, pathjoin(new_target_dir, 'backup.tar.gz'))
2017-05-27 12:04:55 -07:00
# touch the backup dir
utime(get_backup_dir(backup_name))
2017-05-27 12:04:55 -07:00
2015-12-26 21:38:18 -08:00
# Done
2017-05-27 12:08:31 -07:00
start_response() # send 200 response code
2015-12-26 21:38:18 -08:00
exit(0)
def handle_req():
"""
Parse http query parameters and act accordingly.
"""
params = parse_qs()
2017-05-27 12:04:55 -07:00
2015-12-26 21:38:18 -08:00
for param_name in ["proto", "name"]:
2017-05-27 12:08:31 -07:00
if param_name not in params:
2015-12-26 21:38:18 -08:00
raise Exception("Missing parameter: %s" % param_name)
2017-05-27 12:04:55 -07:00
2015-12-26 21:38:18 -08:00
max_backups = int(params["keep"]) if "keep" in params else 5
assert max_backups > 0, "Must keep at least one backup"
2017-05-27 12:04:55 -07:00
2015-12-26 21:38:18 -08:00
if os.environ['REQUEST_METHOD'] == "GET" and params["proto"] == "rsync":
# Rsync prepare is GET
2016-07-02 19:35:55 -07:00
handle_get_rsync(params["name"], sync_prev=True, force_existing="inplace" in params)
2017-05-27 12:04:55 -07:00
elif os.environ['REQUEST_METHOD'] == "PUT" and params["proto"] == "rsync":
# Rsync finalize is PUT
handle_put_rsync(params["name"], params["token"], max_backups)
2017-05-27 12:04:55 -07:00
2015-12-26 21:38:18 -08:00
elif os.environ['REQUEST_METHOD'] == "PUT" and params["proto"] == "archive":
# Archive mode PUTs a file
handle_put_archive(params["name"], sys.stdin.buffer, max_backups)
2017-05-27 12:04:55 -07:00
2015-12-26 21:38:18 -08:00
else:
raise Exception("Invalid request. Params: %s" % params)
2015-12-26 21:41:51 -08:00
2015-12-26 21:38:18 -08:00
if __name__ == "__main__":
try:
handle_req()
except Exception as e:
start_response(status_code=("500", "Internal server error"))
2017-05-27 12:04:55 -07:00
2015-12-26 21:38:18 -08:00
tb = traceback.format_exc()
print(tb)