datadb-scripts/new_backup

234 lines
7.4 KiB
Python
Executable File

#!/usr/bin/env python3
import os
import sys
import traceback
from os import mkdir, rename, unlink, rmdir, utime
from os.path import exists
from os.path import join as pathjoin
from common.cgi import parse_qs, parse_auth, start_response
from common.datadb import DATADB_ROOT, DATADB_TMP, DATADB_DIR_TIMESTAMP_FORMAT, get_backup_dir, get_latest_backup, \
NoBackupException
from datetime import datetime
from shutil import rmtree, move
from subprocess import Popen, PIPE
from random import randint
from time import time
from hashlib import md5
from glob import iglob
import json
def rotate_backups(backup_dir, max_backups=5):
"""
In the backup dir, cascade backups. List the backup dir and parse folder timestamps. Sort and delete old.
Create a symlink pointing to the newest backup
:param backup_dir: absolute path to dir containing the numbered dirs we will be rotating
:param max_backups: Max number of dirs to keep
:returns: Full path of new data dir
"""
# Path to this profile's backup data dir
# profile_base_path = pathjoin(DATADB_ROOT, backup_name, 'data')
dirs = sorted([datetime.strptime(d, DATADB_DIR_TIMESTAMP_FORMAT) for d in os.listdir(backup_dir)])
dirs.reverse()
# we the list of dirs sorted newest to oldest
if len(dirs) > max_backups:
for dirname in dirs[max_backups:]:
rmtree(pathjoin(backup_dir, dirname.strftime(DATADB_DIR_TIMESTAMP_FORMAT)))
return prepare_new_backup_dir(backup_dir)
def prepare_new_backup_dir(backup_dir):
# Create the new backup dir
new_backup_path = pathjoin(backup_dir, datetime.now().strftime(DATADB_DIR_TIMESTAMP_FORMAT))
mkdir(new_backup_path)
mkdir(pathjoin(new_backup_path, "data"))
return new_backup_path + '/data/'
def prepare_backup_dirs(backup_name, max_backups=5, rotate=True):
"""
Check and create dirs where backups under this name will go
:param backup_name: name of backup profile
:returns: absolute path to newly created backup dir (0)
"""
# print("prepare_backup(%s, %s)" % (backup_name, proto))
# Ensure the following dir exists: <DATADB_ROOT>/<backup_name>/data/
backup_base_path = get_backup_dir(backup_name)
if not exists(backup_base_path):
mkdir(backup_base_path)
backup_data_path = pathjoin(backup_base_path, 'data')
if not exists(backup_data_path):
mkdir(backup_data_path)
if not rotate:
# Get the path to the latest backup if using in place mode
# If no backup is found, we'll call the rotate function anyway to get one created
try:
return get_latest_backup(backup_name)
except NoBackupException:
pass
return rotate_backups(backup_data_path, max_backups=max_backups)
def handle_get_rsync(backup_name, sync_prev=False, force_existing=False):
"""
Prepare a temp dest dir for an incoming rsync backup
:param backup_name: name of backup profile
:param sync_prev: disk copy the previous backup that will be rsynced on top of to save bandwidth
:param force_existing: force using existing backups (ideal for single in-place backups of very large things)
"""
if force_existing:
backup_0 = prepare_backup_dirs(backup_name, max_backups=1, rotate=False)
# touch the backup dir
utime(get_backup_dir(backup_name))
start_response()
print(json.dumps([backup_0, None]))
exit(0)
# generate random token
now = int(time())
token = md5()
token.update("{}{}{}".format(now, backup_name, randint(0, 999999999)).encode("UTF-8"))
token = "{}.{}".format(token.hexdigest(), now)
# create tmpdir using token
backup_dir = pathjoin(DATADB_TMP, token)
os.mkdir(backup_dir)
if sync_prev:
prev_path = pathjoin(get_backup_dir(backup_name), 'data', '0', 'data')
if exists(prev_path):
# if we're using rsync let's cp -r the previous backup to the empty new dir.
# this should save some network time rsyncing later
#copytree(prev_backup_path, new_backup_path)
cp = Popen(['rsync', '-avr', '--one-file-system', prev_path+'/', backup_dir+'/'],
stdout=PIPE, stderr=PIPE)
cp.communicate()
# return both to requester
start_response()
print(json.dumps([backup_dir, token]))
exit(0)
def handle_put_rsync(backup_name, tmp_token, max_backups):
"""
Requested after rsync has completed successfully on the client end. Moves
files from tmp dir identififed by tmp_token, to a final location prepared by
rotating backups
"""
# Prepare new dir
new_target_dir = prepare_backup_dirs(backup_name, max_backups=max_backups)
# find tmp dir
tmp_dir = pathjoin(DATADB_TMP, tmp_token)
# move its contents
contents = iglob(pathjoin(tmp_dir, '*'))
for f in contents:
# chop off leading path that iglob adds
f = f[len(tmp_dir)+1:]
move(
pathjoin(tmp_dir, f),
pathjoin(new_target_dir, f)
)
# delete temp dir
rmdir(tmp_dir)
# touch the backup dir
utime(get_backup_dir(backup_name))
# Print confirmation
start_response()
print("OK")
exit(0)
def handle_put_archive(backup_name, fileStream, max_backups):
"""
Prepare and accept a new archive backup - a single tar.gz archive.
:param backup_name: profile the new file will be added to
:param fileStream: file-like object to read archive data from, to disk
"""
# Temp file we will store data in as it is uploaded
tmp_fname = pathjoin(DATADB_TMP, "%s.tar.gz" % time())
# Track uploaded data size
bk_size = 0
with open(tmp_fname, 'wb') as f:
while True:
data = fileStream.read(8192)
if not data:
break
bk_size += len(data)
f.write(data)
# No data = assume something failed
if bk_size == 0:
unlink(tmp_fname)
raise Exception("No file uploaded...")
new_target_dir = prepare_backup_dirs(backup_name, max_backups=max_backups)
# Move backup into place
rename(tmp_fname, pathjoin(new_target_dir, 'backup.tar.gz'))
# touch the backup dir
utime(get_backup_dir(backup_name))
# Done
start_response() # send 200 response code
exit(0)
def handle_req():
"""
Parse http query parameters and act accordingly.
"""
params = parse_qs()
for param_name in ["proto", "name"]:
if param_name not in params:
raise Exception("Missing parameter: %s" % param_name)
max_backups = int(params["keep"]) if "keep" in params else 5
assert max_backups > 0, "Must keep at least one backup"
if os.environ['REQUEST_METHOD'] == "GET" and params["proto"] == "rsync":
# Rsync prepare is GET
handle_get_rsync(params["name"], sync_prev=True, force_existing="inplace" in params)
elif os.environ['REQUEST_METHOD'] == "PUT" and params["proto"] == "rsync":
# Rsync finalize is PUT
handle_put_rsync(params["name"], params["token"], max_backups)
elif os.environ['REQUEST_METHOD'] == "PUT" and params["proto"] == "archive":
# Archive mode PUTs a file
handle_put_archive(params["name"], sys.stdin.buffer, max_backups)
else:
raise Exception("Invalid request. Params: %s" % params)
if __name__ == "__main__":
try:
handle_req()
except Exception as e:
start_response(status_code=("500", "Internal server error"))
tb = traceback.format_exc()
print(tb)