CGI backend for datadb
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

234 lines
7.4 KiB

6 years ago
#!/usr/bin/env python3
5 years ago
import os
import sys
6 years ago
import traceback
5 years ago
from os import mkdir, rename, unlink, rmdir, utime
6 years ago
from os.path import exists
from os.path import join as pathjoin
5 years ago
from common.cgi import parse_qs, parse_auth, start_response
from common.datadb import DATADB_ROOT, DATADB_TMP, DATADB_DIR_TIMESTAMP_FORMAT, get_backup_dir, get_latest_backup, \
NoBackupException
from datetime import datetime
from shutil import rmtree, move
5 years ago
from subprocess import Popen, PIPE
from random import randint
6 years ago
from time import time
from hashlib import md5
from glob import iglob
import json
6 years ago
def rotate_backups(backup_dir, max_backups=5):
6 years ago
"""
In the backup dir, cascade backups. List the backup dir and parse folder timestamps. Sort and delete old.
Create a symlink pointing to the newest backup
6 years ago
:param backup_dir: absolute path to dir containing the numbered dirs we will be rotating
:param max_backups: Max number of dirs to keep
:returns: Full path of new data dir
"""
6 years ago
# Path to this profile's backup data dir
5 years ago
# profile_base_path = pathjoin(DATADB_ROOT, backup_name, 'data')
dirs = sorted([datetime.strptime(d, DATADB_DIR_TIMESTAMP_FORMAT) for d in os.listdir(backup_dir)])
dirs.reverse()
# we the list of dirs sorted newest to oldest
if len(dirs) > max_backups:
for dirname in dirs[max_backups:]:
rmtree(pathjoin(backup_dir, dirname.strftime(DATADB_DIR_TIMESTAMP_FORMAT)))
return prepare_new_backup_dir(backup_dir)
def prepare_new_backup_dir(backup_dir):
6 years ago
# Create the new backup dir
new_backup_path = pathjoin(backup_dir, datetime.now().strftime(DATADB_DIR_TIMESTAMP_FORMAT))
6 years ago
mkdir(new_backup_path)
mkdir(pathjoin(new_backup_path, "data"))
5 years ago
return new_backup_path + '/data/'
6 years ago
def prepare_backup_dirs(backup_name, max_backups=5, rotate=True):
6 years ago
"""
Check and create dirs where backups under this name will go
:param backup_name: name of backup profile
6 years ago
:returns: absolute path to newly created backup dir (0)
"""
5 years ago
# print("prepare_backup(%s, %s)" % (backup_name, proto))
# Ensure the following dir exists: <DATADB_ROOT>/<backup_name>/data/
backup_base_path = get_backup_dir(backup_name)
6 years ago
if not exists(backup_base_path):
mkdir(backup_base_path)
6 years ago
backup_data_path = pathjoin(backup_base_path, 'data')
if not exists(backup_data_path):
mkdir(backup_data_path)
if not rotate:
# Get the path to the latest backup if using in place mode
# If no backup is found, we'll call the rotate function anyway to get one created
try:
return get_latest_backup(backup_name)
except NoBackupException:
pass
return rotate_backups(backup_data_path, max_backups=max_backups)
6 years ago
def handle_get_rsync(backup_name, sync_prev=False, force_existing=False):
"""
Prepare a temp dest dir for an incoming rsync backup
:param backup_name: name of backup profile
:param sync_prev: disk copy the previous backup that will be rsynced on top of to save bandwidth
:param force_existing: force using existing backups (ideal for single in-place backups of very large things)
"""
if force_existing:
backup_0 = prepare_backup_dirs(backup_name, max_backups=1, rotate=False)
# touch the backup dir
utime(get_backup_dir(backup_name))
start_response()
print(json.dumps([backup_0, None]))
exit(0)
# generate random token
now = int(time())
token = md5()
token.update("{}{}{}".format(now, backup_name, randint(0, 999999999)).encode("UTF-8"))
token = "{}.{}".format(token.hexdigest(), now)
# create tmpdir using token
backup_dir = pathjoin(DATADB_TMP, token)
os.mkdir(backup_dir)
if sync_prev:
prev_path = pathjoin(get_backup_dir(backup_name), 'data', '0', 'data')
if exists(prev_path):
# if we're using rsync let's cp -r the previous backup to the empty new dir.
# this should save some network time rsyncing later
#copytree(prev_backup_path, new_backup_path)
cp = Popen(['rsync', '-avr', '--one-file-system', prev_path+'/', backup_dir+'/'],
stdout=PIPE, stderr=PIPE)
cp.communicate()
# return both to requester
start_response()
print(json.dumps([backup_dir, token]))
exit(0)
def handle_put_rsync(backup_name, tmp_token, max_backups):
6 years ago
"""
Requested after rsync has completed successfully on the client end. Moves
files from tmp dir identififed by tmp_token, to a final location prepared by
rotating backups
6 years ago
"""
# Prepare new dir
new_target_dir = prepare_backup_dirs(backup_name, max_backups=max_backups)
# find tmp dir
tmp_dir = pathjoin(DATADB_TMP, tmp_token)
# move its contents
contents = iglob(pathjoin(tmp_dir, '*'))
for f in contents:
# chop off leading path that iglob adds
f = f[len(tmp_dir)+1:]
move(
pathjoin(tmp_dir, f),
pathjoin(new_target_dir, f)
)
# delete temp dir
rmdir(tmp_dir)
# touch the backup dir
utime(get_backup_dir(backup_name))
# Print confirmation
6 years ago
start_response()
print("OK")
6 years ago
exit(0)
def handle_put_archive(backup_name, fileStream, max_backups):
6 years ago
"""
Prepare and accept a new archive backup - a single tar.gz archive.
:param backup_name: profile the new file will be added to
6 years ago
:param fileStream: file-like object to read archive data from, to disk
"""
6 years ago
# Temp file we will store data in as it is uploaded
tmp_fname = pathjoin(DATADB_TMP, "%s.tar.gz" % time())
6 years ago
# Track uploaded data size
bk_size = 0
with open(tmp_fname, 'wb') as f:
while True:
data = fileStream.read(8192)
if not data:
break
bk_size += len(data)
f.write(data)
6 years ago
# No data = assume something failed
if bk_size == 0:
unlink(tmp_fname)
raise Exception("No file uploaded...")
new_target_dir = prepare_backup_dirs(backup_name, max_backups=max_backups)
6 years ago
# Move backup into place
rename(tmp_fname, pathjoin(new_target_dir, 'backup.tar.gz'))
# touch the backup dir
utime(get_backup_dir(backup_name))
6 years ago
# Done
5 years ago
start_response() # send 200 response code
6 years ago
exit(0)
def handle_req():
"""
Parse http query parameters and act accordingly.
"""
params = parse_qs()
6 years ago
for param_name in ["proto", "name"]:
5 years ago
if param_name not in params:
6 years ago
raise Exception("Missing parameter: %s" % param_name)
6 years ago
max_backups = int(params["keep"]) if "keep" in params else 5
assert max_backups > 0, "Must keep at least one backup"
6 years ago
if os.environ['REQUEST_METHOD'] == "GET" and params["proto"] == "rsync":
# Rsync prepare is GET
handle_get_rsync(params["name"], sync_prev=True, force_existing="inplace" in params)
elif os.environ['REQUEST_METHOD'] == "PUT" and params["proto"] == "rsync":
# Rsync finalize is PUT
handle_put_rsync(params["name"], params["token"], max_backups)
6 years ago
elif os.environ['REQUEST_METHOD'] == "PUT" and params["proto"] == "archive":
# Archive mode PUTs a file
handle_put_archive(params["name"], sys.stdin.buffer, max_backups)
6 years ago
else:
raise Exception("Invalid request. Params: %s" % params)
6 years ago
6 years ago
if __name__ == "__main__":
try:
handle_req()
except Exception as e:
start_response(status_code=("500", "Internal server error"))
6 years ago
tb = traceback.format_exc()
print(tb)