basic restoring

This commit is contained in:
dave 2021-06-08 20:35:16 -07:00
parent feacc535f0
commit 9bebaab13a
3 changed files with 77 additions and 14 deletions

View File

@ -2,6 +2,7 @@ import os
import sys
import logging
import argparse
import hashlib
import logging
import subprocess
import requests
@ -48,7 +49,16 @@ class BackupdbClient(object):
def upload(self, stream, backup, namespace="default"):
hasher = WrappedStdout(stream)
return self.post("upload", params=dict(namespace=namespace, name=backup), data=hasher), hasher.sha256()
return self.post("upload", data=hasher, params=dict(namespace=namespace, name=backup)), hasher.sha256()
def get_meta(self, backup, namespace="default", date=None):
return self.get("download", params=dict(namespace=namespace, name=backup, date=date, meta=True)).json()
def download(self, backup, namespace="default", date=None):
"""
Download a backup by date, or the latest if date is not supplied.
"""
return self.get("download", stream=True, params=dict(namespace=namespace, name=backup, date=date))
# def create_user(self, username, password):
# return self.post("user", data={"username": username,
@ -187,7 +197,52 @@ def cmd_restore(args, parser, config, client):
"""
Restore a backup
"""
pass
backup_config = config["backups"][args.backup]
dest = os.path.normpath(args.output_dir or backup_config["dir"])
os.makedirs(dest, exist_ok=True)
original_perms = os.stat(dest)
# Refuse to restore if a lockfile created by this cli is already present
if os.path.exists(os.path.join(backup_config["dir"], LOCKFILE)) and not args.force:
print("Error: data is missing (Use --force?)")
return 1
meta = client.get_meta(args.backup, args.namespace, args.date)
#TODO could catch 404 here and clean exit like the old tool used to do
response = client.download(args.backup, args.namespace, args.date)
# r.raise_for_status()
# with open(local_filename, 'wb') as f:
# for chunk in r.iter_content(chunk_size=8192):
args_tar = [get_tarcmd(), 'zxv', '-C', dest + '/']
print("Tar restore call: {}".format(args_tar))
extract = subprocess.Popen(args_tar, stdin=subprocess.PIPE)
hasher = hashlib.sha256()
for chunk in response.iter_content(WrappedStdout.BUFFSIZE):
extract.stdin.write(chunk)
hasher.update(chunk)
#TODO also sha256 it
extract.stdin.close()
extract.wait()
# TODO: convert to pure python?
sha256 = hasher.hexdigest()
if sha256 != meta["sha256"]:
raise Exception("Downloaded archive (%s) didn't match expected hash (%s)", sha256, meta["sha256"])
if extract.returncode != 0:
raise Exception("Could not extract archive")
# Restore original permissions on data dir
if not args.output_dir:
os.chmod(dest, original_perms.st_mode)
os.chown(dest, original_perms.st_uid, original_perms.st_gid)
def get_args():
@ -232,8 +287,9 @@ def get_args():
p_restore.add_argument("--force", help="force restore operation if destination data already exists",
action="store_true", )
p_restore.add_argument("-n", "--namespace", default="default", help="parent namespace download from")
p_restore.add_argument("-o", "--output-dir", help="override restore path")
p_restore.add_argument("backup", help="backup to download")
p_restore.add_argument("date", help="date of backup to download")
p_restore.add_argument("date", nargs="?", help="date of backup to download")
return parser.parse_args(), parser

View File

@ -193,20 +193,20 @@ class WrappedStdout(object):
BUFFSIZE = 1024 * 1024
"""
Requests will call tell() on the file-like stdout stream if the tell attribute exists. However subprocess'
Requests will call tell() on the file-like readable stream if the tell attribute exists. However subprocess'
stdout stream (_io.BufferedReader) does not support this (raises OSError: [Errno 29] Illegal seek).
If the tell attribute is missing, requests will fall back to simply iterating on the file-like object,
so, we support only the iterable interface
"""
def __init__(self, stdout):
self.stdout = stdout
def __init__(self, readable):
self.readable = readable
self.hash = hashlib.sha256()
def __iter__(self):
return self
def __next__(self):
data = self.stdout.read(self.BUFFSIZE)
data = self.readable.read(self.BUFFSIZE)
if not data:
logging.info("end of stream")
raise StopIteration()
@ -214,7 +214,7 @@ class WrappedStdout(object):
return data
def close(self):
self.stdout.close()
self.readable.close()
def sha256(self):
return self.hash.hexdigest()

View File

@ -65,9 +65,11 @@ class BackupManager(object):
for chunk in chunks:
if not chunk.startswith("backup.tar.gz."):
continue # ignore metadata etc
chunk_key = f"{prefix}{chunk}"
logging.info("fetching chunk %s", chunk_key)
yield from self.s3.get_object(
Bucket=self.bucket,
Key=f"{prefix}{chunk}"
Key=chunk_key,
)["Body"].iter_chunks(chunk_size=1024 * 1024)
@ -147,20 +149,25 @@ class BackupdbApiV1(WebBase):
return self.mgr.list_dates(backup, namespace=namespace)
@cherrypy.expose
def download(self, date, backup, meta=False, namespace="default"):
metadata = self.mgr.get_metadata(backup, date, namespace=namespace)
def download(self, name, date=None, meta=False, namespace="default"):
if not date:
date = sorted(self.mgr.list_dates(name, namespace))[-1]
metadata = self.mgr.get_metadata(name, date, namespace=namespace)
if meta:
cherrypy.response.headers["Content-Type"] = "application/json"
else:
cherrypy.response.headers["Content-Type"] = "application/gzip"
cherrypy.response.headers["Content-Length"] = metadata["size"]
cherrypy.response.headers["Content-Disposition"] = f'filename="{namespace}-{backup}-{date}.tar.gz"'# TODO lol injection
cherrypy.response.headers["Content-Disposition"] = f'filename="{namespace}-{name}-{date}.tar.gz"'#TODO lol injection
cherrypy.response.headers["X-Backupdb-Date"] = date
def download():
if meta:
yield json.dumps(metadata).encode('utf-8')
return
yield from self.mgr.get_stream(backup, date, namespace=namespace)
else:
yield from self.mgr.get_stream(name, date, namespace=namespace)
return download()
download._cp_config = {'response.stream': True}