This commit is contained in:
dave 2017-04-22 00:06:12 -07:00
parent 0596899040
commit 49ff19b088
2 changed files with 128 additions and 120 deletions

View File

@ -7,13 +7,15 @@ from os.path import normpath, join, exists
from os import chmod, chown, stat, environ from os import chmod, chown, stat, environ
from enum import Enum from enum import Enum
import subprocess import subprocess
from requests import get,put,head from requests import get, put, head
import json
SSH_KEY_PATH = environ["DATADB_KEYPATH"] if "DATADB_KEYPATH" in environ else '/root/.ssh/datadb.key' SSH_KEY_PATH = environ["DATADB_KEYPATH"] if "DATADB_KEYPATH" in environ else '/root/.ssh/datadb.key'
RSYNC_DEFAULT_ARGS = ['rsync', '-avzr', '--exclude=.datadb.lock', '--whole-file', '--one-file-system', '--delete', '-e', 'ssh -i {} -p 4874 -o StrictHostKeyChecking=no'.format(SSH_KEY_PATH)] RSYNC_DEFAULT_ARGS = ['rsync', '-avzr', '--exclude=.datadb.lock', '--whole-file', '--one-file-system', '--delete', '-e',
'ssh -i {} -p 4874 -o StrictHostKeyChecking=no'.format(SSH_KEY_PATH)]
DATADB_HTTP_API = 'http://datadb.services.davepedu.com:4875/cgi-bin/' DATADB_HTTP_API = 'http://datadb.services.davepedu.com:4875/cgi-bin/'
class SyncStatus(Enum): class SyncStatus(Enum):
"Data is on local disk" "Data is on local disk"
DATA_AVAILABLE = 1 DATA_AVAILABLE = 1
@ -21,58 +23,58 @@ class SyncStatus(Enum):
DATA_MISSING = 2 DATA_MISSING = 2
def restore(profile, conf, force=False): #remote_uri, local_dir, identity='/root/.ssh/datadb.key' def restore(profile, conf, force=False): # remote_uri, local_dir, identity='/root/.ssh/datadb.key'
""" """
Restore data from datadb Restore data from datadb
""" """
# Sanity check: If the lockfile exists we assume the data is already there, so we wouldn't want to call rsync again # Sanity check: If the lockfile exists we assume the data is already there, so we wouldn't want to call rsync again
# as it would wipe out local changes. This can be overridden with --force # as it would wipe out local changes. This can be overridden with --force
assert (status(profile, conf) == SyncStatus.DATA_MISSING) or force, "Data already exists (Use --force?)" assert (status(profile, conf) == SyncStatus.DATA_MISSING) or force, "Data already exists (Use --force?)"
original_perms = stat(conf["dir"]) original_perms = stat(conf["dir"])
dest = urlparse(conf["uri"]) dest = urlparse(conf["uri"])
status_code = head(DATADB_HTTP_API+'get_backup', params={'proto':dest.scheme, 'name':profile}).status_code status_code = head(DATADB_HTTP_API + 'get_backup', params={'proto': dest.scheme, 'name': profile}).status_code
if status_code == 404: if status_code == 404:
print("Connected to datadb, but datasource '{}' doesn't exist. Exiting".format(profile)) print("Connected to datadb, but datasource '{}' doesn't exist. Exiting".format(profile))
# TODO: special exit code >1 to indicate this? # TODO: special exit code >1 to indicate this?
return return
if dest.scheme == 'rsync': if dest.scheme == 'rsync':
args = RSYNC_DEFAULT_ARGS[:] args = RSYNC_DEFAULT_ARGS[:]
# Request backup server to prepare the backup, the returned dir is what we sync from # Request backup server to prepare the backup, the returned dir is what we sync from
rsync_path = get(DATADB_HTTP_API+'get_backup', params={'proto':'rsync', 'name':profile}).text.rstrip() rsync_path = get(DATADB_HTTP_API + 'get_backup', params={'proto': 'rsync', 'name': profile}).text.rstrip()
# Add rsync source path # Add rsync source path
args.append('nexus@{}:{}'.format(dest.netloc, normpath(rsync_path)+'/')) args.append('nexus@{}:{}'.format(dest.netloc, normpath(rsync_path) + '/'))
# Add local dir # Add local dir
args.append(normpath(conf["dir"])+'/') args.append(normpath(conf["dir"]) + '/')
print("Rsync restore call: {}".format(' '.join(args))) print("Rsync restore call: {}".format(' '.join(args)))
subprocess.check_call(args) subprocess.check_call(args)
elif dest.scheme == 'archive': elif dest.scheme == 'archive':
# http request backup server # http request backup server
# download tarball # download tarball
args_curl = ['curl', '-s', '-v', '-XGET', '{}get_backup?proto=archive&name={}'.format(DATADB_HTTP_API, profile)] args_curl = ['curl', '-s', '-v', '-XGET', '{}get_backup?proto=archive&name={}'.format(DATADB_HTTP_API, profile)]
# unpack # unpack
args_tar = ['tar', 'zxv', '-C', normpath(conf["dir"])+'/'] args_tar = ['tar', 'zxv', '-C', normpath(conf["dir"]) + '/']
print("Tar restore call: {} | {}".format(' '.join(args_curl), ' '.join(args_tar))) print("Tar restore call: {} | {}".format(' '.join(args_curl), ' '.join(args_tar)))
dl = subprocess.Popen(args_curl, stdout=subprocess.PIPE) dl = subprocess.Popen(args_curl, stdout=subprocess.PIPE)
extract = subprocess.Popen(args_tar, stdin=dl.stdout) extract = subprocess.Popen(args_tar, stdin=dl.stdout)
dl.wait() dl.wait()
extract.wait() extract.wait()
# TODO: convert to pure python? # TODO: convert to pure python?
assert dl.returncode == 0, "Could not download archive" assert dl.returncode == 0, "Could not download archive"
assert extract.returncode == 0, "Could not extract archive" assert extract.returncode == 0, "Could not extract archive"
# Restore original permissions on data dir # Restore original permissions on data dir
# TODO store these in conf file # TODO store these in conf file
chmod(conf["dir"], original_perms.st_mode) chmod(conf["dir"], original_perms.st_mode)
@ -84,55 +86,56 @@ def backup(profile, conf, force=False):
""" """
Backup data to datadb Backup data to datadb
""" """
# Sanity check: If the lockfile doesn't exist we assume the data is missing, so we wouldn't want to call rsync # Sanity check: If the lockfile doesn't exist we assume the data is missing, so we wouldn't want to call rsync
# again as it would wipe out the backup. # again as it would wipe out the backup.
assert (status(profile, conf) == SyncStatus.DATA_AVAILABLE) or force, "Data is missing (Use --force?)" assert (status(profile, conf) == SyncStatus.DATA_AVAILABLE) or force, "Data is missing (Use --force?)"
dest = urlparse(conf["uri"]) dest = urlparse(conf["uri"])
if dest.scheme == 'rsync': if dest.scheme == 'rsync':
args = RSYNC_DEFAULT_ARGS[:] args = RSYNC_DEFAULT_ARGS[:]
# Excluded paths # Excluded paths
if conf["exclude"]: if conf["exclude"]:
for exclude_path in conf["exclude"].split(","): for exclude_path in conf["exclude"].split(","):
if not exclude_path == "": if not exclude_path == "":
args.append("--exclude") args.append("--exclude")
args.append(exclude_path) args.append(exclude_path)
# Add local dir # Add local dir
args.append(normpath(conf["dir"])+'/') args.append(normpath(conf["dir"]) + '/')
new_backup_params = {'proto':'rsync', new_backup_params = {'proto': 'rsync',
'name':profile, 'name': profile,
'keep':conf["keep"]} 'keep': conf["keep"]}
if conf["inplace"]: if conf["inplace"]:
new_backup_params["inplace"] = 1 new_backup_params["inplace"] = 1
# Hit backupdb via http to retreive absolute path of rsync destination of remote server # Hit backupdb via http to retreive absolute path of rsync destination of remote server
rsync_path, token = get(DATADB_HTTP_API+'new_backup', params=new_backup_params).json() rsync_path, token = get(DATADB_HTTP_API + 'new_backup', params=new_backup_params).json()
# Add rsync source path # Add rsync source path
args.append(normpath('nexus@{}:{}'.format(dest.netloc, rsync_path))+'/') args.append(normpath('nexus@{}:{}'.format(dest.netloc, rsync_path)) + '/')
#print("Rsync backup call: {}".format(' '.join(args))) # print("Rsync backup call: {}".format(' '.join(args)))
try: try:
subprocess.check_call(args) subprocess.check_call(args)
except subprocess.CalledProcessError as cpe: except subprocess.CalledProcessError as cpe:
if cpe.returncode not in [0,24]: # ignore partial transfer due to vanishing files on our end if cpe.returncode not in [0, 24]: # ignore partial transfer due to vanishing files on our end
raise raise
# confirm completion if backup wasnt already in place # confirm completion if backup wasnt already in place
if not conf["inplace"]: if not conf["inplace"]:
put(DATADB_HTTP_API+'new_backup', params={'proto':'rsync', 'name':profile, 'token': token, 'keep':conf["keep"]}) put(DATADB_HTTP_API + 'new_backup', params={'proto': 'rsync', 'name': profile, 'token': token,
'keep': conf["keep"]})
elif dest.scheme == 'archive': elif dest.scheme == 'archive':
# CD to local source dir # CD to local source dir
# create tarball # create tarball
# http PUT file to backup server # http PUT file to backup server
args_tar = ['tar', '--exclude=.datadb.lock'] args_tar = ['tar', '--exclude=.datadb.lock']
# Excluded paths # Excluded paths
if conf["exclude"]: if conf["exclude"]:
for exclude_path in conf["exclude"].split(","): for exclude_path in conf["exclude"].split(","):
@ -141,17 +144,18 @@ def backup(profile, conf, force=False):
args_tar.append(exclude_path) args_tar.append(exclude_path)
args_tar += ['-zcv', './'] args_tar += ['-zcv', './']
args_curl = ['curl', '-v', '-XPUT', '--data-binary', '@-', '{}new_backup?proto=archive&name={}&keep={}'.format(DATADB_HTTP_API, profile, conf["keep"])] args_curl = ['curl', '-v', '-XPUT', '--data-binary', '@-', '{}new_backup?proto=archive&name={}&keep={}'.
format(DATADB_HTTP_API, profile, conf["keep"])]
print("Tar backup call: {} | {}".format(' '.join(args_tar), ' '.join(args_curl))) print("Tar backup call: {} | {}".format(' '.join(args_tar), ' '.join(args_curl)))
compress = subprocess.Popen(args_tar, stdout=subprocess.PIPE, cwd=normpath(conf["dir"])+'/') compress = subprocess.Popen(args_tar, stdout=subprocess.PIPE, cwd=normpath(conf["dir"]) + '/')
upload = subprocess.Popen(args_curl, stdin=compress.stdout) upload = subprocess.Popen(args_curl, stdin=compress.stdout)
compress.wait() compress.wait()
upload.wait() upload.wait()
# TODO: convert to pure python? # TODO: convert to pure python?
assert compress.returncode == 0, "Could not create archive" assert compress.returncode == 0, "Could not create archive"
assert upload.returncode == 0, "Could not upload archive" assert upload.returncode == 0, "Could not upload archive"
@ -160,9 +164,9 @@ def status(profile, conf):
""" """
Check status of local dir - if the lock file is in place, we assume the data is there Check status of local dir - if the lock file is in place, we assume the data is there
""" """
lockfile = join(conf["dir"], '.datadb.lock') lockfile = join(conf["dir"], '.datadb.lock')
if exists(lockfile): if exists(lockfile):
return SyncStatus.DATA_AVAILABLE return SyncStatus.DATA_AVAILABLE
return SyncStatus.DATA_MISSING return SyncStatus.DATA_MISSING
@ -179,7 +183,7 @@ def shell_exec(cmd, workdir='/tmp/'):
def main(): def main():
""" """
Excepts a config file at /etc/datadb.ini. Example: Excepts a config file at /etc/datadb.ini. Example:
---------------------------- ----------------------------
[gyfd] [gyfd]
uri= uri=
@ -192,56 +196,59 @@ def main():
export_postexec= export_postexec=
exclude= exclude=
---------------------------- ----------------------------
Each [section] defines one backup task. Each [section] defines one backup task.
Fields: Fields:
*uri*: Destination/source for this instance's data. Always fits the following format: *uri*: Destination/source for this instance's data. Always fits the following format:
<procotol>://<server>/<backup name> <procotol>://<server>/<backup name>
Valid protocols: Valid protocols:
rsync - rsync executed over SSH. The local dir will be synced with the remote backup dir using rsync. rsync - rsync executed over SSH. The local dir will be synced with the remote backup dir using rsync.
archive - tar archives transported over HTTP. The local dir will be tarred and PUT to the backup server's remote dir via http. archive - tar archives transported over HTTP. The local dir will be tarred and PUT to the backup server's
remote dir via http.
*dir*: Local dir for this backup *dir*: Local dir for this backup
*keep*: Currently unused. Number of historical copies to keep on remote server *keep*: Currently unused. Number of historical copies to keep on remote server
*auth*: Currently unused. Username:password string to use while contacting the datadb via HTTP. *auth*: Currently unused. Username:password string to use while contacting the datadb via HTTP.
*restore_preexec*: Shell command to exec before pulling/restoring data *restore_preexec*: Shell command to exec before pulling/restoring data
*restore_postexec*: Shell command to exec after pulling/restoring data *restore_postexec*: Shell command to exec after pulling/restoring data
*export_preexec*: Shell command to exec before pushing data *export_preexec*: Shell command to exec before pushing data
*export_postexec*: Shell command to exec after pushing data *export_postexec*: Shell command to exec after pushing data
*exclude*: if the underlying transport method supports excluding paths, a comma separated list of paths to exclude. Applies to backup operations only. *exclude*: if the underlying transport method supports excluding paths, a comma separated list of paths to exclude.
Applies to backup operations only.
*inplace*: rsync only. if enabled, the server will keep only a single copy that you will rsync over. intended for single copies of LARGE datasets. overrides "keep".
*inplace*: rsync only. if enabled, the server will keep only a single copy that you will rsync over. intended for
single copies of LARGE datasets. overrides "keep".
""" """
required_conf_params = ['dir', 'uri'] required_conf_params = ['dir', 'uri']
conf_params = {'export_preexec':None, conf_params = {'export_preexec': None,
'exclude':None, 'exclude': None,
'keep':5, 'keep': 5,
'restore_preexec':None, 'restore_preexec': None,
'restore_postexec':None, 'restore_postexec': None,
'auth':'', 'auth': '',
'export_postexec':None, 'export_postexec': None,
'inplace':False} 'inplace': False}
conf_path = environ["DATADB_CONF"] if "DATADB_CONF" in environ else "/etc/datadb.ini" conf_path = environ["DATADB_CONF"] if "DATADB_CONF" in environ else "/etc/datadb.ini"
# Load profiles # Load profiles
config = ConfigParser() config = ConfigParser()
config.read(conf_path) config.read(conf_path)
config = {section:{k:config[section][k] for k in config[section]} for section in config.sections()} config = {section: {k: config[section][k] for k in config[section]} for section in config.sections()}
for conf_k, conf_dict in config.items(): for conf_k, conf_dict in config.items():
for expect_param, expect_default in conf_params.items(): for expect_param, expect_default in conf_params.items():
if expect_param not in conf_dict.keys(): if expect_param not in conf_dict.keys():
@ -251,57 +258,59 @@ def main():
raise Exception("Required parameter {} missing for profile {}".format(expect_param, conf_k)) raise Exception("Required parameter {} missing for profile {}".format(expect_param, conf_k))
parser = argparse.ArgumentParser(description="Backupdb Agent depends on config: /etc/datadb.ini") parser = argparse.ArgumentParser(description="Backupdb Agent depends on config: /etc/datadb.ini")
parser.add_argument('-f', '--force', default=False, action='store_true', help='force restore operation if destination data already exists') parser.add_argument('-f', '--force', default=False, action='store_true',
help='force restore operation if destination data already exists')
parser.add_argument('-n', '--no-exec', default=False, action='store_true', help='don\'t run pre/post-exec commands') parser.add_argument('-n', '--no-exec', default=False, action='store_true', help='don\'t run pre/post-exec commands')
parser.add_argument('-b', '--no-pre-exec', default=False, action='store_true', help='don\'t run pre-exec commands') parser.add_argument('-b', '--no-pre-exec', default=False, action='store_true', help='don\'t run pre-exec commands')
parser.add_argument('-m', '--no-post-exec', default=False, action='store_true', help='don\'t run post-exec commands') parser.add_argument('-m', '--no-post-exec', default=False, action='store_true',
help='don\'t run post-exec commands')
parser.add_argument('profile', type=str, choices=config.keys(), help='Profile to restore') parser.add_argument('profile', type=str, choices=config.keys(), help='Profile to restore')
#parser.add_argument('-i', '--identity', # parser.add_argument('-i', '--identity',
# help='Ssh keyfile to use', type=str, default='/root/.ssh/datadb.key') # help='Ssh keyfile to use', type=str, default='/root/.ssh/datadb.key')
#parser.add_argument('-r', '--remote', # parser.add_argument('-r', '--remote',
# help='Remote server (rsync://...)', type=str, required=True) # help='Remote server (rsync://...)', type=str, required=True)
#parser.add_argument('-l', '--local_dir', # parser.add_argument('-l', '--local_dir',
# help='Local path', type=str, required=True) # help='Local path', type=str, required=True)
subparser_modes = parser.add_subparsers(dest='mode', help='modes (only "rsync")') subparser_modes = parser.add_subparsers(dest='mode', help='modes (only "rsync")')
subparser_backup = subparser_modes.add_parser('backup', help='backup to datastore') subparser_backup = subparser_modes.add_parser('backup', help='backup to datastore') # NOQA
subparser_restore = subparser_modes.add_parser('restore', help='restore from datastore') subparser_restore = subparser_modes.add_parser('restore', help='restore from datastore') # NOQA
subparser_status = subparser_modes.add_parser('status', help='get info for profile') subparser_status = subparser_modes.add_parser('status', help='get info for profile') # NOQA
args = parser.parse_args() args = parser.parse_args()
if args.no_exec: if args.no_exec:
args.no_pre_exec = True args.no_pre_exec = True
args.no_post_exec = True args.no_post_exec = True
if args.mode == 'restore': if args.mode == 'restore':
if not args.no_pre_exec and config[args.profile]['restore_preexec']: if not args.no_pre_exec and config[args.profile]['restore_preexec']:
shell_exec(config[args.profile]['restore_preexec']) shell_exec(config[args.profile]['restore_preexec'])
restore(args.profile, config[args.profile], force=args.force) restore(args.profile, config[args.profile], force=args.force)
if not args.no_post_exec and config[args.profile]['restore_postexec']: if not args.no_post_exec and config[args.profile]['restore_postexec']:
shell_exec(config[args.profile]['restore_postexec']) shell_exec(config[args.profile]['restore_postexec'])
elif args.mode == 'backup': elif args.mode == 'backup':
if not args.no_pre_exec and config[args.profile]['export_preexec']: if not args.no_pre_exec and config[args.profile]['export_preexec']:
shell_exec(config[args.profile]['export_preexec']) shell_exec(config[args.profile]['export_preexec'])
backup(args.profile, config[args.profile]) backup(args.profile, config[args.profile])
if not args.no_post_exec and config[args.profile]['export_postexec']: if not args.no_post_exec and config[args.profile]['export_postexec']:
shell_exec(config[args.profile]['export_postexec']) shell_exec(config[args.profile]['export_postexec'])
elif args.mode == 'status': elif args.mode == 'status':
info = status(args.profile, config[args.profile]) info = status(args.profile, config[args.profile])
print(SyncStatus(info)) print(SyncStatus(info))
else: else:
parser.print_usage() parser.print_usage()

View File

@ -4,11 +4,10 @@ from setuptools import setup
from datadb import __version__ from datadb import __version__
setup(name='datadb', setup(name='datadb',
version=__version__, version=__version__,
description='datadb cli module', description='datadb cli module',
url='http://gitlab.xmopx.net/dave/datadb-cli', url='http://gitlab.xmopx.net/dave/datadb-cli',
author='dpedu', author='dpedu',
author_email='dave@davepedu.com', author_email='dave@davepedu.com',
packages=['datadb'], packages=['datadb'],
scripts=['bin/datadb'] scripts=['bin/datadb'])
)