commit 4c147219a7bb323807c4469f476daf03d79e35bd Author: dave Date: Sat Dec 26 22:12:36 2015 -0800 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..496bc8e --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +testenv +build +datadb.egg-info +dist diff --git a/README.md b/README.md new file mode 100644 index 0000000..3db0913 --- /dev/null +++ b/README.md @@ -0,0 +1,88 @@ +# datadb + +Effortless program data deployment and backup + +## Installation + +* Clone: `git clone http://gitlab.xmopx.net/dave/datadb-cli.git datadb-cli` +* Install prereqs: `cd datadb-cli ; pip3 install -r requirements.txt` +* Install: `python3 setup.py install` + +## Requirements + +Just python3 and [requests](http://python-requests.org/). + +## Usage + +### Setup + +For one, this is beta and some things are hard-coded. In datadb.py it is recommended to change the DATADB_HTTP_API URL. +This URL should be the cgi-bin path of an http server running [datadb-scripts](http://gitlab.xmopx.net/dave/datadb-scripts). + +Next, a config file must be created for each directory to be restored/backed up. It lives at /etc/datadb.ini and contains +many entires of this format: + +``` +[profile_name] +uri=:/// +dir=/local/path +keep=5 +auth=username:password +restore_preexec= +restore_postexec= +export_preexec= +export_postexec= +``` + +Each [section] defines one backup task. At present, all fields must be there even if their value is blank. + +Fields: + +**uri**: Destination/source for this instance's data. Must be this format: `:///` + +Valid protocols: + +* rsync - rsync executed over SSH. The local dir will be synced with the remote backup dir using rsync. Vice-versa for restores. +* archive - tar.gz data streamed over HTTP. The local dir will be tarred and PUT to the backup server's remote dir via http. Vice-versa for restores. Recommended only for smaller datasets. + +**dir**: Local dir for this backup/restore + +**keep**: Number of historical copies to keep on remote server + +**auth**: Not implemented. Username:password string to use while contacting the datadb via HTTP. + +**restore_preexec**: Not implemented. Shell command to exec before pulling/restoring data. + +**restore_postexec**: Not implemented. Shell command to exec after pulling/restoring data. For example, loading a mysql dump + +**export_preexec**: Not implemented. Shell command to exec before pushing data. For example, dumping a mysql database to a file in the backup dir. + +**export_postexec**: Not implemented. Shell command to exec after pushing data + +### Assumptions + +Datadb makes some assumptions about it's environment. + +* `rsync`, `ssh`, `tar`, and `curl` commands are assumed to be in $PATH +* For rsync operations, the ssh private key file at `/root/.ssh/datadb.key` is used. + +### CLI Usage + +* Restore from backup: `datadb [--force] restore` + +Restore operations have a degree of sanity checking. Upon a successful restore, a file named *.datadb.lock* will be created in the local dir. Datadb checks for this file before doing restore operations, to prevent overwriting live data with an old backup. This check can be overridden with the `--force` command line option. + +* Backup to remote server: `datadb backup` +* Check status: `datadb status` + +Command line usage is agnostic to the underlying transport protocol used. + +## TODO + +* Fix hard coded stuff mentioned above +* Support config file-less usage +* Sync all command +* Option to override config path +* Nicer config parsing +* Implement security +* Implement pre/post exec functions \ No newline at end of file diff --git a/bin/datadb b/bin/datadb new file mode 100755 index 0000000..c35d3ca --- /dev/null +++ b/bin/datadb @@ -0,0 +1,6 @@ +#!/usr/bin/env python3 + +from datadb import datadb + +if __name__ == '__main__': + datadb.main() diff --git a/datadb/__init__.py b/datadb/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/datadb/datadb.py b/datadb/datadb.py new file mode 100755 index 0000000..51b3279 --- /dev/null +++ b/datadb/datadb.py @@ -0,0 +1,222 @@ +#!/usr/bin/env python3 + +import argparse +from configparser import ConfigParser +from urllib.parse import urlparse +from os.path import normpath, join, exists +from os import chmod, chown, stat +from enum import Enum +import subprocess +from requests import get,put + +SSH_KEY_PATH = '/root/.ssh/datadb.key' +RSYNC_DEFAULT_ARGS = ['rsync', '-avzr', '--exclude=.datadb.lock', '--whole-file', '--one-file-system', '--delete', '-e', 'ssh -i {} -p 4874 -o StrictHostKeyChecking=no'.format(SSH_KEY_PATH)] +DATADB_HTTP_API = 'http://datadb.services.davepedu.com:4875/cgi-bin/' + +class SyncStatus(Enum): + "Data is on local disk" + DATA_AVAILABLE = 1 + "Data is not on local disk" + DATA_MISSING = 2 + + +def restore(profile, conf, force=False): #remote_uri, local_dir, identity='/root/.ssh/datadb.key' + """ + Restore data from datadb + """ + + # Sanity check: If the lockfile exists we assume the data is already there, so we wouldn't want to call rsync again + # as it would wipe out local changes. This can be overridden with --force + assert (status(profile, conf) == SyncStatus.DATA_MISSING) or force, "Data already exists (Use --force?)" + + original_perms = stat(conf["dir"]) + dest = urlparse(conf["uri"]) + + if dest.scheme == 'rsync': + args = RSYNC_DEFAULT_ARGS[:] + + # Request backup server to prepare the backup, the returned dir is what we sync from + rsync_path = get(DATADB_HTTP_API+'get_backup', params={'proto':'rsync', 'name':profile}).text.rstrip() + + # Add rsync source path + args.append('nexus@{}:{}'.format(dest.netloc, normpath(rsync_path)+'/')) + + # Add local dir + args.append(normpath(conf["dir"])+'/') + print("Rsync restore call: {}".format(' '.join(args))) + + subprocess.check_call(args) + + elif dest.scheme == 'archive': + # http request backup server + # download tarball + args_curl = ['curl', '-s', '-v', '-XGET', '{}get_backup?proto=archive&name={}'.format(DATADB_HTTP_API, profile)] + # unpack + args_tar = ['tar', 'zxv', '-C', normpath(conf["dir"])+'/'] + + print("Tar restore call: {} | {}".format(' '.join(args_curl), ' '.join(args_tar))) + + dl = subprocess.Popen(args_curl, stdout=subprocess.PIPE) + extract = subprocess.Popen(args_tar, stdin=dl.stdout) + + dl.wait() + extract.wait() + # TODO: convert to pure python? + + assert dl.returncode == 0, "Could not download archive" + assert extract.returncode == 0, "Could not extract archive" + + # Restore original permissions on data dir + # TODO store these in conf file + chmod(conf["dir"], original_perms.st_mode) + chown(conf["dir"], original_perms.st_uid, original_perms.st_gid) + # TODO apply other permissions + + +def backup(profile, conf, force=False): + """ + Backup data to datadb + """ + + # Sanity check: If the lockfile doesn't exist we assume the data is missing, so we wouldn't want to call rsync + # again as it would wipe out the backup. + assert (status(profile, conf) == SyncStatus.DATA_AVAILABLE) or force, "Data is missing (Use --force?)" + + dest = urlparse(conf["uri"]) + + if dest.scheme == 'rsync': + args = RSYNC_DEFAULT_ARGS[:] + + # Add local dir + args.append(normpath(conf["dir"])+'/') + + # Hit backupdb via http to retreive absolute path of rsync destination of remote server + rsync_path = get(DATADB_HTTP_API+'new_backup', params={'proto':'rsync', 'name':profile, 'keep':conf["keep"]}).text.rstrip() + + # Add rsync source path + args.append(normpath('nexus@{}:{}'.format(dest.netloc, rsync_path))+'/') + + #print("Rsync backup call: {}".format(' '.join(args))) + + subprocess.check_call(args) + + elif dest.scheme == 'archive': + # CD to local source dir + # create tarball + # http PUT file to backup server + args_tar = ['tar', '--exclude=.datadb.lock', '-zcv', './'] + args_curl = ['curl', '-v', '-XPUT', '--data-binary', '@-', '{}new_backup?proto=archive&name={}&keep={}'.format(DATADB_HTTP_API, profile, conf["keep"])] + + print("Tar backup call: {} | {}".format(' '.join(args_tar), ' '.join(args_curl))) + + compress = subprocess.Popen(args_tar, stdout=subprocess.PIPE, cwd=normpath(conf["dir"])+'/') + upload = subprocess.Popen(args_curl, stdin=compress.stdout) + + compress.wait() + upload.wait() + # TODO: convert to pure python? + + assert compress.returncode == 0, "Could not create archive" + assert upload.returncode == 0, "Could not upload archive" + + +def status(profile, conf): + """ + Check status of local dir - if the lock file is in place, we assume the data is there + """ + + lockfile = join(conf["dir"], '.datadb.lock') + + if exists(lockfile): + return SyncStatus.DATA_AVAILABLE + return SyncStatus.DATA_MISSING + + +def main(): + """ + Excepts a config file at /etc/datadb.ini. Example: + + ---------------------------- + [gyfd] + uri= + dir= + keep= + auth= + restore_preexec= + restore_postexec= + export_preexec= + export_postexec= + ---------------------------- + + Each [section] defines one backup task. + + Fields: + + *uri*: Destination/source for this instance's data. Always fits the following format: + + :/// + + Valid protocols: + + rsync - rsync executed over SSH. The local dir will be synced with the remote backup dir using rsync. + archive - tar archives transported over HTTP. The local dir will be tarred and PUT to the backup server's remote dir via http. + + *dir*: Local dir for this backup + + *keep*: Currently unused. Number of historical copies to keep on remote server + + *auth*: Currently unused. Username:password string to use while contacting the datadb via HTTP. + + *restore_preexec*: Shell command to exec before pulling/restoring data + + *restore_postexec*: Shell command to exec after pulling/restoring data + + *export_preexec*: Shell command to exec before pushing data + + *export_postexec*: Shell command to exec after pushing data + + """ + # Load profiles + config = ConfigParser() + config.read("/etc/datadb.ini") + + config = {section:{k:config[section][k] for k in config[section]} for section in config.sections()} + + parser = argparse.ArgumentParser(description="Backupdb Agent depends on config: /etc/datadb.ini") + + parser.add_argument('--force', default=False, action='store_true', help='force restore operation if destination data already exists') + + parser.add_argument('profile', type=str, choices=config.keys(), help='Profile to restore') + + #parser.add_argument('-i', '--identity', + # help='Ssh keyfile to use', type=str, default='/root/.ssh/datadb.key') + #parser.add_argument('-r', '--remote', + # help='Remote server (rsync://...)', type=str, required=True) + #parser.add_argument('-l', '--local_dir', + # help='Local path', type=str, required=True) + + subparser_modes = parser.add_subparsers(dest='mode', help='modes (only "rsync")') + + subparser_backup = subparser_modes.add_parser('backup', help='backup to datastore') + + subparser_restore = subparser_modes.add_parser('restore', help='restore from datastore') + + subparser_status = subparser_modes.add_parser('status', help='get info for profile') + + args = parser.parse_args() + + if args.mode == 'restore': + restore(args.profile, config[args.profile], force=args.force) + + elif args.mode == 'backup': + backup(args.profile, config[args.profile]) + + elif args.mode == 'status': + info = status(args.profile, config[args.profile]) + print(SyncStatus(info)) + + else: + parser.print_usage() + +if __name__ == '__main__': + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ca0dee4 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +requests==2.9.1 diff --git a/setup.py b/setup.py new file mode 100755 index 0000000..f57f417 --- /dev/null +++ b/setup.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python3 +from setuptools import setup + +__version__ = "0.0.0" + +setup(name='datadb', + version=__version__, + description='datadb cli module', + url='http://gitlab.xmopx.net/dave/datadb-cli', + author='dpedu', + author_email='dave@davepedu.com', + packages=['datadb'], + scripts=['bin/datadb'] + )