Command line client for automated backups
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

283 lines
11 KiB

  1. #!/usr/bin/env python3
  2. import argparse
  3. from configparser import ConfigParser
  4. from urllib.parse import urlparse
  5. from os.path import normpath, join, exists
  6. from os import chmod, chown, stat, environ
  7. from enum import Enum
  8. import subprocess
  9. from requests import get,put,head
  10. import json
  11. SSH_KEY_PATH = environ["DATADB_KEYPATH"] if "DATADB_KEYPATH" in environ else '/root/.ssh/datadb.key'
  12. RSYNC_DEFAULT_ARGS = ['rsync', '-avzr', '--exclude=.datadb.lock', '--whole-file', '--one-file-system', '--delete', '-e', 'ssh -i {} -p 4874 -o StrictHostKeyChecking=no'.format(SSH_KEY_PATH)]
  13. DATADB_HTTP_API = 'http://datadb.services.davepedu.com:4875/cgi-bin/'
  14. class SyncStatus(Enum):
  15. "Data is on local disk"
  16. DATA_AVAILABLE = 1
  17. "Data is not on local disk"
  18. DATA_MISSING = 2
  19. def restore(profile, conf, force=False): #remote_uri, local_dir, identity='/root/.ssh/datadb.key'
  20. """
  21. Restore data from datadb
  22. """
  23. # Sanity check: If the lockfile exists we assume the data is already there, so we wouldn't want to call rsync again
  24. # as it would wipe out local changes. This can be overridden with --force
  25. assert (status(profile, conf) == SyncStatus.DATA_MISSING) or force, "Data already exists (Use --force?)"
  26. original_perms = stat(conf["dir"])
  27. dest = urlparse(conf["uri"])
  28. status_code = head(DATADB_HTTP_API+'get_backup', params={'proto':dest.scheme, 'name':profile}).status_code
  29. if status_code == 404:
  30. print("Connected to datadb, but datasource '{}' doesn't exist. Exiting".format(profile))
  31. # TODO: special exit code >1 to indicate this?
  32. return
  33. if dest.scheme == 'rsync':
  34. args = RSYNC_DEFAULT_ARGS[:]
  35. # Request backup server to prepare the backup, the returned dir is what we sync from
  36. rsync_path = get(DATADB_HTTP_API+'get_backup', params={'proto':'rsync', 'name':profile}).text.rstrip()
  37. # Add rsync source path
  38. args.append('nexus@{}:{}'.format(dest.netloc, normpath(rsync_path)+'/'))
  39. # Add local dir
  40. args.append(normpath(conf["dir"])+'/')
  41. print("Rsync restore call: {}".format(' '.join(args)))
  42. subprocess.check_call(args)
  43. elif dest.scheme == 'archive':
  44. # http request backup server
  45. # download tarball
  46. args_curl = ['curl', '-s', '-v', '-XGET', '{}get_backup?proto=archive&name={}'.format(DATADB_HTTP_API, profile)]
  47. # unpack
  48. args_tar = ['tar', 'zxv', '-C', normpath(conf["dir"])+'/']
  49. print("Tar restore call: {} | {}".format(' '.join(args_curl), ' '.join(args_tar)))
  50. dl = subprocess.Popen(args_curl, stdout=subprocess.PIPE)
  51. extract = subprocess.Popen(args_tar, stdin=dl.stdout)
  52. dl.wait()
  53. extract.wait()
  54. # TODO: convert to pure python?
  55. assert dl.returncode == 0, "Could not download archive"
  56. assert extract.returncode == 0, "Could not extract archive"
  57. # Restore original permissions on data dir
  58. # TODO store these in conf file
  59. chmod(conf["dir"], original_perms.st_mode)
  60. chown(conf["dir"], original_perms.st_uid, original_perms.st_gid)
  61. # TODO apply other permissions
  62. def backup(profile, conf, force=False):
  63. """
  64. Backup data to datadb
  65. """
  66. # Sanity check: If the lockfile doesn't exist we assume the data is missing, so we wouldn't want to call rsync
  67. # again as it would wipe out the backup.
  68. assert (status(profile, conf) == SyncStatus.DATA_AVAILABLE) or force, "Data is missing (Use --force?)"
  69. dest = urlparse(conf["uri"])
  70. if dest.scheme == 'rsync':
  71. args = RSYNC_DEFAULT_ARGS[:]
  72. # Excluded paths
  73. for exclude_path in conf["exclude"].split(","):
  74. if not exclude_path == "":
  75. args.append("--exclude")
  76. args.append(exclude_path)
  77. # Add local dir
  78. args.append(normpath(conf["dir"])+'/')
  79. # Hit backupdb via http to retreive absolute path of rsync destination of remote server
  80. rsync_path, token = get(DATADB_HTTP_API+'new_backup', params={'proto':'rsync', 'name':profile, 'keep':conf["keep"]}).json()
  81. # Add rsync source path
  82. args.append(normpath('nexus@{}:{}'.format(dest.netloc, rsync_path))+'/')
  83. #print("Rsync backup call: {}".format(' '.join(args)))
  84. try:
  85. subprocess.check_call(args)
  86. except subprocess.CalledProcessError as cpe:
  87. if cpe.returncode not in [0,24]: # ignore partial transfer due to vanishing files on our end
  88. raise
  89. # confirm completion
  90. put(DATADB_HTTP_API+'new_backup', params={'proto':'rsync', 'name':profile, 'token': token, 'keep':conf["keep"]})
  91. elif dest.scheme == 'archive':
  92. # CD to local source dir
  93. # create tarball
  94. # http PUT file to backup server
  95. args_tar = ['tar', '--exclude=.datadb.lock']
  96. # Excluded paths
  97. for exclude_path in conf["exclude"].split(","):
  98. if not exclude_path == "":
  99. args_tar.append("--exclude")
  100. args_tar.append(exclude_path)
  101. args_tar += ['-zcv', './']
  102. args_curl = ['curl', '-v', '-XPUT', '--data-binary', '@-', '{}new_backup?proto=archive&name={}&keep={}'.format(DATADB_HTTP_API, profile, conf["keep"])]
  103. print("Tar backup call: {} | {}".format(' '.join(args_tar), ' '.join(args_curl)))
  104. compress = subprocess.Popen(args_tar, stdout=subprocess.PIPE, cwd=normpath(conf["dir"])+'/')
  105. upload = subprocess.Popen(args_curl, stdin=compress.stdout)
  106. compress.wait()
  107. upload.wait()
  108. # TODO: convert to pure python?
  109. assert compress.returncode == 0, "Could not create archive"
  110. assert upload.returncode == 0, "Could not upload archive"
  111. def status(profile, conf):
  112. """
  113. Check status of local dir - if the lock file is in place, we assume the data is there
  114. """
  115. lockfile = join(conf["dir"], '.datadb.lock')
  116. if exists(lockfile):
  117. return SyncStatus.DATA_AVAILABLE
  118. return SyncStatus.DATA_MISSING
  119. def shell_exec(cmd, workdir='/tmp/'):
  120. """
  121. Execute a command in shell, wait for exit.
  122. """
  123. print("Calling: {}".format(cmd))
  124. subprocess.Popen(cmd, shell=True, cwd=workdir).wait()
  125. def main():
  126. """
  127. Excepts a config file at /etc/datadb.ini. Example:
  128. ----------------------------
  129. [gyfd]
  130. uri=
  131. dir=
  132. keep=
  133. auth=
  134. restore_preexec=
  135. restore_postexec=
  136. export_preexec=
  137. export_postexec=
  138. exclude=
  139. ----------------------------
  140. Each [section] defines one backup task.
  141. Fields:
  142. *uri*: Destination/source for this instance's data. Always fits the following format:
  143. <procotol>://<server>/<backup name>
  144. Valid protocols:
  145. rsync - rsync executed over SSH. The local dir will be synced with the remote backup dir using rsync.
  146. archive - tar archives transported over HTTP. The local dir will be tarred and PUT to the backup server's remote dir via http.
  147. *dir*: Local dir for this backup
  148. *keep*: Currently unused. Number of historical copies to keep on remote server
  149. *auth*: Currently unused. Username:password string to use while contacting the datadb via HTTP.
  150. *restore_preexec*: Shell command to exec before pulling/restoring data
  151. *restore_postexec*: Shell command to exec after pulling/restoring data
  152. *export_preexec*: Shell command to exec before pushing data
  153. *export_postexec*: Shell command to exec after pushing data
  154. *exclude*: if the underlying transport method supports excluding paths, a comma separated list of paths to exclude. Applies to backup operations only.
  155. """
  156. conf_path = environ["DATADB_CONF"] if "DATADB_CONF" in environ else "/etc/datadb.ini"
  157. # Load profiles
  158. config = ConfigParser()
  159. config.read(conf_path)
  160. config = {section:{k:config[section][k] for k in config[section]} for section in config.sections()}
  161. parser = argparse.ArgumentParser(description="Backupdb Agent depends on config: /etc/datadb.ini")
  162. parser.add_argument('-f', '--force', default=False, action='store_true', help='force restore operation if destination data already exists')
  163. parser.add_argument('-n', '--no-exec', default=False, action='store_true', help='don\'t run pre/post-exec commands')
  164. parser.add_argument('-b', '--no-pre-exec', default=False, action='store_true', help='don\'t run pre-exec commands')
  165. parser.add_argument('-m', '--no-post-exec', default=False, action='store_true', help='don\'t run post-exec commands')
  166. parser.add_argument('profile', type=str, choices=config.keys(), help='Profile to restore')
  167. #parser.add_argument('-i', '--identity',
  168. # help='Ssh keyfile to use', type=str, default='/root/.ssh/datadb.key')
  169. #parser.add_argument('-r', '--remote',
  170. # help='Remote server (rsync://...)', type=str, required=True)
  171. #parser.add_argument('-l', '--local_dir',
  172. # help='Local path', type=str, required=True)
  173. subparser_modes = parser.add_subparsers(dest='mode', help='modes (only "rsync")')
  174. subparser_backup = subparser_modes.add_parser('backup', help='backup to datastore')
  175. subparser_restore = subparser_modes.add_parser('restore', help='restore from datastore')
  176. subparser_status = subparser_modes.add_parser('status', help='get info for profile')
  177. args = parser.parse_args()
  178. if args.no_exec:
  179. args.no_pre_exec = True
  180. args.no_post_exec = True
  181. if args.mode == 'restore':
  182. if not args.no_pre_exec and config[args.profile]['restore_preexec']:
  183. shell_exec(config[args.profile]['restore_preexec'])
  184. restore(args.profile, config[args.profile], force=args.force)
  185. if not args.no_post_exec and config[args.profile]['restore_postexec']:
  186. shell_exec(config[args.profile]['restore_postexec'])
  187. elif args.mode == 'backup':
  188. if not args.no_pre_exec and config[args.profile]['export_preexec']:
  189. shell_exec(config[args.profile]['export_preexec'])
  190. backup(args.profile, config[args.profile])
  191. if not args.no_post_exec and config[args.profile]['export_postexec']:
  192. shell_exec(config[args.profile]['export_postexec'])
  193. elif args.mode == 'status':
  194. info = status(args.profile, config[args.profile])
  195. print(SyncStatus(info))
  196. else:
  197. parser.print_usage()
  198. if __name__ == '__main__':
  199. main()