import os import sys import logging from b2.api import B2Api from b2mirror.base import Provider, Reciever from b2mirror.common import Result, FileInfo from b2 import exception as b2exception from b2.download_dest import DownloadDestLocalFile import sqlite3 from contextlib import closing class B2Provider(Provider): """ Iterates files in bucket """ def __init__(self, accountId, appKey, bucketId, bucketBasePath): super(B2Provider, self).__init__() raise NotImplemented() class B2Reciever(Reciever): max_chunk_size = 256 * 1024 def __init__(self, bucket, path, account_id, app_key, workers=10, compare_method='mtime'): super(B2Reciever, self).__init__() self.log = logging.getLogger("B2Reciever") self.bucket_name = bucket self.path = path.lstrip('/') self.account_id = account_id self.app_key = app_key self.api = B2Api(max_upload_workers=workers) self.api.authorize_account('production', self.account_id, self.app_key) self.bucket = self.api.get_bucket_by_name(self.bucket_name) self.db = None self._db_setup() # The receiver is responsible to determining if a file needs to be uploaded or not self.should_transfer = { "mtime": self._should_transfer_mtime, "size": self._should_transfer_size }[compare_method] def _db_setup(self, db_path=None): """ This plugin uses a sqlite database to track the contents of what is on the remote B2 bucket. Why? It's simply faster than using B2's quite limited API to perform the same action. The sqlite DB is stored on the bucket. This method: - Downloads the DB - if none present, creates a new db file - Initializes/updates tables in the db """ if not db_path: db_path = '/tmp/b2mirror.{}.db'.format(os.getpid()) self.db_path = db_path fetch_success = self._fetch_remote_db(db_path) self._open_db() if not fetch_success: # no db was downloaded and the handle above is empty. initialize it. self._init_db_contents() logging.info("Initialized database") # Mark all files as unseen # Files will be marked as seen as they are processed # Later, unseen files will be purged with closing(self.db.cursor()) as c: c.execute("UPDATE 'files' SET seen=0;") def _open_db(self): self.db = sqlite3.connect(self.db_path, check_same_thread=False, isolation_level=None) self.db.row_factory = sqlite3.Row def _init_db_contents(self): """ Init the sqlite database. Creates missing tables. """ def table_exists(table_name): c.execute("SELECT * FROM SQLITE_MASTER WHERE `type`='table' AND `name`=?", (table_name,)) tables = c.fetchall() if len(tables) == 0: return False return True tables = { "files": """ CREATE TABLE `files` ( `path` varchar(4096) PRIMARY KEY, `mtime` INTEGER, `size` INTEGER, `seen` BOOLEAN );""" } with closing(self.db.cursor()) as c: for table_name, table_create_query in tables.items(): if not table_exists(table_name): c.execute(table_create_query) def _fetch_remote_db(self, db_path): db_bucket_path = os.path.join(self.path, ".b2mirror.db") self.log.info("Fetching tracking db from bucket ({}) to {}".format(db_bucket_path, db_path)) try: self.bucket.download_file_by_name(db_bucket_path, DownloadDestLocalFile(db_path)) except b2exception.UnknownError as e: if '404 not_found' in e.message: return False else: raise return True def teardown(self): """ Place the DB file back onto the remote """ self.db.close() sqlite_finfo = FileInfo(self.db_path, ".b2mirror.db", os.path.getsize(self.db_path), int(os.path.getmtime(self.db_path))) self.put_file(sqlite_finfo, purge_historics=True) os.unlink(self.db_path) def _should_transfer_mtime(self, row, f): return not row or row['mtime'] < f.mtime def _should_transfer_size(self, row, f): return not row or row['size'] != f.size def xfer_file(self, f): """ Future-called function that handles a single file. The file's modification time is checked against the database to see if the file has new content that should be uploaded or is untouched since the last sync """ result = Result.failed with closing(self.db.cursor()) as c: row = c.execute("SELECT * FROM 'files' WHERE `path` = ?;", (f.rel_path,)).fetchone() if self.should_transfer(row, f): self.log.info("Uploading: %s", f.rel_path) try: # upload the file. if a row existed it means there may be historic copies of the file already there result = self.put_file(f, purge_historics=row is not None) except: self.log.error("Failed:", f.rel_path) self.log.error("Unexpected error:", sys.exc_info()[0]) raise # The file was uploaded, commit it to the db c.execute("REPLACE INTO 'files' VALUES(?, ?, ?, ?);", (f.rel_path, f.mtime, f.size, 1)) else: c.execute("UPDATE 'files' SET seen=1 WHERE `path` = ?;", (f.rel_path,)).fetchone() result = Result.skipped return result def put_file(self, file_info, purge_historics=False): dest_path = os.path.join(self.path, file_info.rel_path).lstrip('/') upload_result = self.bucket.upload_local_file(file_info.abs_path, dest_path) # NOQA if purge_historics: self.delete_by_path(dest_path, skip=1) return Result.ok def purge(self): """ Delete files on the remote that were not found when scanning the local tree. This assumes an upload phase has already been doing using ***THIS B2Reciever INSTANCE***. """ with closing(self.db.cursor()) as c: with closing(self.db.cursor()) as c_del: for purge_file in c.execute("SELECT * FROM 'files' WHERE seen=0;"): self.log.info("Delete on remote: %s", purge_file["path"]) self.purge_file(purge_file["path"]) c_del.execute("DELETE FROM 'files' WHERE path=?;", (purge_file["path"],)) def purge_file(self, file_path): """ Remove a file and all historical copies from the bucket :param file_path: File path relative to the source tree to delete. This should NOT include self.path """ dest_path = os.path.join(self.path, file_path).lstrip('/') self.delete_by_path(dest_path) def delete_by_path(self, file_path, skip=0, max_entries=100): """ List all versions of a file and delete some or all of them :param file_path: Bucket path to delete :param skip: How many files to skip before starting deletion. 5 means keep 5 historical copies. Using a value of 0 will delete a file and all it's revisions :param max_entries: """ for f in self.bucket.list_file_versions(start_filename=file_path, max_entries=max_entries)["files"]: if f["fileName"] == file_path: if skip == 0: self.api.delete_file_version(f["fileId"], f["fileName"]) else: skip -= 1 else: return