diff --git a/b2mirror/mirror.py b/b2mirror/mirror.py index 52fcc5a..2108160 100644 --- a/b2mirror/mirror.py +++ b/b2mirror/mirror.py @@ -29,7 +29,7 @@ Dest will upload the file, and inform the manager it was completed class B2SyncManager(object): - def __init__(self, source_module, dest_module, exclude_res=None, workers=10): + def __init__(self, source_module, dest_module, exclude_res=None, workers=10, compare_method="mtime"): """ :param source_module: subclass instance of b2mirror.base.Provider acting as a file source :param dest_module: subclass of b2mirror.base.Receiver acting as a file destination @@ -45,11 +45,17 @@ class B2SyncManager(object): self.exclude_res = [ re.compile(r'.*\.(DS_Store|pyc|dropbox)$'), re.compile(r'.*__pycache__.*'), - re.compile(r'.*\.dropbox\.cache.*') + re.compile(r'.*\.dropbox\.cache.*'), + re.compile(r'.*\.AppleDouble.*') ] + (exclude_res if exclude_res else []) self.workers = workers self._init_db() + self.should_transfer = { + "mtime": self._should_transfer_mtime, + "size": self._should_transfer_size + }[compare_method] + @staticmethod def dict_factory(cursor, row): d = {} @@ -141,7 +147,10 @@ class B2SyncManager(object): row = c.execute("SELECT * FROM 'files' WHERE `path` = ?;", (f.rel_path,)).fetchone() - if not row or row['mtime'] < f.mtime: + if self.should_transfer(row, f): + + # The file was uploaded, commit it to the db + c.execute("REPLACE INTO 'files' VALUES(?, ?, ?, ?);", (f.rel_path, f.mtime, f.size, 1)) print("Uploading:", f.rel_path) try: @@ -150,11 +159,6 @@ class B2SyncManager(object): print("Failed:", f.rel_path) print("Unexpected error:", sys.exc_info()[0]) raise - # print("Ok: ", f.rel_path) - - # The file was uploaded, commit it to the db - c.execute("REPLACE INTO 'files' VALUES(?, ?, ?, ?);", (f.rel_path, f.mtime, f.size, 1)) - # print("Done: ", f.rel_path) else: c.execute("UPDATE 'files' SET seen=1 WHERE `path` = ?;", (f.rel_path,)).fetchone() @@ -165,6 +169,12 @@ class B2SyncManager(object): return result + def _should_transfer_mtime(self, row, f): + return not row or row['mtime'] < f.mtime + + def _should_transfer_size(self, row, f): + return not row or row['size'] != f.size + def purge_remote(self): """ Delete files on the remote that were not found when scanning the local tree. @@ -181,7 +191,7 @@ class B2SyncManager(object): c.close() -def sync(source_uri, dest_uri, account_id, app_key, workers=10, exclude=[]): +def sync(source_uri, dest_uri, account_id, app_key, workers=10, exclude=[], compare_method="mtime"): source = urlparse(source_uri) dest = urlparse(dest_uri) @@ -202,5 +212,5 @@ def sync(source_uri, dest_uri, account_id, app_key, workers=10, exclude=[]): assert source_provider is not None assert dest_receiver is not None - syncer = B2SyncManager(source_provider, dest_receiver, workers=workers, exclude_res=exclude) + syncer = B2SyncManager(source_provider, dest_receiver, workers=workers, exclude_res=exclude, compare_method=compare_method) syncer.sync() diff --git a/bin/b2mirror b/bin/b2mirror index 9017827..6a10d47 100755 --- a/bin/b2mirror +++ b/bin/b2mirror @@ -9,6 +9,7 @@ from b2mirror import mirror def main(): parser = argparse.ArgumentParser(description="Sync data to/from B2") + parser.add_argument("-i", "--size", help="Compare by size instead of mtime", action="store_true", default=False) parser.add_argument("-w", "--workers", help="Maximum parallel uploads", type=int, default=10) parser.add_argument("-s", "--source", required=True, help="Source URI") @@ -17,13 +18,21 @@ def main(): parser.add_argument("-a", "--account-id", required=True, help="Backblaze account ID") parser.add_argument("-k", "--app-key", required=True, help="Backblaze application key") - parser.add_argument("--exclude", nargs="+", help="Regexes to exclude from transfer") + parser.add_argument("--exclude", nargs="+", help="List of regexes to exclude from transfer") args = parser.parse_args() ignore_res = [re.compile(i) for i in args.exclude] - mirror.sync(args.source, args.dest, args.account_id, args.app_key, workers=args.workers, exclude=ignore_res) + mirror.sync( + args.source, + args.dest, + args.account_id, + args.app_key, + workers=args.workers, + exclude=ignore_res, + compare_method="size" if args.size else "mtime" + ) if __name__ == '__main__': main()