photolib/photoapp/migrate.py
2019-07-13 15:47:59 -07:00

211 lines
5.7 KiB
Python

import os
import json
import sqlalchemy
from photoapp.dbutils import get_db_engine
from photoapp.types import Photo, PhotoSet, generate_storage_path
from photoapp.storage import uri_to_storage
from photoapp.image import get_exif_data_fobj
import shutil
from contextlib import closing
from concurrent.futures import ThreadPoolExecutor, as_completed
STORAGE_URI = "file://./library"
DB_URI = "sqlite:///photos.db"
"""
Before this application had support for multiple file storage backends, it only supported the filesystem and it was done
in a way that wasn't abstracted. This script contains steps in migrating and old database and file tree to the
modern one.
SQLite to MySQL Steps:
======================
Migrating from mysql to sqlite is NOT required. Follow this part if you're converting from sqlite to mysql.
If you're not migrating from sqlite to mysql, you just need to do the equivalent of step 4 to your sqlite database.
1) Export the old sqlite database's contents. We need the data only, not the schema. First dump everything:
```
$ sqlite3 photos_old.db
sqlite> .output old.sql
sqlite> .dump
sqlite> .exit
```
Then using your favorite editor format all the INSERT statements like:
```
START TRANSACTION;
SET FOREIGN_KEY_CHECKS=0;
(many insert statements here)
SET FOREIGN_KEY_CHECKS=1;
COMMIT;
```
2) Populate a mysql database with the app's schema
You can just start and stop the app with it pointed at mysql like a normal person would.
3) Modify the mysql schema to play nice with our data:
```
mysql> alter table files drop fname;
alter table files modify column path varchar(1024);
```
3) Import the sqlite data dump into mysql
4) Put the mysql schema back
```
mysql> alter table files add fname varchar(256);
````
Now, continue with the steps below. When finished, it's safest to do a data-only dump from mysql, delete and recreate
your schema, and import the mysql data dump.
Filesystem migration
====================
This part is required, the layout of your library directory must be migrated. If you're moving to s3, that is done
here too.
1) Run this script pointed at your new database (edit the connection uri and main function to run part1 below)
It will produce a renames.json in the current dir, which is needed in later steps.
2) Run this script pointed at your new storage (edit the storage uri and main function to run part2 below)
This copies your photos to the new library.
"""
def getstorage():
return uri_to_storage(STORAGE_URI)
def getsession():
engine = get_db_engine(DB_URI)
sessionmaker = sqlalchemy.orm.sessionmaker(autoflush=True, autocommit=False)
sessionmaker.configure(bind=engine)
return sessionmaker()
def part1():
session = getsession()
renames = []
if os.path.exists("renames.json"):
raise Exception("dont run me twice!!")
for p in session.query(Photo).all():
fname = os.path.basename(p.path)
p.fname = fname
ext = fname.split(".")[-1]
newpath = generate_storage_path(p.set.date_real, p.hash, ext)
assert p.path != newpath
renames.append((p.path, newpath))
p.path = newpath
with open("renames.json", "w") as f:
json.dump(renames, f)
session.commit()
# session.rollback()
def part2():
with open("path/to/renames.json", "r") as f:
renames = json.load(f)
library_storage = getstorage()
numdone = 0
with ThreadPoolExecutor(max_workers=8) as pool:
futures = {pool.submit(rename_set, library_storage, set_[0], set_[1]): set_ for set_ in renames}
print("Working...")
for future in as_completed(futures.keys()):
set_ = futures[future]
e = future.exception()
if e:
print("Screwed up on:", set_)
raise e
numdone += 1
print("Done:", numdone)
print("Done!")
def rename_set(storage, src_path, dest_path):
with closing(storage.open(dest_path, 'wb')) as df:
with open(src_path, 'rb') as sf:
shutil.copyfileobj(sf, df)
"""
At one point, the cli contained errors causing it to not submit photo gps data. Running migrate_gpsfix rescans files for
said gps data and updates the database.
"""
def migrate_gpsfix():
session = getsession()
storage = getstorage()
done = 0
# iterate all images
for p in session.query(PhotoSet).filter(sqlalchemy.or_(PhotoSet.lat == 0,
PhotoSet.lat == None # NOQA: E711
)).all():
done += 1
print(done)
if p.lat and p.lon:
continue
# just bail if there's a CR2, the paired jpg is known not to have gps data in my dataset :)
if any(["image/x-canon-cr2" == i.format for i in p.files]):
continue
# pick the jpg out of the set
jpegs = []
for pfile in p.files:
if pfile.format == "image/jpeg":
jpegs.append(pfile)
if not jpegs: # no files with gps data found
continue
gpsdata = None
for img in jpegs:
# scan it for gps data
# print(p.uuid, img.fname)
with closing(storage.open(img.path, 'rb')) as fsrc:
_, gpsdata, _, _ = get_exif_data_fobj(fsrc) # (datetime, (decimal, decimal), (width, height), rotation)
# print(gpsdata)
if gpsdata and gpsdata[0]:
break
if not gpsdata:
continue
print(p.uuid, "->", gpsdata)
p.lat, p.lon = gpsdata[0], gpsdata[1]
# update the db
session.commit()
# __name__ == '__main__' and part1()
# __name__ == '__main__' and part2()
# __name__ == '__main__' and migrate_gpsfix()