From 58a99cd74af110b30191f61f92ba3a31d8f8cda2 Mon Sep 17 00:00:00 2001 From: dave Date: Sat, 8 Sep 2018 15:49:16 -0700 Subject: [PATCH] initial commit, basic library ingest --- photoapp/__init__.py | 0 photoapp/image.py | 103 +++++++++++++++++++++++++++++++++++++++++++ photoapp/ingest.py | 98 ++++++++++++++++++++++++++++++++++++++++ photoapp/library.py | 56 +++++++++++++++++++++++ photoapp/types.py | 34 ++++++++++++++ requirements.txt | 3 ++ setup.py | 22 +++++++++ 7 files changed, 316 insertions(+) create mode 100644 photoapp/__init__.py create mode 100644 photoapp/image.py create mode 100644 photoapp/ingest.py create mode 100644 photoapp/library.py create mode 100644 photoapp/types.py create mode 100644 requirements.txt create mode 100644 setup.py diff --git a/photoapp/__init__.py b/photoapp/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/photoapp/image.py b/photoapp/image.py new file mode 100644 index 0000000..191c3b6 --- /dev/null +++ b/photoapp/image.py @@ -0,0 +1,103 @@ +from datetime import datetime +from PIL import Image, ExifTags +from decimal import Decimal +from hashlib import sha256 +import os +import magic +from photoapp.types import Photo, PhotoSet + + +def get_jpg_info(fpath): + """ + Given the path to a jpg, return a dict describing it + """ + date, gps = get_exif_data(fpath) + + if not date: + # No exif date, fall back to file modification date + date = get_mtime(fpath) + + # gps is set to 0,0 if unavailable + lat, lon = gps or [0, 0] + + mime = magic.from_file(fpath, mime=True) + + # ps = PhotoSet + + photo = Photo(hash=get_hash(fpath), path=fpath, format=mime) + # "fname": os.path.basename(fpath), + + return PhotoSet(date=date, lat=lat, lon=lon, files=[photo]) + + # return {"date": date, + # "lat": lat, + # "lon": lon, + # "formats": []} + + +def get_mtime(fpath): + return datetime.fromtimestamp(os.stat(fpath).st_mtime) + + +def get_hash(path): + hasher = sha256() + with open(path, 'rb') as f: + while True: + piece = f.read(1024 * 256) + if not piece: + break + hasher.update(piece) + return hasher.hexdigest() + + +def get_exif_data(path): + """ + Return a (datetime, (decimal, decimal)) tuple describing the photo's exif date and gps coordinates + """ + img = Image.open(path) + if img.format != "JPEG": + return None, None + exif_data = img._getexif() + if not exif_data: + return None, None + exif = { + ExifTags.TAGS[k]: v + for k, v in exif_data.items() + if k in ExifTags.TAGS + } + datestr = None + gpsinfo = None + dateinfo = None + acceptable = ["DateTime", "DateTimeOriginal", "DateTimeDigitized"] + for key in acceptable: + if key in exif: + datestr = exif[key] + continue + + if datestr is None: + print(exif.keys()) + raise Exception("{} has no DateTime".format(path)) # TODO how often do we hit this + dateinfo = datetime.strptime(datestr, "%Y:%m:%d %H:%M:%S") + + gps = exif.get("GPSInfo") + if gps: + # see https://gis.stackexchange.com/a/273402 + gps_y = round(hms_to_decimal(rational64u_to_hms(gps[2])), 8) + gps_x = round(hms_to_decimal(rational64u_to_hms(gps[4])), 8) + if gps[1] == 'S': + gps_y *= -1 + if gps[3] == 'W': + gps_x *= -1 + gpsinfo = (gps_y, gps_x) + + return dateinfo, gpsinfo + + +def rational64u_to_hms(values): + return [Decimal(values[0][0]) / Decimal(values[0][1]), + Decimal(values[1][0]) / Decimal(values[1][1]), + Decimal(values[2][0]) / Decimal(values[2][1])] + + +def hms_to_decimal(values): + return values[0] + values[1] / 60 + values[2] / 3600 diff --git a/photoapp/ingest.py b/photoapp/ingest.py new file mode 100644 index 0000000..253f24d --- /dev/null +++ b/photoapp/ingest.py @@ -0,0 +1,98 @@ +import magic +import argparse +from photoapp.library import PhotoLibrary +from photoapp.image import get_jpg_info, get_hash, get_mtime +from itertools import chain +from photoapp.types import Photo, PhotoSet +import os + +""" +Photo sorting rules: + +jpeg + exif date + file modification date +raw + group with exif date of jpeg with same name + file modification date +mov, video, or other + modification date +""" + +known_extensions = ["jpg", "png", "cr2", "xmp", "mp4", "mov"] +regular_images = ["jpg", "png"] +files_raw = ["cr2", "xmp"] +files_video = ["mp4", "mov"] + + +def batch_ingest(library, files): + # group by extension + byext = {k: [] for k in known_extensions} + + print("Pre-sorting files") + for item in files: + if not os.path.isfile(item): + print("Skipping due to not a file: {}".format(item)) + continue + extension = item.split(".") + if len(extension) < 2: + print("Skipping due to no extension: {}".format(item)) + continue + extension = extension[-1].lower() + if extension == "jpeg": + extension = "jpg" + if extension not in known_extensions: + print("Skipping due to unknown extension: {}".format(item)) + continue + byext[extension.lower()].append(item) + + print("Scanning images") + photos = [] + # process regular images first. + for item in chain(*[byext[ext] for ext in regular_images]): + photos.append(get_jpg_info(item)) + + print("Scanning RAWs") + # process raws + for item in chain(*[byext[ext] for ext in files_raw]): + itemmeta = Photo(hash=get_hash(item), path=item, format=magic.from_file(item, mime=True)) + fprefix = os.path.basename(item)[::-1].split(".", 1)[-1][::-1] + fmatch = "{}.jpg".format(fprefix.lower()) + foundmatch = False + for photo in photos: + for fmt in photo.files[:]: + if os.path.basename(fmt.path).lower() == fmatch: + foundmatch = True + photo.files.append(itemmeta) + break + if foundmatch: + break + + if not foundmatch: + photos.append(PhotoSet(date=get_mtime(item), lat=0, lon=0, files=[itemmeta])) + + # TODO prune any xmp without an associated regular image or cr2 + + print("Scanning other files") + # process all other formats + for item in chain(*[byext[ext] for ext in files_video]): + itemmeta = Photo(hash=get_hash(item), path=item, format=magic.from_file(item, mime=True)) + photos.append(PhotoSet(date=get_mtime(item), lat=0, lon=0, files=[itemmeta])) + + print("Updating database") + for photoset in photos: + library.add_photoset(photoset) + + +def main(): + parser = argparse.ArgumentParser(description="Library ingestion tool") + parser.add_argument("files", nargs="+") + args = parser.parse_args() + + library = PhotoLibrary("photos.db", "./library/") + + batch_ingest(library, args.files) + + +if __name__ == '__main__': + main() diff --git a/photoapp/library.py b/photoapp/library.py new file mode 100644 index 0000000..97ff208 --- /dev/null +++ b/photoapp/library.py @@ -0,0 +1,56 @@ +import os +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker +from photoapp.types import Base, Photo, PhotoSet +from sqlalchemy.exc import IntegrityError + + +class PhotoLibrary(object): + def __init__(self, db_path, lib_path): + self.path = lib_path + self.engine = create_engine('sqlite:///{}'.format(db_path), echo=False) + Base.metadata.create_all(self.engine) + self.session = sessionmaker() + self.session.configure(bind=self.engine) + + def add_photoset(self, photoset): + """ + Commit a populated photoset object to the library. The paths in the photoset's file list entries will be updated + as the file is moved to the library path. + """ + + # Create target directory + path = os.path.join(self.path, self.get_datedir_path(photoset.date)) + os.makedirs(path, exist_ok=True) + + moves = [] # Track files moved. If the sql transaction files, we'll undo these + + for file in photoset.files: + dest = os.path.join(path, os.path.basename(file.path)) + + # Check if the name is already in use, rename new file if needed + dupe_rename = 1 + while os.path.exists(dest): + fname = os.path.basename(file.path).split(".") + fname[-2] += "_{}".format(dupe_rename) + dest = os.path.join(path, '.'.join(fname)) + dupe_rename += 1 + os.rename(file.path, dest) + moves.append((file.path, dest)) + file.path = dest.lstrip(self.path) + + s = self.session() + s.add(photoset) + try: + s.commit() + except IntegrityError: + # Commit failed, undo the moves + for move in moves: + os.rename(move[1], move[0]) + raise + + def get_datedir_path(self, date): + """ + Return a path like 2018/3/31 given a datetime object representing the same date + """ + return os.path.join(str(date.year), str(date.month), str(date.day)) diff --git a/photoapp/types.py b/photoapp/types.py new file mode 100644 index 0000000..57f9a25 --- /dev/null +++ b/photoapp/types.py @@ -0,0 +1,34 @@ +from sqlalchemy import Column, Integer, String, DateTime, Unicode, DECIMAL, ForeignKey +from sqlalchemy.orm import relationship +from sqlalchemy.ext.declarative import declarative_base + +import uuid + + +Base = declarative_base() + + +class PhotoSet(Base): + __tablename__ = 'photos' + + id = Column(Integer, primary_key=True) + uuid = Column(Unicode, default=lambda: str(uuid.uuid4())) + date = Column(DateTime) + lat = Column(DECIMAL(precision=11)) + lon = Column(DECIMAL(precision=11)) + + files = relationship("Photo", back_populates="set") + + +class Photo(Base): + __tablename__ = 'files' + + id = Column(Integer, primary_key=True) + set_id = Column(Integer, ForeignKey("photos.id")) + uuid = Column(Unicode, default=lambda: str(uuid.uuid4())) + + set = relationship("PhotoSet", back_populates="files", foreign_keys=[set_id]) + + hash = Column(String(length=64), unique=True) + path = Column(Unicode) + format = Column(String(length=64)) # TODO how long can a mime string be diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d488134 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +Pillow==5.2.0 +python-magic==0.4.15 +SQLAlchemy==1.2.11 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..c97cd4e --- /dev/null +++ b/setup.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 + +from setuptools import setup + + +__version__ = "0.0.0" + + +setup(name='photoapp', + version=__version__, + description='Photo library application', + url='', + author='dpedu', + author_email='dave@davepedu.com', + packages=['photoapp'], + install_requires=[], + entry_points={ + "console_scripts": [ + "photoappd = photoapp.daemon:main", + "photoimport = photoapp.ingest:main" + ] + })