initial commit, basic library ingest

This commit is contained in:
dave 2018-09-08 15:49:16 -07:00
commit 58a99cd74a
7 changed files with 316 additions and 0 deletions

0
photoapp/__init__.py Normal file
View File

103
photoapp/image.py Normal file
View File

@ -0,0 +1,103 @@
from datetime import datetime
from PIL import Image, ExifTags
from decimal import Decimal
from hashlib import sha256
import os
import magic
from photoapp.types import Photo, PhotoSet
def get_jpg_info(fpath):
"""
Given the path to a jpg, return a dict describing it
"""
date, gps = get_exif_data(fpath)
if not date:
# No exif date, fall back to file modification date
date = get_mtime(fpath)
# gps is set to 0,0 if unavailable
lat, lon = gps or [0, 0]
mime = magic.from_file(fpath, mime=True)
# ps = PhotoSet
photo = Photo(hash=get_hash(fpath), path=fpath, format=mime)
# "fname": os.path.basename(fpath),
return PhotoSet(date=date, lat=lat, lon=lon, files=[photo])
# return {"date": date,
# "lat": lat,
# "lon": lon,
# "formats": []}
def get_mtime(fpath):
return datetime.fromtimestamp(os.stat(fpath).st_mtime)
def get_hash(path):
hasher = sha256()
with open(path, 'rb') as f:
while True:
piece = f.read(1024 * 256)
if not piece:
break
hasher.update(piece)
return hasher.hexdigest()
def get_exif_data(path):
"""
Return a (datetime, (decimal, decimal)) tuple describing the photo's exif date and gps coordinates
"""
img = Image.open(path)
if img.format != "JPEG":
return None, None
exif_data = img._getexif()
if not exif_data:
return None, None
exif = {
ExifTags.TAGS[k]: v
for k, v in exif_data.items()
if k in ExifTags.TAGS
}
datestr = None
gpsinfo = None
dateinfo = None
acceptable = ["DateTime", "DateTimeOriginal", "DateTimeDigitized"]
for key in acceptable:
if key in exif:
datestr = exif[key]
continue
if datestr is None:
print(exif.keys())
raise Exception("{} has no DateTime".format(path)) # TODO how often do we hit this
dateinfo = datetime.strptime(datestr, "%Y:%m:%d %H:%M:%S")
gps = exif.get("GPSInfo")
if gps:
# see https://gis.stackexchange.com/a/273402
gps_y = round(hms_to_decimal(rational64u_to_hms(gps[2])), 8)
gps_x = round(hms_to_decimal(rational64u_to_hms(gps[4])), 8)
if gps[1] == 'S':
gps_y *= -1
if gps[3] == 'W':
gps_x *= -1
gpsinfo = (gps_y, gps_x)
return dateinfo, gpsinfo
def rational64u_to_hms(values):
return [Decimal(values[0][0]) / Decimal(values[0][1]),
Decimal(values[1][0]) / Decimal(values[1][1]),
Decimal(values[2][0]) / Decimal(values[2][1])]
def hms_to_decimal(values):
return values[0] + values[1] / 60 + values[2] / 3600

98
photoapp/ingest.py Normal file
View File

@ -0,0 +1,98 @@
import magic
import argparse
from photoapp.library import PhotoLibrary
from photoapp.image import get_jpg_info, get_hash, get_mtime
from itertools import chain
from photoapp.types import Photo, PhotoSet
import os
"""
Photo sorting rules:
jpeg
exif date
file modification date
raw
group with exif date of jpeg with same name
file modification date
mov, video, or other
modification date
"""
known_extensions = ["jpg", "png", "cr2", "xmp", "mp4", "mov"]
regular_images = ["jpg", "png"]
files_raw = ["cr2", "xmp"]
files_video = ["mp4", "mov"]
def batch_ingest(library, files):
# group by extension
byext = {k: [] for k in known_extensions}
print("Pre-sorting files")
for item in files:
if not os.path.isfile(item):
print("Skipping due to not a file: {}".format(item))
continue
extension = item.split(".")
if len(extension) < 2:
print("Skipping due to no extension: {}".format(item))
continue
extension = extension[-1].lower()
if extension == "jpeg":
extension = "jpg"
if extension not in known_extensions:
print("Skipping due to unknown extension: {}".format(item))
continue
byext[extension.lower()].append(item)
print("Scanning images")
photos = []
# process regular images first.
for item in chain(*[byext[ext] for ext in regular_images]):
photos.append(get_jpg_info(item))
print("Scanning RAWs")
# process raws
for item in chain(*[byext[ext] for ext in files_raw]):
itemmeta = Photo(hash=get_hash(item), path=item, format=magic.from_file(item, mime=True))
fprefix = os.path.basename(item)[::-1].split(".", 1)[-1][::-1]
fmatch = "{}.jpg".format(fprefix.lower())
foundmatch = False
for photo in photos:
for fmt in photo.files[:]:
if os.path.basename(fmt.path).lower() == fmatch:
foundmatch = True
photo.files.append(itemmeta)
break
if foundmatch:
break
if not foundmatch:
photos.append(PhotoSet(date=get_mtime(item), lat=0, lon=0, files=[itemmeta]))
# TODO prune any xmp without an associated regular image or cr2
print("Scanning other files")
# process all other formats
for item in chain(*[byext[ext] for ext in files_video]):
itemmeta = Photo(hash=get_hash(item), path=item, format=magic.from_file(item, mime=True))
photos.append(PhotoSet(date=get_mtime(item), lat=0, lon=0, files=[itemmeta]))
print("Updating database")
for photoset in photos:
library.add_photoset(photoset)
def main():
parser = argparse.ArgumentParser(description="Library ingestion tool")
parser.add_argument("files", nargs="+")
args = parser.parse_args()
library = PhotoLibrary("photos.db", "./library/")
batch_ingest(library, args.files)
if __name__ == '__main__':
main()

56
photoapp/library.py Normal file
View File

@ -0,0 +1,56 @@
import os
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from photoapp.types import Base, Photo, PhotoSet
from sqlalchemy.exc import IntegrityError
class PhotoLibrary(object):
def __init__(self, db_path, lib_path):
self.path = lib_path
self.engine = create_engine('sqlite:///{}'.format(db_path), echo=False)
Base.metadata.create_all(self.engine)
self.session = sessionmaker()
self.session.configure(bind=self.engine)
def add_photoset(self, photoset):
"""
Commit a populated photoset object to the library. The paths in the photoset's file list entries will be updated
as the file is moved to the library path.
"""
# Create target directory
path = os.path.join(self.path, self.get_datedir_path(photoset.date))
os.makedirs(path, exist_ok=True)
moves = [] # Track files moved. If the sql transaction files, we'll undo these
for file in photoset.files:
dest = os.path.join(path, os.path.basename(file.path))
# Check if the name is already in use, rename new file if needed
dupe_rename = 1
while os.path.exists(dest):
fname = os.path.basename(file.path).split(".")
fname[-2] += "_{}".format(dupe_rename)
dest = os.path.join(path, '.'.join(fname))
dupe_rename += 1
os.rename(file.path, dest)
moves.append((file.path, dest))
file.path = dest.lstrip(self.path)
s = self.session()
s.add(photoset)
try:
s.commit()
except IntegrityError:
# Commit failed, undo the moves
for move in moves:
os.rename(move[1], move[0])
raise
def get_datedir_path(self, date):
"""
Return a path like 2018/3/31 given a datetime object representing the same date
"""
return os.path.join(str(date.year), str(date.month), str(date.day))

34
photoapp/types.py Normal file
View File

@ -0,0 +1,34 @@
from sqlalchemy import Column, Integer, String, DateTime, Unicode, DECIMAL, ForeignKey
from sqlalchemy.orm import relationship
from sqlalchemy.ext.declarative import declarative_base
import uuid
Base = declarative_base()
class PhotoSet(Base):
__tablename__ = 'photos'
id = Column(Integer, primary_key=True)
uuid = Column(Unicode, default=lambda: str(uuid.uuid4()))
date = Column(DateTime)
lat = Column(DECIMAL(precision=11))
lon = Column(DECIMAL(precision=11))
files = relationship("Photo", back_populates="set")
class Photo(Base):
__tablename__ = 'files'
id = Column(Integer, primary_key=True)
set_id = Column(Integer, ForeignKey("photos.id"))
uuid = Column(Unicode, default=lambda: str(uuid.uuid4()))
set = relationship("PhotoSet", back_populates="files", foreign_keys=[set_id])
hash = Column(String(length=64), unique=True)
path = Column(Unicode)
format = Column(String(length=64)) # TODO how long can a mime string be

3
requirements.txt Normal file
View File

@ -0,0 +1,3 @@
Pillow==5.2.0
python-magic==0.4.15
SQLAlchemy==1.2.11

22
setup.py Normal file
View File

@ -0,0 +1,22 @@
#!/usr/bin/env python3
from setuptools import setup
__version__ = "0.0.0"
setup(name='photoapp',
version=__version__,
description='Photo library application',
url='',
author='dpedu',
author_email='dave@davepedu.com',
packages=['photoapp'],
install_requires=[],
entry_points={
"console_scripts": [
"photoappd = photoapp.daemon:main",
"photoimport = photoapp.ingest:main"
]
})