initial commit, basic library ingest
This commit is contained in:
commit
58a99cd74a
|
@ -0,0 +1,103 @@
|
||||||
|
from datetime import datetime
|
||||||
|
from PIL import Image, ExifTags
|
||||||
|
from decimal import Decimal
|
||||||
|
from hashlib import sha256
|
||||||
|
import os
|
||||||
|
import magic
|
||||||
|
from photoapp.types import Photo, PhotoSet
|
||||||
|
|
||||||
|
|
||||||
|
def get_jpg_info(fpath):
|
||||||
|
"""
|
||||||
|
Given the path to a jpg, return a dict describing it
|
||||||
|
"""
|
||||||
|
date, gps = get_exif_data(fpath)
|
||||||
|
|
||||||
|
if not date:
|
||||||
|
# No exif date, fall back to file modification date
|
||||||
|
date = get_mtime(fpath)
|
||||||
|
|
||||||
|
# gps is set to 0,0 if unavailable
|
||||||
|
lat, lon = gps or [0, 0]
|
||||||
|
|
||||||
|
mime = magic.from_file(fpath, mime=True)
|
||||||
|
|
||||||
|
# ps = PhotoSet
|
||||||
|
|
||||||
|
photo = Photo(hash=get_hash(fpath), path=fpath, format=mime)
|
||||||
|
# "fname": os.path.basename(fpath),
|
||||||
|
|
||||||
|
return PhotoSet(date=date, lat=lat, lon=lon, files=[photo])
|
||||||
|
|
||||||
|
# return {"date": date,
|
||||||
|
# "lat": lat,
|
||||||
|
# "lon": lon,
|
||||||
|
# "formats": []}
|
||||||
|
|
||||||
|
|
||||||
|
def get_mtime(fpath):
|
||||||
|
return datetime.fromtimestamp(os.stat(fpath).st_mtime)
|
||||||
|
|
||||||
|
|
||||||
|
def get_hash(path):
|
||||||
|
hasher = sha256()
|
||||||
|
with open(path, 'rb') as f:
|
||||||
|
while True:
|
||||||
|
piece = f.read(1024 * 256)
|
||||||
|
if not piece:
|
||||||
|
break
|
||||||
|
hasher.update(piece)
|
||||||
|
return hasher.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def get_exif_data(path):
|
||||||
|
"""
|
||||||
|
Return a (datetime, (decimal, decimal)) tuple describing the photo's exif date and gps coordinates
|
||||||
|
"""
|
||||||
|
img = Image.open(path)
|
||||||
|
if img.format != "JPEG":
|
||||||
|
return None, None
|
||||||
|
exif_data = img._getexif()
|
||||||
|
if not exif_data:
|
||||||
|
return None, None
|
||||||
|
exif = {
|
||||||
|
ExifTags.TAGS[k]: v
|
||||||
|
for k, v in exif_data.items()
|
||||||
|
if k in ExifTags.TAGS
|
||||||
|
}
|
||||||
|
datestr = None
|
||||||
|
gpsinfo = None
|
||||||
|
dateinfo = None
|
||||||
|
acceptable = ["DateTime", "DateTimeOriginal", "DateTimeDigitized"]
|
||||||
|
for key in acceptable:
|
||||||
|
if key in exif:
|
||||||
|
datestr = exif[key]
|
||||||
|
continue
|
||||||
|
|
||||||
|
if datestr is None:
|
||||||
|
print(exif.keys())
|
||||||
|
raise Exception("{} has no DateTime".format(path)) # TODO how often do we hit this
|
||||||
|
dateinfo = datetime.strptime(datestr, "%Y:%m:%d %H:%M:%S")
|
||||||
|
|
||||||
|
gps = exif.get("GPSInfo")
|
||||||
|
if gps:
|
||||||
|
# see https://gis.stackexchange.com/a/273402
|
||||||
|
gps_y = round(hms_to_decimal(rational64u_to_hms(gps[2])), 8)
|
||||||
|
gps_x = round(hms_to_decimal(rational64u_to_hms(gps[4])), 8)
|
||||||
|
if gps[1] == 'S':
|
||||||
|
gps_y *= -1
|
||||||
|
if gps[3] == 'W':
|
||||||
|
gps_x *= -1
|
||||||
|
gpsinfo = (gps_y, gps_x)
|
||||||
|
|
||||||
|
return dateinfo, gpsinfo
|
||||||
|
|
||||||
|
|
||||||
|
def rational64u_to_hms(values):
|
||||||
|
return [Decimal(values[0][0]) / Decimal(values[0][1]),
|
||||||
|
Decimal(values[1][0]) / Decimal(values[1][1]),
|
||||||
|
Decimal(values[2][0]) / Decimal(values[2][1])]
|
||||||
|
|
||||||
|
|
||||||
|
def hms_to_decimal(values):
|
||||||
|
return values[0] + values[1] / 60 + values[2] / 3600
|
|
@ -0,0 +1,98 @@
|
||||||
|
import magic
|
||||||
|
import argparse
|
||||||
|
from photoapp.library import PhotoLibrary
|
||||||
|
from photoapp.image import get_jpg_info, get_hash, get_mtime
|
||||||
|
from itertools import chain
|
||||||
|
from photoapp.types import Photo, PhotoSet
|
||||||
|
import os
|
||||||
|
|
||||||
|
"""
|
||||||
|
Photo sorting rules:
|
||||||
|
|
||||||
|
jpeg
|
||||||
|
exif date
|
||||||
|
file modification date
|
||||||
|
raw
|
||||||
|
group with exif date of jpeg with same name
|
||||||
|
file modification date
|
||||||
|
mov, video, or other
|
||||||
|
modification date
|
||||||
|
"""
|
||||||
|
|
||||||
|
known_extensions = ["jpg", "png", "cr2", "xmp", "mp4", "mov"]
|
||||||
|
regular_images = ["jpg", "png"]
|
||||||
|
files_raw = ["cr2", "xmp"]
|
||||||
|
files_video = ["mp4", "mov"]
|
||||||
|
|
||||||
|
|
||||||
|
def batch_ingest(library, files):
|
||||||
|
# group by extension
|
||||||
|
byext = {k: [] for k in known_extensions}
|
||||||
|
|
||||||
|
print("Pre-sorting files")
|
||||||
|
for item in files:
|
||||||
|
if not os.path.isfile(item):
|
||||||
|
print("Skipping due to not a file: {}".format(item))
|
||||||
|
continue
|
||||||
|
extension = item.split(".")
|
||||||
|
if len(extension) < 2:
|
||||||
|
print("Skipping due to no extension: {}".format(item))
|
||||||
|
continue
|
||||||
|
extension = extension[-1].lower()
|
||||||
|
if extension == "jpeg":
|
||||||
|
extension = "jpg"
|
||||||
|
if extension not in known_extensions:
|
||||||
|
print("Skipping due to unknown extension: {}".format(item))
|
||||||
|
continue
|
||||||
|
byext[extension.lower()].append(item)
|
||||||
|
|
||||||
|
print("Scanning images")
|
||||||
|
photos = []
|
||||||
|
# process regular images first.
|
||||||
|
for item in chain(*[byext[ext] for ext in regular_images]):
|
||||||
|
photos.append(get_jpg_info(item))
|
||||||
|
|
||||||
|
print("Scanning RAWs")
|
||||||
|
# process raws
|
||||||
|
for item in chain(*[byext[ext] for ext in files_raw]):
|
||||||
|
itemmeta = Photo(hash=get_hash(item), path=item, format=magic.from_file(item, mime=True))
|
||||||
|
fprefix = os.path.basename(item)[::-1].split(".", 1)[-1][::-1]
|
||||||
|
fmatch = "{}.jpg".format(fprefix.lower())
|
||||||
|
foundmatch = False
|
||||||
|
for photo in photos:
|
||||||
|
for fmt in photo.files[:]:
|
||||||
|
if os.path.basename(fmt.path).lower() == fmatch:
|
||||||
|
foundmatch = True
|
||||||
|
photo.files.append(itemmeta)
|
||||||
|
break
|
||||||
|
if foundmatch:
|
||||||
|
break
|
||||||
|
|
||||||
|
if not foundmatch:
|
||||||
|
photos.append(PhotoSet(date=get_mtime(item), lat=0, lon=0, files=[itemmeta]))
|
||||||
|
|
||||||
|
# TODO prune any xmp without an associated regular image or cr2
|
||||||
|
|
||||||
|
print("Scanning other files")
|
||||||
|
# process all other formats
|
||||||
|
for item in chain(*[byext[ext] for ext in files_video]):
|
||||||
|
itemmeta = Photo(hash=get_hash(item), path=item, format=magic.from_file(item, mime=True))
|
||||||
|
photos.append(PhotoSet(date=get_mtime(item), lat=0, lon=0, files=[itemmeta]))
|
||||||
|
|
||||||
|
print("Updating database")
|
||||||
|
for photoset in photos:
|
||||||
|
library.add_photoset(photoset)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Library ingestion tool")
|
||||||
|
parser.add_argument("files", nargs="+")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
library = PhotoLibrary("photos.db", "./library/")
|
||||||
|
|
||||||
|
batch_ingest(library, args.files)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
|
@ -0,0 +1,56 @@
|
||||||
|
import os
|
||||||
|
from sqlalchemy import create_engine
|
||||||
|
from sqlalchemy.orm import sessionmaker
|
||||||
|
from photoapp.types import Base, Photo, PhotoSet
|
||||||
|
from sqlalchemy.exc import IntegrityError
|
||||||
|
|
||||||
|
|
||||||
|
class PhotoLibrary(object):
|
||||||
|
def __init__(self, db_path, lib_path):
|
||||||
|
self.path = lib_path
|
||||||
|
self.engine = create_engine('sqlite:///{}'.format(db_path), echo=False)
|
||||||
|
Base.metadata.create_all(self.engine)
|
||||||
|
self.session = sessionmaker()
|
||||||
|
self.session.configure(bind=self.engine)
|
||||||
|
|
||||||
|
def add_photoset(self, photoset):
|
||||||
|
"""
|
||||||
|
Commit a populated photoset object to the library. The paths in the photoset's file list entries will be updated
|
||||||
|
as the file is moved to the library path.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Create target directory
|
||||||
|
path = os.path.join(self.path, self.get_datedir_path(photoset.date))
|
||||||
|
os.makedirs(path, exist_ok=True)
|
||||||
|
|
||||||
|
moves = [] # Track files moved. If the sql transaction files, we'll undo these
|
||||||
|
|
||||||
|
for file in photoset.files:
|
||||||
|
dest = os.path.join(path, os.path.basename(file.path))
|
||||||
|
|
||||||
|
# Check if the name is already in use, rename new file if needed
|
||||||
|
dupe_rename = 1
|
||||||
|
while os.path.exists(dest):
|
||||||
|
fname = os.path.basename(file.path).split(".")
|
||||||
|
fname[-2] += "_{}".format(dupe_rename)
|
||||||
|
dest = os.path.join(path, '.'.join(fname))
|
||||||
|
dupe_rename += 1
|
||||||
|
os.rename(file.path, dest)
|
||||||
|
moves.append((file.path, dest))
|
||||||
|
file.path = dest.lstrip(self.path)
|
||||||
|
|
||||||
|
s = self.session()
|
||||||
|
s.add(photoset)
|
||||||
|
try:
|
||||||
|
s.commit()
|
||||||
|
except IntegrityError:
|
||||||
|
# Commit failed, undo the moves
|
||||||
|
for move in moves:
|
||||||
|
os.rename(move[1], move[0])
|
||||||
|
raise
|
||||||
|
|
||||||
|
def get_datedir_path(self, date):
|
||||||
|
"""
|
||||||
|
Return a path like 2018/3/31 given a datetime object representing the same date
|
||||||
|
"""
|
||||||
|
return os.path.join(str(date.year), str(date.month), str(date.day))
|
|
@ -0,0 +1,34 @@
|
||||||
|
from sqlalchemy import Column, Integer, String, DateTime, Unicode, DECIMAL, ForeignKey
|
||||||
|
from sqlalchemy.orm import relationship
|
||||||
|
from sqlalchemy.ext.declarative import declarative_base
|
||||||
|
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
|
||||||
|
Base = declarative_base()
|
||||||
|
|
||||||
|
|
||||||
|
class PhotoSet(Base):
|
||||||
|
__tablename__ = 'photos'
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True)
|
||||||
|
uuid = Column(Unicode, default=lambda: str(uuid.uuid4()))
|
||||||
|
date = Column(DateTime)
|
||||||
|
lat = Column(DECIMAL(precision=11))
|
||||||
|
lon = Column(DECIMAL(precision=11))
|
||||||
|
|
||||||
|
files = relationship("Photo", back_populates="set")
|
||||||
|
|
||||||
|
|
||||||
|
class Photo(Base):
|
||||||
|
__tablename__ = 'files'
|
||||||
|
|
||||||
|
id = Column(Integer, primary_key=True)
|
||||||
|
set_id = Column(Integer, ForeignKey("photos.id"))
|
||||||
|
uuid = Column(Unicode, default=lambda: str(uuid.uuid4()))
|
||||||
|
|
||||||
|
set = relationship("PhotoSet", back_populates="files", foreign_keys=[set_id])
|
||||||
|
|
||||||
|
hash = Column(String(length=64), unique=True)
|
||||||
|
path = Column(Unicode)
|
||||||
|
format = Column(String(length=64)) # TODO how long can a mime string be
|
|
@ -0,0 +1,3 @@
|
||||||
|
Pillow==5.2.0
|
||||||
|
python-magic==0.4.15
|
||||||
|
SQLAlchemy==1.2.11
|
|
@ -0,0 +1,22 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from setuptools import setup
|
||||||
|
|
||||||
|
|
||||||
|
__version__ = "0.0.0"
|
||||||
|
|
||||||
|
|
||||||
|
setup(name='photoapp',
|
||||||
|
version=__version__,
|
||||||
|
description='Photo library application',
|
||||||
|
url='',
|
||||||
|
author='dpedu',
|
||||||
|
author_email='dave@davepedu.com',
|
||||||
|
packages=['photoapp'],
|
||||||
|
install_requires=[],
|
||||||
|
entry_points={
|
||||||
|
"console_scripts": [
|
||||||
|
"photoappd = photoapp.daemon:main",
|
||||||
|
"photoimport = photoapp.ingest:main"
|
||||||
|
]
|
||||||
|
})
|
Loading…
Reference in New Issue