initial commit, basic library ingest
This commit is contained in:
commit
58a99cd74a
0
photoapp/__init__.py
Normal file
0
photoapp/__init__.py
Normal file
103
photoapp/image.py
Normal file
103
photoapp/image.py
Normal file
@ -0,0 +1,103 @@
|
||||
from datetime import datetime
|
||||
from PIL import Image, ExifTags
|
||||
from decimal import Decimal
|
||||
from hashlib import sha256
|
||||
import os
|
||||
import magic
|
||||
from photoapp.types import Photo, PhotoSet
|
||||
|
||||
|
||||
def get_jpg_info(fpath):
|
||||
"""
|
||||
Given the path to a jpg, return a dict describing it
|
||||
"""
|
||||
date, gps = get_exif_data(fpath)
|
||||
|
||||
if not date:
|
||||
# No exif date, fall back to file modification date
|
||||
date = get_mtime(fpath)
|
||||
|
||||
# gps is set to 0,0 if unavailable
|
||||
lat, lon = gps or [0, 0]
|
||||
|
||||
mime = magic.from_file(fpath, mime=True)
|
||||
|
||||
# ps = PhotoSet
|
||||
|
||||
photo = Photo(hash=get_hash(fpath), path=fpath, format=mime)
|
||||
# "fname": os.path.basename(fpath),
|
||||
|
||||
return PhotoSet(date=date, lat=lat, lon=lon, files=[photo])
|
||||
|
||||
# return {"date": date,
|
||||
# "lat": lat,
|
||||
# "lon": lon,
|
||||
# "formats": []}
|
||||
|
||||
|
||||
def get_mtime(fpath):
|
||||
return datetime.fromtimestamp(os.stat(fpath).st_mtime)
|
||||
|
||||
|
||||
def get_hash(path):
|
||||
hasher = sha256()
|
||||
with open(path, 'rb') as f:
|
||||
while True:
|
||||
piece = f.read(1024 * 256)
|
||||
if not piece:
|
||||
break
|
||||
hasher.update(piece)
|
||||
return hasher.hexdigest()
|
||||
|
||||
|
||||
def get_exif_data(path):
|
||||
"""
|
||||
Return a (datetime, (decimal, decimal)) tuple describing the photo's exif date and gps coordinates
|
||||
"""
|
||||
img = Image.open(path)
|
||||
if img.format != "JPEG":
|
||||
return None, None
|
||||
exif_data = img._getexif()
|
||||
if not exif_data:
|
||||
return None, None
|
||||
exif = {
|
||||
ExifTags.TAGS[k]: v
|
||||
for k, v in exif_data.items()
|
||||
if k in ExifTags.TAGS
|
||||
}
|
||||
datestr = None
|
||||
gpsinfo = None
|
||||
dateinfo = None
|
||||
acceptable = ["DateTime", "DateTimeOriginal", "DateTimeDigitized"]
|
||||
for key in acceptable:
|
||||
if key in exif:
|
||||
datestr = exif[key]
|
||||
continue
|
||||
|
||||
if datestr is None:
|
||||
print(exif.keys())
|
||||
raise Exception("{} has no DateTime".format(path)) # TODO how often do we hit this
|
||||
dateinfo = datetime.strptime(datestr, "%Y:%m:%d %H:%M:%S")
|
||||
|
||||
gps = exif.get("GPSInfo")
|
||||
if gps:
|
||||
# see https://gis.stackexchange.com/a/273402
|
||||
gps_y = round(hms_to_decimal(rational64u_to_hms(gps[2])), 8)
|
||||
gps_x = round(hms_to_decimal(rational64u_to_hms(gps[4])), 8)
|
||||
if gps[1] == 'S':
|
||||
gps_y *= -1
|
||||
if gps[3] == 'W':
|
||||
gps_x *= -1
|
||||
gpsinfo = (gps_y, gps_x)
|
||||
|
||||
return dateinfo, gpsinfo
|
||||
|
||||
|
||||
def rational64u_to_hms(values):
|
||||
return [Decimal(values[0][0]) / Decimal(values[0][1]),
|
||||
Decimal(values[1][0]) / Decimal(values[1][1]),
|
||||
Decimal(values[2][0]) / Decimal(values[2][1])]
|
||||
|
||||
|
||||
def hms_to_decimal(values):
|
||||
return values[0] + values[1] / 60 + values[2] / 3600
|
98
photoapp/ingest.py
Normal file
98
photoapp/ingest.py
Normal file
@ -0,0 +1,98 @@
|
||||
import magic
|
||||
import argparse
|
||||
from photoapp.library import PhotoLibrary
|
||||
from photoapp.image import get_jpg_info, get_hash, get_mtime
|
||||
from itertools import chain
|
||||
from photoapp.types import Photo, PhotoSet
|
||||
import os
|
||||
|
||||
"""
|
||||
Photo sorting rules:
|
||||
|
||||
jpeg
|
||||
exif date
|
||||
file modification date
|
||||
raw
|
||||
group with exif date of jpeg with same name
|
||||
file modification date
|
||||
mov, video, or other
|
||||
modification date
|
||||
"""
|
||||
|
||||
known_extensions = ["jpg", "png", "cr2", "xmp", "mp4", "mov"]
|
||||
regular_images = ["jpg", "png"]
|
||||
files_raw = ["cr2", "xmp"]
|
||||
files_video = ["mp4", "mov"]
|
||||
|
||||
|
||||
def batch_ingest(library, files):
|
||||
# group by extension
|
||||
byext = {k: [] for k in known_extensions}
|
||||
|
||||
print("Pre-sorting files")
|
||||
for item in files:
|
||||
if not os.path.isfile(item):
|
||||
print("Skipping due to not a file: {}".format(item))
|
||||
continue
|
||||
extension = item.split(".")
|
||||
if len(extension) < 2:
|
||||
print("Skipping due to no extension: {}".format(item))
|
||||
continue
|
||||
extension = extension[-1].lower()
|
||||
if extension == "jpeg":
|
||||
extension = "jpg"
|
||||
if extension not in known_extensions:
|
||||
print("Skipping due to unknown extension: {}".format(item))
|
||||
continue
|
||||
byext[extension.lower()].append(item)
|
||||
|
||||
print("Scanning images")
|
||||
photos = []
|
||||
# process regular images first.
|
||||
for item in chain(*[byext[ext] for ext in regular_images]):
|
||||
photos.append(get_jpg_info(item))
|
||||
|
||||
print("Scanning RAWs")
|
||||
# process raws
|
||||
for item in chain(*[byext[ext] for ext in files_raw]):
|
||||
itemmeta = Photo(hash=get_hash(item), path=item, format=magic.from_file(item, mime=True))
|
||||
fprefix = os.path.basename(item)[::-1].split(".", 1)[-1][::-1]
|
||||
fmatch = "{}.jpg".format(fprefix.lower())
|
||||
foundmatch = False
|
||||
for photo in photos:
|
||||
for fmt in photo.files[:]:
|
||||
if os.path.basename(fmt.path).lower() == fmatch:
|
||||
foundmatch = True
|
||||
photo.files.append(itemmeta)
|
||||
break
|
||||
if foundmatch:
|
||||
break
|
||||
|
||||
if not foundmatch:
|
||||
photos.append(PhotoSet(date=get_mtime(item), lat=0, lon=0, files=[itemmeta]))
|
||||
|
||||
# TODO prune any xmp without an associated regular image or cr2
|
||||
|
||||
print("Scanning other files")
|
||||
# process all other formats
|
||||
for item in chain(*[byext[ext] for ext in files_video]):
|
||||
itemmeta = Photo(hash=get_hash(item), path=item, format=magic.from_file(item, mime=True))
|
||||
photos.append(PhotoSet(date=get_mtime(item), lat=0, lon=0, files=[itemmeta]))
|
||||
|
||||
print("Updating database")
|
||||
for photoset in photos:
|
||||
library.add_photoset(photoset)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Library ingestion tool")
|
||||
parser.add_argument("files", nargs="+")
|
||||
args = parser.parse_args()
|
||||
|
||||
library = PhotoLibrary("photos.db", "./library/")
|
||||
|
||||
batch_ingest(library, args.files)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
56
photoapp/library.py
Normal file
56
photoapp/library.py
Normal file
@ -0,0 +1,56 @@
|
||||
import os
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from photoapp.types import Base, Photo, PhotoSet
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
|
||||
|
||||
class PhotoLibrary(object):
|
||||
def __init__(self, db_path, lib_path):
|
||||
self.path = lib_path
|
||||
self.engine = create_engine('sqlite:///{}'.format(db_path), echo=False)
|
||||
Base.metadata.create_all(self.engine)
|
||||
self.session = sessionmaker()
|
||||
self.session.configure(bind=self.engine)
|
||||
|
||||
def add_photoset(self, photoset):
|
||||
"""
|
||||
Commit a populated photoset object to the library. The paths in the photoset's file list entries will be updated
|
||||
as the file is moved to the library path.
|
||||
"""
|
||||
|
||||
# Create target directory
|
||||
path = os.path.join(self.path, self.get_datedir_path(photoset.date))
|
||||
os.makedirs(path, exist_ok=True)
|
||||
|
||||
moves = [] # Track files moved. If the sql transaction files, we'll undo these
|
||||
|
||||
for file in photoset.files:
|
||||
dest = os.path.join(path, os.path.basename(file.path))
|
||||
|
||||
# Check if the name is already in use, rename new file if needed
|
||||
dupe_rename = 1
|
||||
while os.path.exists(dest):
|
||||
fname = os.path.basename(file.path).split(".")
|
||||
fname[-2] += "_{}".format(dupe_rename)
|
||||
dest = os.path.join(path, '.'.join(fname))
|
||||
dupe_rename += 1
|
||||
os.rename(file.path, dest)
|
||||
moves.append((file.path, dest))
|
||||
file.path = dest.lstrip(self.path)
|
||||
|
||||
s = self.session()
|
||||
s.add(photoset)
|
||||
try:
|
||||
s.commit()
|
||||
except IntegrityError:
|
||||
# Commit failed, undo the moves
|
||||
for move in moves:
|
||||
os.rename(move[1], move[0])
|
||||
raise
|
||||
|
||||
def get_datedir_path(self, date):
|
||||
"""
|
||||
Return a path like 2018/3/31 given a datetime object representing the same date
|
||||
"""
|
||||
return os.path.join(str(date.year), str(date.month), str(date.day))
|
34
photoapp/types.py
Normal file
34
photoapp/types.py
Normal file
@ -0,0 +1,34 @@
|
||||
from sqlalchemy import Column, Integer, String, DateTime, Unicode, DECIMAL, ForeignKey
|
||||
from sqlalchemy.orm import relationship
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
|
||||
import uuid
|
||||
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
|
||||
class PhotoSet(Base):
|
||||
__tablename__ = 'photos'
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
uuid = Column(Unicode, default=lambda: str(uuid.uuid4()))
|
||||
date = Column(DateTime)
|
||||
lat = Column(DECIMAL(precision=11))
|
||||
lon = Column(DECIMAL(precision=11))
|
||||
|
||||
files = relationship("Photo", back_populates="set")
|
||||
|
||||
|
||||
class Photo(Base):
|
||||
__tablename__ = 'files'
|
||||
|
||||
id = Column(Integer, primary_key=True)
|
||||
set_id = Column(Integer, ForeignKey("photos.id"))
|
||||
uuid = Column(Unicode, default=lambda: str(uuid.uuid4()))
|
||||
|
||||
set = relationship("PhotoSet", back_populates="files", foreign_keys=[set_id])
|
||||
|
||||
hash = Column(String(length=64), unique=True)
|
||||
path = Column(Unicode)
|
||||
format = Column(String(length=64)) # TODO how long can a mime string be
|
3
requirements.txt
Normal file
3
requirements.txt
Normal file
@ -0,0 +1,3 @@
|
||||
Pillow==5.2.0
|
||||
python-magic==0.4.15
|
||||
SQLAlchemy==1.2.11
|
22
setup.py
Normal file
22
setup.py
Normal file
@ -0,0 +1,22 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from setuptools import setup
|
||||
|
||||
|
||||
__version__ = "0.0.0"
|
||||
|
||||
|
||||
setup(name='photoapp',
|
||||
version=__version__,
|
||||
description='Photo library application',
|
||||
url='',
|
||||
author='dpedu',
|
||||
author_email='dave@davepedu.com',
|
||||
packages=['photoapp'],
|
||||
install_requires=[],
|
||||
entry_points={
|
||||
"console_scripts": [
|
||||
"photoappd = photoapp.daemon:main",
|
||||
"photoimport = photoapp.ingest:main"
|
||||
]
|
||||
})
|
Loading…
Reference in New Issue
Block a user