photolib/photoapp/image.py

178 lines
5.8 KiB
Python

from datetime import datetime
from PIL import Image, ExifTags
from decimal import Decimal
from hashlib import sha256
import os
import magic
from photoapp.types import Photo, PhotoSet
def get_jpg_info(fpath):
"""
Given the path to a jpg, return a dict describing it
"""
date, gps, dimensions, orientation = get_exif_data(fpath)
if date is None:
raise Exception("No date found, panicing for unknown reasons!")
# gps is set to None, None if unavailable
lat, lon = gps if gps and 0 not in gps else [None, None]
dimensions = dimensions or (None, None)
mime = magic.from_file(fpath, mime=True)
size = os.path.getsize(fpath)
photo = Photo(hash=get_hash(fpath), path=fpath, format=mime, size=size,
width=dimensions[0], height=dimensions[1],
orientation=orientation, fname=os.path.basename(fpath))
return PhotoSet(date=date, date_real=date, lat=lat, lon=lon, files=[photo])
def get_mtime(fpath):
return datetime.fromtimestamp(os.stat(fpath).st_mtime)
def get_hash(path):
hasher = sha256()
with open(path, 'rb') as f:
while True:
piece = f.read(1024 * 256)
if not piece:
break
hasher.update(piece)
return hasher.hexdigest()
def get_exif_data(path):
with open(path, 'rb') as f:
dateinfo, gpsinfo, sizeinfo, orientationinfo = get_exif_data_fobj(f)
if dateinfo is None:
dateinfo = get_mtime(path)
return dateinfo, gpsinfo, sizeinfo, orientationinfo
def get_exif_data_fobj(fobj):
"""
Return a (datetime, (decimal, decimal), (width, height), rotation) tuple describing the photo's exif date and gps coordinates
"""
img = Image.open(fobj) # TODO do i need to close this?
datestr = None
gpsinfo = None
dateinfo = None
orientationinfo = 0
sizeinfo = (img.width, img.height)
if img.format in ["JPEG", "PNG", "GIF"]:
if hasattr(img, "_getexif"):
exif_data = img._getexif()
if exif_data:
exif = {
ExifTags.TAGS[k]: v
for k, v in exif_data.items()
if k in ExifTags.TAGS
}
acceptable = ["DateTime", "DateTimeOriginal", "DateTimeDigitized"]
for key in acceptable:
if key in exif:
datestr = exif[key]
continue
if datestr:
if not datestr.startswith("0000"): # Weed out some known bad cases
try:
dateinfo = datetime.strptime(datestr, "%Y:%m:%d %H:%M:%S")
except ValueError:
dateinfo = datetime.strptime(datestr, "%Y:%m:%d:%H:%M:%S")
orien = exif.get("Orientation")
if orien:
orientationinfo = {0: 0, 8: 1, 3: 2, 6: 3}.get(int(orien), 0)
gpsinfo = parse_exif_gps(exif.get("GPSInfo"))
return dateinfo, gpsinfo, sizeinfo, orientationinfo
def parse_exif_gps(gps):
if not gps or 1 not in gps or 2 not in gps or 3 not in gps or 4 not in gps:
return None
if gps[1] not in {"N", "S"} or gps[3] not in {"E", "W"}:
return None
try:
y_decimal = rational64u_to_hms(gps[2])
x_decimal = rational64u_to_hms(gps[4])
except TypeError:
return None
# see https://gis.stackexchange.com/a/273402
gps_y = round(hms_to_decimal(y_decimal), 8)
gps_x = round(hms_to_decimal(x_decimal), 8)
if gps[1] == 'S':
gps_y *= -1
if gps[3] == 'W':
gps_x *= -1
return (gps_y, gps_x)
def rational64u_to_hms(values):
return [Decimal(values[0].numerator) / Decimal(values[0].denominator),
Decimal(values[1].numerator) / Decimal(values[1].denominator),
Decimal(values[2].numerator) / Decimal(values[2].denominator)]
def hms_to_decimal(values):
return values[0] + values[1] / 60 + values[2] / 3600
def is_cr3(header):
# detect cannon's new cr3 format
# here's what we look for, expecting the first 100 bytes from the file:
# example cr3 file: IMG_0956.CR3
# 00000000 00 00 00 18 66 74 79 70 63 72 78 20 00 00 00 01 |....ftypcrx ....| <--- the "ftypcrx "
# 00000010 63 72 78 20 69 73 6f 6d 00 00 71 a0 6d 6f 6f 76 |crx isom..q.moov|
# 00000020 00 00 68 c0 75 75 69 64 85 c0 b6 87 82 0f 11 e0 |..h.uuid........|
# 00000030 81 11 f4 ce 46 2b 6a 48 00 00 00 26 43 4e 43 56 |....F+jH...&CNCV|
# 00000040 43 61 6e 6f 6e 43 52 33 5f 30 30 31 2f 30 30 2e |CanonCR3_001/00.| <--- the "CanonCR3"
# 00000050 31 31 2e 30 30 2f 30 30 2e 30 30 2e 30 30 00 00 |11.00/00.00.00..|
# 00000060 00 5c 43 43 |.\CC|
# 00000064
tag1 = header[0:13]
tag2 = header[64:72]
return tag1 == b'\x00\x00\x00\x18ftypcrx \x00' and tag2 == b'CanonCR3'
def is_file_cr3(fpath):
with open(fpath, "rb") as f:
header = f.read(100)
return is_cr3(header)
def is_file_xmp(fname):
return fname.split(".")[-1].lower() == "xmp"
def special_magic(fpath):
"""
magic but support for some new formats magic doesn't know about
"""
if is_file_xmp(fpath): # this one is cheap and uses the file name
return "application/octet-stream-xmp" # duplicate check here avoids a file open() which i guess is nice
else: # otherwise, we fall back to reading the file
with open(fpath, "rb") as f:
return special_magic_fobj(f, fpath)
def special_magic_fobj(fobj, fname):
if is_file_xmp(fname): # supported here too ...
return "application/octet-stream-xmp"
else:
header = fobj.read(1024)
if is_cr3(header):
return "image/x-canon-cr3"
else:
ret = magic.from_buffer(header, mime=True)
return ret