photolib/photoapp/image.py

178 lines
5.8 KiB
Python
Raw Normal View History

2018-09-08 15:49:16 -07:00
from datetime import datetime
from PIL import Image, ExifTags
from decimal import Decimal
from hashlib import sha256
import os
import magic
from photoapp.types import Photo, PhotoSet
def get_jpg_info(fpath):
"""
Given the path to a jpg, return a dict describing it
"""
2018-09-09 13:45:26 -07:00
date, gps, dimensions, orientation = get_exif_data(fpath)
2018-09-08 15:49:16 -07:00
2018-09-09 12:05:13 -07:00
if date is None:
2019-11-14 21:28:32 -08:00
raise Exception("No date found, panicing for unknown reasons!")
2018-09-08 15:49:16 -07:00
2022-08-01 22:07:44 -07:00
# gps is set to None, None if unavailable
lat, lon = gps if gps and 0 not in gps else [None, None]
2019-06-25 21:34:32 -07:00
dimensions = dimensions or (None, None)
2018-09-08 15:49:16 -07:00
mime = magic.from_file(fpath, mime=True)
2018-09-09 12:05:13 -07:00
size = os.path.getsize(fpath)
2018-09-08 15:49:16 -07:00
2018-09-09 13:45:26 -07:00
photo = Photo(hash=get_hash(fpath), path=fpath, format=mime, size=size,
2019-07-04 14:25:19 -07:00
width=dimensions[0], height=dimensions[1],
orientation=orientation, fname=os.path.basename(fpath))
2018-09-15 16:48:47 -07:00
return PhotoSet(date=date, date_real=date, lat=lat, lon=lon, files=[photo])
2018-09-08 15:49:16 -07:00
def get_mtime(fpath):
return datetime.fromtimestamp(os.stat(fpath).st_mtime)
def get_hash(path):
hasher = sha256()
with open(path, 'rb') as f:
while True:
piece = f.read(1024 * 256)
if not piece:
break
hasher.update(piece)
return hasher.hexdigest()
def get_exif_data(path):
2019-07-13 15:47:59 -07:00
with open(path, 'rb') as f:
dateinfo, gpsinfo, sizeinfo, orientationinfo = get_exif_data_fobj(f)
if dateinfo is None:
dateinfo = get_mtime(path)
return dateinfo, gpsinfo, sizeinfo, orientationinfo
def get_exif_data_fobj(fobj):
2018-09-08 15:49:16 -07:00
"""
2018-09-09 13:45:26 -07:00
Return a (datetime, (decimal, decimal), (width, height), rotation) tuple describing the photo's exif date and gps coordinates
2018-09-08 15:49:16 -07:00
"""
2019-07-13 15:47:59 -07:00
img = Image.open(fobj) # TODO do i need to close this?
2018-09-09 12:05:13 -07:00
2018-09-08 15:49:16 -07:00
datestr = None
gpsinfo = None
dateinfo = None
2018-09-09 13:45:26 -07:00
orientationinfo = 0
2018-09-09 12:05:13 -07:00
sizeinfo = (img.width, img.height)
if img.format in ["JPEG", "PNG", "GIF"]:
if hasattr(img, "_getexif"):
exif_data = img._getexif()
if exif_data:
exif = {
ExifTags.TAGS[k]: v
for k, v in exif_data.items()
if k in ExifTags.TAGS
}
acceptable = ["DateTime", "DateTimeOriginal", "DateTimeDigitized"]
for key in acceptable:
if key in exif:
datestr = exif[key]
continue
2018-09-10 21:19:02 -07:00
if datestr:
if not datestr.startswith("0000"): # Weed out some known bad cases
try:
dateinfo = datetime.strptime(datestr, "%Y:%m:%d %H:%M:%S")
except ValueError:
dateinfo = datetime.strptime(datestr, "%Y:%m:%d:%H:%M:%S")
2018-09-09 12:05:13 -07:00
2018-09-09 13:45:26 -07:00
orien = exif.get("Orientation")
if orien:
orientationinfo = {0: 0, 8: 1, 3: 2, 6: 3}.get(int(orien), 0)
2021-07-07 19:50:15 -07:00
gpsinfo = parse_exif_gps(exif.get("GPSInfo"))
2018-09-09 12:05:13 -07:00
2018-09-09 13:45:26 -07:00
return dateinfo, gpsinfo, sizeinfo, orientationinfo
2018-09-08 15:49:16 -07:00
2021-07-07 19:50:15 -07:00
def parse_exif_gps(gps):
if not gps or 1 not in gps or 2 not in gps or 3 not in gps or 4 not in gps:
return None
if gps[1] not in {"N", "S"} or gps[3] not in {"E", "W"}:
return None
try:
y_decimal = rational64u_to_hms(gps[2])
x_decimal = rational64u_to_hms(gps[4])
except TypeError:
return None
# see https://gis.stackexchange.com/a/273402
gps_y = round(hms_to_decimal(y_decimal), 8)
gps_x = round(hms_to_decimal(x_decimal), 8)
if gps[1] == 'S':
gps_y *= -1
if gps[3] == 'W':
gps_x *= -1
return (gps_y, gps_x)
2018-09-08 15:49:16 -07:00
def rational64u_to_hms(values):
2020-11-16 16:19:26 -08:00
return [Decimal(values[0].numerator) / Decimal(values[0].denominator),
Decimal(values[1].numerator) / Decimal(values[1].denominator),
Decimal(values[2].numerator) / Decimal(values[2].denominator)]
2018-09-08 15:49:16 -07:00
def hms_to_decimal(values):
return values[0] + values[1] / 60 + values[2] / 3600
2018-09-09 12:05:13 -07:00
2022-12-21 23:32:58 -08:00
def is_cr3(header):
# detect cannon's new cr3 format
# here's what we look for, expecting the first 100 bytes from the file:
# example cr3 file: IMG_0956.CR3
# 00000000 00 00 00 18 66 74 79 70 63 72 78 20 00 00 00 01 |....ftypcrx ....| <--- the "ftypcrx "
# 00000010 63 72 78 20 69 73 6f 6d 00 00 71 a0 6d 6f 6f 76 |crx isom..q.moov|
# 00000020 00 00 68 c0 75 75 69 64 85 c0 b6 87 82 0f 11 e0 |..h.uuid........|
# 00000030 81 11 f4 ce 46 2b 6a 48 00 00 00 26 43 4e 43 56 |....F+jH...&CNCV|
# 00000040 43 61 6e 6f 6e 43 52 33 5f 30 30 31 2f 30 30 2e |CanonCR3_001/00.| <--- the "CanonCR3"
# 00000050 31 31 2e 30 30 2f 30 30 2e 30 30 2e 30 30 00 00 |11.00/00.00.00..|
# 00000060 00 5c 43 43 |.\CC|
# 00000064
tag1 = header[0:13]
tag2 = header[64:72]
return tag1 == b'\x00\x00\x00\x18ftypcrx \x00' and tag2 == b'CanonCR3'
def is_file_cr3(fpath):
with open(fpath, "rb") as f:
header = f.read(100)
return is_cr3(header)
def is_file_xmp(fname):
return fname.split(".")[-1].lower() == "xmp"
2019-07-04 13:10:52 -07:00
def special_magic(fpath):
2022-12-21 23:32:58 -08:00
"""
magic but support for some new formats magic doesn't know about
"""
if is_file_xmp(fpath): # this one is cheap and uses the file name
return "application/octet-stream-xmp" # duplicate check here avoids a file open() which i guess is nice
else: # otherwise, we fall back to reading the file
with open(fpath, "rb") as f:
return special_magic_fobj(f, fpath)
2019-07-04 13:10:52 -07:00
def special_magic_fobj(fobj, fname):
2022-12-21 23:32:58 -08:00
if is_file_xmp(fname): # supported here too ...
2019-07-04 13:10:52 -07:00
return "application/octet-stream-xmp"
else:
2022-12-21 23:32:58 -08:00
header = fobj.read(1024)
if is_cr3(header):
return "image/x-canon-cr3"
else:
ret = magic.from_buffer(header, mime=True)
return ret