178 lines
5.8 KiB
Python
178 lines
5.8 KiB
Python
from datetime import datetime
|
|
from PIL import Image, ExifTags
|
|
from decimal import Decimal
|
|
from hashlib import sha256
|
|
import os
|
|
import magic
|
|
from photoapp.types import Photo, PhotoSet
|
|
|
|
|
|
def get_jpg_info(fpath):
|
|
"""
|
|
Given the path to a jpg, return a dict describing it
|
|
"""
|
|
date, gps, dimensions, orientation = get_exif_data(fpath)
|
|
|
|
if date is None:
|
|
raise Exception("No date found, panicing for unknown reasons!")
|
|
|
|
# gps is set to None, None if unavailable
|
|
lat, lon = gps if gps and 0 not in gps else [None, None]
|
|
dimensions = dimensions or (None, None)
|
|
mime = magic.from_file(fpath, mime=True)
|
|
size = os.path.getsize(fpath)
|
|
|
|
photo = Photo(hash=get_hash(fpath), path=fpath, format=mime, size=size,
|
|
width=dimensions[0], height=dimensions[1],
|
|
orientation=orientation, fname=os.path.basename(fpath))
|
|
return PhotoSet(date=date, date_real=date, lat=lat, lon=lon, files=[photo])
|
|
|
|
|
|
def get_mtime(fpath):
|
|
return datetime.fromtimestamp(os.stat(fpath).st_mtime)
|
|
|
|
|
|
def get_hash(path):
|
|
hasher = sha256()
|
|
with open(path, 'rb') as f:
|
|
while True:
|
|
piece = f.read(1024 * 256)
|
|
if not piece:
|
|
break
|
|
hasher.update(piece)
|
|
return hasher.hexdigest()
|
|
|
|
|
|
def get_exif_data(path):
|
|
with open(path, 'rb') as f:
|
|
dateinfo, gpsinfo, sizeinfo, orientationinfo = get_exif_data_fobj(f)
|
|
if dateinfo is None:
|
|
dateinfo = get_mtime(path)
|
|
return dateinfo, gpsinfo, sizeinfo, orientationinfo
|
|
|
|
|
|
def get_exif_data_fobj(fobj):
|
|
"""
|
|
Return a (datetime, (decimal, decimal), (width, height), rotation) tuple describing the photo's exif date and gps coordinates
|
|
"""
|
|
img = Image.open(fobj) # TODO do i need to close this?
|
|
|
|
datestr = None
|
|
gpsinfo = None
|
|
dateinfo = None
|
|
orientationinfo = 0
|
|
sizeinfo = (img.width, img.height)
|
|
|
|
if img.format in ["JPEG", "PNG", "GIF"]:
|
|
if hasattr(img, "_getexif"):
|
|
exif_data = img._getexif()
|
|
if exif_data:
|
|
exif = {
|
|
ExifTags.TAGS[k]: v
|
|
for k, v in exif_data.items()
|
|
if k in ExifTags.TAGS
|
|
}
|
|
acceptable = ["DateTime", "DateTimeOriginal", "DateTimeDigitized"]
|
|
for key in acceptable:
|
|
if key in exif:
|
|
datestr = exif[key]
|
|
continue
|
|
|
|
if datestr:
|
|
if not datestr.startswith("0000"): # Weed out some known bad cases
|
|
try:
|
|
dateinfo = datetime.strptime(datestr, "%Y:%m:%d %H:%M:%S")
|
|
except ValueError:
|
|
dateinfo = datetime.strptime(datestr, "%Y:%m:%d:%H:%M:%S")
|
|
|
|
orien = exif.get("Orientation")
|
|
if orien:
|
|
orientationinfo = {0: 0, 8: 1, 3: 2, 6: 3}.get(int(orien), 0)
|
|
|
|
gpsinfo = parse_exif_gps(exif.get("GPSInfo"))
|
|
|
|
return dateinfo, gpsinfo, sizeinfo, orientationinfo
|
|
|
|
|
|
def parse_exif_gps(gps):
|
|
if not gps or 1 not in gps or 2 not in gps or 3 not in gps or 4 not in gps:
|
|
return None
|
|
|
|
if gps[1] not in {"N", "S"} or gps[3] not in {"E", "W"}:
|
|
return None
|
|
|
|
try:
|
|
y_decimal = rational64u_to_hms(gps[2])
|
|
x_decimal = rational64u_to_hms(gps[4])
|
|
except TypeError:
|
|
return None
|
|
|
|
# see https://gis.stackexchange.com/a/273402
|
|
gps_y = round(hms_to_decimal(y_decimal), 8)
|
|
gps_x = round(hms_to_decimal(x_decimal), 8)
|
|
if gps[1] == 'S':
|
|
gps_y *= -1
|
|
if gps[3] == 'W':
|
|
gps_x *= -1
|
|
return (gps_y, gps_x)
|
|
|
|
|
|
def rational64u_to_hms(values):
|
|
return [Decimal(values[0].numerator) / Decimal(values[0].denominator),
|
|
Decimal(values[1].numerator) / Decimal(values[1].denominator),
|
|
Decimal(values[2].numerator) / Decimal(values[2].denominator)]
|
|
|
|
|
|
def hms_to_decimal(values):
|
|
return values[0] + values[1] / 60 + values[2] / 3600
|
|
|
|
|
|
def is_cr3(header):
|
|
# detect cannon's new cr3 format
|
|
# here's what we look for, expecting the first 100 bytes from the file:
|
|
# example cr3 file: IMG_0956.CR3
|
|
# 00000000 00 00 00 18 66 74 79 70 63 72 78 20 00 00 00 01 |....ftypcrx ....| <--- the "ftypcrx "
|
|
# 00000010 63 72 78 20 69 73 6f 6d 00 00 71 a0 6d 6f 6f 76 |crx isom..q.moov|
|
|
# 00000020 00 00 68 c0 75 75 69 64 85 c0 b6 87 82 0f 11 e0 |..h.uuid........|
|
|
# 00000030 81 11 f4 ce 46 2b 6a 48 00 00 00 26 43 4e 43 56 |....F+jH...&CNCV|
|
|
# 00000040 43 61 6e 6f 6e 43 52 33 5f 30 30 31 2f 30 30 2e |CanonCR3_001/00.| <--- the "CanonCR3"
|
|
# 00000050 31 31 2e 30 30 2f 30 30 2e 30 30 2e 30 30 00 00 |11.00/00.00.00..|
|
|
# 00000060 00 5c 43 43 |.\CC|
|
|
# 00000064
|
|
tag1 = header[0:13]
|
|
tag2 = header[64:72]
|
|
return tag1 == b'\x00\x00\x00\x18ftypcrx \x00' and tag2 == b'CanonCR3'
|
|
|
|
|
|
def is_file_cr3(fpath):
|
|
with open(fpath, "rb") as f:
|
|
header = f.read(100)
|
|
return is_cr3(header)
|
|
|
|
|
|
def is_file_xmp(fname):
|
|
return fname.split(".")[-1].lower() == "xmp"
|
|
|
|
|
|
def special_magic(fpath):
|
|
"""
|
|
magic but support for some new formats magic doesn't know about
|
|
"""
|
|
if is_file_xmp(fpath): # this one is cheap and uses the file name
|
|
return "application/octet-stream-xmp" # duplicate check here avoids a file open() which i guess is nice
|
|
else: # otherwise, we fall back to reading the file
|
|
with open(fpath, "rb") as f:
|
|
return special_magic_fobj(f, fpath)
|
|
|
|
|
|
def special_magic_fobj(fobj, fname):
|
|
if is_file_xmp(fname): # supported here too ...
|
|
return "application/octet-stream-xmp"
|
|
else:
|
|
header = fobj.read(1024)
|
|
if is_cr3(header):
|
|
return "image/x-canon-cr3"
|
|
else:
|
|
ret = magic.from_buffer(header, mime=True)
|
|
return ret
|