photolib/photoapp/image.py

from datetime import datetime
from PIL import Image, ExifTags
from decimal import Decimal
from hashlib import sha256
import os
import magic
from photoapp.types import Photo, PhotoSet


def get_jpg_info(fpath):
    """
    Given the path to a jpg, return a dict describing it
    """
    date, gps, dimensions, orientation = get_exif_data(fpath)

    if date is None:
        raise Exception("No date found, panicing for unknown reasons!")

    # gps is set to None, None if unavailable
    lat, lon = gps if gps and 0 not in gps else [None, None]
    dimensions = dimensions or (None, None)
    mime = magic.from_file(fpath, mime=True)
    size = os.path.getsize(fpath)

    photo = Photo(hash=get_hash(fpath), path=fpath, format=mime, size=size,
                  width=dimensions[0], height=dimensions[1],
                  orientation=orientation, fname=os.path.basename(fpath))
    return PhotoSet(date=date, date_real=date, lat=lat, lon=lon, files=[photo])


def get_mtime(fpath):
    return datetime.fromtimestamp(os.stat(fpath).st_mtime)


def get_hash(path):
    hasher = sha256()
    with open(path, 'rb') as f:
        while True:
            piece = f.read(1024 * 256)
            if not piece:
                break
            hasher.update(piece)
    return hasher.hexdigest()


def get_exif_data(path):
    with open(path, 'rb') as f:
        dateinfo, gpsinfo, sizeinfo, orientationinfo = get_exif_data_fobj(f)
        if dateinfo is None:
            dateinfo = get_mtime(path)
        return dateinfo, gpsinfo, sizeinfo, orientationinfo


def get_exif_data_fobj(fobj):
    """
    Return a (datetime, (decimal, decimal), (width, height), rotation) tuple describing the photo's exif date and gps coordinates
    """
    img = Image.open(fobj)  # TODO do i need to close this?

    datestr = None
    gpsinfo = None
    dateinfo = None
    orientationinfo = 0
    sizeinfo = (img.width, img.height)

    if img.format in ["JPEG", "PNG", "GIF"]:
        if hasattr(img, "_getexif"):
            exif_data = img._getexif()
            if exif_data:
                exif = {
                    ExifTags.TAGS[k]: v
                    for k, v in exif_data.items()
                    if k in ExifTags.TAGS
                }
                acceptable = ["DateTime", "DateTimeOriginal", "DateTimeDigitized"]
                for key in acceptable:
                    if key in exif:
                        datestr = exif[key]
                        continue

                if datestr:
                    if not datestr.startswith("0000"):  # Weed out some known bad cases
                        try:
                            dateinfo = datetime.strptime(datestr, "%Y:%m:%d %H:%M:%S")
                        except ValueError:
                            dateinfo = datetime.strptime(datestr, "%Y:%m:%d:%H:%M:%S")

                orien = exif.get("Orientation")
                if orien:
                    orientationinfo = {0: 0, 8: 1, 3: 2, 6: 3}.get(int(orien), 0)

                gpsinfo = parse_exif_gps(exif.get("GPSInfo"))

    return dateinfo, gpsinfo, sizeinfo, orientationinfo


def parse_exif_gps(gps):
    if not gps or 1 not in gps or 2 not in gps or 3 not in gps or 4 not in gps:
        return None

    if gps[1] not in {"N", "S"} or gps[3] not in {"E", "W"}:
        return None

    try:
        y_decimal = rational64u_to_hms(gps[2])
        x_decimal = rational64u_to_hms(gps[4])
    except TypeError:
        return None

    # see https://gis.stackexchange.com/a/273402
    gps_y = round(hms_to_decimal(y_decimal), 8)
    gps_x = round(hms_to_decimal(x_decimal), 8)
    if gps[1] == 'S':
        gps_y *= -1
    if gps[3] == 'W':
        gps_x *= -1
    return (gps_y, gps_x)


def rational64u_to_hms(values):
    return [Decimal(values[0].numerator) / Decimal(values[0].denominator),
            Decimal(values[1].numerator) / Decimal(values[1].denominator),
            Decimal(values[2].numerator) / Decimal(values[2].denominator)]


def hms_to_decimal(values):
    return values[0] + values[1] / 60 + values[2] / 3600


def is_cr3(header):
    # detect cannon's new cr3 format
    # here's what we look for, expecting the first 100 bytes from the file:
    # example cr3 file: IMG_0956.CR3
    # 00000000  00 00 00 18 66 74 79 70  63 72 78 20 00 00 00 01  |....ftypcrx ....|    <--- the "ftypcrx "
    # 00000010  63 72 78 20 69 73 6f 6d  00 00 71 a0 6d 6f 6f 76  |crx isom..q.moov|
    # 00000020  00 00 68 c0 75 75 69 64  85 c0 b6 87 82 0f 11 e0  |..h.uuid........|
    # 00000030  81 11 f4 ce 46 2b 6a 48  00 00 00 26 43 4e 43 56  |....F+jH...&CNCV|
    # 00000040  43 61 6e 6f 6e 43 52 33  5f 30 30 31 2f 30 30 2e  |CanonCR3_001/00.|    <--- the "CanonCR3"
    # 00000050  31 31 2e 30 30 2f 30 30  2e 30 30 2e 30 30 00 00  |11.00/00.00.00..|
    # 00000060  00 5c 43 43                                       |.\CC|
    # 00000064
    tag1 = header[0:13]
    tag2 = header[64:72]
    return tag1 == b'\x00\x00\x00\x18ftypcrx \x00' and tag2 == b'CanonCR3'


def is_file_cr3(fpath):
    with open(fpath, "rb") as f:
        header = f.read(100)
    return is_cr3(header)


def is_file_xmp(fname):
    return fname.split(".")[-1].lower() == "xmp"


def special_magic(fpath):
    """
    magic but support for some new formats magic doesn't know about
    """
    if is_file_xmp(fpath):  # this one is cheap and uses the file name
        return "application/octet-stream-xmp"  # duplicate check here avoids a file open() which i guess is nice
    else:  # otherwise, we fall back to reading the file
        with open(fpath, "rb") as f:
            return special_magic_fobj(f, fpath)


def special_magic_fobj(fobj, fname):
    if is_file_xmp(fname):  # supported here too ...
        return "application/octet-stream-xmp"
    else:
        header = fobj.read(1024)
        if is_cr3(header):
            return "image/x-canon-cr3"
        else:
            ret = magic.from_buffer(header, mime=True)
            return ret