from datetime import datetime from PIL import Image, ExifTags from decimal import Decimal from hashlib import sha256 import os import magic from photoapp.types import Photo, PhotoSet def get_jpg_info(fpath): """ Given the path to a jpg, return a dict describing it """ date, gps, dimensions, orientation = get_exif_data(fpath) if date is None: raise Exception("No date found, panicing for unknown reasons!") # gps is set to None, None if unavailable lat, lon = gps if gps and 0 not in gps else [None, None] dimensions = dimensions or (None, None) mime = magic.from_file(fpath, mime=True) size = os.path.getsize(fpath) photo = Photo(hash=get_hash(fpath), path=fpath, format=mime, size=size, width=dimensions[0], height=dimensions[1], orientation=orientation, fname=os.path.basename(fpath)) return PhotoSet(date=date, date_real=date, lat=lat, lon=lon, files=[photo]) def get_mtime(fpath): return datetime.fromtimestamp(os.stat(fpath).st_mtime) def get_hash(path): hasher = sha256() with open(path, 'rb') as f: while True: piece = f.read(1024 * 256) if not piece: break hasher.update(piece) return hasher.hexdigest() def get_exif_data(path): with open(path, 'rb') as f: dateinfo, gpsinfo, sizeinfo, orientationinfo = get_exif_data_fobj(f) if dateinfo is None: dateinfo = get_mtime(path) return dateinfo, gpsinfo, sizeinfo, orientationinfo def get_exif_data_fobj(fobj): """ Return a (datetime, (decimal, decimal), (width, height), rotation) tuple describing the photo's exif date and gps coordinates """ img = Image.open(fobj) # TODO do i need to close this? datestr = None gpsinfo = None dateinfo = None orientationinfo = 0 sizeinfo = (img.width, img.height) if img.format in ["JPEG", "PNG", "GIF"]: if hasattr(img, "_getexif"): exif_data = img._getexif() if exif_data: exif = { ExifTags.TAGS[k]: v for k, v in exif_data.items() if k in ExifTags.TAGS } acceptable = ["DateTime", "DateTimeOriginal", "DateTimeDigitized"] for key in acceptable: if key in exif: datestr = exif[key] continue if datestr: if not datestr.startswith("0000"): # Weed out some known bad cases try: dateinfo = datetime.strptime(datestr, "%Y:%m:%d %H:%M:%S") except ValueError: dateinfo = datetime.strptime(datestr, "%Y:%m:%d:%H:%M:%S") orien = exif.get("Orientation") if orien: orientationinfo = {0: 0, 8: 1, 3: 2, 6: 3}.get(int(orien), 0) gpsinfo = parse_exif_gps(exif.get("GPSInfo")) return dateinfo, gpsinfo, sizeinfo, orientationinfo def parse_exif_gps(gps): if not gps or 1 not in gps or 2 not in gps or 3 not in gps or 4 not in gps: return None if gps[1] not in {"N", "S"} or gps[3] not in {"E", "W"}: return None try: y_decimal = rational64u_to_hms(gps[2]) x_decimal = rational64u_to_hms(gps[4]) except TypeError: return None # see https://gis.stackexchange.com/a/273402 gps_y = round(hms_to_decimal(y_decimal), 8) gps_x = round(hms_to_decimal(x_decimal), 8) if gps[1] == 'S': gps_y *= -1 if gps[3] == 'W': gps_x *= -1 return (gps_y, gps_x) def rational64u_to_hms(values): return [Decimal(values[0].numerator) / Decimal(values[0].denominator), Decimal(values[1].numerator) / Decimal(values[1].denominator), Decimal(values[2].numerator) / Decimal(values[2].denominator)] def hms_to_decimal(values): return values[0] + values[1] / 60 + values[2] / 3600 def is_cr3(header): # detect cannon's new cr3 format # here's what we look for, expecting the first 100 bytes from the file: # example cr3 file: IMG_0956.CR3 # 00000000 00 00 00 18 66 74 79 70 63 72 78 20 00 00 00 01 |....ftypcrx ....| <--- the "ftypcrx " # 00000010 63 72 78 20 69 73 6f 6d 00 00 71 a0 6d 6f 6f 76 |crx isom..q.moov| # 00000020 00 00 68 c0 75 75 69 64 85 c0 b6 87 82 0f 11 e0 |..h.uuid........| # 00000030 81 11 f4 ce 46 2b 6a 48 00 00 00 26 43 4e 43 56 |....F+jH...&CNCV| # 00000040 43 61 6e 6f 6e 43 52 33 5f 30 30 31 2f 30 30 2e |CanonCR3_001/00.| <--- the "CanonCR3" # 00000050 31 31 2e 30 30 2f 30 30 2e 30 30 2e 30 30 00 00 |11.00/00.00.00..| # 00000060 00 5c 43 43 |.\CC| # 00000064 tag1 = header[0:13] tag2 = header[64:72] return tag1 == b'\x00\x00\x00\x18ftypcrx \x00' and tag2 == b'CanonCR3' def is_file_cr3(fpath): with open(fpath, "rb") as f: header = f.read(100) return is_cr3(header) def is_file_xmp(fname): return fname.split(".")[-1].lower() == "xmp" def special_magic(fpath): """ magic but support for some new formats magic doesn't know about """ if is_file_xmp(fpath): # this one is cheap and uses the file name return "application/octet-stream-xmp" # duplicate check here avoids a file open() which i guess is nice else: # otherwise, we fall back to reading the file with open(fpath, "rb") as f: return special_magic_fobj(f, fpath) def special_magic_fobj(fobj, fname): if is_file_xmp(fname): # supported here too ... return "application/octet-stream-xmp" else: header = fobj.read(1024) if is_cr3(header): return "image/x-canon-cr3" else: ret = magic.from_buffer(header, mime=True) return ret