diff --git a/photoapp/image.py b/photoapp/image.py index bf3dad5..acbca4a 100644 --- a/photoapp/image.py +++ b/photoapp/image.py @@ -127,15 +127,51 @@ def hms_to_decimal(values): return values[0] + values[1] / 60 + values[2] / 3600 +def is_cr3(header): + # detect cannon's new cr3 format + # here's what we look for, expecting the first 100 bytes from the file: + # example cr3 file: IMG_0956.CR3 + # 00000000 00 00 00 18 66 74 79 70 63 72 78 20 00 00 00 01 |....ftypcrx ....| <--- the "ftypcrx " + # 00000010 63 72 78 20 69 73 6f 6d 00 00 71 a0 6d 6f 6f 76 |crx isom..q.moov| + # 00000020 00 00 68 c0 75 75 69 64 85 c0 b6 87 82 0f 11 e0 |..h.uuid........| + # 00000030 81 11 f4 ce 46 2b 6a 48 00 00 00 26 43 4e 43 56 |....F+jH...&CNCV| + # 00000040 43 61 6e 6f 6e 43 52 33 5f 30 30 31 2f 30 30 2e |CanonCR3_001/00.| <--- the "CanonCR3" + # 00000050 31 31 2e 30 30 2f 30 30 2e 30 30 2e 30 30 00 00 |11.00/00.00.00..| + # 00000060 00 5c 43 43 |.\CC| + # 00000064 + tag1 = header[0:13] + tag2 = header[64:72] + return tag1 == b'\x00\x00\x00\x18ftypcrx \x00' and tag2 == b'CanonCR3' + + +def is_file_cr3(fpath): + with open(fpath, "rb") as f: + header = f.read(100) + return is_cr3(header) + + +def is_file_xmp(fname): + return fname.split(".")[-1].lower() == "xmp" + + def special_magic(fpath): - if fpath.split(".")[-1].lower() == "xmp": - return "application/octet-stream-xmp" - else: - return magic.from_file(fpath, mime=True) + """ + magic but support for some new formats magic doesn't know about + """ + if is_file_xmp(fpath): # this one is cheap and uses the file name + return "application/octet-stream-xmp" # duplicate check here avoids a file open() which i guess is nice + else: # otherwise, we fall back to reading the file + with open(fpath, "rb") as f: + return special_magic_fobj(f, fpath) def special_magic_fobj(fobj, fname): - if fname.split(".")[-1].lower() == "xmp": + if is_file_xmp(fname): # supported here too ... return "application/octet-stream-xmp" else: - return magic.from_buffer(fobj.read(1024), mime=True) + header = fobj.read(1024) + if is_cr3(header): + return "image/x-canon-cr3" + else: + ret = magic.from_buffer(header, mime=True) + return ret diff --git a/photoapp/migrate.py b/photoapp/migrate.py index 31c706a..3e7fdcd 100644 --- a/photoapp/migrate.py +++ b/photoapp/migrate.py @@ -172,7 +172,7 @@ def migrate_gpsfix(): continue # just bail if there's a CR2, the paired jpg is known not to have gps data in my dataset :) - if any(["image/x-canon-cr2" == i.format for i in p.files]): + if any([i.format in ("image/x-canon-cr2", "image/x-canon-cr3", ) for i in p.files]): continue # pick the jpg out of the set diff --git a/photoapp/types.py b/photoapp/types.py index b2c8aa7..dea32e7 100644 --- a/photoapp/types.py +++ b/photoapp/types.py @@ -27,6 +27,8 @@ ftypes = dict(jpg=dict(category=fcategory.image, mimes={"image/gif"}), cr2=dict(category=fcategory.raw, mimes={"image/x-canon-cr2"}), + cr3=dict(category=fcategory.raw, + mimes={"image/x-canon-cr3"}), xmp=dict(category=fcategory.raw, mimes={"application/octet-stream-xmp"}), psd=dict(category=fcategory.raw, @@ -66,7 +68,7 @@ known_mimes = set.union(*[i["mimes"] for i in ftypes.values()]) regular_images = set([extension for extension, ftype in ftypes.items() if ftype["category"] == fcategory.image]) regular_mimes = set().union(*[ftype["mimes"] for ftype in ftypes.values() if ftype["category"] == fcategory.image]) # "derived" files, treated as black boxes, we can't open them because proprietary -# cr2, xmp, etc +# cr2, cr3, xmp, etc files_raw = set([extension for extension, ftype in ftypes.items() if ftype["category"] == fcategory.raw]) # video types # mp4, mov, etc