support canon cr3 raws
All checks were successful
Gitea/photolib/pipeline/head This commit looks good

This commit is contained in:
dave 2022-12-21 23:32:58 -08:00
parent 7fa03091ce
commit 174b64a771
3 changed files with 46 additions and 8 deletions

View File

@ -127,15 +127,51 @@ def hms_to_decimal(values):
return values[0] + values[1] / 60 + values[2] / 3600
def is_cr3(header):
# detect cannon's new cr3 format
# here's what we look for, expecting the first 100 bytes from the file:
# example cr3 file: IMG_0956.CR3
# 00000000 00 00 00 18 66 74 79 70 63 72 78 20 00 00 00 01 |....ftypcrx ....| <--- the "ftypcrx "
# 00000010 63 72 78 20 69 73 6f 6d 00 00 71 a0 6d 6f 6f 76 |crx isom..q.moov|
# 00000020 00 00 68 c0 75 75 69 64 85 c0 b6 87 82 0f 11 e0 |..h.uuid........|
# 00000030 81 11 f4 ce 46 2b 6a 48 00 00 00 26 43 4e 43 56 |....F+jH...&CNCV|
# 00000040 43 61 6e 6f 6e 43 52 33 5f 30 30 31 2f 30 30 2e |CanonCR3_001/00.| <--- the "CanonCR3"
# 00000050 31 31 2e 30 30 2f 30 30 2e 30 30 2e 30 30 00 00 |11.00/00.00.00..|
# 00000060 00 5c 43 43 |.\CC|
# 00000064
tag1 = header[0:13]
tag2 = header[64:72]
return tag1 == b'\x00\x00\x00\x18ftypcrx \x00' and tag2 == b'CanonCR3'
def is_file_cr3(fpath):
with open(fpath, "rb") as f:
header = f.read(100)
return is_cr3(header)
def is_file_xmp(fname):
return fname.split(".")[-1].lower() == "xmp"
def special_magic(fpath):
if fpath.split(".")[-1].lower() == "xmp":
return "application/octet-stream-xmp"
else:
return magic.from_file(fpath, mime=True)
"""
magic but support for some new formats magic doesn't know about
"""
if is_file_xmp(fpath): # this one is cheap and uses the file name
return "application/octet-stream-xmp" # duplicate check here avoids a file open() which i guess is nice
else: # otherwise, we fall back to reading the file
with open(fpath, "rb") as f:
return special_magic_fobj(f, fpath)
def special_magic_fobj(fobj, fname):
if fname.split(".")[-1].lower() == "xmp":
if is_file_xmp(fname): # supported here too ...
return "application/octet-stream-xmp"
else:
return magic.from_buffer(fobj.read(1024), mime=True)
header = fobj.read(1024)
if is_cr3(header):
return "image/x-canon-cr3"
else:
ret = magic.from_buffer(header, mime=True)
return ret

View File

@ -172,7 +172,7 @@ def migrate_gpsfix():
continue
# just bail if there's a CR2, the paired jpg is known not to have gps data in my dataset :)
if any(["image/x-canon-cr2" == i.format for i in p.files]):
if any([i.format in ("image/x-canon-cr2", "image/x-canon-cr3", ) for i in p.files]):
continue
# pick the jpg out of the set

View File

@ -27,6 +27,8 @@ ftypes = dict(jpg=dict(category=fcategory.image,
mimes={"image/gif"}),
cr2=dict(category=fcategory.raw,
mimes={"image/x-canon-cr2"}),
cr3=dict(category=fcategory.raw,
mimes={"image/x-canon-cr3"}),
xmp=dict(category=fcategory.raw,
mimes={"application/octet-stream-xmp"}),
psd=dict(category=fcategory.raw,
@ -66,7 +68,7 @@ known_mimes = set.union(*[i["mimes"] for i in ftypes.values()])
regular_images = set([extension for extension, ftype in ftypes.items() if ftype["category"] == fcategory.image])
regular_mimes = set().union(*[ftype["mimes"] for ftype in ftypes.values() if ftype["category"] == fcategory.image])
# "derived" files, treated as black boxes, we can't open them because proprietary
# cr2, xmp, etc
# cr2, cr3, xmp, etc
files_raw = set([extension for extension, ftype in ftypes.items() if ftype["category"] == fcategory.raw])
# video types
# mp4, mov, etc