From edb80828e8e2b2c877b89f441d3953e5391f3c51 Mon Sep 17 00:00:00 2001 From: dave Date: Thu, 4 Jul 2019 13:10:52 -0700 Subject: [PATCH] api upload refined --- photoapp/api.py | 63 +++++++++++++++++++++++++++++----------------- photoapp/cli.py | 6 ++++- photoapp/image.py | 15 +++++++++++ photoapp/ingest.py | 9 +------ photoapp/types.py | 26 ++++++++++++++++--- 5 files changed, 84 insertions(+), 35 deletions(-) diff --git a/photoapp/api.py b/photoapp/api.py index 911238a..0372e36 100644 --- a/photoapp/api.py +++ b/photoapp/api.py @@ -4,7 +4,7 @@ import logging import json from datetime import datetime, timedelta from photoapp.library import PhotoLibrary -from photoapp.types import Photo, PhotoSet, Tag, TagItem, PhotoStatus, User, known_extensions +from photoapp.types import Photo, PhotoSet, Tag, TagItem, PhotoStatus, User, known_extensions, known_mimes from jinja2 import Environment, FileSystemLoader, select_autoescape from sqlalchemy import desc from sqlalchemy import func, and_, or_ @@ -13,6 +13,7 @@ from photoapp.common import pwhash import math from urllib.parse import urlparse from photoapp.utils import mime2ext, auth, require_auth, photo_auth_filter, slugify, copysha, get_extension +from photoapp.image import special_magic_fobj from photoapp.dbutils import db import tempfile from contextlib import closing @@ -37,6 +38,9 @@ class StorageAdapter(object): # TODO erase the path raise NotImplementedError() + def getsize(self, path): + raise NotImplementedError() + class FilesystemAdapter(StorageAdapter): def __init__(self, root): @@ -45,19 +49,22 @@ class FilesystemAdapter(StorageAdapter): def exists(self, path): # TODO return true/false if the file path exists - return os.path.exists(self.abspath(path)) + return os.path.exists(self._abspath(path)) def open(self, path, mode): # TODO return a handle to the path. this should work as a context manager - os.makedirs(os.path.dirname(self.abspath(path)), exist_ok=True) - return open(self.abspath(path), mode) + os.makedirs(os.path.dirname(self._abspath(path)), exist_ok=True) + return open(self._abspath(path), mode) def delete(self, path): # TODO delete the file # TODO prune empty directories that were components of $path - os.unlink(self.abspath(path)) + os.unlink(self._abspath(path)) - def abspath(self, path): + def getsize(self, path): + return os.path.getsize(self._abspath(path)) + + def _abspath(self, path): return os.path.join(self.root, path) @@ -74,6 +81,9 @@ class S3Adapter(StorageAdapter): # TODO erase the path raise NotImplementedError() + def getsize(self, path): + raise NotImplementedError() + class GfapiAdapter(StorageAdapter): pass # TODO gluster storage backend @@ -156,13 +166,12 @@ class PhotosApiV1(object): stored_files = [] photo_objs = [] - def abort_upload(): + def abort_upload(reason): for file in stored_files: self.library.storage.delete(photo_path) db.rollback() - print(traceback.format_exc()) - # raise cherrypy.HTTPError(400, traceback.format_exc()) - raise + cherrypy.response.status = 400 + return {"error": reason} for file in files: # build path using the sha and extension. note that we trust the sha the client provided now & verify later @@ -172,10 +181,8 @@ class PhotosApiV1(object): assert ext in known_extensions photo_path = f"{basepath}_{photo_meta['hash'][0:8]}.{ext}" - try: - assert not self.library.storage.exists(photo_path), f"file already in library: {photo_path}" - except AssertionError: - abort_upload() + if self.library.storage.exists(photo_path): + return abort_upload("file already in library: {photo_path}") # write file to the path (and copy sha while in flight) with closing(self.library.storage.open(photo_path, 'wb')) as f: @@ -183,20 +190,30 @@ class PhotosApiV1(object): stored_files.append(photo_path) - # is sha doesn't match uploaded metadata, abort + # misc input validation + # also if sha doesn't match uploaded metadata, abort + # todo don't use asserts try: assert shasum == photo_meta["hash"], "uploaded file didn't match provided sha" - except AssertionError: - abort_upload() + + with closing(self.library.storage.open(photo_path, 'rb')) as f: + mime = special_magic_fobj(f, file.filename) + assert mime == photo_meta.get("format") and mime in known_mimes, "unknown or invalid mime" + + assert self.library.storage.getsize(photo_path) == photo_meta.get("size"), \ + "invalid size, file truncated?" + + except AssertionError as ae: + return abort_upload(str(ae)) # create photo object for this entry p = Photo(hash=shasum, path=photo_path, - format=photo_meta.get("format"), # TODO verify - size=photo_meta.get("size"), # TODO verify - width=photo_meta.get("width"), - height=photo_meta.get("height"), - orientation=photo_meta.get("orientation")) + format=photo_meta.get("format"), + size=photo_meta.get("size"), + width=photo_meta.get("width"), # not verified + height=photo_meta.get("height"), # not verified + orientation=photo_meta.get("orientation")) # not verified photo_objs.append(p) @@ -209,7 +226,7 @@ class PhotosApiV1(object): try: db.commit() except IntegrityError: - abort_upload() + return abort_upload() return ps.to_json() diff --git a/photoapp/cli.py b/photoapp/cli.py index 16e8e5c..9c94ba0 100644 --- a/photoapp/cli.py +++ b/photoapp/cli.py @@ -138,7 +138,11 @@ def main(): files.append(("files", (os.path.basename(file.path), open(file.path, 'rb'), file.format), )) print("Uploading: ", [os.path.basename(file.path) for file in set_.files]) - result = client.upload(files, payload) + try: + result = client.upload(files, payload) + except HTTPError as he: + print(he.response.json()) + return print("Uploaded: ", result.json()["uuid"]) print(f"{num} / {len(sets)}") # TODO be nice and close the files diff --git a/photoapp/image.py b/photoapp/image.py index 81d1f9c..a90a7e3 100644 --- a/photoapp/image.py +++ b/photoapp/image.py @@ -109,5 +109,20 @@ def hms_to_decimal(values): return values[0] + values[1] / 60 + values[2] / 3600 +def special_magic(fpath): + if fpath.split(".")[-1].lower() == "xmp": + return "application/octet-stream-xmp" + else: + return magic.from_file(fpath, mime=True) + + +def special_magic_fobj(fobj, fname): + if fname.split(".")[-1].lower() == "xmp": + return "application/octet-stream-xmp" + else: + fobj.seek(0) + return magic.from_buffer(fobj.read(1024), mime=True) + + def main(): print(get_exif_data("library/2018/9/8/MMwo4hr.jpg")) diff --git a/photoapp/ingest.py b/photoapp/ingest.py index e773657..c021906 100644 --- a/photoapp/ingest.py +++ b/photoapp/ingest.py @@ -2,7 +2,7 @@ import magic import argparse import traceback from photoapp.library import PhotoLibrary -from photoapp.image import get_jpg_info, get_hash, get_mtime +from photoapp.image import get_jpg_info, get_hash, get_mtime, special_magic from itertools import chain from photoapp.types import Photo, PhotoSet, known_extensions, regular_images, files_raw, files_video, map_extension import os @@ -104,13 +104,6 @@ def batch_ingest(library, files): print("\nUpdate complete") -def special_magic(fpath): - if fpath.split(".")[-1].lower() == "xmp": - return "application/octet-stream-xmp" - else: - return magic.from_file(fpath, mime=True) - - def main(): parser = argparse.ArgumentParser(description="Library ingestion tool") parser.add_argument("files", nargs="+") diff --git a/photoapp/types.py b/photoapp/types.py index 234a37d..142b7b5 100644 --- a/photoapp/types.py +++ b/photoapp/types.py @@ -7,12 +7,32 @@ import uuid import enum +# file extensions we allow known_extensions = ["jpg", "png", "cr2", "xmp", "mp4", "mov"] -regular_images = ["jpg", "png"] -files_raw = ["cr2", "xmp"] + +# categorizaiton of media type based on extension +regular_images = ["jpg", "png"] # we can pull metadata out of these +files_raw = ["cr2", "xmp"] # treated as black boxes files_video = ["mp4", "mov"] + +# extensions with well-known aliases mapped_extensions = {"jpg": {"jpeg", }} # target: aliases -known_mimes = {"image/jpeg"} # TODO enforce this + +# allowed file types (based on magic identification) +# TODO enforce this +known_mimes = {"image/png", + "image/jpeg", + "image/gif", + "application/octet-stream-xmp", + "image/x-canon-cr2", + "video/mp4", + "video/quicktime"} + + +def mime2ext(mime): + """ + Given a mime type return the canonical file extension + """ def map_extension(ext):