From ae948af4183824a32a904bbba7007e75888283de Mon Sep 17 00:00:00 2001 From: dave Date: Tue, 2 Jul 2019 12:34:41 -0700 Subject: [PATCH] api upload related changes --- photoapp/api.py | 106 ++++++++++++++++++++++++++++++++-------------- photoapp/cli.py | 7 ++- photoapp/types.py | 2 +- photoapp/utils.py | 22 ++++++++++ 4 files changed, 102 insertions(+), 35 deletions(-) diff --git a/photoapp/api.py b/photoapp/api.py index 47c778c..9995e90 100644 --- a/photoapp/api.py +++ b/photoapp/api.py @@ -4,7 +4,7 @@ import logging import json from datetime import datetime, timedelta from photoapp.library import PhotoLibrary -from photoapp.types import Photo, PhotoSet, Tag, TagItem, PhotoStatus, User +from photoapp.types import Photo, PhotoSet, Tag, TagItem, PhotoStatus, User, known_extensions from jinja2 import Environment, FileSystemLoader, select_autoescape from sqlalchemy import desc from sqlalchemy import func, and_, or_ @@ -12,13 +12,14 @@ from sqlalchemy.exc import IntegrityError from photoapp.common import pwhash import math from urllib.parse import urlparse -from photoapp.utils import mime2ext, auth, require_auth, photo_auth_filter, slugify +from photoapp.utils import mime2ext, auth, require_auth, photo_auth_filter, slugify, copysha, get_extension from photoapp.dbutils import db +import tempfile class StorageAdapter(object): """ - Abstract interface for working with photo file storage. All paths are relative to the storage adapter's root parameter. + Abstract interface for working with photo file storage. All paths are relative to the storage adapter's root param. """ def file_exists(self, path): @@ -36,6 +37,7 @@ class StorageAdapter(object): def dedupe_name(self, path): # TODO modify and return the passed path such that writing to it does not overwrite an existing file + # TODO it would probably be smart to hold some kind of lock on this file raise NotImplementedError() @@ -83,7 +85,7 @@ class GfapiAdapter(StorageAdapter): pass # TODO gluster storage backend -#This is largely duplicated from library.py, but written with intent for later refactoring to support abstract storage. +# This is largely duplicated from library.py, but written with intent for later refactoring to support abstract storage. class LibraryManager(object): def __init__(self, storage): assert isinstance(storage, StorageAdapter) @@ -143,45 +145,85 @@ class PhotosApiV1(object): def upload(self, files, meta): """ upload accepts one photoset (multiple images) - metadata format """ + # load and verify metadata meta = json.loads(meta) - if type(files) != list: files = [files] + if set([file.filename for file in files]) != set(meta["files"].keys()): + raise cherrypy.HTTPError(400, f"file metadata missing") + + # use the photo's date to build a base path + # each file's sha and file extension will be appended to this + photo_date = datetime.fromisoformat(meta["date"]) + basepath = photo_date.strftime("%Y/%m/%d/%Y-%m-%d_%H.%M.%S") for file in files: - print("File name:", file.filename) + # build path using the sha and extension. note that we trust the sha the client provided now & verify later + photo_meta = meta["files"][file.filename] + ext = get_extension(file.filename) + assert ext in known_extensions + photo_path = f"{basepath}_{photo_meta['hash'][0:8]}.{ext}" - # import hashlib - # sha = hashlib.sha256() - # total = 0 - # while True: - # b = file.file.read(1024) - # if not b: - # break - # sha.update(b) - # total += len(b) - # print("Read length:", total) - # print("Read sha256:", sha.hexdigest()) + print(photo_path) - if str(file.filename) not in meta["files"].keys(): - raise cherrypy.HTTPError(400, f"no mdatadata provided for filename '{file.filename}'") - print("we have metadata for this file:", meta["files"][file.filename]) + # generate a path in the storage + # yyyy/mm/dd/yyyy-mm_hh.MM.ss_x.jpg + # dest = self.library.storage.dedupe_name() - # create database objects based on the request - # self.lib.add_photoset(set_, photos) + # write file to the path (and copy sha while in flight) - # build file path (yyyy/mm/dd/yyyy-mm_hh.MM.ss_x.jpg) (incrmenting X if the key already exists etc) - # copy to storage - # check if sha256 exists already - # delete if dupe, raise error - # (see file rewind code in ingest.py) - # create records - # commit - # respond with list of uuids of the sets + # is sha doesn't match uploaded metadata, abort - print("____") + # create photo object for this entry + pass + + # create photoset with the above photos + + # commit it + + # if commit fails, delete the files + + # with tempfile.TemporaryDirectory() as tmpdir: + # finfo = [] + # for file in files: + # # copy to local storage + # # TODO validate for funny paths like ../../ etc + # tmpphoto = os.path.join(tmpdir, file.filename) + # with open(tmpphoto, 'wb') as fout: + # shasum = copysha(file.file, fout) + + # finfo.append((tmpphoto, shasum, os.path.getsize(tmpphoto), )) + + # # print("File name:", file.filename) + # # import hashlib + # # sha = hashlib.sha256() + # # total = 0 + # # while True: + # # b = file.file.read(1024) + # # if not b: + # # break + # # sha.update(b) + # # total += len(b) + # # print("Read length:", total) + # # print("Read sha256:", sha.hexdigest()) + + # if str(file.filename) not in meta["files"].keys(): + # raise cherrypy.HTTPError(400, f"no mdatadata provided for filename '{file.filename}'") + # print("we have metadata for this file:", meta["files"][file.filename]) + + # # create database objects based on the request + # # self.lib.add_photoset(set_, photos) + + # # build file path (yyyy/mm/dd/yyyy-mm_hh.MM.ss_x.jpg) (incrmenting X if the key already exists etc) + # # copy to storage + # # check if sha256 exists already + # # delete if dupe, raise error + # # (see file rewind code in ingest.py) + # # create records + # # commit + # # respond with list of uuids of the sets + # print("____") @cherrypy.expose @cherrypy.tools.json_out() diff --git a/photoapp/cli.py b/photoapp/cli.py index 18ca836..3e8eb25 100644 --- a/photoapp/cli.py +++ b/photoapp/cli.py @@ -3,7 +3,7 @@ import json import argparse import requests from requests.exceptions import HTTPError -from photoapp.utils import get_extension +from photoapp.utils import get_extension, shasum from photoapp.types import known_extensions from photoapp.common import pwhash from photoapp.ingest import get_photosets @@ -129,7 +129,8 @@ def main(): print("skipping:", skipped) print("sets:", [[f.path for f in s.files] for s in sets]) - for set_ in sets: + print(f"0 / {len(sets)}", end="") + for num, set_ in enumerate(sets): payload = set_.to_json() payload["files"] = {os.path.basename(photo.path): photo.to_json() for photo in set_.files} @@ -138,6 +139,8 @@ def main(): files.append(("files", (os.path.basename(file.path), open(file.path, 'rb'), file.format), )) client.upload(files, payload) + print(f"\r{num} / {len(sets)}", end="") + # TODO be nice and close the files elif args.action == "user": if args.action_user == "create": diff --git a/photoapp/types.py b/photoapp/types.py index c3f0fb6..0ac092f 100644 --- a/photoapp/types.py +++ b/photoapp/types.py @@ -76,7 +76,7 @@ class Photo(Base): def to_json(self): j = {attr: getattr(self, attr) for attr in - {"uuid", "size", "width", "height", "orientation", "format"}} + {"uuid", "size", "width", "height", "orientation", "format", "hash"}} j["set"] = self.set.uuid return j diff --git a/photoapp/utils.py b/photoapp/utils.py index 74297cd..abae77f 100644 --- a/photoapp/utils.py +++ b/photoapp/utils.py @@ -1,6 +1,28 @@ import os import cherrypy from photoapp.types import PhotoSet, PhotoStatus +import hashlib + + +def copysha(fpin, fpout): + sha = hashlib.sha256() + while True: + b = fpin.read(4096) + if not b: + break + fpout.write(b) + sha.update(b) + return sha.hexdigest() + + +def shasum(fpin): + sha = hashlib.sha256() + while True: + b = fpin.read(4096) + if not b: + break + sha.update(b) + return sha.hexdigest() def mime2ext(mime):