uploading from cli mvp

This commit is contained in:
dave 2019-07-02 23:03:28 -07:00
parent ae948af418
commit 6e7c47ddad
3 changed files with 91 additions and 101 deletions

View File

@ -15,6 +15,8 @@ from urllib.parse import urlparse
from photoapp.utils import mime2ext, auth, require_auth, photo_auth_filter, slugify, copysha, get_extension
from photoapp.dbutils import db
import tempfile
from contextlib import closing
import traceback
class StorageAdapter(object):
@ -22,7 +24,7 @@ class StorageAdapter(object):
Abstract interface for working with photo file storage. All paths are relative to the storage adapter's root param.
"""
def file_exists(self, path):
def exists(self, path):
# TODO return true/false if the file path exists
raise NotImplementedError()
@ -35,36 +37,32 @@ class StorageAdapter(object):
# TODO erase the path
raise NotImplementedError()
def dedupe_name(self, path):
# TODO modify and return the passed path such that writing to it does not overwrite an existing file
# TODO it would probably be smart to hold some kind of lock on this file
raise NotImplementedError()
class FilesystemAdapter(StorageAdapter):
def __init__(self, root):
super().__init__()
self.root = root # root path
def file_exists(self, path):
def exists(self, path):
# TODO return true/false if the file path exists
raise NotImplementedError()
return os.path.exists(self.abspath(path))
def open(self, path, mode):
# TODO return a handle to the path. this should work as a context manager
raise NotImplementedError()
os.makedirs(os.path.dirname(self.abspath(path)), exist_ok=True)
return open(self.abspath(path), mode)
def delete(self, path):
# TODO erase the path
raise NotImplementedError()
# TODO delete the file
# TODO prune empty directories that were components of $path
os.unlink(self.abspath(path))
def dedupe_name(self, path):
# TODO modify and return the passed path such that writing to it does not overwrite an existing file
raise NotImplementedError()
def abspath(self, path):
return os.path.join(self.root, path)
class S3Adapter(StorageAdapter):
def file_exists(self, path):
def exists(self, path):
# TODO return true/false if the file path exists
raise NotImplementedError()
@ -76,10 +74,6 @@ class S3Adapter(StorageAdapter):
# TODO erase the path
raise NotImplementedError()
def dedupe_name(self, path):
# TODO modify and return the passed path such that writing to it does not overwrite an existing file
raise NotImplementedError()
class GfapiAdapter(StorageAdapter):
pass # TODO gluster storage backend
@ -91,40 +85,40 @@ class LibraryManager(object):
assert isinstance(storage, StorageAdapter)
self.storage = storage
def add_photoset(self, photoset):
"""
Commit a populated photoset object to the library. The paths in the photoset's file list entries will be updated
as the file is moved to the library path.
"""
# Create target directory
path = os.path.join(self.path, self.get_datedir_path(photoset.date))
os.makedirs(path, exist_ok=True)
# def add_photoset(self, photoset):
# """
# Commit a populated photoset object to the library. The paths in the photoset's file list entries will be updated
# as the file is moved to the library path.
# """
# # Create target directory
# path = os.path.join(self.path, self.get_datedir_path(photoset.date))
# os.makedirs(path, exist_ok=True)
moves = [] # Track files moved. If the sql transaction files, we'll undo these
# moves = [] # Track files moved. If the sql transaction files, we'll undo these
for file in photoset.files:
dest = os.path.join(path, os.path.basename(file.path))
# for file in photoset.files:
# dest = os.path.join(path, os.path.basename(file.path))
# Check if the name is already in use, rename new file if needed
dupe_rename = 1
while os.path.exists(dest):
fname = os.path.basename(file.path).split(".")
fname[-2] += "_{}".format(dupe_rename)
dest = os.path.join(path, '.'.join(fname))
dupe_rename += 1
os.rename(file.path, dest)
moves.append((file.path, dest))
file.path = dest.lstrip(self.path)
# # Check if the name is already in use, rename new file if needed
# dupe_rename = 1
# while os.path.exists(dest):
# fname = os.path.basename(file.path).split(".")
# fname[-2] += "_{}".format(dupe_rename)
# dest = os.path.join(path, '.'.join(fname))
# dupe_rename += 1
# os.rename(file.path, dest)
# moves.append((file.path, dest))
# file.path = dest.lstrip(self.path)
s = self.session()
s.add(photoset)
try:
s.commit()
except IntegrityError:
# Commit failed, undo the moves
for move in moves:
os.rename(move[1], move[0])
raise
# s = self.session()
# s.add(photoset)
# try:
# s.commit()
# except IntegrityError:
# # Commit failed, undo the moves
# for move in moves:
# os.rename(move[1], move[0])
# raise
class PhotosApi(object):
@ -142,6 +136,7 @@ class PhotosApiV1(object):
yield f"<plaintext>hello, this is the api. my database is: {db}\n"
@cherrypy.expose
@cherrypy.tools.json_out()
def upload(self, files, meta):
"""
upload accepts one photoset (multiple images)
@ -158,72 +153,65 @@ class PhotosApiV1(object):
photo_date = datetime.fromisoformat(meta["date"])
basepath = photo_date.strftime("%Y/%m/%d/%Y-%m-%d_%H.%M.%S")
stored_files = []
photo_objs = []
def abort_upload():
for file in stored_files:
self.library.storage.delete(photo_path)
db.rollback()
print(traceback.format_exc())
# raise cherrypy.HTTPError(400, traceback.format_exc())
raise
for file in files:
# build path using the sha and extension. note that we trust the sha the client provided now & verify later
# something like 2019/06/25/2019-06-25_19.28.05_cea1a138.png
photo_meta = meta["files"][file.filename]
ext = get_extension(file.filename)
assert ext in known_extensions
photo_path = f"{basepath}_{photo_meta['hash'][0:8]}.{ext}"
print(photo_path)
# generate a path in the storage
# yyyy/mm/dd/yyyy-mm_hh.MM.ss_x.jpg
# dest = self.library.storage.dedupe_name()
try:
assert not self.library.storage.exists(photo_path), f"file already in library: {photo_path}"
except AssertionError:
abort_upload()
# write file to the path (and copy sha while in flight)
with closing(self.library.storage.open(photo_path, 'wb')) as f:
shasum = copysha(file.file, f)
stored_files.append(photo_path)
# is sha doesn't match uploaded metadata, abort
try:
assert shasum == photo_meta["hash"], "uploaded file didn't match provided sha"
except AssertionError:
abort_upload()
# create photo object for this entry
pass
p = Photo(hash=shasum,
path=photo_path,
format=photo_meta.get("format"), # TODO verify
size=photo_meta.get("size"), # TODO verify
width=photo_meta.get("width"),
height=photo_meta.get("height"),
orientation=photo_meta.get("orientation"))
# create photoset with the above photos
photo_objs.append(p)
# commit it
ps = PhotoSet(date=photo_date,
date_real=photo_date, # TODO support time offsets
files=photo_objs) # TODO support title field etc
# if commit fails, delete the files
db.add(ps)
# with tempfile.TemporaryDirectory() as tmpdir:
# finfo = []
# for file in files:
# # copy to local storage
# # TODO validate for funny paths like ../../ etc
# tmpphoto = os.path.join(tmpdir, file.filename)
# with open(tmpphoto, 'wb') as fout:
# shasum = copysha(file.file, fout)
try:
db.commit()
except IntegrityError:
abort_upload()
# finfo.append((tmpphoto, shasum, os.path.getsize(tmpphoto), ))
# # print("File name:", file.filename)
# # import hashlib
# # sha = hashlib.sha256()
# # total = 0
# # while True:
# # b = file.file.read(1024)
# # if not b:
# # break
# # sha.update(b)
# # total += len(b)
# # print("Read length:", total)
# # print("Read sha256:", sha.hexdigest())
# if str(file.filename) not in meta["files"].keys():
# raise cherrypy.HTTPError(400, f"no mdatadata provided for filename '{file.filename}'")
# print("we have metadata for this file:", meta["files"][file.filename])
# # create database objects based on the request
# # self.lib.add_photoset(set_, photos)
# # build file path (yyyy/mm/dd/yyyy-mm_hh.MM.ss_x.jpg) (incrmenting X if the key already exists etc)
# # copy to storage
# # check if sha256 exists already
# # delete if dupe, raise error
# # (see file rewind code in ingest.py)
# # create records
# # commit
# # respond with list of uuids of the sets
# print("____")
return ps.to_json()
@cherrypy.expose
@cherrypy.tools.json_out()

View File

@ -129,7 +129,6 @@ def main():
print("skipping:", skipped)
print("sets:", [[f.path for f in s.files] for s in sets])
print(f"0 / {len(sets)}", end="")
for num, set_ in enumerate(sets):
payload = set_.to_json()
payload["files"] = {os.path.basename(photo.path): photo.to_json() for photo in set_.files}
@ -138,8 +137,10 @@ def main():
for file in set_.files:
files.append(("files", (os.path.basename(file.path), open(file.path, 'rb'), file.format), ))
client.upload(files, payload)
print(f"\r{num} / {len(sets)}", end="")
print("Uploading: ", [os.path.basename(file.path) for file in set_.files])
result = client.upload(files, payload)
print("Uploaded: ", result.json()["uuid"])
print(f"{num} / {len(sets)}")
# TODO be nice and close the files
elif args.action == "user":

View File

@ -12,6 +12,7 @@ regular_images = ["jpg", "png"]
files_raw = ["cr2", "xmp"]
files_video = ["mp4", "mov"]
mapped_extensions = {"jpg": {"jpeg", }} # target: aliases
known_mimes = {"image/jpeg"} # TODO enforce this
def map_extension(ext):
@ -77,7 +78,7 @@ class Photo(Base):
def to_json(self):
j = {attr: getattr(self, attr) for attr in
{"uuid", "size", "width", "height", "orientation", "format", "hash"}}
j["set"] = self.set.uuid
j["set"] = self.set.uuid if self.set else None
return j