diff --git a/README.md b/README.md index 54305de..2fee9e8 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,7 @@ Arguments are as follows: * `--database sqlite:///photos.db` - [Sqlalchemy](https://docs.sqlalchemy.org/en/13/core/engines.html) connection uri * `--cache file://./cache` - storage uri to use as a cache for things like thumbnails. It can be the same as the library * `--port 8080` - listen on http on port 8080 +* `--thumb-service` - optional address of thumbnail service - see below. Supported library uri schemes are: @@ -70,6 +71,11 @@ Besides browsing, most interaction with Photolib will be done using its CLI tool username, and password details need to be provided to the cli via a url flag not shown here. See `photocli --help` for more information. +Optional: Photolib uses a secondary service to generate thumbnails for video files. After the above installation +instructions are complete, run `photothumbd` on a differnet port with the same arguments. On first run, it will create +a user for internal communications and log the username/password. This username/password must be used in the +`--thumb-service` for `photoappd`. + Commands -------- diff --git a/photoapp/daemon.py b/photoapp/daemon.py index 250da7e..fb163d4 100644 --- a/photoapp/daemon.py +++ b/photoapp/daemon.py @@ -14,6 +14,7 @@ from photoapp.api import PhotosApi, LibraryManager from photoapp.dbutils import SAEnginePlugin, SATool, db, get_db_engine, date_format from photoapp.utils import auth, require_auth, photoset_auth_filter, slugify, cherryparam, number_format from photoapp.storage import uri_to_storage +from photoapp.webutils import validate_password from jinja2 import Environment, FileSystemLoader, select_autoescape from sqlalchemy import desc, func, and_, or_ @@ -21,12 +22,6 @@ from sqlalchemy import desc, func, and_, or_ APPROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../")) -def validate_password(realm, username, password): - if db.query(User).filter(User.name == username, User.password == pwhash(password)).first(): - return True - return False - - class PhotosWeb(object): """ Http root of the UI webserver diff --git a/photoapp/thumbserver.py b/photoapp/thumbserver.py index d24ca82..6c57274 100644 --- a/photoapp/thumbserver.py +++ b/photoapp/thumbserver.py @@ -1,84 +1,175 @@ import os +import re import logging import cherrypy import shutil import tempfile import traceback import requests -from threading import Thread +import subprocess +from concurrent.futures import ThreadPoolExecutor +from threading import Thread, Semaphore from contextlib import closing -from queue import Queue, Empty +from queue import LifoQueue, Empty from shutil import copyfileobj -from subprocess import check_call -from photoapp.dbutils import SAEnginePlugin, SATool, db, get_db_engine, create_db_sessionmaker +from urllib.parse import urlparse +from photoapp.dbutils import SAEnginePlugin, SATool, get_db_engine, create_db_sessionmaker from photoapp.storage import uri_to_storage -from photoapp.api import PhotosApi, LibraryManager from photoapp.types import User, Photo -from photoapp.utils import require_auth from photoapp.common import pwhash from photoapp.dbsession import DatabaseSession from photoapp.thumb import thumb_path, image_file_style +from photoapp.webutils import validate_password +from photoapp.utils import genpw -def get_video_thumb(srcpath, outpath): - #TODO limit execution time +THUMBSERVICE_USER_INTERNAL = "_thumbservice" +RE_DURATION = re.compile(r' Duration: (?P\d\d):(?P\d\d):(?P\d\d).(?P\d\d),') + + +def get_video_duration(srcpath, timeout=30): + """ + Get the duration of a video, in seconds, by parsing ffmpeg stderr output line: + Duration: 00:00:00.94, start: 0.000000, bitrate: 15046 kb/s + """ + cmd = [ + "ffmpeg", + "-hide_banner", + "-i", srcpath, + ] + + try: + p = subprocess.run(cmd, timeout=timeout, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + except subprocess.TimeoutExpired: + logging.error("ffmpeg length: timed out") + return 0.0 + + if p.returncode != 1: + logging.error("ffmpeg length: unexpected return code %s", p.returncode) + logging.error("ffmpeg stdout: %s", p.stdout) + logging.error("ffmpeg stderr: %s", p.stderr) + return 0.0 + + if not p.stderr: + logging.error("ffmpeg length: no stderr") + return 0.0 + + stderr = p.stderr.decode() + match = RE_DURATION.search(stderr) + if not match: + logging.error("ffmpeg length: could not find duration") + logging.error("ffmpeg stdout: %s", p.stdout) + logging.error("ffmpeg stderr: %s", p.stderr) + return 0.0 + + times = match.groupdict() + + return int(times["hours"]) * 60 * 60 + \ + int(times["minutes"]) * 60 + \ + int(times["seconds"]) + \ + int(times["decseconds"]) / 100 + + +def get_video_thumb(srcpath, outpath, timeout=30): + + duration = get_video_duration(srcpath, timeout) + + if duration == 0.0: + return False cmd = [ "ffmpeg", + "-hide_banner", + "-loglevel", "error", "-i", srcpath, "-vframes", "1", # Output one frame "-an", # Disable audio # "-s", "400x222" # Output size - "-ss", "1", # grab the frame from 1 second into the video + "-ss", "1" if duration > 5 else "0", outpath ] - #TODO capture output and only log on error - check_call(cmd) + try: + p = subprocess.run(cmd, timeout=timeout, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + except subprocess.TimeoutExpired: + logging.error("ffmpeg: timed out") + return False + + if p.returncode != 0 or not os.path.exists(outpath): + logging.error("ffmpeg: no image produced.") + logging.error("ffmpeg return code: %s", p.returncode) + logging.error("ffmpeg stdout: %s", p.stdout) + logging.error("ffmpeg stderr: %s", p.stderr) + return False + + return True def setup_thumb_user(engine): - #TODO create the internal User used to talk to this service - # if user doesnt exist - # create - # log the password - pass + # create the internal User used to talk to this service + with closing(create_db_sessionmaker(engine)()) as s: + u = s.query(User).filter(User.name == THUMBSERVICE_USER_INTERNAL).first() + if u: + return + + password = genpw() + + logging.warning("created thumbserver user: %s:%s", THUMBSERVICE_USER_INTERNAL, password) + s.add(User(name=THUMBSERVICE_USER_INTERNAL, password=pwhash(password))) + s.commit() + + +def validate_thumbservice_password(realm, username, password): + if username != THUMBSERVICE_USER_INTERNAL: + return False + return validate_password(realm, username, password) class ThumbWorker(Thread): - def __init__(self, engine, library, cache): + def __init__(self, engine, library, cache, max_workers=4): super().__init__() self.daemon = True - self.queue = Queue() + self.queue = LifoQueue() self.engine = engine self.library = library self.cache = cache + self.max_workers = max_workers + self.throttle = Semaphore(max_workers) def run(self): - logged_empty = False - while True: - try: - image_uuid, style_name = self.queue.get(block=True, timeout=5.0) - logged_empty = False - except Empty: - if not logged_empty: - logging.info("queue empty") - logged_empty = True - continue + with ThreadPoolExecutor(max_workers=self.max_workers) as pool: + while True: + try: + item = self.queue.get(block=True, timeout=5.0) + except Empty: + continue - try: - with ( - closing(create_db_sessionmaker(self.engine)()) as s, - tempfile.TemporaryDirectory() as d, - ): - self.do_thumb(image_uuid, style_name, s, d) - except: - traceback.print_exc() #TODO something like _failed_thumbs_cache - #TODO handle errors differently, like - # db error -> kill program - # filesystem error -> kill program - # PIL error -> ignore + # semaphore is used so that the queue is not immediately consumed into waiting Futures in the pool. + # this is to preserve the LIFO behavior of the queue + self.throttle.acquire() + pool.submit(self.handle_task, item) + + qlen = self.queue.qsize() + if qlen: + logging.info("images to process: %s", qlen) + + def handle_task(self, item): + image_uuid, style_name = item + try: + with ( + closing(create_db_sessionmaker(self.engine)()) as s, + tempfile.TemporaryDirectory() as d, + ): + self.do_thumb(image_uuid, style_name, s, d) + except: + traceback.print_exc() #TODO something like _failed_thumbs_cache + #TODO handle errors differently, like + # db error -> kill program + # filesystem error -> kill program + # PIL error -> ignore + self.queue.task_done() + self.throttle.release() def do_thumb(self, image_uuid, style_name, session, tmpdir): """ @@ -97,7 +188,7 @@ class ThumbWorker(Thread): return # download the image - local_src_path = os.path.join(tmpdir, image.fname) # TODO fname isn't sanitized? + local_src_path = os.path.join(tmpdir, image.fname) # TODO fname isn't sanitized? use temp. thumb_tmp_path = os.path.join(tmpdir, "thumb.jpg") with ( self.library.open(image.path, "rb") as src, @@ -106,9 +197,9 @@ class ThumbWorker(Thread): shutil.copyfileobj(src, dest) # generate a still from the image - get_video_thumb(local_src_path, thumb_tmp_path) - - logging.info("generated %s: %sb", thumb_tmp_path, str(os.path.getsize(thumb_tmp_path))) + if not get_video_thumb(local_src_path, thumb_tmp_path): + logging.error("video extraction failed: %s", image_uuid) + return # TODO something like _failed_thumbs_cache # Do normal cropping of the thumb thumb_cropped_path = os.path.join(tmpdir, "thumb_cropped.jpg") @@ -121,6 +212,8 @@ class ThumbWorker(Thread): ): copyfileobj(fsrc, fdest) + logging.info("processed %s: %sb", image_uuid, str(os.path.getsize(thumb_tmp_path))) + class ThumbServiceWeb(object): def __init__(self, queue_thumbnail): @@ -131,7 +224,6 @@ class ThumbServiceWeb(object): yield "photoapp thumbnail service OK" @cherrypy.expose - # @require_auth def thumb(self, uuid, style): """ Generate a thumbnail for the file identified. Calling this endpoint adds the image to the queue. Duplicate @@ -145,21 +237,24 @@ class ThumbClient(object): """ Client for interacting with the thumbserver api """ - def __init__(self, server_url): - self.server_url = server_url + def __init__(self, server_uri): self.session = requests.Session() + + uri = urlparse(server_uri) + port = uri.port or dict(http=80, https=443)[uri.scheme] + host = f"{uri.scheme}://{uri.hostname}:{port}" + if uri.path: + host = host + "/" + uri.path + + if uri.username: + self.session.auth = (uri.username, uri.password, ) + + self.server_url = host a = requests.adapters.HTTPAdapter(max_retries=0) self.session.mount('http://', a) def request_thumb(self, photo_uuid, style_name): - self.session.get(self.server_url, params=dict(uuid=photo_uuid, style=style_name)) - - -# TODO dedupe me -def validate_password(realm, username, password): - if db.query(User).filter(User.name == username, User.password == pwhash(password)).first(): - return True - return False + self.session.get(self.server_url + "/thumb", params=dict(uuid=photo_uuid, style=style_name)) def main(): @@ -177,6 +272,7 @@ def main(): parser.add_argument('-s', '--database', help="sqlalchemy database connection uri", default=os.environ.get("DATABASE_URL")), parser.add_argument('--debug', action="store_true", help="enable development options") + parser.add_argument('--max-workers', type=int, default=4, help="number of image download/process threads") args = parser.parse_args() @@ -203,24 +299,17 @@ def main(): # Create various internal tools library_storage = uri_to_storage(args.library) - library_manager = LibraryManager(library_storage) cache_storage = uri_to_storage(args.cache) - thumbnail_worker = ThumbWorker(engine, library_storage, cache_storage) + thumbnail_worker = ThumbWorker(engine, library_storage, cache_storage, args.max_workers) thumbnail_worker.start() # Setup and mount web ui web = ThumbServiceWeb(thumbnail_worker.queue.put) cherrypy.tree.mount(web, '/', {'/': {'tools.trailing_slash.on': False, - 'tools.db.on': True, }}) - - # Setup and mount API - api = PhotosApi(library_manager) - cherrypy.tree.mount(api, '/api', {'/': {'tools.sessions.on': False, - 'tools.trailing_slash.on': False, - 'tools.auth_basic.on': True, - 'tools.auth_basic.realm': 'photolib', - 'tools.auth_basic.checkpassword': validate_password, - 'tools.db.on': True}}) + 'tools.db.on': True, }, + '/thumb': {'tools.auth_basic.on': True, + 'tools.auth_basic.realm': 'thumbservice', + 'tools.auth_basic.checkpassword': validate_thumbservice_password}}) # General config options cherrypy.config.update({ diff --git a/photoapp/utils.py b/photoapp/utils.py index f99fe35..2e8b065 100644 --- a/photoapp/utils.py +++ b/photoapp/utils.py @@ -2,6 +2,8 @@ import os import cherrypy from photoapp.types import PhotoSet, PhotoStatus import hashlib +from random import choice +import string def copysha(fpin, fpout): @@ -45,7 +47,7 @@ def require_auth(func): """ def wrapped(*args, **kwargs): if not auth(): - raise cherrypy.HTTPError(403) + raise cherrypy.HTTPError(403, "Authentication required") return func(*args, **kwargs) return wrapped @@ -78,3 +80,7 @@ def cherryparam(v, type_=str): def number_format(value): return format(int(value), ',d') + + +def genpw(length=16): + return ''.join([choice(string.ascii_lowercase + string.ascii_uppercase + string.digits) for _ in range(0, length)]) diff --git a/photoapp/webutils.py b/photoapp/webutils.py new file mode 100644 index 0000000..87426eb --- /dev/null +++ b/photoapp/webutils.py @@ -0,0 +1,9 @@ +from photoapp.types import User +from photoapp.dbutils import db +from photoapp.common import pwhash + + +def validate_password(realm, username, password): + if db.query(User).filter(User.name == username, User.password == pwhash(password)).first(): + return True + return False