photolib/photoapp/thumbserver.py

351 lines
12 KiB
Python

import os
import re
import logging
import cherrypy
import shutil
import tempfile
import traceback
import requests
import subprocess
from concurrent.futures import ThreadPoolExecutor
from threading import Thread, Semaphore
from contextlib import closing
from queue import LifoQueue, Empty
from shutil import copyfileobj
from urllib.parse import urlparse
from photoapp.dbutils import SAEnginePlugin, SATool, get_db_engine, create_db_sessionmaker
from photoapp.storage import uri_to_storage
from photoapp.types import User, Photo
from photoapp.common import pwhash
from photoapp.dbsession import DatabaseSession
from photoapp.thumb import thumb_path, image_file_style
from photoapp.webutils import validate_password
from photoapp.utils import genpw, get_extension
THUMBSERVICE_USER_INTERNAL = "_thumbservice"
RE_DURATION = re.compile(r' Duration: (?P<hours>\d\d):(?P<minutes>\d\d):(?P<seconds>\d\d).(?P<decseconds>\d\d),')
def get_video_duration(srcpath, timeout=30):
"""
Get the duration of a video, in seconds, by parsing ffmpeg stderr output line:
Duration: 00:00:00.94, start: 0.000000, bitrate: 15046 kb/s
"""
cmd = [
"ffmpeg",
"-hide_banner",
"-i", srcpath,
]
try:
p = subprocess.run(cmd, timeout=timeout, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except subprocess.TimeoutExpired:
logging.error("ffmpeg length: timed out")
return 0.0
if p.returncode != 1:
logging.error("ffmpeg length: unexpected return code %s", p.returncode)
logging.error("ffmpeg stdout: %s", p.stdout)
logging.error("ffmpeg stderr: %s", p.stderr)
return 0.0
if not p.stderr:
logging.error("ffmpeg length: no stderr")
return 0.0
stderr = p.stderr.decode()
match = RE_DURATION.search(stderr)
if not match:
logging.error("ffmpeg length: could not find duration")
logging.error("ffmpeg stdout: %s", p.stdout)
logging.error("ffmpeg stderr: %s", p.stderr)
return 0.0
times = match.groupdict()
return int(times["hours"]) * 60 * 60 + \
int(times["minutes"]) * 60 + \
int(times["seconds"]) + \
int(times["decseconds"]) / 100
def get_video_thumb(srcpath, outpath, timeout=30):
duration = get_video_duration(srcpath, timeout)
if duration == 0.0:
return False
cmd = [
"ffmpeg",
"-hide_banner",
"-loglevel", "error",
"-i", srcpath,
"-vframes", "1", # Output one frame
"-an", # Disable audio
# "-s", "400x222" # Output size
"-ss", "1" if duration > 5 else "0",
outpath
]
try:
p = subprocess.run(cmd, timeout=timeout, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except subprocess.TimeoutExpired:
logging.error("ffmpeg: timed out")
return False
if p.returncode != 0 or not os.path.exists(outpath):
logging.error("ffmpeg: no image produced.")
logging.error("ffmpeg return code: %s", p.returncode)
logging.error("ffmpeg stdout: %s", p.stdout)
logging.error("ffmpeg stderr: %s", p.stderr)
return False
return True
def setup_thumb_user(engine):
# create the internal User used to talk to this service
with closing(create_db_sessionmaker(engine)()) as s:
u = s.query(User).filter(User.name == THUMBSERVICE_USER_INTERNAL).first()
if u:
return
password_hash = os.environ.get("THUMBSERVICE_INITIAL_PASSWORD_HASH")
if not password_hash:
password = genpw()
logging.warning("created thumbserver user: %s:%s", THUMBSERVICE_USER_INTERNAL, password)
password_hash = pwhash(password)
s.add(User(name=THUMBSERVICE_USER_INTERNAL, password=password_hash))
s.commit()
def validate_thumbservice_password(realm, username, password):
if username != THUMBSERVICE_USER_INTERNAL:
return False
return validate_password(realm, username, password)
class ThumbWorker(Thread):
def __init__(self, engine, library, cache, max_workers=4):
super().__init__()
self.daemon = True
self.queue = LifoQueue()
self.engine = engine
self.library = library
self.cache = cache
self.max_workers = max_workers
self.throttle = Semaphore(max_workers)
def run(self):
with ThreadPoolExecutor(max_workers=self.max_workers) as pool:
while True:
try:
item = self.queue.get(block=True, timeout=5.0)
except Empty:
continue
# semaphore is used so that the queue is not immediately consumed into waiting Futures in the pool.
# this is to preserve the LIFO behavior of the queue
self.throttle.acquire()
pool.submit(self.handle_task, item)
qlen = self.queue.qsize()
if qlen:
logging.info("images to process: %s", qlen)
def handle_task(self, item):
image_uuid, style_name = item
try:
with (
closing(create_db_sessionmaker(self.engine)()) as s,
tempfile.TemporaryDirectory() as d,
):
self.do_thumb(image_uuid, style_name, s, d)
except:
traceback.print_exc() #TODO something like _failed_thumbs_cache
#TODO handle errors differently, like
# db error -> kill program
# filesystem error -> kill program
# PIL error -> ignore
self.queue.task_done()
self.throttle.release()
def do_thumb(self, image_uuid, style_name, session, tmpdir):
"""
Generate a thumbnail for the given image identified by uuid
"""
# find the image
image = session.query(Photo).filter(Photo.uuid == image_uuid).first()
if not image:
logging.info("attempted invalid uuid: %s", image_uuid)
return
# Bail if it exists in storage already
cache_path = thumb_path(style_name, image_uuid)
if self.cache.exists(cache_path):
return
# download the image
local_src_path = os.path.join(tmpdir, "input.{}".format(get_extension(image.fname)))
thumb_tmp_path = os.path.join(tmpdir, "thumb.jpg")
# TODO simplify low level operations like this
with (
self.library.open(image.path, "rb") as src,
open(local_src_path, "wb") as dest,
):
shutil.copyfileobj(src, dest)
# generate a still from the image
if not get_video_thumb(local_src_path, thumb_tmp_path):
logging.error("video extraction failed: %s", image_uuid)
return # TODO something like _failed_thumbs_cache
# Do normal cropping of the thumb
thumb_cropped_path = os.path.join(tmpdir, "thumb_cropped.jpg")
image_file_style(thumb_tmp_path, thumb_cropped_path, style_name, image.orientation)
# copy thumbnail to cache storage
with (
open(thumb_cropped_path, 'rb') as fsrc,
closing(self.cache.open(cache_path, 'wb')) as fdest
):
copyfileobj(fsrc, fdest)
logging.info("processed %s: %sb", image_uuid, str(os.path.getsize(thumb_tmp_path)))
class ThumbServiceWeb(object):
def __init__(self, queue_thumbnail):
self.queue_thumbnail = queue_thumbnail
@cherrypy.expose
def index(self):
yield "photoapp thumbnail service OK"
@cherrypy.expose
def thumb(self, uuid, style):
"""
Generate a thumbnail for the file identified. Calling this endpoint adds the image to the queue. Duplicate
requests are OK and are ignored later
"""
self.queue_thumbnail((uuid, style, ))
yield "ok"
class ThumbClient(object):
"""
Client for interacting with the thumbserver api
"""
def __init__(self, server_uri):
self.session = requests.Session()
uri = urlparse(server_uri)
port = uri.port or dict(http=80, https=443)[uri.scheme]
host = f"{uri.scheme}://{uri.hostname}:{port}"
if uri.path:
host = host + "/" + uri.path
if uri.username:
self.session.auth = (uri.username, uri.password, )
self.server_url = host
a = requests.adapters.HTTPAdapter(max_retries=0)
self.session.mount('http://', a)
def request_thumb(self, photo_uuid, style_name):
self.session.get(self.server_url + "/thumb", params=dict(uuid=photo_uuid, style=style_name))
def main():
# this is a slimmed down version of daemon.py TODO dedupe me
import argparse
import signal
parser = argparse.ArgumentParser(description="Photod photo server")
parser.add_argument('-p', '--port', help="tcp port to listen on",
default=int(os.environ.get("THUMBSERVICE_PORT", 8081)), type=int)
parser.add_argument('-l', '--library', default=os.environ.get("STORAGE_URL"), help="library path")
parser.add_argument('-c', '--cache', default=os.environ.get("CACHE_URL"), help="cache url")
# https://docs.sqlalchemy.org/en/13/core/engines.html
parser.add_argument('-s', '--database', help="sqlalchemy database connection uri",
default=os.environ.get("DATABASE_URL")),
parser.add_argument('--debug', action="store_true", help="enable development options")
parser.add_argument('--max-workers', type=int, default=4, help="number of image download/process threads")
args = parser.parse_args()
if not args.database:
parser.error("--database or DATABASE_URL is required")
if not args.library:
parser.error("--library or STORAGE_URL is required")
if not args.cache:
parser.error("--cache or CACHE_URL is required")
logging.basicConfig(level=logging.INFO if args.debug else logging.WARNING,
format="%(asctime)-15s %(levelname)-8s %(filename)s:%(lineno)d %(message)s")
# Get database connection
engine = get_db_engine(args.database)
setup_thumb_user(engine)
# Setup database in web framework
cherrypy.tools.db = SATool()
SAEnginePlugin(cherrypy.engine, engine).subscribe()
# Create various internal tools
library_storage = uri_to_storage(args.library)
cache_storage = uri_to_storage(args.cache)
thumbnail_worker = ThumbWorker(engine, library_storage, cache_storage, args.max_workers)
thumbnail_worker.start()
# Setup and mount web ui
web = ThumbServiceWeb(thumbnail_worker.queue.put)
cherrypy.tree.mount(web, '/', {'/': {'tools.trailing_slash.on': False,
'tools.db.on': True, },
'/thumb': {'tools.auth_basic.on': True,
'tools.auth_basic.realm': 'thumbservice',
'tools.auth_basic.checkpassword': validate_thumbservice_password}})
# General config options
cherrypy.config.update({
'tools.sessions.storage_class': DatabaseSession,
'tools.sessions.on': True,
'tools.sessions.locking': 'explicit',
'tools.sessions.timeout': 525600,
'request.show_tracebacks': True,
'server.socket_port': args.port,
'server.thread_pool': 5,
'server.socket_host': '0.0.0.0',
'server.show_tracebacks': True,
'log.screen': False,
'engine.autoreload.on': args.debug,
})
# Setup signal handling and run it.
def signal_handler(signum, stack):
logging.critical('Got sig {}, exiting...'.format(signum))
cherrypy.engine.exit()
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler)
try:
cherrypy.engine.start()
cherrypy.engine.block()
finally:
logging.info("API has shut down")
cherrypy.engine.exit()
if __name__ == '__main__':
main()