complete thumbservice auth, docs, parallelism, and client

This commit is contained in:
dave 2021-08-22 15:47:15 -07:00
parent 53ede96df3
commit 15c2133f35
5 changed files with 179 additions and 74 deletions

View File

@ -50,6 +50,7 @@ Arguments are as follows:
* `--database sqlite:///photos.db` - [Sqlalchemy](https://docs.sqlalchemy.org/en/13/core/engines.html) connection uri
* `--cache file://./cache` - storage uri to use as a cache for things like thumbnails. It can be the same as the library
* `--port 8080` - listen on http on port 8080
* `--thumb-service` - optional address of thumbnail service - see below.
Supported library uri schemes are:
@ -70,6 +71,11 @@ Besides browsing, most interaction with Photolib will be done using its CLI tool
username, and password details need to be provided to the cli via a url flag not shown here. See `photocli --help` for
more information.
Optional: Photolib uses a secondary service to generate thumbnails for video files. After the above installation
instructions are complete, run `photothumbd` on a differnet port with the same arguments. On first run, it will create
a user for internal communications and log the username/password. This username/password must be used in the
`--thumb-service` for `photoappd`.
Commands
--------

View File

@ -14,6 +14,7 @@ from photoapp.api import PhotosApi, LibraryManager
from photoapp.dbutils import SAEnginePlugin, SATool, db, get_db_engine, date_format
from photoapp.utils import auth, require_auth, photoset_auth_filter, slugify, cherryparam, number_format
from photoapp.storage import uri_to_storage
from photoapp.webutils import validate_password
from jinja2 import Environment, FileSystemLoader, select_autoescape
from sqlalchemy import desc, func, and_, or_
@ -21,12 +22,6 @@ from sqlalchemy import desc, func, and_, or_
APPROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../"))
def validate_password(realm, username, password):
if db.query(User).filter(User.name == username, User.password == pwhash(password)).first():
return True
return False
class PhotosWeb(object):
"""
Http root of the UI webserver

View File

@ -1,84 +1,175 @@
import os
import re
import logging
import cherrypy
import shutil
import tempfile
import traceback
import requests
from threading import Thread
import subprocess
from concurrent.futures import ThreadPoolExecutor
from threading import Thread, Semaphore
from contextlib import closing
from queue import Queue, Empty
from queue import LifoQueue, Empty
from shutil import copyfileobj
from subprocess import check_call
from photoapp.dbutils import SAEnginePlugin, SATool, db, get_db_engine, create_db_sessionmaker
from urllib.parse import urlparse
from photoapp.dbutils import SAEnginePlugin, SATool, get_db_engine, create_db_sessionmaker
from photoapp.storage import uri_to_storage
from photoapp.api import PhotosApi, LibraryManager
from photoapp.types import User, Photo
from photoapp.utils import require_auth
from photoapp.common import pwhash
from photoapp.dbsession import DatabaseSession
from photoapp.thumb import thumb_path, image_file_style
from photoapp.webutils import validate_password
from photoapp.utils import genpw
def get_video_thumb(srcpath, outpath):
#TODO limit execution time
THUMBSERVICE_USER_INTERNAL = "_thumbservice"
RE_DURATION = re.compile(r' Duration: (?P<hours>\d\d):(?P<minutes>\d\d):(?P<seconds>\d\d).(?P<decseconds>\d\d),')
def get_video_duration(srcpath, timeout=30):
"""
Get the duration of a video, in seconds, by parsing ffmpeg stderr output line:
Duration: 00:00:00.94, start: 0.000000, bitrate: 15046 kb/s
"""
cmd = [
"ffmpeg",
"-hide_banner",
"-i", srcpath,
]
try:
p = subprocess.run(cmd, timeout=timeout, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except subprocess.TimeoutExpired:
logging.error("ffmpeg length: timed out")
return 0.0
if p.returncode != 1:
logging.error("ffmpeg length: unexpected return code %s", p.returncode)
logging.error("ffmpeg stdout: %s", p.stdout)
logging.error("ffmpeg stderr: %s", p.stderr)
return 0.0
if not p.stderr:
logging.error("ffmpeg length: no stderr")
return 0.0
stderr = p.stderr.decode()
match = RE_DURATION.search(stderr)
if not match:
logging.error("ffmpeg length: could not find duration")
logging.error("ffmpeg stdout: %s", p.stdout)
logging.error("ffmpeg stderr: %s", p.stderr)
return 0.0
times = match.groupdict()
return int(times["hours"]) * 60 * 60 + \
int(times["minutes"]) * 60 + \
int(times["seconds"]) + \
int(times["decseconds"]) / 100
def get_video_thumb(srcpath, outpath, timeout=30):
duration = get_video_duration(srcpath, timeout)
if duration == 0.0:
return False
cmd = [
"ffmpeg",
"-hide_banner",
"-loglevel", "error",
"-i", srcpath,
"-vframes", "1", # Output one frame
"-an", # Disable audio
# "-s", "400x222" # Output size
"-ss", "1", # grab the frame from 1 second into the video
"-ss", "1" if duration > 5 else "0",
outpath
]
#TODO capture output and only log on error
check_call(cmd)
try:
p = subprocess.run(cmd, timeout=timeout, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except subprocess.TimeoutExpired:
logging.error("ffmpeg: timed out")
return False
if p.returncode != 0 or not os.path.exists(outpath):
logging.error("ffmpeg: no image produced.")
logging.error("ffmpeg return code: %s", p.returncode)
logging.error("ffmpeg stdout: %s", p.stdout)
logging.error("ffmpeg stderr: %s", p.stderr)
return False
return True
def setup_thumb_user(engine):
#TODO create the internal User used to talk to this service
# if user doesnt exist
# create
# log the password
pass
# create the internal User used to talk to this service
with closing(create_db_sessionmaker(engine)()) as s:
u = s.query(User).filter(User.name == THUMBSERVICE_USER_INTERNAL).first()
if u:
return
password = genpw()
logging.warning("created thumbserver user: %s:%s", THUMBSERVICE_USER_INTERNAL, password)
s.add(User(name=THUMBSERVICE_USER_INTERNAL, password=pwhash(password)))
s.commit()
def validate_thumbservice_password(realm, username, password):
if username != THUMBSERVICE_USER_INTERNAL:
return False
return validate_password(realm, username, password)
class ThumbWorker(Thread):
def __init__(self, engine, library, cache):
def __init__(self, engine, library, cache, max_workers=4):
super().__init__()
self.daemon = True
self.queue = Queue()
self.queue = LifoQueue()
self.engine = engine
self.library = library
self.cache = cache
self.max_workers = max_workers
self.throttle = Semaphore(max_workers)
def run(self):
logged_empty = False
while True:
try:
image_uuid, style_name = self.queue.get(block=True, timeout=5.0)
logged_empty = False
except Empty:
if not logged_empty:
logging.info("queue empty")
logged_empty = True
continue
with ThreadPoolExecutor(max_workers=self.max_workers) as pool:
while True:
try:
item = self.queue.get(block=True, timeout=5.0)
except Empty:
continue
try:
with (
closing(create_db_sessionmaker(self.engine)()) as s,
tempfile.TemporaryDirectory() as d,
):
self.do_thumb(image_uuid, style_name, s, d)
except:
traceback.print_exc() #TODO something like _failed_thumbs_cache
#TODO handle errors differently, like
# db error -> kill program
# filesystem error -> kill program
# PIL error -> ignore
# semaphore is used so that the queue is not immediately consumed into waiting Futures in the pool.
# this is to preserve the LIFO behavior of the queue
self.throttle.acquire()
pool.submit(self.handle_task, item)
qlen = self.queue.qsize()
if qlen:
logging.info("images to process: %s", qlen)
def handle_task(self, item):
image_uuid, style_name = item
try:
with (
closing(create_db_sessionmaker(self.engine)()) as s,
tempfile.TemporaryDirectory() as d,
):
self.do_thumb(image_uuid, style_name, s, d)
except:
traceback.print_exc() #TODO something like _failed_thumbs_cache
#TODO handle errors differently, like
# db error -> kill program
# filesystem error -> kill program
# PIL error -> ignore
self.queue.task_done()
self.throttle.release()
def do_thumb(self, image_uuid, style_name, session, tmpdir):
"""
@ -97,7 +188,7 @@ class ThumbWorker(Thread):
return
# download the image
local_src_path = os.path.join(tmpdir, image.fname) # TODO fname isn't sanitized?
local_src_path = os.path.join(tmpdir, image.fname) # TODO fname isn't sanitized? use temp.<extension>
thumb_tmp_path = os.path.join(tmpdir, "thumb.jpg")
with (
self.library.open(image.path, "rb") as src,
@ -106,9 +197,9 @@ class ThumbWorker(Thread):
shutil.copyfileobj(src, dest)
# generate a still from the image
get_video_thumb(local_src_path, thumb_tmp_path)
logging.info("generated %s: %sb", thumb_tmp_path, str(os.path.getsize(thumb_tmp_path)))
if not get_video_thumb(local_src_path, thumb_tmp_path):
logging.error("video extraction failed: %s", image_uuid)
return # TODO something like _failed_thumbs_cache
# Do normal cropping of the thumb
thumb_cropped_path = os.path.join(tmpdir, "thumb_cropped.jpg")
@ -121,6 +212,8 @@ class ThumbWorker(Thread):
):
copyfileobj(fsrc, fdest)
logging.info("processed %s: %sb", image_uuid, str(os.path.getsize(thumb_tmp_path)))
class ThumbServiceWeb(object):
def __init__(self, queue_thumbnail):
@ -131,7 +224,6 @@ class ThumbServiceWeb(object):
yield "photoapp thumbnail service OK"
@cherrypy.expose
# @require_auth
def thumb(self, uuid, style):
"""
Generate a thumbnail for the file identified. Calling this endpoint adds the image to the queue. Duplicate
@ -145,21 +237,24 @@ class ThumbClient(object):
"""
Client for interacting with the thumbserver api
"""
def __init__(self, server_url):
self.server_url = server_url
def __init__(self, server_uri):
self.session = requests.Session()
uri = urlparse(server_uri)
port = uri.port or dict(http=80, https=443)[uri.scheme]
host = f"{uri.scheme}://{uri.hostname}:{port}"
if uri.path:
host = host + "/" + uri.path
if uri.username:
self.session.auth = (uri.username, uri.password, )
self.server_url = host
a = requests.adapters.HTTPAdapter(max_retries=0)
self.session.mount('http://', a)
def request_thumb(self, photo_uuid, style_name):
self.session.get(self.server_url, params=dict(uuid=photo_uuid, style=style_name))
# TODO dedupe me
def validate_password(realm, username, password):
if db.query(User).filter(User.name == username, User.password == pwhash(password)).first():
return True
return False
self.session.get(self.server_url + "/thumb", params=dict(uuid=photo_uuid, style=style_name))
def main():
@ -177,6 +272,7 @@ def main():
parser.add_argument('-s', '--database', help="sqlalchemy database connection uri",
default=os.environ.get("DATABASE_URL")),
parser.add_argument('--debug', action="store_true", help="enable development options")
parser.add_argument('--max-workers', type=int, default=4, help="number of image download/process threads")
args = parser.parse_args()
@ -203,24 +299,17 @@ def main():
# Create various internal tools
library_storage = uri_to_storage(args.library)
library_manager = LibraryManager(library_storage)
cache_storage = uri_to_storage(args.cache)
thumbnail_worker = ThumbWorker(engine, library_storage, cache_storage)
thumbnail_worker = ThumbWorker(engine, library_storage, cache_storage, args.max_workers)
thumbnail_worker.start()
# Setup and mount web ui
web = ThumbServiceWeb(thumbnail_worker.queue.put)
cherrypy.tree.mount(web, '/', {'/': {'tools.trailing_slash.on': False,
'tools.db.on': True, }})
# Setup and mount API
api = PhotosApi(library_manager)
cherrypy.tree.mount(api, '/api', {'/': {'tools.sessions.on': False,
'tools.trailing_slash.on': False,
'tools.auth_basic.on': True,
'tools.auth_basic.realm': 'photolib',
'tools.auth_basic.checkpassword': validate_password,
'tools.db.on': True}})
'tools.db.on': True, },
'/thumb': {'tools.auth_basic.on': True,
'tools.auth_basic.realm': 'thumbservice',
'tools.auth_basic.checkpassword': validate_thumbservice_password}})
# General config options
cherrypy.config.update({

View File

@ -2,6 +2,8 @@ import os
import cherrypy
from photoapp.types import PhotoSet, PhotoStatus
import hashlib
from random import choice
import string
def copysha(fpin, fpout):
@ -45,7 +47,7 @@ def require_auth(func):
"""
def wrapped(*args, **kwargs):
if not auth():
raise cherrypy.HTTPError(403)
raise cherrypy.HTTPError(403, "Authentication required")
return func(*args, **kwargs)
return wrapped
@ -78,3 +80,7 @@ def cherryparam(v, type_=str):
def number_format(value):
return format(int(value), ',d')
def genpw(length=16):
return ''.join([choice(string.ascii_lowercase + string.ascii_uppercase + string.digits) for _ in range(0, length)])

9
photoapp/webutils.py Normal file
View File

@ -0,0 +1,9 @@
from photoapp.types import User
from photoapp.dbutils import db
from photoapp.common import pwhash
def validate_password(realm, username, password):
if db.query(User).filter(User.name == username, User.password == pwhash(password)).first():
return True
return False