complete thumbservice auth, docs, parallelism, and client

This commit is contained in:
dave 2021-08-22 15:47:15 -07:00
parent 53ede96df3
commit 15c2133f35
5 changed files with 179 additions and 74 deletions

View File

@ -50,6 +50,7 @@ Arguments are as follows:
* `--database sqlite:///photos.db` - [Sqlalchemy](https://docs.sqlalchemy.org/en/13/core/engines.html) connection uri * `--database sqlite:///photos.db` - [Sqlalchemy](https://docs.sqlalchemy.org/en/13/core/engines.html) connection uri
* `--cache file://./cache` - storage uri to use as a cache for things like thumbnails. It can be the same as the library * `--cache file://./cache` - storage uri to use as a cache for things like thumbnails. It can be the same as the library
* `--port 8080` - listen on http on port 8080 * `--port 8080` - listen on http on port 8080
* `--thumb-service` - optional address of thumbnail service - see below.
Supported library uri schemes are: Supported library uri schemes are:
@ -70,6 +71,11 @@ Besides browsing, most interaction with Photolib will be done using its CLI tool
username, and password details need to be provided to the cli via a url flag not shown here. See `photocli --help` for username, and password details need to be provided to the cli via a url flag not shown here. See `photocli --help` for
more information. more information.
Optional: Photolib uses a secondary service to generate thumbnails for video files. After the above installation
instructions are complete, run `photothumbd` on a differnet port with the same arguments. On first run, it will create
a user for internal communications and log the username/password. This username/password must be used in the
`--thumb-service` for `photoappd`.
Commands Commands
-------- --------

View File

@ -14,6 +14,7 @@ from photoapp.api import PhotosApi, LibraryManager
from photoapp.dbutils import SAEnginePlugin, SATool, db, get_db_engine, date_format from photoapp.dbutils import SAEnginePlugin, SATool, db, get_db_engine, date_format
from photoapp.utils import auth, require_auth, photoset_auth_filter, slugify, cherryparam, number_format from photoapp.utils import auth, require_auth, photoset_auth_filter, slugify, cherryparam, number_format
from photoapp.storage import uri_to_storage from photoapp.storage import uri_to_storage
from photoapp.webutils import validate_password
from jinja2 import Environment, FileSystemLoader, select_autoescape from jinja2 import Environment, FileSystemLoader, select_autoescape
from sqlalchemy import desc, func, and_, or_ from sqlalchemy import desc, func, and_, or_
@ -21,12 +22,6 @@ from sqlalchemy import desc, func, and_, or_
APPROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../")) APPROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../"))
def validate_password(realm, username, password):
if db.query(User).filter(User.name == username, User.password == pwhash(password)).first():
return True
return False
class PhotosWeb(object): class PhotosWeb(object):
""" """
Http root of the UI webserver Http root of the UI webserver

View File

@ -1,84 +1,175 @@
import os import os
import re
import logging import logging
import cherrypy import cherrypy
import shutil import shutil
import tempfile import tempfile
import traceback import traceback
import requests import requests
from threading import Thread import subprocess
from concurrent.futures import ThreadPoolExecutor
from threading import Thread, Semaphore
from contextlib import closing from contextlib import closing
from queue import Queue, Empty from queue import LifoQueue, Empty
from shutil import copyfileobj from shutil import copyfileobj
from subprocess import check_call from urllib.parse import urlparse
from photoapp.dbutils import SAEnginePlugin, SATool, db, get_db_engine, create_db_sessionmaker from photoapp.dbutils import SAEnginePlugin, SATool, get_db_engine, create_db_sessionmaker
from photoapp.storage import uri_to_storage from photoapp.storage import uri_to_storage
from photoapp.api import PhotosApi, LibraryManager
from photoapp.types import User, Photo from photoapp.types import User, Photo
from photoapp.utils import require_auth
from photoapp.common import pwhash from photoapp.common import pwhash
from photoapp.dbsession import DatabaseSession from photoapp.dbsession import DatabaseSession
from photoapp.thumb import thumb_path, image_file_style from photoapp.thumb import thumb_path, image_file_style
from photoapp.webutils import validate_password
from photoapp.utils import genpw
def get_video_thumb(srcpath, outpath): THUMBSERVICE_USER_INTERNAL = "_thumbservice"
#TODO limit execution time RE_DURATION = re.compile(r' Duration: (?P<hours>\d\d):(?P<minutes>\d\d):(?P<seconds>\d\d).(?P<decseconds>\d\d),')
def get_video_duration(srcpath, timeout=30):
"""
Get the duration of a video, in seconds, by parsing ffmpeg stderr output line:
Duration: 00:00:00.94, start: 0.000000, bitrate: 15046 kb/s
"""
cmd = [
"ffmpeg",
"-hide_banner",
"-i", srcpath,
]
try:
p = subprocess.run(cmd, timeout=timeout, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except subprocess.TimeoutExpired:
logging.error("ffmpeg length: timed out")
return 0.0
if p.returncode != 1:
logging.error("ffmpeg length: unexpected return code %s", p.returncode)
logging.error("ffmpeg stdout: %s", p.stdout)
logging.error("ffmpeg stderr: %s", p.stderr)
return 0.0
if not p.stderr:
logging.error("ffmpeg length: no stderr")
return 0.0
stderr = p.stderr.decode()
match = RE_DURATION.search(stderr)
if not match:
logging.error("ffmpeg length: could not find duration")
logging.error("ffmpeg stdout: %s", p.stdout)
logging.error("ffmpeg stderr: %s", p.stderr)
return 0.0
times = match.groupdict()
return int(times["hours"]) * 60 * 60 + \
int(times["minutes"]) * 60 + \
int(times["seconds"]) + \
int(times["decseconds"]) / 100
def get_video_thumb(srcpath, outpath, timeout=30):
duration = get_video_duration(srcpath, timeout)
if duration == 0.0:
return False
cmd = [ cmd = [
"ffmpeg", "ffmpeg",
"-hide_banner",
"-loglevel", "error",
"-i", srcpath, "-i", srcpath,
"-vframes", "1", # Output one frame "-vframes", "1", # Output one frame
"-an", # Disable audio "-an", # Disable audio
# "-s", "400x222" # Output size # "-s", "400x222" # Output size
"-ss", "1", # grab the frame from 1 second into the video "-ss", "1" if duration > 5 else "0",
outpath outpath
] ]
#TODO capture output and only log on error try:
check_call(cmd) p = subprocess.run(cmd, timeout=timeout, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except subprocess.TimeoutExpired:
logging.error("ffmpeg: timed out")
return False
if p.returncode != 0 or not os.path.exists(outpath):
logging.error("ffmpeg: no image produced.")
logging.error("ffmpeg return code: %s", p.returncode)
logging.error("ffmpeg stdout: %s", p.stdout)
logging.error("ffmpeg stderr: %s", p.stderr)
return False
return True
def setup_thumb_user(engine): def setup_thumb_user(engine):
#TODO create the internal User used to talk to this service # create the internal User used to talk to this service
# if user doesnt exist with closing(create_db_sessionmaker(engine)()) as s:
# create u = s.query(User).filter(User.name == THUMBSERVICE_USER_INTERNAL).first()
# log the password if u:
pass return
password = genpw()
logging.warning("created thumbserver user: %s:%s", THUMBSERVICE_USER_INTERNAL, password)
s.add(User(name=THUMBSERVICE_USER_INTERNAL, password=pwhash(password)))
s.commit()
def validate_thumbservice_password(realm, username, password):
if username != THUMBSERVICE_USER_INTERNAL:
return False
return validate_password(realm, username, password)
class ThumbWorker(Thread): class ThumbWorker(Thread):
def __init__(self, engine, library, cache): def __init__(self, engine, library, cache, max_workers=4):
super().__init__() super().__init__()
self.daemon = True self.daemon = True
self.queue = Queue() self.queue = LifoQueue()
self.engine = engine self.engine = engine
self.library = library self.library = library
self.cache = cache self.cache = cache
self.max_workers = max_workers
self.throttle = Semaphore(max_workers)
def run(self): def run(self):
logged_empty = False with ThreadPoolExecutor(max_workers=self.max_workers) as pool:
while True: while True:
try: try:
image_uuid, style_name = self.queue.get(block=True, timeout=5.0) item = self.queue.get(block=True, timeout=5.0)
logged_empty = False except Empty:
except Empty: continue
if not logged_empty:
logging.info("queue empty")
logged_empty = True
continue
try: # semaphore is used so that the queue is not immediately consumed into waiting Futures in the pool.
with ( # this is to preserve the LIFO behavior of the queue
closing(create_db_sessionmaker(self.engine)()) as s, self.throttle.acquire()
tempfile.TemporaryDirectory() as d, pool.submit(self.handle_task, item)
):
self.do_thumb(image_uuid, style_name, s, d) qlen = self.queue.qsize()
except: if qlen:
traceback.print_exc() #TODO something like _failed_thumbs_cache logging.info("images to process: %s", qlen)
#TODO handle errors differently, like
# db error -> kill program def handle_task(self, item):
# filesystem error -> kill program image_uuid, style_name = item
# PIL error -> ignore try:
with (
closing(create_db_sessionmaker(self.engine)()) as s,
tempfile.TemporaryDirectory() as d,
):
self.do_thumb(image_uuid, style_name, s, d)
except:
traceback.print_exc() #TODO something like _failed_thumbs_cache
#TODO handle errors differently, like
# db error -> kill program
# filesystem error -> kill program
# PIL error -> ignore
self.queue.task_done()
self.throttle.release()
def do_thumb(self, image_uuid, style_name, session, tmpdir): def do_thumb(self, image_uuid, style_name, session, tmpdir):
""" """
@ -97,7 +188,7 @@ class ThumbWorker(Thread):
return return
# download the image # download the image
local_src_path = os.path.join(tmpdir, image.fname) # TODO fname isn't sanitized? local_src_path = os.path.join(tmpdir, image.fname) # TODO fname isn't sanitized? use temp.<extension>
thumb_tmp_path = os.path.join(tmpdir, "thumb.jpg") thumb_tmp_path = os.path.join(tmpdir, "thumb.jpg")
with ( with (
self.library.open(image.path, "rb") as src, self.library.open(image.path, "rb") as src,
@ -106,9 +197,9 @@ class ThumbWorker(Thread):
shutil.copyfileobj(src, dest) shutil.copyfileobj(src, dest)
# generate a still from the image # generate a still from the image
get_video_thumb(local_src_path, thumb_tmp_path) if not get_video_thumb(local_src_path, thumb_tmp_path):
logging.error("video extraction failed: %s", image_uuid)
logging.info("generated %s: %sb", thumb_tmp_path, str(os.path.getsize(thumb_tmp_path))) return # TODO something like _failed_thumbs_cache
# Do normal cropping of the thumb # Do normal cropping of the thumb
thumb_cropped_path = os.path.join(tmpdir, "thumb_cropped.jpg") thumb_cropped_path = os.path.join(tmpdir, "thumb_cropped.jpg")
@ -121,6 +212,8 @@ class ThumbWorker(Thread):
): ):
copyfileobj(fsrc, fdest) copyfileobj(fsrc, fdest)
logging.info("processed %s: %sb", image_uuid, str(os.path.getsize(thumb_tmp_path)))
class ThumbServiceWeb(object): class ThumbServiceWeb(object):
def __init__(self, queue_thumbnail): def __init__(self, queue_thumbnail):
@ -131,7 +224,6 @@ class ThumbServiceWeb(object):
yield "photoapp thumbnail service OK" yield "photoapp thumbnail service OK"
@cherrypy.expose @cherrypy.expose
# @require_auth
def thumb(self, uuid, style): def thumb(self, uuid, style):
""" """
Generate a thumbnail for the file identified. Calling this endpoint adds the image to the queue. Duplicate Generate a thumbnail for the file identified. Calling this endpoint adds the image to the queue. Duplicate
@ -145,21 +237,24 @@ class ThumbClient(object):
""" """
Client for interacting with the thumbserver api Client for interacting with the thumbserver api
""" """
def __init__(self, server_url): def __init__(self, server_uri):
self.server_url = server_url
self.session = requests.Session() self.session = requests.Session()
uri = urlparse(server_uri)
port = uri.port or dict(http=80, https=443)[uri.scheme]
host = f"{uri.scheme}://{uri.hostname}:{port}"
if uri.path:
host = host + "/" + uri.path
if uri.username:
self.session.auth = (uri.username, uri.password, )
self.server_url = host
a = requests.adapters.HTTPAdapter(max_retries=0) a = requests.adapters.HTTPAdapter(max_retries=0)
self.session.mount('http://', a) self.session.mount('http://', a)
def request_thumb(self, photo_uuid, style_name): def request_thumb(self, photo_uuid, style_name):
self.session.get(self.server_url, params=dict(uuid=photo_uuid, style=style_name)) self.session.get(self.server_url + "/thumb", params=dict(uuid=photo_uuid, style=style_name))
# TODO dedupe me
def validate_password(realm, username, password):
if db.query(User).filter(User.name == username, User.password == pwhash(password)).first():
return True
return False
def main(): def main():
@ -177,6 +272,7 @@ def main():
parser.add_argument('-s', '--database', help="sqlalchemy database connection uri", parser.add_argument('-s', '--database', help="sqlalchemy database connection uri",
default=os.environ.get("DATABASE_URL")), default=os.environ.get("DATABASE_URL")),
parser.add_argument('--debug', action="store_true", help="enable development options") parser.add_argument('--debug', action="store_true", help="enable development options")
parser.add_argument('--max-workers', type=int, default=4, help="number of image download/process threads")
args = parser.parse_args() args = parser.parse_args()
@ -203,24 +299,17 @@ def main():
# Create various internal tools # Create various internal tools
library_storage = uri_to_storage(args.library) library_storage = uri_to_storage(args.library)
library_manager = LibraryManager(library_storage)
cache_storage = uri_to_storage(args.cache) cache_storage = uri_to_storage(args.cache)
thumbnail_worker = ThumbWorker(engine, library_storage, cache_storage) thumbnail_worker = ThumbWorker(engine, library_storage, cache_storage, args.max_workers)
thumbnail_worker.start() thumbnail_worker.start()
# Setup and mount web ui # Setup and mount web ui
web = ThumbServiceWeb(thumbnail_worker.queue.put) web = ThumbServiceWeb(thumbnail_worker.queue.put)
cherrypy.tree.mount(web, '/', {'/': {'tools.trailing_slash.on': False, cherrypy.tree.mount(web, '/', {'/': {'tools.trailing_slash.on': False,
'tools.db.on': True, }}) 'tools.db.on': True, },
'/thumb': {'tools.auth_basic.on': True,
# Setup and mount API 'tools.auth_basic.realm': 'thumbservice',
api = PhotosApi(library_manager) 'tools.auth_basic.checkpassword': validate_thumbservice_password}})
cherrypy.tree.mount(api, '/api', {'/': {'tools.sessions.on': False,
'tools.trailing_slash.on': False,
'tools.auth_basic.on': True,
'tools.auth_basic.realm': 'photolib',
'tools.auth_basic.checkpassword': validate_password,
'tools.db.on': True}})
# General config options # General config options
cherrypy.config.update({ cherrypy.config.update({

View File

@ -2,6 +2,8 @@ import os
import cherrypy import cherrypy
from photoapp.types import PhotoSet, PhotoStatus from photoapp.types import PhotoSet, PhotoStatus
import hashlib import hashlib
from random import choice
import string
def copysha(fpin, fpout): def copysha(fpin, fpout):
@ -45,7 +47,7 @@ def require_auth(func):
""" """
def wrapped(*args, **kwargs): def wrapped(*args, **kwargs):
if not auth(): if not auth():
raise cherrypy.HTTPError(403) raise cherrypy.HTTPError(403, "Authentication required")
return func(*args, **kwargs) return func(*args, **kwargs)
return wrapped return wrapped
@ -78,3 +80,7 @@ def cherryparam(v, type_=str):
def number_format(value): def number_format(value):
return format(int(value), ',d') return format(int(value), ',d')
def genpw(length=16):
return ''.join([choice(string.ascii_lowercase + string.ascii_uppercase + string.digits) for _ in range(0, length)])

9
photoapp/webutils.py Normal file
View File

@ -0,0 +1,9 @@
from photoapp.types import User
from photoapp.dbutils import db
from photoapp.common import pwhash
def validate_password(realm, username, password):
if db.query(User).filter(User.name == username, User.password == pwhash(password)).first():
return True
return False