diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..324a0ab --- /dev/null +++ b/.dockerignore @@ -0,0 +1,3 @@ +/virtualenv/ +/testdata/ +/mediaweb.egg-info/ \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..46fe008 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,17 @@ +FROM ubuntu:disco AS main + +RUN apt-get update && \ + apt-get install -y python3-pip && \ + pip3 install -U pip + +ADD . /tmp/code/ + +RUN cd /tmp/code && \ + pip3 install -r requirements.txt && \ + python3 setup.py install && \ + useradd --uid 1000 app && \ + rm -rf /tmp/code + +USER app + +ENTRYPOINT ["mswebd"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..12d3b06 --- /dev/null +++ b/README.md @@ -0,0 +1,43 @@ +mediasort +========= + +Webapp for quick or automatic media sorting and integration with the Deluge torrent client. + + +configuration +------------- + +Basic flags + +|flag|meaning|example| +|---|---|---| +|--server|deluge rpc uri|deluge://username:password@host:port| +|--port|8081|http port to listen on| + +Mediasort has several options to tune how it sorts your media. + +First, `--library` should be set to your media library's path. The media library must contain top level directories for +each show, appropriately name. The name of the directory will be used to determine what show to put in it. In each show +directory, there should be season dirs (such as "Season 6" or "2019") within which the actual media files are placed. + +If needed, `--pathmap` can be set to translate paths when sorting files. This would be needed if your torrent client has +a different view of the filesystem than `mediasort` does, e.g. if they're running in docker containers. Consider these +two paths: + +* `/media/storage/mylibrary/myshow/Season 5/episode.mkv` +* `/data/torrents/Complete/myshow.mkv` + +The first is the destination path in your media library as seen by mediasort. The second is the path Deluge sees. +Setting `--pathmap` to `/data/torrents/:/media/storage/torrents/` gives mediasort the info it needs to resolve these +paths - it translates the Deluge path by simply replacing the prefix obtained from the left half of the `--pathmap` with +the right. + +Finally, `--ui-movedests` provides a list of pre-filled destinations that Deluge may move files to after they're sorted. +These paths are passed directly to deluge and should be pathed from that perspective. + + +todo +---- +* support re-labeling sorted torrents +* support multiple deluge instances - need a pathmap per instance +* make UI pretty diff --git a/mediaweb/__init__.py b/mediaweb/__init__.py index 2473d10..01cf2a4 100644 --- a/mediaweb/__init__.py +++ b/mediaweb/__init__.py @@ -1,14 +1,15 @@ import os -import cherrypy import logging -from jinja2 import Environment, FileSystemLoader, select_autoescape -from deluge_client import DelugeRPCClient -from urllib.parse import urlparse -from pprint import pprint -from threading import Thread +import cherrypy from time import sleep from queue import Queue +from pprint import pprint +from threading import Thread +from urllib.parse import urlparse from dataclasses import dataclass, field +from deluge_client import DelugeRPCClient +from jinja2 import Environment, FileSystemLoader, select_autoescape +from mediaweb import shows APPROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../")) @@ -17,13 +18,16 @@ APPROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../")) @dataclass class Cache: torrents: dict = field(default_factory=dict) + shows: dict = field(default_factory=dict) class ClientCache(object): - def __init__(self, client): + def __init__(self, client, libpath): self.client = client self.data = Cache() self.q = Queue() + self.inflight = False + self.libpath = libpath self.background_t = Thread(target=self.background, daemon=True) self.background_t.start() @@ -32,18 +36,20 @@ class ClientCache(object): self.timer_t.start() def refresh(self): - self.q.put(None) + if not self.inflight and self.q.qsize() == 0: # best effort duplicate work reduction + self.q.put(None) def background(self): while True: self.q.get() # block until we need to do something + self.inflight = True logging.info("performing background tasks...") - self.data.torrents = self.client.core.get_torrents_status({"label": "sickrage"}, - ['name', 'label', 'save_path', 'is_seed', - 'is_finished', 'progress']) + self.build_showindex() + self.build_torrentindex() self.q.task_done() + self.inflight = False logging.info("background tasks complete") def timer(self): @@ -52,6 +58,17 @@ class ClientCache(object): logging.info("sleeping...") sleep(300) # TODO configurable task interval + def build_torrentindex(self): + logging.info("refreshing torrents") + self.data.torrents = self.client.core.get_torrents_status({"label": "sickrage"}, + ['name', 'label', 'save_path', 'is_seed', + 'is_finished', 'progress']) + + def build_showindex(self): + logging.info("updating show index") + data = shows.create_index([self.libpath]) + self.data.shows = sorted(data, key=lambda x: x.name) + class MediaWeb(object): def __init__(self, rpc, templater, uioptions): @@ -63,7 +80,11 @@ class MediaWeb(object): """ Render a template """ - return self.tpl.get_template(template).render(**kwargs, options=self.uioptions, **self.get_default_vars()) + return self.tpl.get_template(template).render(**kwargs, + options=self.uioptions, + torrents=self.rpc.data.torrents, + shows=self.rpc.data.shows, + **self.get_default_vars()) def get_default_vars(self): return {} @@ -74,7 +95,7 @@ class MediaWeb(object): if action == "update": self.rpc.refresh() raise cherrypy.HTTPRedirect("/") - return self.render("index.html", torrents=self.rpc.data.torrents) + return self.render("index.html") @cherrypy.expose def move(self, thash, dest=None, otherdest=None): @@ -88,6 +109,32 @@ class MediaWeb(object): return self.render("moveform.html", torrent=torrent) + @cherrypy.expose + def sort(self, thash, dest=None): + torrent = self.rpc.client.core.get_torrent_status(thash, []) # TODO reduce to needed fields + + # find the actual file among the torrent's files + # really we just pick the biggest one + finfo = None + fsize = 0 + for tfile in torrent["files"]: + if tfile["size"] > fsize: + finfo = tfile + + fname = finfo["path"] + + matches = shows.match_episode(fname, self.rpc.data.shows) + + if cherrypy.request.method == "POST" and dest: + thematch = None + for m in matches: + if m.dest.dir == dest: + thematch = m + break + return f"sort {fname} into {thematch}" + + return self.render("sortform.html", torrent=torrent, matches=matches) + def main(): import argparse @@ -97,8 +144,6 @@ def main(): parser.add_argument('-p', '--port', help="tcp port to listen on", default=int(os.environ.get("MEDIAWEB_PORT", 8080)), type=int) - parser.add_argument('-i', '--download-dirs', help="download directories", nargs="+", - default=os.environ.get("MEDIAWEB_DLDIR")) parser.add_argument('-o', '--library', default=os.environ.get("STORAGE_URL"), help="media library path") parser.add_argument('--debug', action="store_true", help="enable development options") parser.add_argument('-s', '--server', help="deluge uris", action="append", required=True) @@ -112,12 +157,12 @@ def main(): # TODO smarter argparser that understands env vars if not args.library: - parser.error("--download-dirs or MEDIAWEB_DLDIR is required") + parser.error("--library or MEDIAWEB_DLDIR is required") logging.basicConfig(level=logging.INFO if args.debug else logging.WARNING, format="%(asctime)-15s %(levelname)-8s %(filename)s:%(lineno)d %(message)s") - tpl_dir = os.path.join(APPROOT, "templates") if not args.debug else "templates" + tpl_dir = os.path.join(APPROOT, "templates") tpl = Environment(loader=FileSystemLoader(tpl_dir), autoescape=select_autoescape(['html', 'xml'])) # self.tpl.filters.update(basename=os.path.basename, @@ -131,7 +176,7 @@ def main(): assert uri.scheme == "deluge" rpc = DelugeRPCClient(uri.hostname, uri.port if uri.port else 58846, uri.username, uri.password, decode_utf8=True) - rpc_cache = ClientCache(rpc) + rpc_cache = ClientCache(rpc, args.library) web = MediaWeb(rpc_cache, tpl, uioptions) cherrypy.tree.mount(web, '/', {'/': {'tools.auth_basic.on': True, diff --git a/mediaweb/shows.py b/mediaweb/shows.py new file mode 100644 index 0000000..848ef64 --- /dev/null +++ b/mediaweb/shows.py @@ -0,0 +1,222 @@ +import os +import re +import string +import logging +from enum import Enum +from fuzzywuzzy import fuzz +from collections import namedtuple + + +# lifted from https://git.davepedu.com/dave/tvsort/src/branch/master/tvsort/ + + +NORMAL_SEASON_EP_RE = re.compile(r'(([sS]([0-9]{2}))x?([eE]([0-9]{2}))?)') # match something like s01e02 +NORMAL_SEASON_EP_RE2 = re.compile(r'(([0-9]+)[xX]([0-9]{2}))') # match something like 21x04 +DATE_SEASON_EP_RE = re.compile(r'((201[0-9]).([0-9]{1,2})?.([0-9]{1,2})?)') # match something like 2017-08-3 +COMMON_CRAP = [re.compile(i, flags=re.I) for i in + [r'(720|1080)p', + r'hdtv', + r'(h.?)?x?264(.[a-z0-9]+)?', + r'(ddp\d\.\d)?', + r'web(\-?(dl|rip))?', + r'[\.\-\s](amzn|amazon)[\.\-\s]', + r'dd.5.\d', + r'AAC2.\d']] + + +class EpisodeParseException(Exception): + pass + + +class Seasoning(Enum): + """ + All episodes are categorized into seasons (or season-like entities). A season may number it's episodes by date or by + season and episode number. Thirdly, an episode may be associated with a season but not obey the regular naming + scheme - such as a special episode. This enum is for describing what chronological scheme an episode appears to use. + """ + NONE = 0 + BY_SEASON = 1 + BY_DATE = 2 + SPECIAL = 3 + + +Show = namedtuple("Show", "root dir name mode seasons") +""" +Struct describing an in-library tv show + root : abs path to the folder containing dir + dir : absolute(?) file path to the show + name : name of the show + mode : Season strategy (cannot be 'special') + seasons : list of season ints +""" + + +EpInfo = namedtuple("EpInfo", "file mode major minor extra") +""" +Struct for describing an episode file. + file : file name of the episode file + mode : chronological scheme of file naming (see Season) + major : least granular chronological unit. Typically season or year + minor : medium granular unit. Always episode number + extra : most granular unit. Always day (only used for date-based episodes) +""" + + +MatchedEpisode = namedtuple("MatchedEpisode", "root ep dest subdest score") +""" +Struct describing the intent to sort and episode file into a location + root : abs path to the folder containing ep.file + ep : associated EpInfo object + dest : associated Show object + score : scoring value Show::match returned +""" + + +def create_show(root_path, dirname): + dir_lower = dirname.lower() + + # Inspect contents of show directory and guess naming scheme + yearish = 0 + seasonish = 0 + wtfish = 0 + buckets_season = [] + buckets_year = [] + for item in os.listdir(os.path.join(root_path, dirname)): + if item.lower().startswith("season "): + seasonish += 1 + buckets_season.append(int(''.join([i if i in string.digits else " " for i in item]).strip())) # todo flexible season dir detection + continue + try: + year = int(item) + buckets_year.append(year) + if year > 1900 and year < 2050: + yearish += 1 + continue + except ValueError: + pass + wtfish += 1 + + mode = None + episodes = None + + if yearish > seasonish and yearish > wtfish: + mode = Seasoning.BY_DATE + episodes = buckets_year + elif seasonish > yearish and seasonish > wtfish: + mode = Seasoning.BY_SEASON + episodes = buckets_season + else: + mode = Seasoning.NONE + episodes = [] + + return Show(root_path, dirname, dir_lower, mode, episodes) + + +def create_index(fs_paths): + shows = [] + for d in fs_paths: + for i in os.listdir(d): + if os.path.isdir(os.path.join(d, i)): + try: + shows.append(create_show(d, i)) + except PermissionError as pe: + logging.warning(f"skipping {d} due to {pe}") + + return shows + + +def parse_episode(fname): + """ + Given a file name, parse out any information we can from the name + :return: + """ + + # Remove file extensions + # item = fname.rstrip(".mkv").lower() #TODO make this better + item = '.'.join(fname.split(".")[0:-1]) + + # Extract season information + # And remove seasons info chars from the working name + epinfo = None + match = NORMAL_SEASON_EP_RE.search(item) or NORMAL_SEASON_EP_RE2.search(item) + if match: + fields = match.groups() + if len(fields) == 5: + whole, _, season, _, episode = fields + else: + whole, season, episode = fields + + if season and not episode: + epinfo = EpInfo(fname, Seasoning.SPECIAL, int(season), None, None) + else: + assert season and episode + epinfo = EpInfo(fname, Seasoning.BY_SEASON, int(season), int(episode), None) + + # delete everything after the episode number + pos = item.find(whole) + if pos >= 10: + item = item[0:pos] + else: + # unless it makes it too short + item = item.replace(whole, "") + else: + match = DATE_SEASON_EP_RE.search(item) + if match: + whole, year, month, day = match.groups() + assert year is not None + if month: + month = int(month) + if day: + day = int(day) + epinfo = EpInfo(fname, Seasoning.BY_DATE, int(year), month, day) + # delete everything after the episode number + pos = item.find(whole) + if pos >= 10: + item = item[0:pos] + else: + # unless it makes it too short + item = item.replace(whole, "") + else: + raise EpisodeParseException("Could not parse episode {}".format(repr(fname))) + + # Remove common torrenty names + for crap in COMMON_CRAP: + item = crap.sub("", item) + + # Remaining chars should be a show name and possibly and episode title. And random bs + allowed_chars = string.ascii_lowercase + string.digits + item = ''.join([i if i in allowed_chars else " " for i in item]).strip() + + return epinfo, item + + +def sub_bucket_name(show, major, minor, extra): + if show.mode == Seasoning.BY_DATE: + return str(major) + elif show.mode == Seasoning.BY_SEASON: + return "Season {}".format(major) + else: + return '' + + +def match_episode(fname, shows, thresh=65): + """ + Given a filename and a show library, determine which show and season is the best place to sort it to + """ + matches = [] + + # Parse information from the episode file name + try: + epinfo, item = parse_episode(fname) + except EpisodeParseException: + return matches + + # Find a show from the library best matching this episode + for show in shows: + value = fuzz.token_set_ratio(show.name.lower(), item.lower()) #TODO add algorithm swap arg for snakeoil + if value > thresh: + matches.append( + MatchedEpisode(fname, epinfo, show, + sub_bucket_name(show, epinfo.major, epinfo.minor, epinfo.extra), + value)) + return sorted(matches, key=lambda x: x.score, reverse=True) diff --git a/requirements.txt b/requirements.txt index 4eaefbd..dfb98a6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,7 @@ backports.functools-lru-cache==1.5 cheroot==6.5.5 CherryPy==18.1.2 deluge-client==1.7.1 +fuzzywuzzy==0.17.0 jaraco.functools==2.0 Jinja2==2.10.1 MarkupSafe==1.1.1 diff --git a/templates/index.html b/templates/index.html index bff82ea..0900008 100644 --- a/templates/index.html +++ b/templates/index.html @@ -1,13 +1,13 @@ {% extends "page.html" %} +{% block toolbar %} +
+ + +
+{% endblock %} {% block body %}
-
-
- - -
-

Completed

@@ -23,7 +23,7 @@ - + {% endif %}{% endfor %}
{{ torid[0:6] }} {{ tor.name }} {{ tor.save_path }}xx {{ "complete" if tor.is_finished else "pending" }} @@ -53,6 +53,21 @@
+

Shows

+ + + + + + + {% for show in shows %} + + + + + + {% endfor %} +
namepathsorting
{{ show.name }}{{ show.dir }}{{ show.mode }}
{% endblock %} \ No newline at end of file diff --git a/templates/moveform.html b/templates/moveform.html index e7efabf..4cea18f 100644 --- a/templates/moveform.html +++ b/templates/moveform.html @@ -1,4 +1,9 @@ {% extends "page.html" %} + +{% block toolbar %} + +{% endblock %} + {% block body %}

Move {{ torrent.name }}

@@ -26,4 +31,4 @@
-{% endblock %} \ No newline at end of file +{% endblock %} diff --git a/templates/page.html b/templates/page.html index cdc18c3..bc987db 100644 --- a/templates/page.html +++ b/templates/page.html @@ -10,20 +10,22 @@ } #page { margin: 0px auto; - width: 1000px; + padding: 0px 15px; + max-width: 1000px; } td { padding: 3px 5px; } td { border-bottom: 1px solid #666; - /*display: block;*/ - }
+
+{% block toolbar %}{% endblock %} +
{% block body %}{% endblock %}
diff --git a/templates/sortform.html b/templates/sortform.html new file mode 100644 index 0000000..baaef2d --- /dev/null +++ b/templates/sortform.html @@ -0,0 +1,50 @@ +{% extends "page.html" %} +{% block toolbar %} + +{% endblock %} +{% block body %} +
+

Sort {{ torrent.name }}

+

{{ torrent.hash }}

+ +
+ files + +
+ +
+ +
+ +
+ destination + + + + + + + + + {% for row in matches %} + + + + + + + + {% endfor %} +
showseasonlibraryscore
+ + {{ row.dest.dir }}{{ row.subdest }}{{ row.dest.root }}{{ row.score }}
+
+ +
+
+
+{% endblock %} \ No newline at end of file