sorting basics

This commit is contained in:
dave 2019-08-17 12:50:46 -07:00
parent 741147015f
commit 49dd11a8c2
10 changed files with 432 additions and 29 deletions

3
.dockerignore Normal file
View File

@ -0,0 +1,3 @@
/virtualenv/
/testdata/
/mediaweb.egg-info/

17
Dockerfile Normal file
View File

@ -0,0 +1,17 @@
FROM ubuntu:disco AS main
RUN apt-get update && \
apt-get install -y python3-pip && \
pip3 install -U pip
ADD . /tmp/code/
RUN cd /tmp/code && \
pip3 install -r requirements.txt && \
python3 setup.py install && \
useradd --uid 1000 app && \
rm -rf /tmp/code
USER app
ENTRYPOINT ["mswebd"]

43
README.md Normal file
View File

@ -0,0 +1,43 @@
mediasort
=========
Webapp for quick or automatic media sorting and integration with the Deluge torrent client.
configuration
-------------
Basic flags
|flag|meaning|example|
|---|---|---|
|--server|deluge rpc uri|deluge://username:password@host:port|
|--port|8081|http port to listen on|
Mediasort has several options to tune how it sorts your media.
First, `--library` should be set to your media library's path. The media library must contain top level directories for
each show, appropriately name. The name of the directory will be used to determine what show to put in it. In each show
directory, there should be season dirs (such as "Season 6" or "2019") within which the actual media files are placed.
If needed, `--pathmap` can be set to translate paths when sorting files. This would be needed if your torrent client has
a different view of the filesystem than `mediasort` does, e.g. if they're running in docker containers. Consider these
two paths:
* `/media/storage/mylibrary/myshow/Season 5/episode.mkv`
* `/data/torrents/Complete/myshow.mkv`
The first is the destination path in your media library as seen by mediasort. The second is the path Deluge sees.
Setting `--pathmap` to `/data/torrents/:/media/storage/torrents/` gives mediasort the info it needs to resolve these
paths - it translates the Deluge path by simply replacing the prefix obtained from the left half of the `--pathmap` with
the right.
Finally, `--ui-movedests` provides a list of pre-filled destinations that Deluge may move files to after they're sorted.
These paths are passed directly to deluge and should be pathed from that perspective.
todo
----
* support re-labeling sorted torrents
* support multiple deluge instances - need a pathmap per instance
* make UI pretty

View File

@ -1,14 +1,15 @@
import os
import cherrypy
import logging
from jinja2 import Environment, FileSystemLoader, select_autoescape
from deluge_client import DelugeRPCClient
from urllib.parse import urlparse
from pprint import pprint
from threading import Thread
import cherrypy
from time import sleep
from queue import Queue
from pprint import pprint
from threading import Thread
from urllib.parse import urlparse
from dataclasses import dataclass, field
from deluge_client import DelugeRPCClient
from jinja2 import Environment, FileSystemLoader, select_autoescape
from mediaweb import shows
APPROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../"))
@ -17,13 +18,16 @@ APPROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../"))
@dataclass
class Cache:
torrents: dict = field(default_factory=dict)
shows: dict = field(default_factory=dict)
class ClientCache(object):
def __init__(self, client):
def __init__(self, client, libpath):
self.client = client
self.data = Cache()
self.q = Queue()
self.inflight = False
self.libpath = libpath
self.background_t = Thread(target=self.background, daemon=True)
self.background_t.start()
@ -32,18 +36,20 @@ class ClientCache(object):
self.timer_t.start()
def refresh(self):
self.q.put(None)
if not self.inflight and self.q.qsize() == 0: # best effort duplicate work reduction
self.q.put(None)
def background(self):
while True:
self.q.get() # block until we need to do something
self.inflight = True
logging.info("performing background tasks...")
self.data.torrents = self.client.core.get_torrents_status({"label": "sickrage"},
['name', 'label', 'save_path', 'is_seed',
'is_finished', 'progress'])
self.build_showindex()
self.build_torrentindex()
self.q.task_done()
self.inflight = False
logging.info("background tasks complete")
def timer(self):
@ -52,6 +58,17 @@ class ClientCache(object):
logging.info("sleeping...")
sleep(300) # TODO configurable task interval
def build_torrentindex(self):
logging.info("refreshing torrents")
self.data.torrents = self.client.core.get_torrents_status({"label": "sickrage"},
['name', 'label', 'save_path', 'is_seed',
'is_finished', 'progress'])
def build_showindex(self):
logging.info("updating show index")
data = shows.create_index([self.libpath])
self.data.shows = sorted(data, key=lambda x: x.name)
class MediaWeb(object):
def __init__(self, rpc, templater, uioptions):
@ -63,7 +80,11 @@ class MediaWeb(object):
"""
Render a template
"""
return self.tpl.get_template(template).render(**kwargs, options=self.uioptions, **self.get_default_vars())
return self.tpl.get_template(template).render(**kwargs,
options=self.uioptions,
torrents=self.rpc.data.torrents,
shows=self.rpc.data.shows,
**self.get_default_vars())
def get_default_vars(self):
return {}
@ -74,7 +95,7 @@ class MediaWeb(object):
if action == "update":
self.rpc.refresh()
raise cherrypy.HTTPRedirect("/")
return self.render("index.html", torrents=self.rpc.data.torrents)
return self.render("index.html")
@cherrypy.expose
def move(self, thash, dest=None, otherdest=None):
@ -88,6 +109,32 @@ class MediaWeb(object):
return self.render("moveform.html", torrent=torrent)
@cherrypy.expose
def sort(self, thash, dest=None):
torrent = self.rpc.client.core.get_torrent_status(thash, []) # TODO reduce to needed fields
# find the actual file among the torrent's files
# really we just pick the biggest one
finfo = None
fsize = 0
for tfile in torrent["files"]:
if tfile["size"] > fsize:
finfo = tfile
fname = finfo["path"]
matches = shows.match_episode(fname, self.rpc.data.shows)
if cherrypy.request.method == "POST" and dest:
thematch = None
for m in matches:
if m.dest.dir == dest:
thematch = m
break
return f"sort {fname} into {thematch}"
return self.render("sortform.html", torrent=torrent, matches=matches)
def main():
import argparse
@ -97,8 +144,6 @@ def main():
parser.add_argument('-p', '--port', help="tcp port to listen on",
default=int(os.environ.get("MEDIAWEB_PORT", 8080)), type=int)
parser.add_argument('-i', '--download-dirs', help="download directories", nargs="+",
default=os.environ.get("MEDIAWEB_DLDIR"))
parser.add_argument('-o', '--library', default=os.environ.get("STORAGE_URL"), help="media library path")
parser.add_argument('--debug', action="store_true", help="enable development options")
parser.add_argument('-s', '--server', help="deluge uris", action="append", required=True)
@ -112,12 +157,12 @@ def main():
# TODO smarter argparser that understands env vars
if not args.library:
parser.error("--download-dirs or MEDIAWEB_DLDIR is required")
parser.error("--library or MEDIAWEB_DLDIR is required")
logging.basicConfig(level=logging.INFO if args.debug else logging.WARNING,
format="%(asctime)-15s %(levelname)-8s %(filename)s:%(lineno)d %(message)s")
tpl_dir = os.path.join(APPROOT, "templates") if not args.debug else "templates"
tpl_dir = os.path.join(APPROOT, "templates")
tpl = Environment(loader=FileSystemLoader(tpl_dir),
autoescape=select_autoescape(['html', 'xml']))
# self.tpl.filters.update(basename=os.path.basename,
@ -131,7 +176,7 @@ def main():
assert uri.scheme == "deluge"
rpc = DelugeRPCClient(uri.hostname, uri.port if uri.port else 58846, uri.username, uri.password, decode_utf8=True)
rpc_cache = ClientCache(rpc)
rpc_cache = ClientCache(rpc, args.library)
web = MediaWeb(rpc_cache, tpl, uioptions)
cherrypy.tree.mount(web, '/', {'/': {'tools.auth_basic.on': True,

222
mediaweb/shows.py Normal file
View File

@ -0,0 +1,222 @@
import os
import re
import string
import logging
from enum import Enum
from fuzzywuzzy import fuzz
from collections import namedtuple
# lifted from https://git.davepedu.com/dave/tvsort/src/branch/master/tvsort/
NORMAL_SEASON_EP_RE = re.compile(r'(([sS]([0-9]{2}))x?([eE]([0-9]{2}))?)') # match something like s01e02
NORMAL_SEASON_EP_RE2 = re.compile(r'(([0-9]+)[xX]([0-9]{2}))') # match something like 21x04
DATE_SEASON_EP_RE = re.compile(r'((201[0-9]).([0-9]{1,2})?.([0-9]{1,2})?)') # match something like 2017-08-3
COMMON_CRAP = [re.compile(i, flags=re.I) for i in
[r'(720|1080)p',
r'hdtv',
r'(h.?)?x?264(.[a-z0-9]+)?',
r'(ddp\d\.\d)?',
r'web(\-?(dl|rip))?',
r'[\.\-\s](amzn|amazon)[\.\-\s]',
r'dd.5.\d',
r'AAC2.\d']]
class EpisodeParseException(Exception):
pass
class Seasoning(Enum):
"""
All episodes are categorized into seasons (or season-like entities). A season may number it's episodes by date or by
season and episode number. Thirdly, an episode may be associated with a season but not obey the regular naming
scheme - such as a special episode. This enum is for describing what chronological scheme an episode appears to use.
"""
NONE = 0
BY_SEASON = 1
BY_DATE = 2
SPECIAL = 3
Show = namedtuple("Show", "root dir name mode seasons")
"""
Struct describing an in-library tv show
root : abs path to the folder containing dir
dir : absolute(?) file path to the show
name : name of the show
mode : Season strategy (cannot be 'special')
seasons : list of season ints
"""
EpInfo = namedtuple("EpInfo", "file mode major minor extra")
"""
Struct for describing an episode file.
file : file name of the episode file
mode : chronological scheme of file naming (see Season)
major : least granular chronological unit. Typically season or year
minor : medium granular unit. Always episode number
extra : most granular unit. Always day (only used for date-based episodes)
"""
MatchedEpisode = namedtuple("MatchedEpisode", "root ep dest subdest score")
"""
Struct describing the intent to sort and episode file into a location
root : abs path to the folder containing ep.file
ep : associated EpInfo object
dest : associated Show object
score : scoring value Show::match returned
"""
def create_show(root_path, dirname):
dir_lower = dirname.lower()
# Inspect contents of show directory and guess naming scheme
yearish = 0
seasonish = 0
wtfish = 0
buckets_season = []
buckets_year = []
for item in os.listdir(os.path.join(root_path, dirname)):
if item.lower().startswith("season "):
seasonish += 1
buckets_season.append(int(''.join([i if i in string.digits else " " for i in item]).strip())) # todo flexible season dir detection
continue
try:
year = int(item)
buckets_year.append(year)
if year > 1900 and year < 2050:
yearish += 1
continue
except ValueError:
pass
wtfish += 1
mode = None
episodes = None
if yearish > seasonish and yearish > wtfish:
mode = Seasoning.BY_DATE
episodes = buckets_year
elif seasonish > yearish and seasonish > wtfish:
mode = Seasoning.BY_SEASON
episodes = buckets_season
else:
mode = Seasoning.NONE
episodes = []
return Show(root_path, dirname, dir_lower, mode, episodes)
def create_index(fs_paths):
shows = []
for d in fs_paths:
for i in os.listdir(d):
if os.path.isdir(os.path.join(d, i)):
try:
shows.append(create_show(d, i))
except PermissionError as pe:
logging.warning(f"skipping {d} due to {pe}")
return shows
def parse_episode(fname):
"""
Given a file name, parse out any information we can from the name
:return:
"""
# Remove file extensions
# item = fname.rstrip(".mkv").lower() #TODO make this better
item = '.'.join(fname.split(".")[0:-1])
# Extract season information
# And remove seasons info chars from the working name
epinfo = None
match = NORMAL_SEASON_EP_RE.search(item) or NORMAL_SEASON_EP_RE2.search(item)
if match:
fields = match.groups()
if len(fields) == 5:
whole, _, season, _, episode = fields
else:
whole, season, episode = fields
if season and not episode:
epinfo = EpInfo(fname, Seasoning.SPECIAL, int(season), None, None)
else:
assert season and episode
epinfo = EpInfo(fname, Seasoning.BY_SEASON, int(season), int(episode), None)
# delete everything after the episode number
pos = item.find(whole)
if pos >= 10:
item = item[0:pos]
else:
# unless it makes it too short
item = item.replace(whole, "")
else:
match = DATE_SEASON_EP_RE.search(item)
if match:
whole, year, month, day = match.groups()
assert year is not None
if month:
month = int(month)
if day:
day = int(day)
epinfo = EpInfo(fname, Seasoning.BY_DATE, int(year), month, day)
# delete everything after the episode number
pos = item.find(whole)
if pos >= 10:
item = item[0:pos]
else:
# unless it makes it too short
item = item.replace(whole, "")
else:
raise EpisodeParseException("Could not parse episode {}".format(repr(fname)))
# Remove common torrenty names
for crap in COMMON_CRAP:
item = crap.sub("", item)
# Remaining chars should be a show name and possibly and episode title. And random bs
allowed_chars = string.ascii_lowercase + string.digits
item = ''.join([i if i in allowed_chars else " " for i in item]).strip()
return epinfo, item
def sub_bucket_name(show, major, minor, extra):
if show.mode == Seasoning.BY_DATE:
return str(major)
elif show.mode == Seasoning.BY_SEASON:
return "Season {}".format(major)
else:
return ''
def match_episode(fname, shows, thresh=65):
"""
Given a filename and a show library, determine which show and season is the best place to sort it to
"""
matches = []
# Parse information from the episode file name
try:
epinfo, item = parse_episode(fname)
except EpisodeParseException:
return matches
# Find a show from the library best matching this episode
for show in shows:
value = fuzz.token_set_ratio(show.name.lower(), item.lower()) #TODO add algorithm swap arg for snakeoil
if value > thresh:
matches.append(
MatchedEpisode(fname, epinfo, show,
sub_bucket_name(show, epinfo.major, epinfo.minor, epinfo.extra),
value))
return sorted(matches, key=lambda x: x.score, reverse=True)

View File

@ -2,6 +2,7 @@ backports.functools-lru-cache==1.5
cheroot==6.5.5
CherryPy==18.1.2
deluge-client==1.7.1
fuzzywuzzy==0.17.0
jaraco.functools==2.0
Jinja2==2.10.1
MarkupSafe==1.1.1

View File

@ -1,13 +1,13 @@
{% extends "page.html" %}
{% block toolbar %}
<form action="/" method="post">
<input name="action" type="submit" value="refresh">
<input name="action" type="submit" value="update">
</form>
{% endblock %}
{% block body %}
<div class="torrents">
<div class="toolbar">
<form action="/" method="post">
<input name="action" type="submit" value="refresh">
<input name="action" type="submit" value="update">
</form>
</div>
<h2>Completed</h2>
<table>
<tr>
@ -23,7 +23,7 @@
<td>{{ torid[0:6] }}</td>
<td>{{ tor.name }}</td>
<td>{{ tor.save_path }}</td>
<td>x</td>
<td>x</td><!-- TODO pre-computed sort destination for 1 click sorting -->
<td>{{ "complete" if tor.is_finished else "pending" }}</td>
<td>
<a href="/move?thash={{ torid }}"><button>Move</button></a>
@ -53,6 +53,21 @@
</tr>
{% endif %}{% endfor %}
</table>
<h2>Shows</h2>
<table>
<tr>
<th>name</th>
<th>path</th>
<th>sorting</th>
</tr>
{% for show in shows %}
<tr>
<td>{{ show.name }}</td>
<td>{{ show.dir }}</td>
<td>{{ show.mode }}</td>
</tr>
{% endfor %}
</table>
</div>
{% endblock %}

View File

@ -1,4 +1,9 @@
{% extends "page.html" %}
{% block toolbar %}
<a href="/"><button>Back</button></a>
{% endblock %}
{% block body %}
<div class="moveform">
<h2>Move {{ torrent.name }} </h2>
@ -26,4 +31,4 @@
</fieldset>
</form>
</div>
{% endblock %}
{% endblock %}

View File

@ -10,20 +10,22 @@
}
#page {
margin: 0px auto;
width: 1000px;
padding: 0px 15px;
max-width: 1000px;
}
td {
padding: 3px 5px;
}
td {
border-bottom: 1px solid #666;
/*display: block;*/
}
</style>
</head>
<body>
<div id="page">
<div class="toolbar">
{% block toolbar %}{% endblock %}
</div>
{% block body %}{% endblock %}
</div>
</body>

50
templates/sortform.html Normal file
View File

@ -0,0 +1,50 @@
{% extends "page.html" %}
{% block toolbar %}
<a href="/"><button>Back</button></a>
{% endblock %}
{% block body %}
<div class="sortform">
<h2>Sort {{ torrent.name }} </h2>
<p>{{ torrent.hash }}</p>
<fieldset>
<legend>files</legend>
<ul>
{% for f in torrent.files %}
<li>{{ f.path }} - {{ f.size }} B</li> <!-- TODO nice size formatting-->
{% endfor %}
</ul>
</fieldset>
<br/>
<form action="/sort" method="post">
<input type="hidden" name="thash" value="{{ torrent.hash }}">
<fieldset>
<legend>destination</legend>
<table>
<tr>
<th></th>
<th>show</th>
<th>season</th>
<th>library</th>
<th>score</th>
</tr>
{% for row in matches %}
<tr>
<td>
<input type="radio" name="dest" value="{{row.dest.dir}}" {% if loop.index == 1 %}checked="checked"{%endif %}/>
</td>
<td>{{ row.dest.dir }}</td>
<td>{{ row.subdest }}</td>
<td>{{ row.dest.root }}</td>
<td>{{ row.score }}</td>
</tr>
{% endfor %}
</table>
<br />
<input type="submit" value="Sort">
</fieldset>
</form>
</div>
{% endblock %}