223 lines
7.0 KiB
Python
223 lines
7.0 KiB
Python
import os
|
|
import re
|
|
import string
|
|
import logging
|
|
from enum import Enum
|
|
from fuzzywuzzy import fuzz
|
|
from collections import namedtuple
|
|
|
|
|
|
# lifted from https://git.davepedu.com/dave/tvsort/src/branch/master/tvsort/
|
|
|
|
|
|
NORMAL_SEASON_EP_RE = re.compile(r'(([sS]([0-9]{2}))x?([eE]([0-9]{2}))?)') # match something like s01e02
|
|
NORMAL_SEASON_EP_RE2 = re.compile(r'(([0-9]+)[xX]([0-9]{2}))') # match something like 21x04
|
|
DATE_SEASON_EP_RE = re.compile(r'((20[1,2][0-9]).([0-9]{1,2})?.([0-9]{1,2})?)') # match something like 2017-08-3
|
|
COMMON_CRAP = [re.compile(i, flags=re.I) for i in
|
|
[r'(720|1080)p',
|
|
r'hdtv',
|
|
r'(h.?)?x?264(.[a-z0-9]+)?',
|
|
r'(ddp\d\.\d)?',
|
|
r'web(\-?(dl|rip))?',
|
|
r'[\.\-\s](amzn|amazon)[\.\-\s]',
|
|
r'dd.5.\d',
|
|
r'aac2.\d']]
|
|
|
|
|
|
class EpisodeParseException(Exception):
|
|
pass
|
|
|
|
|
|
class Seasoning(Enum):
|
|
"""
|
|
All episodes are categorized into seasons (or season-like entities). A season may number it's episodes by date or by
|
|
season and episode number. Thirdly, an episode may be associated with a season but not obey the regular naming
|
|
scheme - such as a special episode. This enum is for describing what chronological scheme an episode appears to use.
|
|
"""
|
|
NONE = 0
|
|
BY_SEASON = 1
|
|
BY_DATE = 2
|
|
SPECIAL = 3
|
|
|
|
|
|
Show = namedtuple("Show", "root dir name mode seasons")
|
|
"""
|
|
Struct describing an in-library tv show
|
|
root : abs path to the folder containing dir
|
|
dir : absolute(?) file path to the show
|
|
name : name of the show
|
|
mode : Season strategy (cannot be 'special')
|
|
seasons : list of season ints
|
|
"""
|
|
|
|
|
|
EpInfo = namedtuple("EpInfo", "file mode major minor extra")
|
|
"""
|
|
Struct for describing an episode file.
|
|
file : file name of the episode file
|
|
mode : chronological scheme of file naming (see Season)
|
|
major : least granular chronological unit. Typically season or year
|
|
minor : medium granular unit. Always episode number
|
|
extra : most granular unit. Always day (only used for date-based episodes)
|
|
"""
|
|
|
|
|
|
MatchedEpisode = namedtuple("MatchedEpisode", "root ep dest subdest score")
|
|
"""
|
|
Struct describing the intent to sort and episode file into a location
|
|
root : abs path to the folder containing ep.file
|
|
ep : associated EpInfo object
|
|
dest : associated Show object
|
|
score : scoring value Show::match returned
|
|
"""
|
|
|
|
|
|
def create_show(root_path, dirname):
|
|
dir_lower = dirname.lower()
|
|
|
|
# Inspect contents of show directory and guess naming scheme
|
|
yearish = 0
|
|
seasonish = 0
|
|
wtfish = 0
|
|
buckets_season = []
|
|
buckets_year = []
|
|
for item in os.listdir(os.path.join(root_path, dirname)):
|
|
if item.lower().startswith("season "):
|
|
seasonish += 1
|
|
buckets_season.append(int(''.join([i if i in string.digits else " " for i in item]).strip())) # todo flexible season dir detection
|
|
continue
|
|
try:
|
|
year = int(item)
|
|
buckets_year.append(year)
|
|
if year > 1900 and year < 2050:
|
|
yearish += 1
|
|
continue
|
|
except ValueError:
|
|
pass
|
|
wtfish += 1
|
|
|
|
mode = None
|
|
episodes = None
|
|
|
|
if yearish > seasonish and yearish > wtfish:
|
|
mode = Seasoning.BY_DATE
|
|
episodes = buckets_year
|
|
elif seasonish > yearish and seasonish > wtfish:
|
|
mode = Seasoning.BY_SEASON
|
|
episodes = buckets_season
|
|
else:
|
|
mode = Seasoning.NONE
|
|
episodes = []
|
|
|
|
return Show(root_path, dirname, dir_lower, mode, episodes)
|
|
|
|
|
|
def create_index(fs_paths):
|
|
shows = []
|
|
for d in fs_paths:
|
|
for i in os.listdir(d):
|
|
if os.path.isdir(os.path.join(d, i)):
|
|
try:
|
|
shows.append(create_show(d, i))
|
|
except PermissionError as pe:
|
|
logging.warning(f"skipping {d} due to {pe}")
|
|
|
|
return shows
|
|
|
|
|
|
def parse_episode(fname):
|
|
"""
|
|
Given a file name, parse out any information we can from the name
|
|
:return:
|
|
"""
|
|
|
|
# Remove file extensions
|
|
fname = fname.lower()
|
|
item = '.'.join(fname.split(".")[0:-1])
|
|
|
|
# Extract season information
|
|
# And remove seasons info chars from the working name
|
|
epinfo = None
|
|
match = NORMAL_SEASON_EP_RE.search(item) or NORMAL_SEASON_EP_RE2.search(item)
|
|
if match:
|
|
fields = match.groups()
|
|
if len(fields) == 5:
|
|
whole, _, season, _, episode = fields
|
|
else:
|
|
whole, season, episode = fields
|
|
|
|
if season and not episode:
|
|
epinfo = EpInfo(fname, Seasoning.SPECIAL, int(season), None, None)
|
|
else:
|
|
assert season and episode
|
|
epinfo = EpInfo(fname, Seasoning.BY_SEASON, int(season), int(episode), None)
|
|
|
|
# delete everything after the episode number
|
|
pos = item.find(whole)
|
|
if pos >= 10:
|
|
item = item[0:pos]
|
|
else:
|
|
# unless it makes it too short
|
|
item = item.replace(whole, "")
|
|
else:
|
|
match = DATE_SEASON_EP_RE.search(item)
|
|
if match:
|
|
whole, year, month, day = match.groups()
|
|
assert year is not None
|
|
if month:
|
|
month = int(month)
|
|
if day:
|
|
day = int(day)
|
|
epinfo = EpInfo(fname, Seasoning.BY_DATE, int(year), month, day)
|
|
# delete everything after the episode number
|
|
pos = item.find(whole)
|
|
if pos >= 10:
|
|
item = item[0:pos]
|
|
else:
|
|
# unless it makes it too short
|
|
item = item.replace(whole, "")
|
|
else:
|
|
raise EpisodeParseException("Could not parse episode {}".format(repr(fname)))
|
|
|
|
# Remove common torrenty names
|
|
for crap in COMMON_CRAP:
|
|
item = crap.sub("", item)
|
|
|
|
# Remaining chars should be a show name and possibly and episode title. And random bs
|
|
allowed_chars = string.ascii_lowercase + string.digits
|
|
item = ''.join([i if i in allowed_chars else " " for i in item]).strip()
|
|
|
|
return epinfo, item
|
|
|
|
|
|
def sub_bucket_name(show, major, minor, extra):
|
|
if show.mode == Seasoning.BY_DATE:
|
|
return str(major)
|
|
elif show.mode == Seasoning.BY_SEASON:
|
|
return "Season {}".format(major)
|
|
else:
|
|
return ''
|
|
|
|
|
|
def match_episode(fname, shows, minscore=65):
|
|
"""
|
|
Given a filename and a show library, determine which show and season is the best place to sort it to
|
|
"""
|
|
matches = []
|
|
|
|
# Parse information from the episode file name
|
|
try:
|
|
epinfo, item = parse_episode(fname)
|
|
except EpisodeParseException:
|
|
return matches
|
|
|
|
# Find a show from the library best matching this episode
|
|
for show in shows:
|
|
value = fuzz.token_set_ratio(show.name.lower(), item.lower()) #TODO add algorithm swap arg for snakeoil
|
|
if value >= minscore:
|
|
matches.append(
|
|
MatchedEpisode(fname, epinfo, show,
|
|
sub_bucket_name(show, epinfo.major, epinfo.minor, epinfo.extra),
|
|
value))
|
|
return sorted(matches, key=lambda x: x.score, reverse=True)
|