initial prototype commit

This commit is contained in:
dave 2017-10-07 18:08:07 -07:00
commit e2edfabe28
5 changed files with 185 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
build
dist
*.egg-info
testenv

3
requirements.txt Normal file
View File

@ -0,0 +1,3 @@
fuzzywuzzy==0.15.1
python-Levenshtein==0.12.0
tabulate==0.8.1

16
setup.py Normal file
View File

@ -0,0 +1,16 @@
#!/usr/bin/env python3
from setuptools import setup
setup(name='tvsort',
version='0.0.1',
description='CLI tool to sort tv shows into directories',
url='http://gitlab.xmopx.net/dave/tvsort',
author='dpedu',
author_email='dave@davepedu.com',
packages=['tvsort'],
entry_points={
'console_scripts': [
'tvsort = tvsort.cli:main'
]
},
zip_safe=True)

0
tvsort/__init__.py Normal file
View File

162
tvsort/cli.py Normal file
View File

@ -0,0 +1,162 @@
#!/usr/bin/env python3
import argparse
import os
import re
import string
from pprint import pprint
from fuzzywuzzy import fuzz
from collections import namedtuple
from tabulate import tabulate
class Season:
by_season = 0
by_date = 1
special = 3
EpInfo = namedtuple("EpInfo", "file mode major minor extra")
Show = namedtuple("Show", "dir name match")
MatchedEpisode = namedtuple("MatchedEpisode", "ep dest score")
def create_show(dirname):
dir_lower = dirname.lower()
return Show(dirname, dir_lower, lambda other: fuzz.token_set_ratio(dir_lower, other.lower()))
def main():
" parse command line args "
parser = argparse.ArgumentParser(description="sort tv shows")
parser.add_argument("-s", "--src", nargs="+", help="", required=True)
parser.add_argument("-d", "--dest", nargs="+", help="", required=True)
parser.add_argument("--soft", action="store_true", help="Soft link instead of hard link")#TODO
parser.add_argument("--match-thresh", type=int, default=65) #0-100
parser.add_argument("--mappings", nargs="+", default=[]) # many foo=bar transformations to help witch mapping
args = parser.parse_args()
mappings = {}
for item in args.mappings:
key, value = item.split("=")
mappings[key] = value
# Create index of shows
shows = []
for destdir in args.dest:
for i in os.listdir(destdir):
shows.append(create_show(i))
NORMAL_SEASON_EP_RE = re.compile(r'(([sS]([0-9]{2}))x?([eE]([0-9]{2}))?)') # match something like s01e02
NORMAL_SEASON_EP_RE2 = re.compile(r'(([0-9]+)[xX]([0-9]{2}))') # match something like 21x04
DATE_SEASON_EP_RE = re.compile(r'((201[0-9]).([0-9]{1,2})?.([0-9]{1,2})?)') # match something like 2017-08-3
COMMON_CRAP = [re.compile(i, flags=re.I) for i in
[r'(720|1080)p',
r'hdtv',
r'(h.?)?x?264(.[a-z0-9]+)?',
r'(ddp\d\.\d)?',
r'web(\-?(dl|rip))?',
r'[\.\-\s](amzn|amazon)[\.\-\s]',
r'dd.5.\d',
r'AAC2.\d']]
failures = []
results = []
for srcdir in args.src:
for item in os.listdir(srcdir):
if not os.path.isfile(os.path.join(srcdir, item)):
# TODO go into subdirs too (we assume dir means season pack)
continue
fname = item
# Remove file extension
item = item.rstrip(".mkv").lower()#TODO make this better
# Apply manual transformations
for old, new in mappings.items():
item = item.replace(old, new)
# Extract season information
# And remove seasons info chars from the working name
epinfo = None
match = NORMAL_SEASON_EP_RE.search(item) or NORMAL_SEASON_EP_RE2.search(item)
if match:
fields = match.groups()
if len(fields) == 5:
whole, _, season, _, episode = fields
else:
whole, season, episode = fields
if season and not episode:
epinfo = EpInfo(fname, Season.special, int(season), None, None)
else:
assert season and episode
epinfo = EpInfo(fname, Season.by_season, int(season), int(episode), None)
# delete everything after the episode number
pos = item.find(whole)
if pos >= 10:
item = item[0:pos]
else:
# unless it makes it too short
item = item.replace(whole, "")
else:
match = DATE_SEASON_EP_RE.search(item)
if match:
whole, year, month, day = match.groups()
assert year is not None
if month:
month = int(month)
if day:
day = int(day)
epinfo = EpInfo(fname, Season.by_date, int(year), month, day)
# delete everything after the episode number
pos = item.find(whole)
if pos >= 10:
item = item[0:pos]
else:
# unless it makes it too short
item = item.replace(whole, "")
else:
# raise Exception("Could not parse episode: {}".format(repr(item)))
failures.append(fname)
continue
# Remove common torrenty names
for crap in COMMON_CRAP:
item = crap.sub("", item)
# print(epinfo, "->", item)
# Remaining chars should be a show name and possibly and episode title. And random bs
allowed_chars = string.ascii_lowercase + string.digits
item = ''.join([i if i in allowed_chars else " " for i in item]).strip()
match_score = 0
best_match_show = None
for show in shows:
value = show.match(item)
if value > match_score:
match_score = value
best_match_show = show
if match_score > args.match_thresh:
results.append(MatchedEpisode(epinfo, best_match_show, match_score))
else:
failures.append(fname)
tab_rows = []
for item in sorted(results, key=lambda x: x.dest.dir):
row = [item.ep.file, item.ep.major, item.ep.minor, item.dest.dir, item.score]
tab_rows.append(row)
print(tabulate(tab_rows, headers=["file", "season", "episode", "dest", "score"]))
if failures:
print("\n\n")
print("Could not match:")
pprint(failures)
if __name__ == '__main__':
main()