Web app for quickly sorting deluge torrents into a library
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

223 lines
7.1 KiB

  1. import os
  2. import re
  3. import string
  4. import logging
  5. from enum import Enum
  6. from fuzzywuzzy import fuzz
  7. from collections import namedtuple
  8. # lifted from https://git.davepedu.com/dave/tvsort/src/branch/master/tvsort/
  9. NORMAL_SEASON_EP_RE = re.compile(r'(([sS]([0-9]{2}))x?([eE]([0-9]{2}))?)') # match something like s01e02
  10. NORMAL_SEASON_EP_RE2 = re.compile(r'(([0-9]+)[xX]([0-9]{2}))') # match something like 21x04
  11. DATE_SEASON_EP_RE = re.compile(r'((201[0-9]).([0-9]{1,2})?.([0-9]{1,2})?)') # match something like 2017-08-3
  12. COMMON_CRAP = [re.compile(i, flags=re.I) for i in
  13. [r'(720|1080)p',
  14. r'hdtv',
  15. r'(h.?)?x?264(.[a-z0-9]+)?',
  16. r'(ddp\d\.\d)?',
  17. r'web(\-?(dl|rip))?',
  18. r'[\.\-\s](amzn|amazon)[\.\-\s]',
  19. r'dd.5.\d',
  20. r'AAC2.\d']]
  21. class EpisodeParseException(Exception):
  22. pass
  23. class Seasoning(Enum):
  24. """
  25. All episodes are categorized into seasons (or season-like entities). A season may number it's episodes by date or by
  26. season and episode number. Thirdly, an episode may be associated with a season but not obey the regular naming
  27. scheme - such as a special episode. This enum is for describing what chronological scheme an episode appears to use.
  28. """
  29. NONE = 0
  30. BY_SEASON = 1
  31. BY_DATE = 2
  32. SPECIAL = 3
  33. Show = namedtuple("Show", "root dir name mode seasons")
  34. """
  35. Struct describing an in-library tv show
  36. root : abs path to the folder containing dir
  37. dir : absolute(?) file path to the show
  38. name : name of the show
  39. mode : Season strategy (cannot be 'special')
  40. seasons : list of season ints
  41. """
  42. EpInfo = namedtuple("EpInfo", "file mode major minor extra")
  43. """
  44. Struct for describing an episode file.
  45. file : file name of the episode file
  46. mode : chronological scheme of file naming (see Season)
  47. major : least granular chronological unit. Typically season or year
  48. minor : medium granular unit. Always episode number
  49. extra : most granular unit. Always day (only used for date-based episodes)
  50. """
  51. MatchedEpisode = namedtuple("MatchedEpisode", "root ep dest subdest score")
  52. """
  53. Struct describing the intent to sort and episode file into a location
  54. root : abs path to the folder containing ep.file
  55. ep : associated EpInfo object
  56. dest : associated Show object
  57. score : scoring value Show::match returned
  58. """
  59. def create_show(root_path, dirname):
  60. dir_lower = dirname.lower()
  61. # Inspect contents of show directory and guess naming scheme
  62. yearish = 0
  63. seasonish = 0
  64. wtfish = 0
  65. buckets_season = []
  66. buckets_year = []
  67. for item in os.listdir(os.path.join(root_path, dirname)):
  68. if item.lower().startswith("season "):
  69. seasonish += 1
  70. buckets_season.append(int(''.join([i if i in string.digits else " " for i in item]).strip())) # todo flexible season dir detection
  71. continue
  72. try:
  73. year = int(item)
  74. buckets_year.append(year)
  75. if year > 1900 and year < 2050:
  76. yearish += 1
  77. continue
  78. except ValueError:
  79. pass
  80. wtfish += 1
  81. mode = None
  82. episodes = None
  83. if yearish > seasonish and yearish > wtfish:
  84. mode = Seasoning.BY_DATE
  85. episodes = buckets_year
  86. elif seasonish > yearish and seasonish > wtfish:
  87. mode = Seasoning.BY_SEASON
  88. episodes = buckets_season
  89. else:
  90. mode = Seasoning.NONE
  91. episodes = []
  92. return Show(root_path, dirname, dir_lower, mode, episodes)
  93. def create_index(fs_paths):
  94. shows = []
  95. for d in fs_paths:
  96. for i in os.listdir(d):
  97. if os.path.isdir(os.path.join(d, i)):
  98. try:
  99. shows.append(create_show(d, i))
  100. except PermissionError as pe:
  101. logging.warning(f"skipping {d} due to {pe}")
  102. return shows
  103. def parse_episode(fname):
  104. """
  105. Given a file name, parse out any information we can from the name
  106. :return:
  107. """
  108. # Remove file extensions
  109. # item = fname.rstrip(".mkv").lower() #TODO make this better
  110. item = '.'.join(fname.split(".")[0:-1])
  111. # Extract season information
  112. # And remove seasons info chars from the working name
  113. epinfo = None
  114. match = NORMAL_SEASON_EP_RE.search(item) or NORMAL_SEASON_EP_RE2.search(item)
  115. if match:
  116. fields = match.groups()
  117. if len(fields) == 5:
  118. whole, _, season, _, episode = fields
  119. else:
  120. whole, season, episode = fields
  121. if season and not episode:
  122. epinfo = EpInfo(fname, Seasoning.SPECIAL, int(season), None, None)
  123. else:
  124. assert season and episode
  125. epinfo = EpInfo(fname, Seasoning.BY_SEASON, int(season), int(episode), None)
  126. # delete everything after the episode number
  127. pos = item.find(whole)
  128. if pos >= 10:
  129. item = item[0:pos]
  130. else:
  131. # unless it makes it too short
  132. item = item.replace(whole, "")
  133. else:
  134. match = DATE_SEASON_EP_RE.search(item)
  135. if match:
  136. whole, year, month, day = match.groups()
  137. assert year is not None
  138. if month:
  139. month = int(month)
  140. if day:
  141. day = int(day)
  142. epinfo = EpInfo(fname, Seasoning.BY_DATE, int(year), month, day)
  143. # delete everything after the episode number
  144. pos = item.find(whole)
  145. if pos >= 10:
  146. item = item[0:pos]
  147. else:
  148. # unless it makes it too short
  149. item = item.replace(whole, "")
  150. else:
  151. raise EpisodeParseException("Could not parse episode {}".format(repr(fname)))
  152. # Remove common torrenty names
  153. for crap in COMMON_CRAP:
  154. item = crap.sub("", item)
  155. # Remaining chars should be a show name and possibly and episode title. And random bs
  156. allowed_chars = string.ascii_lowercase + string.digits
  157. item = ''.join([i if i in allowed_chars else " " for i in item]).strip()
  158. return epinfo, item
  159. def sub_bucket_name(show, major, minor, extra):
  160. if show.mode == Seasoning.BY_DATE:
  161. return str(major)
  162. elif show.mode == Seasoning.BY_SEASON:
  163. return "Season {}".format(major)
  164. else:
  165. return ''
  166. def match_episode(fname, shows, thresh=65):
  167. """
  168. Given a filename and a show library, determine which show and season is the best place to sort it to
  169. """
  170. matches = []
  171. # Parse information from the episode file name
  172. try:
  173. epinfo, item = parse_episode(fname)
  174. except EpisodeParseException:
  175. return matches
  176. # Find a show from the library best matching this episode
  177. for show in shows:
  178. value = fuzz.token_set_ratio(show.name.lower(), item.lower()) #TODO add algorithm swap arg for snakeoil
  179. if value > thresh:
  180. matches.append(
  181. MatchedEpisode(fname, epinfo, show,
  182. sub_bucket_name(show, epinfo.major, epinfo.minor, epinfo.extra),
  183. value))
  184. return sorted(matches, key=lambda x: x.score, reverse=True)