def __init__(self): Transformer.__init__(self, 15) of_separators = ['of', 'sur', '/', '\\'] of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE) self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False) episode_words = ['episodes?'] def _formater(episode_number): epnum = parse_numeral(episode_number) if not valid_year(epnum): if epnum > 100: season, epnum = epnum // 100, epnum % 100 # episodes which have a season > 50 are most likely errors # (Simpson is at 25!) if season > 50: return None return {'season': season, 'episodeNumber': epnum} else: return epnum self.container.register_property(['episodeNumber', 'season'], '[0-9]{2,4}', confidence=0.6, formatter=_formater, disabler=lambda options: options.get('episode_prefer_number') if options else False) self.container.register_property(['episodeNumber', 'season'], '[0-9]{4}', confidence=0.6, formatter=_formater) self.container.register_property('episodeNumber', '[^0-9](\d{1,3})', confidence=0.6, formatter=parse_numeral, disabler=lambda options: not options.get('episode_prefer_number') if options else True) self.container.register_property(None, '(' + build_or_pattern(episode_words) + sep + '?(?P<episodeNumber>' + numeral + '))[^0-9]', confidence=0.4, formatter=parse_numeral) self.container.register_property(None, r'(?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral +')', confidence=0.6, formatter=parse_numeral) self.container.register_property('episodeNumber', r'^' + sep + '?(\d{1,3})' + sep, confidence=0.4, formatter=parse_numeral, disabler=lambda options: not options.get('episode_prefer_number') if options else True) self.container.register_property('episodeNumber', sep + r'(\d{1,3})' + sep + '?$', confidence=0.4, formatter=parse_numeral, disabler=lambda options: not options.get('episode_prefer_number') if options else True)
def __init__(self): Transformer.__init__(self, -190) self.container = PropertiesContainer(canonical_from_pattern=False) self._allowed_groupname_pattern = '[\w@#€£$&!\?]' self._forbidden_groupname_lambda = [ lambda elt: elt in ['rip', 'by', 'for', 'par', 'pour', 'bonus'], lambda elt: self._is_number(elt) ] # If the previous property in this list, the match will be considered as safe # and group name can contain a separator. self.previous_safe_properties = [ 'videoCodec', 'format', 'videoApi', 'audioCodec', 'audioProfile', 'videoProfile', 'audioChannels', 'other' ] self.previous_safe_values = {'other': ['Complete']} self.next_safe_properties = ['extension', 'website'] self.next_safe_values = {'format': ['Telesync']} self.container.sep_replace_char = '-' self.container.canonical_from_pattern = False self.container.enhance = True self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+') self.container.register_property( 'releaseGroup', self._allowed_groupname_pattern + '+-' + self._allowed_groupname_pattern + '+') self.re_sep = re.compile('(' + sep + ')')
def __init__(self): Transformer.__init__(self, 45) self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False) tlds_pattern = build_or_pattern( TLDS) # All registered domain extension safe_tlds_pattern = build_or_pattern( ['com', 'org', 'net']) # For sure a website extension safe_subdomains_pattern = build_or_pattern( ['www']) # For sure a website subdomain safe_prefix_tlds_pattern = build_or_pattern( ['co', 'com', 'org', 'net']) # Those words before a tlds are sure self.container.register_property( 'website', '(?:' + safe_subdomains_pattern + '\.)+' + r'(?:[a-z-]+\.)+' + r'(?:' + tlds_pattern + r')+') self.container.register_property( 'website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_tlds_pattern + r')+') self.container.register_property( 'website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_prefix_tlds_pattern + r'\.)+' + r'(?:' + tlds_pattern + r')+')
def __init__(self): Transformer.__init__(self, -205) self.container = PropertiesContainer() self.container.register_property('episodeDetails', 'Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired') self.container.register_property('episodeDetails', 'Extras?', canonical_form='Extras')
def __init__(self): Transformer.__init__(self, 25) self.container = PropertiesContainer(canonical_from_pattern=False) self.container.register_property( None, 'cd' + _psep + '(?P<cdNumber>[0-9])(?:' + _psep + 'of' + _psep + '(?P<cdNumberTotal>[0-9]))?', confidence=1.0, enhance=False, global_span=True, formatter=parse_numeral) self.container.register_property('cdNumberTotal', '([1-9])' + _psep + 'cds?', confidence=0.9, enhance=False, formatter=parse_numeral) self.container.register_property('bonusNumber', 'x([0-9]{1,2})', enhance=False, global_span=True, formatter=parse_numeral) self.container.register_property('filmNumber', 'f([0-9]{1,2})', enhance=False, global_span=True, formatter=parse_numeral) self.container.register_property('edition', 'collector', 'collector-edition', 'edition-collector', canonical_form='Collector Edition') self.container.register_property('edition', 'special-edition', 'edition-special', canonical_form='Special Edition') self.container.register_property('edition', 'criterion', 'criterion-edition', 'edition-criterion', canonical_form='Criterion Edition') self.container.register_property('edition', 'deluxe', 'cdeluxe-edition', 'edition-deluxe', canonical_form='Deluxe Edition') self.container.register_property('edition', 'director\'?s?-cut', 'director\'?s?-cut-edition', 'edition-director\'?s?-cut', canonical_form='Director\'s cut')
class GuessEpisodeSpecial(Transformer): def __init__(self): Transformer.__init__(self, -205) self.container = PropertiesContainer() self.container.register_property('special', 'Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired') self.container.register_property('special', 'Extras?', canonical_form='Extras') def guess_special(self, string, node=None, options=None): properties = self.container.find_properties(string, node, 'special', multiple=True) guesses = self.container.as_guess(properties, multiple=True) return guesses def second_pass_options(self, mtree, options=None): if not mtree.guess.get('type', '').startswith('episode'): for unidentified_leaf in mtree.unidentified_leaves(): properties = self.container.find_properties(unidentified_leaf.value, unidentified_leaf, 'special') guess = self.container.as_guess(properties) if guess: return {'type': 'episode'} return None def supported_properties(self): return self.container.get_supported_properties() def process(self, mtree, options=None): if mtree.guess.get('type', '').startswith('episode') and (not mtree.info.get('episodeNumber') or mtree.info.get('season') == 0): for title_leaf in mtree.leaves_containing('title'): guesses = self.guess_special(title_leaf.value, title_leaf, options) for guess in guesses: found_guess(title_leaf, guess, update_guess=False) for unidentified_leaf in mtree.unidentified_leaves(): guesses = self.guess_special(unidentified_leaf.value, unidentified_leaf, options) for guess in guesses: found_guess(unidentified_leaf, guess, update_guess=False) return None
def guess_regexps_id(self, string, node=None, options=None): container = PropertiesContainer( enhance=False, canonical_from_pattern=False) for regexp in options.get("id_regexps"): container.register_property( 'regexpId', regexp, confidence=1.0, validator=NoValidator()) found = container.find_properties(string, node, options) return container.as_guess(found, string)
def __init__(self): Transformer.__init__(self, 45) self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False) tlds = [] f = resource_stream('guessit', 'tlds-alpha-by-domain.txt') f.readline() next(f) for tld in f: tld = tld.strip() if b'--' in tld: continue tlds.append(tld.decode("utf-8")) f.close() tlds_pattern = build_or_pattern(tlds) # All registered domain extension safe_tlds_pattern = build_or_pattern(['com', 'org', 'net']) # For sure a website extension safe_subdomains_pattern = build_or_pattern(['www']) # For sure a website subdomain safe_prefix_tlds_pattern = build_or_pattern(['co', 'com', 'org', 'net']) # Those words before a tlds are sure self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)+' + r'(?:[a-z-]+\.)+' + r'(?:' + tlds_pattern + r')+') self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_tlds_pattern + r')+') self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_prefix_tlds_pattern + r'\.)+' + r'(?:' + tlds_pattern + r')+')
def __init__(self): Transformer.__init__(self, -205) self.container = PropertiesContainer() self.container.register_property( "episodeDetails", "Special", "Bonus", "Omake", "Ova", "Oav", "Pilot", "Unaired" ) self.container.register_property("episodeDetails", "Extras?", canonical_form="Extras")
class GuessWebsite(Transformer): def __init__(self): Transformer.__init__(self, 45) self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False) tlds = [] f = resource_stream('guessit', 'tlds-alpha-by-domain.txt') f.readline() next(f) for tld in f: tld = tld.strip() if b'--' in tld: continue tlds.append(tld.decode("utf-8")) f.close() tlds_pattern = build_or_pattern( tlds) # All registered domain extension safe_tlds_pattern = build_or_pattern( ['com', 'org', 'net']) # For sure a website extension safe_subdomains_pattern = build_or_pattern( ['www']) # For sure a website subdomain safe_prefix_tlds_pattern = build_or_pattern( ['co', 'com', 'org', 'net']) # Those words before a tlds are sure self.container.register_property( 'website', '(?:' + safe_subdomains_pattern + '\.)+' + r'(?:[a-z-]+\.)+' + r'(?:' + tlds_pattern + r')+') self.container.register_property( 'website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_tlds_pattern + r')+') self.container.register_property( 'website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_prefix_tlds_pattern + r'\.)+' + r'(?:' + tlds_pattern + r')+') def supported_properties(self): return self.container.get_supported_properties() def guess_website(self, string, node=None, options=None): found = self.container.find_properties(string, node, 'website') return self.container.as_guess(found, string) def process(self, mtree, options=None): GuessFinder(self.guess_website, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
class GuessEpisodeSpecial(Transformer): def __init__(self): Transformer.__init__(self, -205) self.container = PropertiesContainer() self.container.register_property('special', 'Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired') self.container.register_property('special', 'Extras?', canonical_form='Extras') def guess_special(self, string, node=None, options=None): properties = self.container.find_properties(string, node, 'special', multiple=True) guesses = self.container.as_guess(properties, multiple=True) return guesses def second_pass_options(self, mtree, options=None): if not mtree.guess.get('type', '').startswith('episode'): for unidentified_leaf in mtree.unidentified_leaves(): properties = self.container.find_properties( unidentified_leaf.value, unidentified_leaf, 'special') guess = self.container.as_guess(properties) if guess: return {'type': 'episode'} return None def supported_properties(self): return self.container.get_supported_properties() def process(self, mtree, options=None): if mtree.guess.get('type', '').startswith('episode') and ( not mtree.info.get('episodeNumber') or mtree.info.get('season') == 0): for title_leaf in mtree.leaves_containing('title'): guesses = self.guess_special(title_leaf.value, title_leaf, options) for guess in guesses: found_guess(title_leaf, guess, update_guess=False) for unidentified_leaf in mtree.unidentified_leaves(): guesses = self.guess_special(unidentified_leaf.value, unidentified_leaf, options) for guess in guesses: found_guess(unidentified_leaf, guess, update_guess=False) return None
def __init__(self): Transformer.__init__(self, 15) self.properties = PropertiesContainer(enhance=False, canonical_from_pattern=False) def _formater(episodeNumber): epnum = parse_numeral(episodeNumber) if not valid_year(epnum): if epnum > 100: season, epnum = epnum // 100, epnum % 100 # episodes which have a season > 50 are most likely errors # (Simpson is at 25!) if season > 50: return None return {'season': season, 'episodeNumber': epnum} else: return epnum self.properties.register_property(['episodeNumber', 'season'], '[0-9]{2,4}', confidence=0.6, formatter=_formater) self.properties.register_property('episodeNumber', '(?:episode)' + sep + '(' + numeral + ')[^0-9]', confidence=0.3)
def expected_series(string, node=None, options=None): container = PropertiesContainer(enhance=True, canonical_from_pattern=False) for expected_serie in options.get('expected_series'): if expected_serie.startswith('re:'): expected_serie = expected_serie[3:] expected_serie = expected_serie.replace(' ', '-') container.register_property('series', expected_serie, enhance=True) else: expected_serie = re.escape(expected_serie) container.register_property('series', expected_serie, enhance=False) found = container.find_properties(string, node, options) return container.as_guess(found, string)
def __init__(self): Transformer.__init__(self, 45) self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False) tlds_pattern = build_or_pattern(TLDS) # All registered domain extension safe_tlds_pattern = build_or_pattern(['com', 'org', 'net']) # For sure a website extension safe_subdomains_pattern = build_or_pattern(['www']) # For sure a website subdomain safe_prefix_tlds_pattern = build_or_pattern(['co', 'com', 'org', 'net']) # Those words before a tlds are sure self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)+' + r'(?:[a-z-]+\.)+' + r'(?:' + tlds_pattern + r')+') self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_tlds_pattern + r')+') self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_prefix_tlds_pattern + r'\.)+' + r'(?:' + tlds_pattern + r')+')
def __init__(self): Transformer.__init__(self, 45) self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False) tlds = [] f = resource_stream('guessit', 'tlds-alpha-by-domain.txt') f.readline() next(f) for tld in f: tld = tld.strip() if b'--' in tld: continue tlds.append(tld.decode("utf-8")) f.close() tlds_pattern = build_or_pattern( tlds) # All registered domain extension safe_tlds_pattern = build_or_pattern( ['com', 'org', 'net']) # For sure a website extension safe_subdomains_pattern = build_or_pattern( ['www']) # For sure a website subdomain safe_prefix_tlds_pattern = build_or_pattern( ['co', 'com', 'org', 'net']) # Those words before a tlds are sure self.container.register_property( 'website', '(?:' + safe_subdomains_pattern + '\.)+' + r'(?:[a-z-]+\.)+' + r'(?:' + tlds_pattern + r')+') self.container.register_property( 'website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_tlds_pattern + r')+') self.container.register_property( 'website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_prefix_tlds_pattern + r'\.)+' + r'(?:' + tlds_pattern + r')+')
def __init__(self): Transformer.__init__(self, 20) self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False) def episode_parser(value): values = re_split('[a-zA-Z]', value) values = [x for x in values if x] ret = [] for letters_elt in values: dashed_values = letters_elt.split('-') dashed_values = [x for x in dashed_values if x] if len(dashed_values) > 1: for _ in range(0, len(dashed_values) - 1): start_dash_ep = parse_numeral(dashed_values[0]) end_dash_ep = parse_numeral(dashed_values[1]) for dash_ep in range(start_dash_ep, end_dash_ep + 1): ret.append(dash_ep) else: ret.append(parse_numeral(letters_elt)) if len(ret) > 1: return {None: ret[0], 'episodeList': ret} # TODO: Should support seasonList also elif len(ret) > 0: return ret[0] else: return None self.container.register_property(None, r'((?:season|saison)' + sep + '?(?P<season>' + numeral + '))', confidence=1.0, formatter=parse_numeral) self.container.register_property(None, r'(s(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral + ')*)))[^0-9]', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser}, validator=NoValidator()) self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<episodeNumber>(?:x' + digital_numeral + '(?:' + sep + '?[x-]' + digital_numeral + ')*)))[^0-9]', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser}) self.container.register_property(None, r'(s(?P<season>' + digital_numeral + '))[^0-9]', confidence=0.6, formatter=parse_numeral, validator=NoValidator()) self.container.register_property(None, r'((?P<episodeNumber>' + digital_numeral + ')v[23])', confidence=0.6, formatter=parse_numeral) self.container.register_property(None, r'((?:ep)' + sep + r'(?P<episodeNumber>' + numeral + '))[^0-9]', confidence=0.7, formatter=parse_numeral) self.container.register_property(None, r'(e(?P<episodeNumber>' + digital_numeral + '))', confidence=0.6, formatter=parse_numeral) self.container.register_canonical_properties('other', 'FiNAL', 'Complete', validator=WeakValidator())
def guess_regexps_id(self, string, node=None, options=None): container = PropertiesContainer(enhance=False, canonical_from_pattern=False) for regexp in options.get("id_regexps"): container.register_property('regexpId', regexp, confidence=1.0, validator=NoValidator()) found = container.find_properties(string, node, options) return container.as_guess(found, string)
def __init__(self): Transformer.__init__(self, -190) self.container = PropertiesContainer(canonical_from_pattern=False) self._allowed_groupname_pattern = '[\w@#€£$&]' self._forbidden_groupname_lambda = [lambda elt: elt in ['rip', 'by', 'for', 'par', 'pour', 'bonus'], lambda elt: self._is_number(elt), ] # If the previous property in this list, the match will be considered as safe # and group name can contain a separator. self.previous_safe_properties = ['videoCodec', 'format', 'videoApi', 'audioCodec', 'audioProfile', 'videoProfile', 'audioChannels'] self.container.sep_replace_char = '-' self.container.canonical_from_pattern = False self.container.enhance = True self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+') self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+-' + self._allowed_groupname_pattern + '+')
def expected_series(self, string, node=None, options=None): container = PropertiesContainer(enhance=True, canonical_from_pattern=False) for expected_serie in options.get('expected_series'): if expected_serie.startswith('re:'): expected_serie = expected_serie[3:] expected_serie = expected_serie.replace(' ', '-') container.register_property('series', expected_serie, enhance=True) else: expected_serie = re.escape(expected_serie) container.register_property('series', expected_serie, enhance=False) found = container.find_properties(string, node, options) return container.as_guess(found, string)
class GuessEpisodeDetails(Transformer): def __init__(self): Transformer.__init__(self, -205) self.container = PropertiesContainer() self.container.register_property( "episodeDetails", "Special", "Bonus", "Omake", "Ova", "Oav", "Pilot", "Unaired" ) self.container.register_property("episodeDetails", "Extras?", canonical_form="Extras") def guess_details(self, string, node=None, options=None): properties = self.container.find_properties(string, node, options, "episodeDetails", multiple=True) guesses = self.container.as_guess(properties, multiple=True) return guesses def second_pass_options(self, mtree, options=None): if not mtree.guess.get("type", "").startswith("episode"): for unidentified_leaf in mtree.unidentified_leaves(): properties = self.container.find_properties( unidentified_leaf.value, unidentified_leaf, options, "episodeDetails" ) guess = self.container.as_guess(properties) if guess: return {"type": "episode"} return None def supported_properties(self): return self.container.get_supported_properties() def process(self, mtree, options=None): if mtree.guess.get("type", "").startswith("episode") and ( not mtree.info.get("episodeNumber") or mtree.info.get("season") == 0 ): for leaf in itertools.chain(mtree.leaves_containing("title"), mtree.unidentified_leaves()): guesses = self.guess_details(leaf.value, leaf, options) for guess in guesses: found_guess(leaf, guess, update_guess=False) return None
def __init__(self): Transformer.__init__(self, 25) self.container = PropertiesContainer(canonical_from_pattern=False) self.container.register_property(None, 'cd' + _psep + '(?P<cdNumber>[0-9])(?:' + _psep + 'of' + _psep + '(?P<cdNumberTotal>[0-9]))?', confidence=1.0, enhance=False, global_span=True, formatter=parse_numeral) self.container.register_property('cdNumberTotal', '([1-9])' + _psep + 'cds?', confidence=0.9, enhance=False, formatter=parse_numeral) self.container.register_property('bonusNumber', 'x([0-9]{1,2})', enhance=False, global_span=True, formatter=parse_numeral) self.container.register_property('filmNumber', 'f([0-9]{1,2})', enhance=False, global_span=True, formatter=parse_numeral) self.container.register_property('edition', 'collector', 'collector-edition', 'edition-collector', canonical_form='Collector Edition') self.container.register_property('edition', 'special-edition', 'edition-special', canonical_form='Special Edition') self.container.register_property('edition', 'criterion', 'criterion-edition', 'edition-criterion', canonical_form='Criterion Edition') self.container.register_property('edition', 'deluxe', 'cdeluxe-edition', 'edition-deluxe', canonical_form='Deluxe Edition') self.container.register_property('edition', 'director\'?s?-cut', 'director\'?s?-cut-edition', 'edition-director\'?s?-cut', canonical_form='Director\'s cut')
class GuessWebsite(Transformer): def __init__(self): Transformer.__init__(self, 45) self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False) tlds_pattern = build_or_pattern( TLDS) # All registered domain extension safe_tlds_pattern = build_or_pattern( ['com', 'org', 'net']) # For sure a website extension safe_subdomains_pattern = build_or_pattern( ['www']) # For sure a website subdomain safe_prefix_tlds_pattern = build_or_pattern( ['co', 'com', 'org', 'net']) # Those words before a tlds are sure self.container.register_property( 'website', '(?:' + safe_subdomains_pattern + '\.)+' + r'(?:[a-z-]+\.)+' + r'(?:' + tlds_pattern + r')+') self.container.register_property( 'website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_tlds_pattern + r')+') self.container.register_property( 'website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_prefix_tlds_pattern + r'\.)+' + r'(?:' + tlds_pattern + r')+') def supported_properties(self): return self.container.get_supported_properties() def guess_website(self, string, node=None, options=None): found = self.container.find_properties(string, node, options, 'website') return self.container.as_guess(found, string) def process(self, mtree, options=None): GuessFinder(self.guess_website, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
def __init__(self): Transformer.__init__(self, 20) self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False) def episode_parser(value): values = re_split('[a-zA-Z]', value) values = [x for x in values if x] ret = [] for letters_elt in values: dashed_values = letters_elt.split('-') dashed_values = [x for x in dashed_values if x] if len(dashed_values) > 1: for _ in range(0, len(dashed_values) - 1): start_dash_ep = parse_numeral(dashed_values[0]) end_dash_ep = parse_numeral(dashed_values[1]) for dash_ep in range(start_dash_ep, end_dash_ep + 1): ret.append(dash_ep) else: ret.append(parse_numeral(letters_elt)) if len(ret) > 1: return {None: ret[0], 'episodeList': ret} # TODO: Should support seasonList also elif len(ret) > 0: return ret[0] else: return None class ResolutionCollisionValidator(object): def validate(self, prop, string, node, match, entry_start, entry_end): return len(match.group(2)) < 3 self.container.register_property(None, r'((?:season|saison)' + sep + '?(?P<season>' + numeral + '))', confidence=1.0, formatter=parse_numeral) self.container.register_property(None, r'(s(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral + ')*)))[^0-9]', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser}, validator=NoValidator()) self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<episodeNumber>(?:x' + digital_numeral + '(?:' + sep + '?[x-]' + digital_numeral + ')*)))[^0-9]', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator())) self.container.register_property(None, r'(s(?P<season>' + digital_numeral + '))[^0-9]', confidence=0.6, formatter=parse_numeral, validator=NoValidator()) self.container.register_property(None, r'((?P<episodeNumber>' + digital_numeral + ')v[23])', confidence=0.6, formatter=parse_numeral) self.container.register_property(None, r'((?:ep)' + sep + r'(?P<episodeNumber>' + numeral + '))[^0-9]', confidence=0.7, formatter=parse_numeral) self.container.register_property(None, r'(e(?P<episodeNumber>' + digital_numeral + '))', confidence=0.6, formatter=parse_numeral) self.container.register_property(None, r'\A ?((?P<episodeNumber>' + '\d{2}' + '))' + sep, confidence=0.4, formatter=parse_numeral) self.container.register_property(None, r'\A ?(0(?P<episodeNumber>' + '\d+' + '))' + sep, confidence=0.4, formatter=parse_numeral) self.container.register_canonical_properties('other', 'FiNAL', 'Complete', validator=WeakValidator())
def __init__(self): Transformer.__init__(self, -190) self.container = PropertiesContainer(canonical_from_pattern=False) self._allowed_groupname_pattern = '[\w@#€£$&!\?]' self._forbidden_groupname_lambda = [lambda elt: elt in ['rip', 'by', 'for', 'par', 'pour', 'bonus'], lambda elt: self._is_number(elt)] # If the previous property in this list, the match will be considered as safe # and group name can contain a separator. self.previous_safe_properties = ['videoCodec', 'format', 'videoApi', 'audioCodec', 'audioProfile', 'videoProfile', 'audioChannels', 'screenSize', 'other'] self.previous_safe_values = {'other': ['Complete']} self.next_safe_properties = ['extension', 'website'] self.next_safe_values = {'format': ['Telesync']} self.next_unsafe_properties = list(self.previous_safe_properties) self.next_unsafe_properties.extend(['episodeNumber', 'season']) self.container.sep_replace_char = '-' self.container.canonical_from_pattern = False self.container.enhance = True self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+') self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+-' + self._allowed_groupname_pattern + '+') self.re_sep = re.compile('(' + sep + ')')
class GuessWeakEpisodesRexps(Transformer): def __init__(self): Transformer.__init__(self, 15) self.properties = PropertiesContainer(enhance=False, canonical_from_pattern=False) def _formater(episodeNumber): epnum = parse_numeral(episodeNumber) if not valid_year(epnum): if epnum > 100: season, epnum = epnum // 100, epnum % 100 # episodes which have a season > 50 are most likely errors # (Simpson is at 25!) if season > 50: return None return {'season': season, 'episodeNumber': epnum} else: return epnum self.properties.register_property(['episodeNumber', 'season'], '[0-9]{2,4}', confidence=0.6, formatter=_formater) self.properties.register_property('episodeNumber', '(?:episode)' + sep + '(' + numeral + ')[^0-9]', confidence=0.3) def supported_properties(self): return self.properties.get_supported_properties() def guess_weak_episodes_rexps(self, string, node=None, options=None): if node and 'episodeNumber' in node.root.info: return None properties = self.properties.find_properties(string, node) guess = self.properties.as_guess(properties, string) return guess def should_process(self, mtree, options=None): return mtree.guess.get('type', '').startswith('episode') def process(self, mtree, options=None): GuessFinder(self.guess_weak_episodes_rexps, 0.6, self.log, options).process_nodes(mtree.unidentified_leaves())
class GuessWebsite(Transformer): def __init__(self): Transformer.__init__(self, 45) self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False) tlds = [] f = resource_stream('guessit', 'tlds-alpha-by-domain.txt') f.readline() next(f) for tld in f: tld = tld.strip() if b'--' in tld: continue tlds.append(tld.decode("utf-8")) f.close() tlds_pattern = build_or_pattern(tlds) # All registered domain extension safe_tlds_pattern = build_or_pattern(['com', 'org', 'net']) # For sure a website extension safe_subdomains_pattern = build_or_pattern(['www']) # For sure a website subdomain safe_prefix_tlds_pattern = build_or_pattern(['co', 'com', 'org', 'net']) # Those words before a tlds are sure self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)+' + r'(?:[a-z-]+\.)+' + r'(?:' + tlds_pattern + r')+') self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_tlds_pattern + r')+') self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_prefix_tlds_pattern + r'\.)+' + r'(?:' + tlds_pattern + r')+') def supported_properties(self): return self.container.get_supported_properties() def guess_website(self, string, node=None, options=None): found = self.container.find_properties(string, node, 'website') return self.container.as_guess(found, string) def process(self, mtree, options=None): GuessFinder(self.guess_website, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
class GuessWebsite(Transformer): def __init__(self): Transformer.__init__(self, 45) self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False) tlds_pattern = build_or_pattern(TLDS) # All registered domain extension safe_tlds_pattern = build_or_pattern(['com', 'org', 'net']) # For sure a website extension safe_subdomains_pattern = build_or_pattern(['www']) # For sure a website subdomain safe_prefix_tlds_pattern = build_or_pattern(['co', 'com', 'org', 'net']) # Those words before a tlds are sure self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)+' + r'(?:[a-z-]+\.)+' + r'(?:' + tlds_pattern + r')+') self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_tlds_pattern + r')+') self.container.register_property('website', '(?:' + safe_subdomains_pattern + '\.)*' + r'[a-z-]+\.' + r'(?:' + safe_prefix_tlds_pattern + r'\.)+' + r'(?:' + tlds_pattern + r')+') def supported_properties(self): return self.container.get_supported_properties() def guess_website(self, string, node=None, options=None): found = self.container.find_properties(string, node, 'website') return self.container.as_guess(found, string) def process(self, mtree, options=None): GuessFinder(self.guess_website, 1.0, self.log, options).process_nodes(mtree.unidentified_leaves())
def __init__(self): Transformer.__init__(self, 20) range_separators = ['-', 'to', 'a'] discrete_separators = ['&', 'and', 'et'] of_separators = ['of', 'sur', '/', '\\'] season_words = ['seasons?', 'saisons?', 'series?'] episode_words = ['episodes?'] season_markers = ['s'] episode_markers = ['e', 'ep'] discrete_sep = sep for range_separator in range_separators: discrete_sep = discrete_sep.replace(range_separator, '') discrete_separators.append(discrete_sep) all_separators = list(range_separators) all_separators.extend(discrete_separators) self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False) range_separators_re = re.compile(build_or_pattern(range_separators), re.IGNORECASE) discrete_separators_re = re.compile( build_or_pattern(discrete_separators), re.IGNORECASE) all_separators_re = re.compile(build_or_pattern(all_separators), re.IGNORECASE) of_separators_re = re.compile( build_or_pattern(of_separators, escape=True), re.IGNORECASE) season_words_re = re.compile(build_or_pattern(season_words), re.IGNORECASE) episode_words_re = re.compile(build_or_pattern(episode_words), re.IGNORECASE) season_markers_re = re.compile(build_or_pattern(season_markers), re.IGNORECASE) episode_markers_re = re.compile(build_or_pattern(episode_markers), re.IGNORECASE) def list_parser(value, propertyListName, discrete_separators_re=discrete_separators_re, range_separators_re=range_separators_re, allow_discrete=False, fill_gaps=False): discrete_elements = filter(lambda x: x != '', discrete_separators_re.split(value)) discrete_elements = [x.strip() for x in discrete_elements] proper_discrete_elements = [] i = 0 while i < len(discrete_elements): if i < len(discrete_elements) - 2 and range_separators_re.match( discrete_elements[i + 1]): proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i + 1] + discrete_elements[i + 2]) i += 3 else: match = range_separators_re.search(discrete_elements[i]) if match and match.start() == 0: proper_discrete_elements[i - 1] = proper_discrete_elements[ i - 1] + discrete_elements[i] elif match and match.end() == len(discrete_elements[i]): proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i + 1]) else: proper_discrete_elements.append(discrete_elements[i]) i += 1 discrete_elements = proper_discrete_elements ret = [] for discrete_element in discrete_elements: range_values = filter( lambda x: x != '', range_separators_re.split(discrete_element)) range_values = [x.strip() for x in range_values] if len(range_values) > 1: for x in range(0, len(range_values) - 1): start_range_ep = parse_numeral(range_values[x]) end_range_ep = parse_numeral(range_values[x + 1]) for range_ep in range(start_range_ep, end_range_ep + 1): if range_ep not in ret: ret.append(range_ep) else: discrete_value = parse_numeral(discrete_element) if discrete_value not in ret: ret.append(discrete_value) if len(ret) > 1: if not allow_discrete: valid_ret = [] # replace discrete elements by ranges valid_ret.append(ret[0]) for i in range(0, len(ret) - 1): previous = valid_ret[len(valid_ret) - 1] if ret[i + 1] < previous: pass else: valid_ret.append(ret[i + 1]) ret = valid_ret if fill_gaps: ret = list(range(min(ret), max(ret) + 1)) if len(ret) > 1: return {None: ret[0], propertyListName: ret} if len(ret) > 0: return ret[0] return None def episode_parser_x(value): return list_parser(value, 'episodeList', discrete_separators_re=re.compile( 'x', re.IGNORECASE)) def episode_parser_e(value): return list_parser(value, 'episodeList', discrete_separators_re=re.compile( 'e', re.IGNORECASE), fill_gaps=True) def episode_parser(value): return list_parser(value, 'episodeList') def season_parser(value): return list_parser(value, 'seasonList') class ResolutionCollisionValidator(object): def validate(self, prop, string, node, match, entry_start, entry_end): return len(match.group(2)) < 3 #limit self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + numeral + ')' + sep + '?' + season_words_re.pattern + '?)', confidence=1.0, formatter=parse_numeral) self.container.register_property( None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + season_words_re.pattern + '?)' + sep, confidence=1.0, formatter={ None: parse_numeral, 'season': season_parser }, validator=ChainedValidator( DefaultValidator(), FormatterValidator( 'season', lambda x: len(x) > 1 if hasattr(x, '__len__') else False))) self.container.register_property( None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral + ')*)))', confidence=1.0, formatter={ None: parse_numeral, 'episodeNumber': episode_parser_e, 'season': season_parser }, validator=NoValidator()) #self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<episodeNumber>(?:x' + digital_numeral + '(?:' + sep + '?[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator())) self.container.register_property( None, sep + r'((?P<season>' + digital_numeral + ')' + sep + '' + '(?P<episodeNumber>(?:x' + sep + digital_numeral + '(?:' + sep + '[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={ None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser }, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator())) self.container.register_property( None, r'((?P<season>' + digital_numeral + ')' + '(?P<episodeNumber>(?:x' + digital_numeral + '(?:[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={ None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser }, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator())) self.container.register_property( None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={ None: parse_numeral, 'season': season_parser }, validator=NoValidator()) self.container.register_property(None, r'((?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter=parse_numeral) self.container.register_property( None, r'(ep' + sep + r'?(?P<episodeNumber>' + numeral + ')' + sep + '?)', confidence=0.7, formatter=parse_numeral) self.container.register_property( None, r'(ep' + sep + r'?(?P<episodeNumber>' + numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.7, formatter=parse_numeral) self.container.register_property( None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={ None: parse_numeral, 'episodeNumber': episode_parser }) self.container.register_property( None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + episode_words_re.pattern + '?)', confidence=0.8, formatter={ None: parse_numeral, 'episodeNumber': episode_parser }) self.container.register_property( None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter={ None: parse_numeral, 'episodeNumber': episode_parser }) self.container.register_property( None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.8, formatter={ None: parse_numeral, 'episodeNumber': episode_parser }) self.container.register_property('episodeNumber', r'^ ?(\d{2})' + sep, confidence=0.4, formatter=parse_numeral) self.container.register_property('episodeNumber', r'^ ?(\d{2})' + sep, confidence=0.4, formatter=parse_numeral) self.container.register_property('episodeNumber', r'^ ?0(\d{1,2})' + sep, confidence=0.4, formatter=parse_numeral) self.container.register_property('episodeNumber', sep + r'(\d{2}) ?$', confidence=0.4, formatter=parse_numeral) self.container.register_property('episodeNumber', sep + r'0(\d{1,2}) ?$', confidence=0.4, formatter=parse_numeral) self.container.register_property( None, r'((?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + ')(?:' + sep + '?(?:episodes?|eps?))?)', confidence=0.7, formatter=parse_numeral) self.container.register_property( None, r'((?:episodes?|eps?)' + sep + '?(?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral) self.container.register_property(None, r'((?:seasons?|saisons?|s)' + sep + '?(?P<season>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral) self.container.register_property( None, r'((?P<season>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral + ')' + sep + '?(?:seasons?|saisons?|s))', confidence=0.7, formatter=parse_numeral) self.container.register_canonical_properties('other', 'FiNAL', 'Complete', validator=WeakValidator()) self.container.register_property( None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<other>xAll))', confidence=1.0, formatter={ None: parse_numeral, 'other': lambda x: 'Complete', 'season': season_parser }, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
class GuessReleaseGroup(Transformer): def __init__(self): Transformer.__init__(self, -190) self.container = PropertiesContainer(canonical_from_pattern=False) self._allowed_groupname_pattern = '[\w@#€£$&]' self._forbidden_groupname_lambda = [ lambda elt: elt in ['rip', 'by', 'for', 'par', 'pour', 'bonus'], lambda elt: self._is_number(elt), ] # If the previous property in this list, the match will be considered as safe # and group name can contain a separator. self.previous_safe_properties = [ 'videoCodec', 'format', 'videoApi', 'audioCodec', 'audioProfile', 'videoProfile', 'audioChannels' ] self.container.sep_replace_char = '-' self.container.canonical_from_pattern = False self.container.enhance = True self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+') self.container.register_property( 'releaseGroup', self._allowed_groupname_pattern + '+-' + self._allowed_groupname_pattern + '+') def supported_properties(self): return self.container.get_supported_properties() def _is_number(self, s): try: int(s) return True except ValueError: return False def validate_group_name(self, guess): val = guess['releaseGroup'] if len(val) >= 2: if '-' in val: checked_val = "" for elt in val.split('-'): forbidden = False for forbidden_lambda in self._forbidden_groupname_lambda: forbidden = forbidden_lambda(elt.lower()) if forbidden: break if not forbidden: if checked_val: checked_val += '-' checked_val += elt else: break val = checked_val if not val: return False guess['releaseGroup'] = val forbidden = False for forbidden_lambda in self._forbidden_groupname_lambda: forbidden = forbidden_lambda(val.lower()) if forbidden: break if not forbidden: return True return False def is_leaf_previous(self, leaf, node): if leaf.span[1] <= node.span[0]: for idx in range(leaf.span[1], node.span[0]): if not leaf.root.value[idx] in sep: return False return True return False def guess_release_group(self, string, node=None, options=None): found = self.container.find_properties(string, node, 'releaseGroup') guess = self.container.as_guess(found, string, self.validate_group_name, sep_replacement='-') validated_guess = None if guess: explicit_group_node = node.group_node() if explicit_group_node: for leaf in explicit_group_node.leaves_containing( self.previous_safe_properties): if self.is_leaf_previous(leaf, node): if leaf.root.value[leaf.span[1]] == '-': guess.metadata().confidence = 1 else: guess.metadata().confidence = 0.7 validated_guess = guess if not validated_guess: # If previous group last leaf is identified as a safe property, # consider the raw value as a releaseGroup previous_group_node = node.previous_group_node() if previous_group_node: for leaf in previous_group_node.leaves_containing( self.previous_safe_properties): if self.is_leaf_previous(leaf, node): guess = Guess({'releaseGroup': node.value}, confidence=1, input=node.value, span=(0, len(node.value))) if self.validate_group_name(guess): node.guess = guess validated_guess = guess if validated_guess: # If following group nodes have only one unidentified leaf, it belongs to the release group next_group_node = node while True: next_group_node = next_group_node.next_group_node() if next_group_node: leaves = list(next_group_node.leaves()) if len(leaves) == 1 and not leaves[0].guess: validated_guess['releaseGroup'] = validated_guess[ 'releaseGroup'] + leaves[0].value leaves[0].guess = validated_guess else: break else: break if validated_guess: # Strip brackets validated_guess['releaseGroup'] = strip_brackets( validated_guess['releaseGroup']) return validated_guess def process(self, mtree, options=None): GuessFinder(self.guess_release_group, None, self.log, options).process_nodes(mtree.unidentified_leaves())
def __init__(self): Transformer.__init__(self, 20) of_separators = ['of', 'sur', '/', '\\'] of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE) season_words = ['seasons?', 'saisons?', 'series?'] episode_words = ['episodes?'] season_markers = ['s'] episode_markers = ['e', 'ep'] self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False) season_words_re = re.compile(build_or_pattern(season_words), re.IGNORECASE) episode_words_re = re.compile(build_or_pattern(episode_words), re.IGNORECASE) season_markers_re = re.compile(build_or_pattern(season_markers), re.IGNORECASE) episode_markers_re = re.compile(build_or_pattern(episode_markers), re.IGNORECASE) def episode_parser_x(value): return list_parser(value, 'episodeList', discrete_separators_re=re.compile('x', re.IGNORECASE)) def episode_parser_e(value): return list_parser(value, 'episodeList', discrete_separators_re=re.compile('e', re.IGNORECASE), fill_gaps=True) def episode_parser(value): return list_parser(value, 'episodeList') def season_parser(value): return list_parser(value, 'seasonList') class ResolutionCollisionValidator(object): @staticmethod def validate(prop, string, node, match, entry_start, entry_end): # Invalidate when season or episode is more than 100. try: season_value = season_parser(match.group(2)) episode_value = episode_parser_x(match.group(3)) return season_value < 100 or episode_value < 100 except: # This may occur for 1xAll or patterns like this. return True self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + numeral + ')' + sep + '?' + season_words_re.pattern + '?)', confidence=1.0, formatter=parse_numeral) self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + season_words_re.pattern + '?)' + sep, confidence=1.0, formatter={None: parse_numeral, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), FormatterValidator('season', lambda x: len(x) > 1 if hasattr(x, '__len__') else False))) self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_e, 'season': season_parser}, validator=NoValidator()) self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser, 'season': season_parser}, validator=NoValidator()) self.container.register_property(None, sep + r'((?P<season>' + digital_numeral + ')' + sep + '' + '(?P<episodeNumber>(?:x' + sep + digital_numeral + '(?:' + sep + '[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator())) self.container.register_property(None, r'((?P<season>' + digital_numeral + ')' + '(?P<episodeNumber>(?:x' + digital_numeral + '(?:[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator())) self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={None: parse_numeral, 'season': season_parser}, validator=NoValidator()) self.container.register_property(None, r'((?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter=parse_numeral) self.container.register_property('version', sep + r'(V\d+)' + sep, confidence=0.6, formatter=parse_numeral, validator=NoValidator()) self.container.register_property(None, r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?)', confidence=0.7, formatter=parse_numeral) self.container.register_property(None, r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.7, formatter=parse_numeral) self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser}) self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + episode_words_re.pattern + '?)', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser}) self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser}) self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser}) self.container.register_property('episodeNumber', r'^' + sep + '+(\d{2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '\d{2}' + ')*)' + sep, confidence=0.4, formatter=episode_parser) self.container.register_property('episodeNumber', r'^' + sep + '+0(\d{1,2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '0\d{1,2}' + ')*)' + sep, confidence=0.4, formatter=episode_parser) self.container.register_property('episodeNumber', sep + r'(\d{2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + r'\d{2}' + ')*)' + sep + '+$', confidence=0.4, formatter=episode_parser) self.container.register_property('episodeNumber', sep + r'0(\d{1,2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + r'0\d{1,2}' + ')*)' + sep + '+$', confidence=0.4, formatter=episode_parser) self.container.register_property(None, r'((?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + ')(?:' + sep + '?(?:episodes?|eps?))?)', confidence=0.7, formatter=parse_numeral) self.container.register_property(None, r'((?:episodes?|eps?)' + sep + '?(?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral) self.container.register_property(None, r'((?:seasons?|saisons?|s)' + sep + '?(?P<season>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral) self.container.register_property(None, r'((?P<season>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral + ')' + sep + '?(?:seasons?|saisons?|s))', confidence=0.7, formatter=parse_numeral) self.container.register_canonical_properties('other', 'FiNAL', 'Complete', validator=WeakValidator()) self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<other>xAll))', confidence=1.0, formatter={None: parse_numeral, 'other': lambda x: 'Complete', 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
class GuessEpisodesRexps(Transformer): def __init__(self): Transformer.__init__(self, 20) of_separators = ['of', 'sur', '/', '\\'] of_separators_re = re.compile( build_or_pattern(of_separators, escape=True), re.IGNORECASE) season_words = ['seasons?', 'saisons?', 'series?'] episode_words = ['episodes?'] season_markers = ['s'] episode_markers = ['e', 'ep'] self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False) season_words_re = re.compile(build_or_pattern(season_words), re.IGNORECASE) episode_words_re = re.compile(build_or_pattern(episode_words), re.IGNORECASE) season_markers_re = re.compile(build_or_pattern(season_markers), re.IGNORECASE) episode_markers_re = re.compile(build_or_pattern(episode_markers), re.IGNORECASE) def episode_parser_x(value): return list_parser(value, 'episodeList', discrete_separators_re=re.compile( 'x', re.IGNORECASE)) def episode_parser_e(value): return list_parser(value, 'episodeList', discrete_separators_re=re.compile( 'e', re.IGNORECASE), fill_gaps=True) def episode_parser(value): return list_parser(value, 'episodeList') def season_parser(value): return list_parser(value, 'seasonList') class ResolutionCollisionValidator(object): @staticmethod def validate(prop, string, node, match, entry_start, entry_end): return len(match.group(2)) < 3 # limit self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + numeral + ')' + sep + '?' + season_words_re.pattern + '?)', confidence=1.0, formatter=parse_numeral) self.container.register_property( None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + season_words_re.pattern + '?)' + sep, confidence=1.0, formatter={ None: parse_numeral, 'season': season_parser }, validator=ChainedValidator( DefaultValidator(), FormatterValidator( 'season', lambda x: len(x) > 1 if hasattr(x, '__len__') else False))) self.container.register_property( None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral + ')*)))', confidence=1.0, formatter={ None: parse_numeral, 'episodeNumber': episode_parser_e, 'season': season_parser }, validator=NoValidator()) self.container.register_property( None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)))', confidence=1.0, formatter={ None: parse_numeral, 'episodeNumber': episode_parser, 'season': season_parser }, validator=NoValidator()) self.container.register_property( None, sep + r'((?P<season>' + digital_numeral + ')' + sep + '' + '(?P<episodeNumber>(?:x' + sep + digital_numeral + '(?:' + sep + '[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={ None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser }, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator())) self.container.register_property( None, r'((?P<season>' + digital_numeral + ')' + '(?P<episodeNumber>(?:x' + digital_numeral + '(?:[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={ None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser }, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator())) self.container.register_property( None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={ None: parse_numeral, 'season': season_parser }, validator=NoValidator()) self.container.register_property(None, r'((?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter=parse_numeral) self.container.register_property('version', sep + r'(V\d+)' + sep, confidence=0.6, formatter=parse_numeral, validator=NoValidator()) self.container.register_property( None, r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?)', confidence=0.7, formatter=parse_numeral) self.container.register_property( None, r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.7, formatter=parse_numeral) self.container.register_property( None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={ None: parse_numeral, 'episodeNumber': episode_parser }) self.container.register_property( None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + episode_words_re.pattern + '?)', confidence=0.8, formatter={ None: parse_numeral, 'episodeNumber': episode_parser }) self.container.register_property( None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter={ None: parse_numeral, 'episodeNumber': episode_parser }) self.container.register_property( None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.8, formatter={ None: parse_numeral, 'episodeNumber': episode_parser }) self.container.register_property('episodeNumber', r'^' + sep + '+(\d{2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '\d{2}' + ')*)' + sep, confidence=0.4, formatter=episode_parser) self.container.register_property( 'episodeNumber', r'^' + sep + '+0(\d{1,2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '0\d{1,2}' + ')*)' + sep, confidence=0.4, formatter=episode_parser) self.container.register_property('episodeNumber', sep + r'(\d{2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + r'\d{2}' + ')*)' + sep + '+$', confidence=0.4, formatter=episode_parser) self.container.register_property( 'episodeNumber', sep + r'0(\d{1,2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + r'0\d{1,2}' + ')*)' + sep + '+$', confidence=0.4, formatter=episode_parser) self.container.register_property( None, r'((?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + ')(?:' + sep + '?(?:episodes?|eps?))?)', confidence=0.7, formatter=parse_numeral) self.container.register_property( None, r'((?:episodes?|eps?)' + sep + '?(?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral) self.container.register_property(None, r'((?:seasons?|saisons?|s)' + sep + '?(?P<season>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral) self.container.register_property( None, r'((?P<season>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral + ')' + sep + '?(?:seasons?|saisons?|s))', confidence=0.7, formatter=parse_numeral) self.container.register_canonical_properties('other', 'FiNAL', 'Complete', validator=WeakValidator()) self.container.register_property( None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<other>xAll))', confidence=1.0, formatter={ None: parse_numeral, 'other': lambda x: 'Complete', 'season': season_parser }, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator())) def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options): naming_opts.add_argument( '-E', '--episode-prefer-number', action='store_true', dest='episode_prefer_number', default=False, help= 'Guess "serie.213.avi" as the episodeNumber 213. Without this option, ' 'it will be guessed as season 2, episodeNumber 13') def supported_properties(self): return [ 'episodeNumber', 'season', 'episodeList', 'seasonList', 'episodeCount', 'seasonCount', 'version', 'other' ] def guess_episodes_rexps(self, string, node=None, options=None): found = self.container.find_properties(string, node, options) guess = self.container.as_guess(found, string) if guess and node: if 'season' in guess and 'episodeNumber' in guess: # If two guesses contains both season and episodeNumber in same group, create an episodeList for existing_guess in node.group_node().guesses: if 'season' in existing_guess and 'episodeNumber' in existing_guess: if 'episodeList' not in existing_guess: existing_guess['episodeList'] = [ existing_guess['episodeNumber'] ] existing_guess['episodeList'].append( guess['episodeNumber']) existing_guess['episodeList'].sort() if existing_guess['episodeNumber'] > guess[ 'episodeNumber']: existing_guess.set_confidence('episodeNumber', 0) else: guess.set_confidence('episodeNumber', 0) guess['episodeList'] = list( existing_guess['episodeList']) elif 'episodeNumber' in guess: # If two guesses contains only episodeNumber in same group, remove the existing one. for existing_guess in node.group_node().guesses: if 'episodeNumber' in existing_guess: for k, v in existing_guess.items(): if k in guess: del guess[k] return guess def should_process(self, mtree, options=None): return mtree.guess.get('type', '').startswith('episode') def process(self, mtree, options=None): GuessFinder(self.guess_episodes_rexps, None, self.log, options).process_nodes(mtree.unidentified_leaves())
class GuessEpisodesRexps(Transformer): def __init__(self): Transformer.__init__(self, 20) self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False) def episode_parser(value): values = re_split('[a-zA-Z]', value) values = [x for x in values if x] ret = [] for letters_elt in values: dashed_values = letters_elt.split('-') dashed_values = [x for x in dashed_values if x] if len(dashed_values) > 1: for _ in range(0, len(dashed_values) - 1): start_dash_ep = parse_numeral(dashed_values[0]) end_dash_ep = parse_numeral(dashed_values[1]) for dash_ep in range(start_dash_ep, end_dash_ep + 1): ret.append(dash_ep) else: ret.append(parse_numeral(letters_elt)) if len(ret) > 1: return {None: ret[0], 'episodeList': ret} # TODO: Should support seasonList also elif len(ret) > 0: return ret[0] else: return None class ResolutionCollisionValidator(object): def validate(self, prop, string, node, match, entry_start, entry_end): return len(match.group(2)) < 3 self.container.register_property(None, r'((?:season|saison)' + sep + '?(?P<season>' + numeral + '))', confidence=1.0, formatter=parse_numeral) self.container.register_property(None, r'(s(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral + ')*)))[^0-9]', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser}, validator=NoValidator()) self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<episodeNumber>(?:x' + digital_numeral + '(?:' + sep + '?[x-]' + digital_numeral + ')*)))[^0-9]', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator())) self.container.register_property(None, r'(s(?P<season>' + digital_numeral + '))[^0-9]', confidence=0.6, formatter=parse_numeral, validator=NoValidator()) self.container.register_property(None, r'((?P<episodeNumber>' + digital_numeral + ')v[23])', confidence=0.6, formatter=parse_numeral) self.container.register_property(None, r'((?:ep)' + sep + r'(?P<episodeNumber>' + numeral + '))[^0-9]', confidence=0.7, formatter=parse_numeral) self.container.register_property(None, r'(e(?P<episodeNumber>' + digital_numeral + '))', confidence=0.6, formatter=parse_numeral) self.container.register_canonical_properties('other', 'FiNAL', 'Complete', validator=WeakValidator()) def supported_properties(self): return ['episodeNumber', 'season'] def guess_episodes_rexps(self, string, node=None, options=None): found = self.container.find_properties(string, node) return self.container.as_guess(found, string) def should_process(self, mtree, options=None): return mtree.guess.get('type', '').startswith('episode') def process(self, mtree, options=None): GuessFinder(self.guess_episodes_rexps, None, self.log, options).process_nodes(mtree.unidentified_leaves())
class GuessEpisodesRexps(Transformer): def __init__(self): Transformer.__init__(self, 20) self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False) def episode_parser(value): values = re_split('[a-zA-Z]', value) values = [x for x in values if x] ret = [] for letters_elt in values: dashed_values = letters_elt.split('-') dashed_values = [x for x in dashed_values if x] if len(dashed_values) > 1: for _ in range(0, len(dashed_values) - 1): start_dash_ep = parse_numeral(dashed_values[0]) end_dash_ep = parse_numeral(dashed_values[1]) for dash_ep in range(start_dash_ep, end_dash_ep + 1): ret.append(dash_ep) else: ret.append(parse_numeral(letters_elt)) if len(ret) > 1: return { None: ret[0], 'episodeList': ret } # TODO: Should support seasonList also elif len(ret) > 0: return ret[0] else: return None class ResolutionCollisionValidator(object): def validate(self, prop, string, node, match, entry_start, entry_end): return len(match.group(2)) < 3 self.container.register_property(None, r'((?:season|saison)' + sep + '?(?P<season>' + numeral + '))', confidence=1.0, formatter=parse_numeral) self.container.register_property( None, r'(s(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral + ')*)))[^0-9]', confidence=1.0, formatter={ None: parse_numeral, 'episodeNumber': episode_parser }, validator=NoValidator()) self.container.register_property( None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<episodeNumber>(?:x' + digital_numeral + '(?:' + sep + '?[x-]' + digital_numeral + ')*)))[^0-9]', confidence=1.0, formatter={ None: parse_numeral, 'episodeNumber': episode_parser }, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator())) self.container.register_property(None, r'(s(?P<season>' + digital_numeral + '))[^0-9]', confidence=0.6, formatter=parse_numeral, validator=NoValidator()) self.container.register_property(None, r'((?P<episodeNumber>' + digital_numeral + ')v[23])', confidence=0.6, formatter=parse_numeral) self.container.register_property( None, r'((?:ep)' + sep + r'(?P<episodeNumber>' + numeral + '))[^0-9]', confidence=0.7, formatter=parse_numeral) self.container.register_property(None, r'(e(?P<episodeNumber>' + digital_numeral + '))', confidence=0.6, formatter=parse_numeral) self.container.register_property(None, r'\A ?((?P<episodeNumber>' + '\d{2}' + '))' + sep, confidence=0.4, formatter=parse_numeral) self.container.register_property(None, r'\A ?(0(?P<episodeNumber>' + '\d+' + '))' + sep, confidence=0.4, formatter=parse_numeral) self.container.register_canonical_properties('other', 'FiNAL', 'Complete', validator=WeakValidator()) def supported_properties(self): return ['episodeNumber', 'season'] def guess_episodes_rexps(self, string, node=None, options=None): found = self.container.find_properties(string, node) return self.container.as_guess(found, string) def should_process(self, mtree, options=None): return mtree.guess.get('type', '').startswith('episode') def process(self, mtree, options=None): GuessFinder(self.guess_episodes_rexps, None, self.log, options).process_nodes(mtree.unidentified_leaves())
class GuessEpisodesRexps(Transformer): def __init__(self): Transformer.__init__(self, 20) of_separators = ['of', 'sur', '/', '\\'] of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE) season_words = ['seasons?', 'saisons?', 'series?'] episode_words = ['episodes?'] season_markers = ['s'] episode_markers = ['e', 'ep'] self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False) season_words_re = re.compile(build_or_pattern(season_words), re.IGNORECASE) episode_words_re = re.compile(build_or_pattern(episode_words), re.IGNORECASE) season_markers_re = re.compile(build_or_pattern(season_markers), re.IGNORECASE) episode_markers_re = re.compile(build_or_pattern(episode_markers), re.IGNORECASE) def episode_parser_x(value): return list_parser(value, 'episodeList', discrete_separators_re=re.compile('x', re.IGNORECASE)) def episode_parser_e(value): return list_parser(value, 'episodeList', discrete_separators_re=re.compile('e', re.IGNORECASE), fill_gaps=True) def episode_parser(value): return list_parser(value, 'episodeList') def season_parser(value): return list_parser(value, 'seasonList') class ResolutionCollisionValidator(object): @staticmethod def validate(prop, string, node, match, entry_start, entry_end): # Invalidate when season or episode is more than 100. try: season_value = season_parser(match.group(2)) episode_value = episode_parser_x(match.group(3)) return season_value < 100 or episode_value < 100 except: # This may occur for 1xAll or patterns like this. return True self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + numeral + ')' + sep + '?' + season_words_re.pattern + '?)', confidence=1.0, formatter=parse_numeral) self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + season_words_re.pattern + '?)' + sep, confidence=1.0, formatter={None: parse_numeral, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), FormatterValidator('season', lambda x: len(x) > 1 if hasattr(x, '__len__') else False))) self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_e, 'season': season_parser}, validator=NoValidator()) self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser, 'season': season_parser}, validator=NoValidator()) self.container.register_property(None, sep + r'((?P<season>' + digital_numeral + ')' + sep + '' + '(?P<episodeNumber>(?:x' + sep + digital_numeral + '(?:' + sep + '[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator())) self.container.register_property(None, r'((?P<season>' + digital_numeral + ')' + '(?P<episodeNumber>(?:x' + digital_numeral + '(?:[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator())) self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={None: parse_numeral, 'season': season_parser}, validator=NoValidator()) self.container.register_property(None, r'((?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter=parse_numeral) self.container.register_property('version', sep + r'(V\d+)' + sep, confidence=0.6, formatter=parse_numeral, validator=NoValidator()) self.container.register_property(None, r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?)', confidence=0.7, formatter=parse_numeral) self.container.register_property(None, r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.7, formatter=parse_numeral) self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser}) self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + episode_words_re.pattern + '?)', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser}) self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser}) self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser}) self.container.register_property('episodeNumber', r'^' + sep + '+(\d{2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '\d{2}' + ')*)' + sep, confidence=0.4, formatter=episode_parser) self.container.register_property('episodeNumber', r'^' + sep + '+0(\d{1,2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '0\d{1,2}' + ')*)' + sep, confidence=0.4, formatter=episode_parser) self.container.register_property('episodeNumber', sep + r'(\d{2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + r'\d{2}' + ')*)' + sep + '+$', confidence=0.4, formatter=episode_parser) self.container.register_property('episodeNumber', sep + r'0(\d{1,2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + r'0\d{1,2}' + ')*)' + sep + '+$', confidence=0.4, formatter=episode_parser) self.container.register_property(None, r'((?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + ')(?:' + sep + '?(?:episodes?|eps?))?)', confidence=0.7, formatter=parse_numeral) self.container.register_property(None, r'((?:episodes?|eps?)' + sep + '?(?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral) self.container.register_property(None, r'((?:seasons?|saisons?|s)' + sep + '?(?P<season>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral) self.container.register_property(None, r'((?P<season>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral + ')' + sep + '?(?:seasons?|saisons?|s))', confidence=0.7, formatter=parse_numeral) self.container.register_canonical_properties('other', 'FiNAL', 'Complete', validator=WeakValidator()) self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<other>xAll))', confidence=1.0, formatter={None: parse_numeral, 'other': lambda x: 'Complete', 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator())) def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options): naming_opts.add_argument('-E', '--episode-prefer-number', action='store_true', dest='episode_prefer_number', default=False, help='Guess "serie.213.avi" as the episodeNumber 213. Without this option, ' 'it will be guessed as season 2, episodeNumber 13') def supported_properties(self): return ['episodeNumber', 'season', 'episodeList', 'seasonList', 'episodeCount', 'seasonCount', 'version', 'other'] def guess_episodes_rexps(self, string, node=None, options=None): found = self.container.find_properties(string, node, options) guess = self.container.as_guess(found, string) if guess and node: if 'season' in guess and 'episodeNumber' in guess: # If two guesses contains both season and episodeNumber in same group, create an episodeList for existing_guess in node.group_node().guesses: if 'season' in existing_guess and 'episodeNumber' in existing_guess: if 'episodeList' not in existing_guess: existing_guess['episodeList'] = [existing_guess['episodeNumber']] existing_guess['episodeList'].append(guess['episodeNumber']) existing_guess['episodeList'].sort() if existing_guess['episodeNumber'] > guess['episodeNumber']: existing_guess.set_confidence('episodeNumber', 0) else: guess.set_confidence('episodeNumber', 0) guess['episodeList'] = list(existing_guess['episodeList']) elif 'episodeNumber' in guess: # If two guesses contains only episodeNumber in same group, remove the existing one. for existing_guess in node.group_node().guesses: if 'episodeNumber' in existing_guess: for k, v in existing_guess.items(): if k in guess: del guess[k] return guess def should_process(self, mtree, options=None): return mtree.guess.get('type', '').startswith('episode') def process(self, mtree, options=None): GuessFinder(self.guess_episodes_rexps, None, self.log, options).process_nodes(mtree.unidentified_leaves())
class GuessVideoRexps(Transformer): def __init__(self): Transformer.__init__(self, 25) self.container = PropertiesContainer(canonical_from_pattern=False) self.container.register_property( None, 'cd' + _psep + '(?P<cdNumber>[0-9])(?:' + _psep + 'of' + _psep + '(?P<cdNumberTotal>[0-9]))?', confidence=1.0, enhance=False, global_span=True, formatter=parse_numeral) self.container.register_property('cdNumberTotal', '([1-9])' + _psep + 'cds?', confidence=0.9, enhance=False, formatter=parse_numeral) self.container.register_property('bonusNumber', 'x([0-9]{1,2})', enhance=False, global_span=True, formatter=parse_numeral) self.container.register_property('filmNumber', 'f([0-9]{1,2})', enhance=False, global_span=True, formatter=parse_numeral) self.container.register_property('edition', 'collector', 'collector-edition', 'edition-collector', canonical_form='Collector Edition') self.container.register_property('edition', 'special-edition', 'edition-special', canonical_form='Special Edition') self.container.register_property('edition', 'criterion', 'criterion-edition', 'edition-criterion', canonical_form='Criterion Edition') self.container.register_property('edition', 'deluxe', 'cdeluxe-edition', 'edition-deluxe', canonical_form='Deluxe Edition') self.container.register_property('edition', 'director\'?s?-cut', 'director\'?s?-cut-edition', 'edition-director\'?s?-cut', canonical_form='Director\'s cut') def supported_properties(self): return self.container.get_supported_properties() def guess_video_rexps(self, string, node=None, options=None): found = self.container.find_properties(string, node, options) return self.container.as_guess(found, string) def process(self, mtree, options=None): GuessFinder(self.guess_video_rexps, None, self.log, options).process_nodes(mtree.unidentified_leaves())
class GuessWeakEpisodesRexps(Transformer): def __init__(self): Transformer.__init__(self, 15) of_separators = ["of", "sur", "/", "\\"] of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE) self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False, remove_duplicates=True) episode_words = ["episodes?"] def episode_list_parser(value): return list_parser(value, "episodeList") def season_episode_parser(episode_number): epnum = parse_numeral(episode_number) if not valid_year(epnum): if epnum > 100: season, epnum = epnum // 100, epnum % 100 # episodes which have a season > 50 are most likely errors # (Simpson is at 25!) if season > 50: return None return {"season": season, "episodeNumber": epnum} else: return epnum self.container.register_property( ["episodeNumber", "season"], "[0-9]{2,4}", confidence=0.6, formatter=season_episode_parser, disabler=lambda options: options.get("episode_prefer_number") if options else False, ) self.container.register_property( ["episodeNumber", "season"], "[0-9]{4}", confidence=0.6, formatter=season_episode_parser ) self.container.register_property( None, "(" + build_or_pattern(episode_words) + sep + "?(?P<episodeNumber>" + numeral + "))[^0-9]", confidence=0.4, formatter=parse_numeral, ) self.container.register_property( None, r"(?P<episodeNumber>" + numeral + ")" + sep + "?" + of_separators_re.pattern + sep + "?(?P<episodeCount>" + numeral + ")", confidence=0.6, formatter=parse_numeral, ) self.container.register_property( "episodeNumber", "[^0-9](\d{2,3}" + "(?:" + sep + "?" + all_separators_re.pattern + sep + "?" + "\d{2,3}" + ")*)", confidence=0.4, formatter=episode_list_parser, disabler=lambda options: not options.get("episode_prefer_number") if options else True, ) self.container.register_property( "episodeNumber", r"^" + sep + "?(\d{2,3}" + "(?:" + sep + "?" + all_separators_re.pattern + sep + "?" + "\d{2,3}" + ")*)" + sep, confidence=0.4, formatter=episode_list_parser, disabler=lambda options: not options.get("episode_prefer_number") if options else True, ) self.container.register_property( "episodeNumber", sep + r"(\d{2,3}" + "(?:" + sep + "?" + all_separators_re.pattern + sep + "?" + "\d{2,3}" + ")*)" + sep + "?$", confidence=0.4, formatter=episode_list_parser, disabler=lambda options: not options.get("episode_prefer_number") if options else True, ) def supported_properties(self): return self.container.get_supported_properties() def guess_weak_episodes_rexps(self, string, node=None, options=None): properties = self.container.find_properties(string, node, options) guess = self.container.as_guess(properties, string) if node and guess: if "episodeNumber" in guess and "season" in guess: existing_guesses = list( filter(lambda x: "season" in x and "episodeNumber" in x, node.group_node().guesses) ) if existing_guesses: return None elif "episodeNumber" in guess: # If we only have episodeNumber in the guess, and another node contains both season and episodeNumber # keep only the second. safe_guesses = list(filter(lambda x: "season" in x and "episodeNumber" in x, node.group_node().guesses)) if safe_guesses: return None else: # If we have other nodes containing episodeNumber, create an episodeList. existing_guesses = list( filter(lambda x: "season" not in x and "episodeNumber" in x, node.group_node().guesses) ) for existing_guess in existing_guesses: if "episodeList" not in existing_guess: existing_guess["episodeList"] = [existing_guess["episodeNumber"]] existing_guess["episodeList"].append(guess["episodeNumber"]) existing_guess["episodeList"].sort() if existing_guess["episodeNumber"] > guess["episodeNumber"]: existing_guess.set_confidence("episodeNumber", 0) else: guess.set_confidence("episodeNumber", 0) guess["episodeList"] = list(existing_guess["episodeList"]) return guess def should_process(self, mtree, options=None): return mtree.guess.get("type", "").startswith("episode") def process(self, mtree, options=None): GuessFinder(self.guess_weak_episodes_rexps, 0.6, self.log, options).process_nodes(mtree.unidentified_leaves())
def __init__(self): Transformer.__init__(self, 20) range_separators = ['-', 'to', 'a'] discrete_separators = ['&', 'and', 'et'] of_separators = ['of', 'sur', '/', '\\'] season_words = ['seasons?', 'saisons?', 'series?'] episode_words = ['episodes?'] season_markers = ['s'] episode_markers = ['e', 'ep'] discrete_sep = sep for range_separator in range_separators: discrete_sep = discrete_sep.replace(range_separator, '') discrete_separators.append(discrete_sep) all_separators = list(range_separators) all_separators.extend(discrete_separators) self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False) range_separators_re = re.compile(build_or_pattern(range_separators), re.IGNORECASE) discrete_separators_re = re.compile(build_or_pattern(discrete_separators), re.IGNORECASE) all_separators_re = re.compile(build_or_pattern(all_separators), re.IGNORECASE) of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE) season_words_re = re.compile(build_or_pattern(season_words), re.IGNORECASE) episode_words_re = re.compile(build_or_pattern(episode_words), re.IGNORECASE) season_markers_re = re.compile(build_or_pattern(season_markers), re.IGNORECASE) episode_markers_re = re.compile(build_or_pattern(episode_markers), re.IGNORECASE) def list_parser(value, propertyListName, discrete_separators_re=discrete_separators_re, range_separators_re=range_separators_re, allow_discrete=False, fill_gaps=False): discrete_elements = filter(lambda x: x != '', discrete_separators_re.split(value)) discrete_elements = [x.strip() for x in discrete_elements] proper_discrete_elements = [] i = 0 while i < len(discrete_elements): if i < len(discrete_elements) - 2 and range_separators_re.match(discrete_elements[i+1]): proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i+1] + discrete_elements[i+2]) i += 3 else: match = range_separators_re.search(discrete_elements[i]) if match and match.start() == 0: proper_discrete_elements[i-1] = proper_discrete_elements[i-1] + discrete_elements[i] elif match and match.end() == len(discrete_elements[i]): proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i + 1]) else: proper_discrete_elements.append(discrete_elements[i]) i += 1 discrete_elements = proper_discrete_elements ret = [] for discrete_element in discrete_elements: range_values = filter(lambda x: x != '', range_separators_re.split(discrete_element)) range_values = [x.strip() for x in range_values] if len(range_values) > 1: for x in range(0, len(range_values) - 1): start_range_ep = parse_numeral(range_values[x]) end_range_ep = parse_numeral(range_values[x+1]) for range_ep in range(start_range_ep, end_range_ep + 1): if range_ep not in ret: ret.append(range_ep) else: discrete_value = parse_numeral(discrete_element) if discrete_value not in ret: ret.append(discrete_value) if len(ret) > 1: if not allow_discrete: valid_ret = [] # replace discrete elements by ranges valid_ret.append(ret[0]) for i in range(0, len(ret) - 1): previous = valid_ret[len(valid_ret) - 1] if ret[i+1] < previous: pass else: valid_ret.append(ret[i+1]) ret = valid_ret if fill_gaps: ret = list(range(min(ret), max(ret) + 1)) if len(ret) > 1: return {None: ret[0], propertyListName: ret} if len(ret) > 0: return ret[0] return None def episode_parser_x(value): return list_parser(value, 'episodeList', discrete_separators_re=re.compile('x', re.IGNORECASE)) def episode_parser_e(value): return list_parser(value, 'episodeList', discrete_separators_re=re.compile('e',re.IGNORECASE), fill_gaps=True) def episode_parser(value): return list_parser(value, 'episodeList') def season_parser(value): return list_parser(value, 'seasonList') class ResolutionCollisionValidator(object): def validate(self, prop, string, node, match, entry_start, entry_end): return len(match.group(2)) < 3 #limit self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + numeral + ')' + sep + '?' + season_words_re.pattern + '?)', confidence=1.0, formatter=parse_numeral) self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + season_words_re.pattern + '?)' + sep, confidence=1.0, formatter={None: parse_numeral, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), FormatterValidator('season', lambda x: len(x) > 1 if hasattr(x, '__len__') else False))) self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_e, 'season': season_parser}, validator=NoValidator()) #self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<episodeNumber>(?:x' + digital_numeral + '(?:' + sep + '?[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator())) self.container.register_property(None, sep + r'((?P<season>' + digital_numeral + ')' + sep + '' + '(?P<episodeNumber>(?:x' + sep + digital_numeral + '(?:' + sep + '[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator())) self.container.register_property(None, r'((?P<season>' + digital_numeral + ')' + '(?P<episodeNumber>(?:x' + digital_numeral + '(?:[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator())) self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={None: parse_numeral, 'season': season_parser}, validator=NoValidator()) self.container.register_property(None, r'((?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter=parse_numeral) self.container.register_property(None, r'(ep' + sep + r'?(?P<episodeNumber>' + numeral + ')' + sep + '?)', confidence=0.7, formatter=parse_numeral) self.container.register_property(None, r'(ep' + sep + r'?(?P<episodeNumber>' + numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.7, formatter=parse_numeral) self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser}) self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + episode_words_re.pattern + '?)', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser}) self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser}) self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser}) self.container.register_property('episodeNumber', r'^ ?(\d{2})' + sep, confidence=0.4, formatter=parse_numeral) self.container.register_property('episodeNumber', r'^ ?(\d{2})' + sep, confidence=0.4, formatter=parse_numeral) self.container.register_property('episodeNumber', r'^ ?0(\d{1,2})' + sep, confidence=0.4, formatter=parse_numeral) self.container.register_property('episodeNumber', sep + r'(\d{2}) ?$', confidence=0.4, formatter=parse_numeral) self.container.register_property('episodeNumber', sep + r'0(\d{1,2}) ?$', confidence=0.4, formatter=parse_numeral) self.container.register_property(None, r'((?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + ')(?:' + sep + '?(?:episodes?|eps?))?)', confidence=0.7, formatter=parse_numeral) self.container.register_property(None, r'((?:episodes?|eps?)' + sep + '?(?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral) self.container.register_property(None, r'((?:seasons?|saisons?|s)' + sep + '?(?P<season>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral) self.container.register_property(None, r'((?P<season>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral + ')' + sep + '?(?:seasons?|saisons?|s))', confidence=0.7, formatter=parse_numeral) self.container.register_canonical_properties('other', 'FiNAL', 'Complete', validator=WeakValidator()) self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<other>xAll))', confidence=1.0, formatter={None: parse_numeral, 'other': lambda x: 'Complete', 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
def __init__(self): Transformer.__init__(self, 15) of_separators = ["of", "sur", "/", "\\"] of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE) self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False, remove_duplicates=True) episode_words = ["episodes?"] def episode_list_parser(value): return list_parser(value, "episodeList") def season_episode_parser(episode_number): epnum = parse_numeral(episode_number) if not valid_year(epnum): if epnum > 100: season, epnum = epnum // 100, epnum % 100 # episodes which have a season > 50 are most likely errors # (Simpson is at 25!) if season > 50: return None return {"season": season, "episodeNumber": epnum} else: return epnum self.container.register_property( ["episodeNumber", "season"], "[0-9]{2,4}", confidence=0.6, formatter=season_episode_parser, disabler=lambda options: options.get("episode_prefer_number") if options else False, ) self.container.register_property( ["episodeNumber", "season"], "[0-9]{4}", confidence=0.6, formatter=season_episode_parser ) self.container.register_property( None, "(" + build_or_pattern(episode_words) + sep + "?(?P<episodeNumber>" + numeral + "))[^0-9]", confidence=0.4, formatter=parse_numeral, ) self.container.register_property( None, r"(?P<episodeNumber>" + numeral + ")" + sep + "?" + of_separators_re.pattern + sep + "?(?P<episodeCount>" + numeral + ")", confidence=0.6, formatter=parse_numeral, ) self.container.register_property( "episodeNumber", "[^0-9](\d{2,3}" + "(?:" + sep + "?" + all_separators_re.pattern + sep + "?" + "\d{2,3}" + ")*)", confidence=0.4, formatter=episode_list_parser, disabler=lambda options: not options.get("episode_prefer_number") if options else True, ) self.container.register_property( "episodeNumber", r"^" + sep + "?(\d{2,3}" + "(?:" + sep + "?" + all_separators_re.pattern + sep + "?" + "\d{2,3}" + ")*)" + sep, confidence=0.4, formatter=episode_list_parser, disabler=lambda options: not options.get("episode_prefer_number") if options else True, ) self.container.register_property( "episodeNumber", sep + r"(\d{2,3}" + "(?:" + sep + "?" + all_separators_re.pattern + sep + "?" + "\d{2,3}" + ")*)" + sep + "?$", confidence=0.4, formatter=episode_list_parser, disabler=lambda options: not options.get("episode_prefer_number") if options else True, )
class GuessReleaseGroup(Transformer): def __init__(self): Transformer.__init__(self, -190) self.container = PropertiesContainer(canonical_from_pattern=False) self._allowed_groupname_pattern = '[\w@#€£$&]' self._forbidden_groupname_lambda = [lambda elt: elt in ['rip', 'by', 'for', 'par', 'pour', 'bonus'], lambda elt: self._is_number(elt), ] # If the previous property in this list, the match will be considered as safe # and group name can contain a separator. self.previous_safe_properties = ['videoCodec', 'format', 'videoApi', 'audioCodec', 'audioProfile', 'videoProfile', 'audioChannels'] self.container.sep_replace_char = '-' self.container.canonical_from_pattern = False self.container.enhance = True self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+') self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+-' + self._allowed_groupname_pattern + '+') def supported_properties(self): return self.container.get_supported_properties() def _is_number(self, s): try: int(s) return True except ValueError: return False def validate_group_name(self, guess): val = guess['releaseGroup'] if len(val) >= 2: if '-' in val: checked_val = "" for elt in val.split('-'): forbidden = False for forbidden_lambda in self._forbidden_groupname_lambda: forbidden = forbidden_lambda(elt.lower()) if forbidden: break if not forbidden: if checked_val: checked_val += '-' checked_val += elt else: break val = checked_val if not val: return False guess['releaseGroup'] = val forbidden = False for forbidden_lambda in self._forbidden_groupname_lambda: forbidden = forbidden_lambda(val.lower()) if forbidden: break if not forbidden: return True return False def is_leaf_previous(self, leaf, node): if leaf.span[1] <= node.span[0]: for idx in range(leaf.span[1], node.span[0]): if not leaf.root.value[idx] in sep: return False return True return False def guess_release_group(self, string, node=None, options=None): found = self.container.find_properties(string, node, 'releaseGroup') guess = self.container.as_guess(found, string, self.validate_group_name, sep_replacement='-') validated_guess = None if guess: explicit_group_node = node.group_node() if explicit_group_node: for leaf in explicit_group_node.leaves_containing(self.previous_safe_properties): if self.is_leaf_previous(leaf, node): if leaf.root.value[leaf.span[1]] == '-': guess.metadata().confidence = 1 else: guess.metadata().confidence = 0.7 validated_guess = guess if not validated_guess: # If previous group last leaf is identified as a safe property, # consider the raw value as a releaseGroup previous_group_node = node.previous_group_node() if previous_group_node: for leaf in previous_group_node.leaves_containing(self.previous_safe_properties): if self.is_leaf_previous(leaf, node): guess = Guess({'releaseGroup': node.value}, confidence=1, input=node.value, span=(0, len(node.value))) if self.validate_group_name(guess): node.guess = guess validated_guess = guess if validated_guess: # If following group nodes have only one unidentified leaf, it belongs to the release group next_group_node = node while True: next_group_node = next_group_node.next_group_node() if next_group_node: leaves = list(next_group_node.leaves()) if len(leaves) == 1 and not leaves[0].guess: validated_guess['releaseGroup'] = validated_guess['releaseGroup'] + leaves[0].value leaves[0].guess = validated_guess else: break else: break if validated_guess: # Strip brackets validated_guess['releaseGroup'] = strip_brackets(validated_guess['releaseGroup']) return validated_guess def process(self, mtree, options=None): GuessFinder(self.guess_release_group, None, self.log, options).process_nodes(mtree.unidentified_leaves())
def guess_release_group(self, string, node=None, options=None): if options and options.get('expected_group'): expected_container = PropertiesContainer( enhance=True, canonical_from_pattern=False) for expected_group in options.get('expected_group'): if expected_group.startswith('re:'): expected_group = expected_group[3:] expected_group = expected_group.replace(' ', '-') expected_container.register_property('releaseGroup', expected_group, enhance=True) else: expected_group = re.escape(expected_group) expected_container.register_property('releaseGroup', expected_group, enhance=False) found = expected_container.find_properties(string, node, options, 'releaseGroup') guess = expected_container.as_guess(found, string, self.validate_group_name) if guess: return guess found = self.container.find_properties(string, node, options, 'releaseGroup') guess = self.container.as_guess(found, string, self.validate_group_name) validated_guess = None if guess: group_node = node.group_node() if group_node: for leaf in group_node.leaves_containing( self.previous_safe_properties): if self.validate_node(leaf, node, True): if leaf.root.value[leaf.span[1]] == '-': guess.metadata().confidence = 1 else: guess.metadata().confidence = 0.7 validated_guess = guess if not validated_guess: # If previous group last leaf is identified as a safe property, # consider the raw value as a releaseGroup previous_group_node = node.previous_group_node() if previous_group_node: for leaf in previous_group_node.leaves_containing( self.previous_safe_properties): if self.validate_node(leaf, node, False): guess = Guess({'releaseGroup': node.value}, confidence=1, input=node.value, span=(0, len(node.value))) if self.validate_group_name(guess): node.guess = guess validated_guess = guess if validated_guess: # If following group nodes have only one unidentified leaf, it belongs to the release group next_group_node = node while True: next_group_node = next_group_node.next_group_node() if next_group_node: leaves = list(next_group_node.leaves()) if len(leaves) == 1 and not leaves[0].guess: validated_guess['releaseGroup'] = validated_guess[ 'releaseGroup'] + leaves[0].value leaves[0].guess = validated_guess else: break else: break if not validated_guess and node.is_explicit( ) and node.node_last_idx == 0: # first node from group validated_guess = build_guess( node, 'releaseGroup', value=node.value[1:len(node.value) - 1]) validated_guess.metadata().confidence = 0.4 validated_guess.metadata().span = 1, len(node.value) node.guess = validated_guess if validated_guess: # Strip brackets validated_guess['releaseGroup'] = strip_brackets( validated_guess['releaseGroup']) return validated_guess
class GuessReleaseGroup(Transformer): def __init__(self): Transformer.__init__(self, -190) self.container = PropertiesContainer(canonical_from_pattern=False) self._allowed_groupname_pattern = '[\w@#€£$&!\?]' self._forbidden_groupname_lambda = [lambda elt: elt in ['rip', 'by', 'for', 'par', 'pour', 'bonus'], lambda elt: self._is_number(elt)] # If the previous property in this list, the match will be considered as safe # and group name can contain a separator. self.previous_safe_properties = ['videoCodec', 'format', 'videoApi', 'audioCodec', 'audioProfile', 'videoProfile', 'audioChannels', 'other'] self.previous_safe_values = {'other': ['Complete']} self.next_safe_properties = ['extension', 'website'] self.next_safe_values = {'format': ['Telesync']} self.container.sep_replace_char = '-' self.container.canonical_from_pattern = False self.container.enhance = True self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+') self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+-' + self._allowed_groupname_pattern + '+') self.re_sep = re.compile('(' + sep + ')') def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options): naming_opts.add_argument('-G', '--expected-group', action='append', dest='expected_group', help='Expected release group (can be used multiple times)') def supported_properties(self): return self.container.get_supported_properties() def _is_number(self, s): try: int(s) return True except ValueError: return False def validate_group_name(self, guess): val = guess['releaseGroup'] if len(val) > 1: checked_val = "" forbidden = False for elt in self.re_sep.split(val): # separators are in the list because of capturing group if forbidden: # Previous was forbidden, don't had separator forbidden = False continue for forbidden_lambda in self._forbidden_groupname_lambda: forbidden = forbidden_lambda(elt.lower()) if forbidden: if checked_val: # Removing previous separator checked_val = checked_val[0:len(checked_val) - 1] break if not forbidden: checked_val += elt val = checked_val if not val: return False if self.re_sep.match(val[-1]): val = val[:len(val)-1] if self.re_sep.match(val[0]): val = val[1:] guess['releaseGroup'] = val forbidden = False for forbidden_lambda in self._forbidden_groupname_lambda: forbidden = forbidden_lambda(val.lower()) if forbidden: break if not forbidden: return True return False def is_leaf_previous(self, leaf, node): if leaf.span[1] <= node.span[0]: for idx in range(leaf.span[1], node.span[0]): if leaf.root.value[idx] not in sep: return False return True return False def validate_next_leaves(self, node): if 'series' in node.root.info or 'title' in node.root.info: # --expected-series or --expected-title is used. return True # Make sure to avoid collision with 'series' or 'title' guessed later. Should be more precise. leaves = node.root.unidentified_leaves() return len(list(leaves)) > 1 def validate_node(self, leaf, node, safe=False): if not self.is_leaf_previous(leaf, node): return False if not self.validate_next_leaves(node): return False if safe: for k, v in leaf.guess.items(): if k in self.previous_safe_values and not v in self.previous_safe_values[k]: return False return True def guess_release_group(self, string, node=None, options=None): if options and options.get('expected_group'): expected_container = PropertiesContainer(enhance=True, canonical_from_pattern=False) for expected_group in options.get('expected_group'): if expected_group.startswith('re:'): expected_group = expected_group[3:] expected_group = expected_group.replace(' ', '-') expected_container.register_property('releaseGroup', expected_group, enhance=True) else: expected_group = re.escape(expected_group) expected_container.register_property('releaseGroup', expected_group, enhance=False) found = expected_container.find_properties(string, node, options, 'releaseGroup') guess = expected_container.as_guess(found, string, self.validate_group_name) if guess: return guess found = self.container.find_properties(string, node, options, 'releaseGroup') guess = self.container.as_guess(found, string, self.validate_group_name) validated_guess = None if guess: group_node = node.group_node() if group_node: for leaf in group_node.leaves_containing(self.previous_safe_properties): if self.validate_node(leaf, node, True): if leaf.root.value[leaf.span[1]] == '-': guess.metadata().confidence = 1 else: guess.metadata().confidence = 0.7 validated_guess = guess if not validated_guess: # If previous group last leaf is identified as a safe property, # consider the raw value as a releaseGroup previous_group_node = node.previous_group_node() if previous_group_node: for leaf in previous_group_node.leaves_containing(self.previous_safe_properties): if self.validate_node(leaf, node, False): guess = Guess({'releaseGroup': node.value}, confidence=1, input=node.value, span=(0, len(node.value))) if self.validate_group_name(guess): node.guess = guess validated_guess = guess if validated_guess: # If following group nodes have only one unidentified leaf, it belongs to the release group next_group_node = node while True: next_group_node = next_group_node.next_group_node() if next_group_node: leaves = list(next_group_node.leaves()) if len(leaves) == 1 and not leaves[0].guess: validated_guess['releaseGroup'] = validated_guess['releaseGroup'] + leaves[0].value leaves[0].guess = validated_guess else: break else: break if not validated_guess and node.is_explicit() and node.node_last_idx == 0: # first node from group validated_guess = build_guess(node, 'releaseGroup', value=node.value[1:len(node.value)-1]) validated_guess.metadata().confidence = 0.4 validated_guess.metadata().span = 1, len(node.value) node.guess = validated_guess if validated_guess: # Strip brackets validated_guess['releaseGroup'] = strip_brackets(validated_guess['releaseGroup']) return validated_guess def process(self, mtree, options=None): GuessFinder(self.guess_release_group, None, self.log, options).process_nodes(mtree.unidentified_leaves())
class GuessReleaseGroup(Transformer): def __init__(self): Transformer.__init__(self, -190) self.container = PropertiesContainer(canonical_from_pattern=False) self._allowed_groupname_pattern = '[\w@#€£$&!\?]' self._forbidden_groupname_lambda = [ lambda elt: elt in ['rip', 'by', 'for', 'par', 'pour', 'bonus'], lambda elt: self._is_number(elt) ] # If the previous property in this list, the match will be considered as safe # and group name can contain a separator. self.previous_safe_properties = [ 'videoCodec', 'format', 'videoApi', 'audioCodec', 'audioProfile', 'videoProfile', 'audioChannels', 'other' ] self.previous_safe_values = {'other': ['Complete']} self.next_safe_properties = ['extension', 'website'] self.next_safe_values = {'format': ['Telesync']} self.container.sep_replace_char = '-' self.container.canonical_from_pattern = False self.container.enhance = True self.container.register_property('releaseGroup', self._allowed_groupname_pattern + '+') self.container.register_property( 'releaseGroup', self._allowed_groupname_pattern + '+-' + self._allowed_groupname_pattern + '+') self.re_sep = re.compile('(' + sep + ')') def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options): naming_opts.add_argument( '-G', '--expected-group', action='append', dest='expected_group', help='Expected release group (can be used multiple times)') def supported_properties(self): return self.container.get_supported_properties() def _is_number(self, s): try: int(s) return True except ValueError: return False def validate_group_name(self, guess): val = guess['releaseGroup'] if len(val) > 1: checked_val = "" forbidden = False for elt in self.re_sep.split( val ): # separators are in the list because of capturing group if forbidden: # Previous was forbidden, don't had separator forbidden = False continue for forbidden_lambda in self._forbidden_groupname_lambda: forbidden = forbidden_lambda(elt.lower()) if forbidden: if checked_val: # Removing previous separator checked_val = checked_val[0:len(checked_val) - 1] break if not forbidden: checked_val += elt val = checked_val if not val: return False if self.re_sep.match(val[-1]): val = val[:len(val) - 1] if self.re_sep.match(val[0]): val = val[1:] guess['releaseGroup'] = val forbidden = False for forbidden_lambda in self._forbidden_groupname_lambda: forbidden = forbidden_lambda(val.lower()) if forbidden: break if not forbidden: return True return False def is_leaf_previous(self, leaf, node): if leaf.span[1] <= node.span[0]: for idx in range(leaf.span[1], node.span[0]): if leaf.root.value[idx] not in sep: return False return True return False def validate_next_leaves(self, node): if 'series' in node.root.info or 'title' in node.root.info: # --expected-series or --expected-title is used. return True # Make sure to avoid collision with 'series' or 'title' guessed later. Should be more precise. leaves = node.root.unidentified_leaves() return len(list(leaves)) > 1 def validate_node(self, leaf, node, safe=False): if not self.is_leaf_previous(leaf, node): return False if not self.validate_next_leaves(node): return False if safe: for k, v in leaf.guess.items(): if k in self.previous_safe_values and not v in self.previous_safe_values[ k]: return False return True def guess_release_group(self, string, node=None, options=None): if options and options.get('expected_group'): expected_container = PropertiesContainer( enhance=True, canonical_from_pattern=False) for expected_group in options.get('expected_group'): if expected_group.startswith('re:'): expected_group = expected_group[3:] expected_group = expected_group.replace(' ', '-') expected_container.register_property('releaseGroup', expected_group, enhance=True) else: expected_group = re.escape(expected_group) expected_container.register_property('releaseGroup', expected_group, enhance=False) found = expected_container.find_properties(string, node, options, 'releaseGroup') guess = expected_container.as_guess(found, string, self.validate_group_name) if guess: return guess found = self.container.find_properties(string, node, options, 'releaseGroup') guess = self.container.as_guess(found, string, self.validate_group_name) validated_guess = None if guess: group_node = node.group_node() if group_node: for leaf in group_node.leaves_containing( self.previous_safe_properties): if self.validate_node(leaf, node, True): if leaf.root.value[leaf.span[1]] == '-': guess.metadata().confidence = 1 else: guess.metadata().confidence = 0.7 validated_guess = guess if not validated_guess: # If previous group last leaf is identified as a safe property, # consider the raw value as a releaseGroup previous_group_node = node.previous_group_node() if previous_group_node: for leaf in previous_group_node.leaves_containing( self.previous_safe_properties): if self.validate_node(leaf, node, False): guess = Guess({'releaseGroup': node.value}, confidence=1, input=node.value, span=(0, len(node.value))) if self.validate_group_name(guess): node.guess = guess validated_guess = guess if validated_guess: # If following group nodes have only one unidentified leaf, it belongs to the release group next_group_node = node while True: next_group_node = next_group_node.next_group_node() if next_group_node: leaves = list(next_group_node.leaves()) if len(leaves) == 1 and not leaves[0].guess: validated_guess['releaseGroup'] = validated_guess[ 'releaseGroup'] + leaves[0].value leaves[0].guess = validated_guess else: break else: break if not validated_guess and node.is_explicit( ) and node.node_last_idx == 0: # first node from group validated_guess = build_guess( node, 'releaseGroup', value=node.value[1:len(node.value) - 1]) validated_guess.metadata().confidence = 0.4 validated_guess.metadata().span = 1, len(node.value) node.guess = validated_guess if validated_guess: # Strip brackets validated_guess['releaseGroup'] = strip_brackets( validated_guess['releaseGroup']) return validated_guess def process(self, mtree, options=None): GuessFinder(self.guess_release_group, None, self.log, options).process_nodes(mtree.unidentified_leaves())
class GuessWeakEpisodesRexps(Transformer): def __init__(self): Transformer.__init__(self, 15) of_separators = ['of', 'sur', '/', '\\'] of_separators_re = re.compile( build_or_pattern(of_separators, escape=True), re.IGNORECASE) self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False, remove_duplicates=True) episode_words = ['episodes?'] def episode_list_parser(value): return list_parser(value, 'episodeList') def season_episode_parser(episode_number): epnum = parse_numeral(episode_number) if not valid_year(epnum): if epnum > 100: season, epnum = epnum // 100, epnum % 100 # episodes which have a season > 50 are most likely errors # (Simpson is at 25!) if season > 50: return None return {'season': season, 'episodeNumber': epnum} else: return epnum self.container.register_property( ['episodeNumber', 'season'], '[0-9]{2,4}', confidence=0.6, formatter=season_episode_parser, disabler=lambda options: options.get('episode_prefer_number') if options else False) self.container.register_property(['episodeNumber', 'season'], '[0-9]{4}', confidence=0.6, formatter=season_episode_parser) self.container.register_property( None, '(' + build_or_pattern(episode_words) + sep + '?(?P<episodeNumber>' + numeral + '))[^0-9]', confidence=0.4, formatter=parse_numeral) self.container.register_property( None, r'(?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + ')', confidence=0.6, formatter=parse_numeral) self.container.register_property( 'episodeNumber', '[^0-9](\d{2,3}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '\d{2,3}' + ')*)', confidence=0.4, formatter=episode_list_parser, disabler=lambda options: not options.get('episode_prefer_number') if options else True) self.container.register_property( 'episodeNumber', r'^' + sep + '?(\d{2,3}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '\d{2,3}' + ')*)' + sep, confidence=0.4, formatter=episode_list_parser, disabler=lambda options: not options.get('episode_prefer_number') if options else True) self.container.register_property( 'episodeNumber', sep + r'(\d{2,3}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '\d{2,3}' + ')*)' + sep + '?$', confidence=0.4, formatter=episode_list_parser, disabler=lambda options: not options.get('episode_prefer_number') if options else True) def supported_properties(self): return self.container.get_supported_properties() def guess_weak_episodes_rexps(self, string, node=None, options=None): properties = self.container.find_properties(string, node, options) guess = self.container.as_guess(properties, string) if node and guess: if 'episodeNumber' in guess and 'season' in guess: existing_guesses = list( filter(lambda x: 'season' in x and 'episodeNumber' in x, node.group_node().guesses)) if existing_guesses: return None elif 'episodeNumber' in guess: # If we only have episodeNumber in the guess, and another node contains both season and episodeNumber # keep only the second. safe_guesses = list( filter(lambda x: 'season' in x and 'episodeNumber' in x, node.group_node().guesses)) if safe_guesses: return None else: # If we have other nodes containing episodeNumber, create an episodeList. existing_guesses = list( filter( lambda x: 'season' not in x and 'episodeNumber' in x, node.group_node().guesses)) for existing_guess in existing_guesses: if 'episodeList' not in existing_guess: existing_guess['episodeList'] = [ existing_guess['episodeNumber'] ] existing_guess['episodeList'].append( guess['episodeNumber']) existing_guess['episodeList'].sort() if existing_guess['episodeNumber'] > guess[ 'episodeNumber']: existing_guess.set_confidence('episodeNumber', 0) else: guess.set_confidence('episodeNumber', 0) guess['episodeList'] = list( existing_guess['episodeList']) return guess def should_process(self, mtree, options=None): return mtree.guess.get('type', '').startswith('episode') def process(self, mtree, options=None): GuessFinder(self.guess_weak_episodes_rexps, 0.6, self.log, options).process_nodes(mtree.unidentified_leaves())
def __init__(self): Transformer.__init__(self, 35) self.container = PropertiesContainer() self.qualities = QualitiesContainer() def register_property(propname, props): """props a dict of {value: [patterns]}""" for canonical_form, patterns in props.items(): if isinstance(patterns, tuple): patterns2, kwargs = patterns kwargs = dict(kwargs) kwargs['canonical_form'] = canonical_form self.container.register_property(propname, *patterns2, **kwargs) else: self.container.register_property(propname, *patterns, canonical_form=canonical_form) def register_quality(propname, quality_dict): """props a dict of {canonical_form: quality}""" for canonical_form, quality in quality_dict.items(): self.qualities.register_quality(propname, canonical_form, quality) register_property('container', {'mp4': ['MP4']}) # http://en.wikipedia.org/wiki/Pirated_movie_release_types register_property('format', {'VHS': ['VHS'], 'Cam': ['CAM', 'CAMRip'], 'Telesync': ['TELESYNC', 'PDVD'], 'Telesync': (['TS'], {'confidence': 0.2}), 'Workprint': ['WORKPRINT', 'WP'], 'Telecine': ['TELECINE', 'TC'], 'PPV': ['PPV', 'PPV-Rip'], # Pay Per View 'TV': ['SD-TV', 'SD-TV-Rip', 'Rip-SD-TV', 'TV-Rip', 'Rip-TV'], 'DVB': ['DVB-Rip', 'DVB', 'PD-TV'], 'DVD': ['DVD', 'DVD-Rip', 'VIDEO-TS'], 'HDTV': ['HD-TV', 'TV-RIP-HD', 'HD-TV-RIP'], 'VOD': ['VOD', 'VOD-Rip'], 'WEBRip': ['WEB-Rip'], 'WEB-DL': ['WEB-DL'], 'HD-DVD': ['HD-(?:DVD)?-Rip', 'HD-DVD'], 'BluRay': ['Blu-ray', 'B[DR]', 'B[DR]-Rip', 'BD[59]', 'BD25', 'BD50'] }) register_quality('format', {'VHS': -100, 'Cam': -90, 'Telesync': -80, 'Workprint': -70, 'Telecine': -60, 'PPV': -50, 'TV': -30, 'DVB': -20, 'DVD': 0, 'HDTV': 20, 'VOD': 40, 'WEBRip': 50, 'WEB-DL': 60, 'HD-DVD': 80, 'BluRay': 100 }) register_property('screenSize', {'360p': ['(?:\d{3,}(?:\\|\/|x|\*))?360(?:i|p?x?)'], '368p': ['(?:\d{3,}(?:\\|\/|x|\*))?368(?:i|p?x?)'], '480p': ['(?:\d{3,}(?:\\|\/|x|\*))?480(?:i|p?x?)'], '480p': (['hr'], {'confidence': 0.2}), '576p': ['(?:\d{3,}(?:\\|\/|x|\*))?576(?:i|p?x?)'], '720p': ['(?:\d{3,}(?:\\|\/|x|\*))?720(?:i|p?x?)'], '900p': ['(?:\d{3,}(?:\\|\/|x|\*))?900(?:i|p?x?)'], '1080i': ['(?:\d{3,}(?:\\|\/|x|\*))?1080i'], '1080p': ['(?:\d{3,}(?:\\|\/|x|\*))?1080(?:p?x?)'], '4K': ['(?:\d{3,}(?:\\|\/|x|\*))?2160(?:i|p?x?)'] }) register_quality('screenSize', {'360p': -300, '368p': -200, '480p': -100, '576p': 0, '720p': 100, '900p': 130, '1080i': 180, '1080p': 200, '4K': 400 }) _videoCodecProperty = {'Real': ['Rv\d{2}'], # http://en.wikipedia.org/wiki/RealVideo 'Mpeg2': ['Mpeg2'], 'DivX': ['DVDivX', 'DivX'], 'XviD': ['XviD'], 'h264': ['[hx]-264(?:-AVC)?', 'MPEG-4(?:-AVC)'], 'h265': ['[hx]-265(?:-HEVC)?', 'HEVC'] } register_property('videoCodec', _videoCodecProperty) register_quality('videoCodec', {'Real': -50, 'Mpeg2': -30, 'DivX': -10, 'XviD': 0, 'h264': 100, 'h265': 150 }) # http://blog.mediacoderhq.com/h264-profiles-and-levels/ # http://fr.wikipedia.org/wiki/H.264 self.container.register_property('videoProfile', 'BP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess])) self.container.register_property('videoProfile', 'XP', 'EP', canonical_form='XP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess])) self.container.register_property('videoProfile', 'MP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess])) self.container.register_property('videoProfile', 'HP', 'HiP', canonical_form='HP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess])) self.container.register_property('videoProfile', '10.?bit', 'Hi10P', canonical_form='10bit', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess])) self.container.register_property('videoProfile', 'Hi422P', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess])) self.container.register_property('videoProfile', 'Hi444PP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess])) register_quality('videoProfile', {'BP': -20, 'XP': -10, 'MP': 0, 'HP': 10, '10bit': 15, 'Hi422P': 25, 'Hi444PP': 35 }) # has nothing to do here (or on filenames for that matter), but some # releases use it and it helps to identify release groups, so we adapt register_property('videoApi', {'DXVA': ['DXVA']}) register_property('audioCodec', {'MP3': ['MP3'], 'DolbyDigital': ['DD'], 'AAC': ['AAC'], 'AC3': ['AC3'], 'Flac': ['FLAC'], 'DTS': ['DTS'], 'TrueHD': ['True-HD'] }) register_quality('audioCodec', {'MP3': 10, 'DolbyDigital': 30, 'AAC': 35, 'AC3': 40, 'Flac': 45, 'DTS': 60, 'TrueHD': 70 }) self.container.register_property('audioProfile', 'HD', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS'])) self.container.register_property('audioProfile', 'HD-MA', canonical_form='HDMA', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS'])) self.container.register_property('audioProfile', 'HE', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC'])) self.container.register_property('audioProfile', 'LC', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC'])) self.container.register_property('audioProfile', 'HQ', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AC3'])) register_quality('audioProfile', {'HD': 20, 'HDMA': 50, 'LC': 0, 'HQ': 0, 'HE': 20 }) register_property('audioChannels', {'7.1': ['7[\W_]1', '7ch'], '5.1': ['5[\W_]1', '5ch'], '2.0': ['2[\W_]0', '2ch', 'stereo'], '1.0': ['1[\W_]0', '1ch', 'mono'] }) register_quality('audioChannels', {'7.1': 200, '5.1': 100, '2.0': 0, '1.0': -100 }) self.container.register_property('episodeFormat', r'Minisodes?', canonical_form='Minisode') register_property('other', {'AudioFix': ['Audio-Fix', 'Audio-Fixed'], 'SyncFix': ['Sync-Fix', 'Sync-Fixed'], 'DualAudio': ['Dual-Audio'], 'WideScreen': ['ws', 'wide-screen'], }) self.container.register_property('other', 'Real', 'Fix', canonical_form="Proper", validator=WeakValidator()) self.container.register_property('other', 'Proper', 'Repack', 'Rerip', canonical_form="Proper") self.container.register_canonical_properties('other', 'R5', 'Screener', '3D', 'HD', 'HQ', 'DDC') self.container.register_canonical_properties('other', 'Limited', 'Complete', 'Classic', 'Unrated', 'LiNE', 'Bonus', 'Trailer', validator=WeakValidator()) for prop in self.container.get_properties('format'): self.container.register_property('other', prop.pattern + '(-?Scr(?:eener)?)', canonical_form='Screener') for exts in (subtitle_exts, info_exts, video_exts): for container in exts: self.container.register_property('container', container, confidence=0.3)
def __init__(self): Transformer.__init__(self, 35) self.container = PropertiesContainer() self.qualities = QualitiesContainer() def register_property(propname, props, **kwargs): """props a dict of {value: [patterns]}""" for canonical_form, patterns in props.items(): if isinstance(patterns, tuple): patterns2, pattern_kwarg = patterns if kwargs: current_kwarg = dict(kwargs) current_kwarg.update(pattern_kwarg) else: current_kwarg = dict(pattern_kwarg) current_kwarg['canonical_form'] = canonical_form self.container.register_property(propname, *patterns2, **current_kwarg) elif kwargs: current_kwarg = dict(kwargs) current_kwarg['canonical_form'] = canonical_form self.container.register_property(propname, *patterns, **current_kwarg) else: self.container.register_property(propname, *patterns, canonical_form=canonical_form) def register_quality(propname, quality_dict): """props a dict of {canonical_form: quality}""" for canonical_form, quality in quality_dict.items(): self.qualities.register_quality(propname, canonical_form, quality) register_property('container', {'mp4': ['MP4']}) # http://en.wikipedia.org/wiki/Pirated_movie_release_types register_property('format', {'VHS': ['VHS', 'VHS-Rip'], 'Cam': ['CAM', 'CAMRip', 'HD-CAM'], #'Telesync': ['TELESYNC', 'PDVD'], 'Telesync': (['TS', 'HD-TS'], {'confidence': 0.4}), 'Workprint': ['WORKPRINT', 'WP'], 'Telecine': ['TELECINE', 'TC'], 'PPV': ['PPV', 'PPV-Rip'], # Pay Per View 'TV': ['SD-TV', 'SD-TV-Rip', 'Rip-SD-TV', 'TV-Rip', 'Rip-TV'], 'DVB': ['DVB-Rip', 'DVB', 'PD-TV'], 'DVD': ['DVD', 'DVD-Rip', 'VIDEO-TS', 'DVD-R', 'DVD-9', 'DVD-5'], 'HDTV': ['HD-TV', 'TV-RIP-HD', 'HD-TV-RIP', 'HD-RIP'], 'VOD': ['VOD', 'VOD-Rip'], 'WEBRip': ['WEB-Rip'], 'WEB-DL': ['WEB-DL', 'WEB-HD', 'WEB'], 'HD-DVD': ['HD-DVD-Rip', 'HD-DVD'], 'BluRay': ['Blu-ray(?:-Rip)?', 'B[DR]', 'B[DR]-Rip', 'BD[59]', 'BD25', 'BD50'] }) register_quality('format', {'VHS': -100, 'Cam': -90, 'Telesync': -80, 'Workprint': -70, 'Telecine': -60, 'PPV': -50, 'TV': -30, 'DVB': -20, 'DVD': 0, 'HDTV': 20, 'VOD': 40, 'WEBRip': 50, 'WEB-DL': 60, 'HD-DVD': 80, 'BluRay': 100 }) register_property('screenSize', {'360p': ['(?:\d{3,}(?:\\|\/|x|\*))?360(?:i|p?x?)'], '368p': ['(?:\d{3,}(?:\\|\/|x|\*))?368(?:i|p?x?)'], '480p': ['(?:\d{3,}(?:\\|\/|x|\*))?480(?:i|p?x?)'], #'480p': (['hr'], {'confidence': 0.2}), # duplicate dict key '576p': ['(?:\d{3,}(?:\\|\/|x|\*))?576(?:i|p?x?)'], '720p': ['(?:\d{3,}(?:\\|\/|x|\*))?720(?:i|p?x?)'], '900p': ['(?:\d{3,}(?:\\|\/|x|\*))?900(?:i|p?x?)'], '1080i': ['(?:\d{3,}(?:\\|\/|x|\*))?1080i'], '1080p': ['(?:\d{3,}(?:\\|\/|x|\*))?1080p?x?'], '4K': ['(?:\d{3,}(?:\\|\/|x|\*))?2160(?:i|p?x?)'] }, validator=ChainedValidator(DefaultValidator(), OnlyOneValidator())) class ResolutionValidator(object): """Make sure our match is surrounded by separators, or by another entry""" @staticmethod def validate(prop, string, node, match, entry_start, entry_end): """ span = _get_span(prop, match) span = _trim_span(span, string[span[0]:span[1]]) start, end = span sep_start = start <= 0 or string[start - 1] in sep sep_end = end >= len(string) or string[end] in sep start_by_other = start in entry_end end_by_other = end in entry_start if (sep_start or start_by_other) and (sep_end or end_by_other): return True return False """ return True _digits_re = re.compile('\d+') def resolution_formatter(value): digits = _digits_re.findall(value) return 'x'.join(digits) self.container.register_property('screenSize', '\d{3,4}-?[x\*]-?\d{3,4}', canonical_from_pattern=False, formatter=resolution_formatter, validator=ChainedValidator(DefaultValidator(), ResolutionValidator())) register_quality('screenSize', {'360p': -300, '368p': -200, '480p': -100, '576p': 0, '720p': 100, '900p': 130, '1080i': 180, '1080p': 200, '4K': 400 }) _videoCodecProperty = {'Real': ['Rv\d{2}'], # http://en.wikipedia.org/wiki/RealVideo 'Mpeg2': ['Mpeg2'], 'DivX': ['DVDivX', 'DivX'], 'XviD': ['XviD'], 'h264': ['[hx]-264(?:-AVC)?', 'MPEG-4(?:-AVC)'], 'h265': ['[hx]-265(?:-HEVC)?', 'HEVC'] } register_property('videoCodec', _videoCodecProperty) register_quality('videoCodec', {'Real': -50, 'Mpeg2': -30, 'DivX': -10, 'XviD': 0, 'h264': 100, 'h265': 150 }) # http://blog.mediacoderhq.com/h264-profiles-and-levels/ # http://fr.wikipedia.org/wiki/H.264 self.container.register_property('videoProfile', 'BP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess])) self.container.register_property('videoProfile', 'XP', 'EP', canonical_form='XP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess])) self.container.register_property('videoProfile', 'MP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess])) self.container.register_property('videoProfile', 'HP', 'HiP', canonical_form='HP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess])) self.container.register_property('videoProfile', '10.?bit', 'Hi10P', canonical_form='10bit') self.container.register_property('videoProfile', '8.?bit', canonical_form='8bit') self.container.register_property('videoProfile', 'Hi422P', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess])) self.container.register_property('videoProfile', 'Hi444PP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess])) register_quality('videoProfile', {'BP': -20, 'XP': -10, 'MP': 0, 'HP': 10, '10bit': 15, 'Hi422P': 25, 'Hi444PP': 35 }) # has nothing to do here (or on filenames for that matter), but some # releases use it and it helps to identify release groups, so we adapt register_property('videoApi', {'DXVA': ['DXVA']}) register_property('audioCodec', {'MP3': ['MP3', 'LAME', 'LAME(?:\d)+-(?:\d)+'], 'DolbyDigital': ['DD'], 'AAC': ['AAC'], 'AC3': ['AC3'], 'Flac': ['FLAC'], 'DTS': (['DTS'], {'validator': LeftValidator()}), 'TrueHD': ['True-HD'] }) register_quality('audioCodec', {'MP3': 10, 'DolbyDigital': 30, 'AAC': 35, 'AC3': 40, 'Flac': 45, 'DTS': 60, 'TrueHD': 70 }) self.container.register_property('audioProfile', 'HD', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS'])) self.container.register_property('audioProfile', 'HD-MA', canonical_form='HDMA', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS'])) self.container.register_property('audioProfile', 'HE', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC'])) self.container.register_property('audioProfile', 'LC', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC'])) self.container.register_property('audioProfile', 'HQ', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AC3'])) register_quality('audioProfile', {'HD': 20, 'HDMA': 50, 'LC': 0, 'HQ': 0, 'HE': 20 }) register_property('audioChannels', {'7.1': ['7[\W_]1', '7ch', '8ch'], '5.1': ['5[\W_]1', '5ch', '6ch'], '2.0': ['2[\W_]0', '2ch', 'stereo'], '1.0': ['1[\W_]0', '1ch', 'mono'] }) register_quality('audioChannels', {'7.1': 200, '5.1': 100, '2.0': 0, '1.0': -100 }) self.container.register_property('episodeFormat', r'Minisodes?', canonical_form='Minisode') self.container.register_property('crc32', '(?:[a-fA-F]|[0-9]){8}', enhance=False, canonical_from_pattern=False) weak_episode_words = ['pt', 'part'] self.container.register_property(None, '(' + build_or_pattern(weak_episode_words) + sep + '?(?P<part>' + numeral + '))[^0-9]', enhance=False, canonical_from_pattern=False, confidence=0.4, formatter=parse_numeral) register_property('other', {'AudioFix': ['Audio-Fix', 'Audio-Fixed'], 'SyncFix': ['Sync-Fix', 'Sync-Fixed'], 'DualAudio': ['Dual-Audio'], 'WideScreen': ['ws', 'wide-screen'], 'Netflix': ['Netflix', 'NF'] }) self.container.register_property('other', 'Real', 'Fix', canonical_form='Proper', validator=NeighborValidator()) self.container.register_property('other', 'Proper', 'Repack', 'Rerip', canonical_form='Proper') self.container.register_property('other', 'Fansub', canonical_form='Fansub') self.container.register_property('other', 'Fastsub', canonical_form='Fastsub') self.container.register_property('other', '(?:Seasons?' + sep + '?)?Complete', canonical_form='Complete') self.container.register_property('other', 'R5', 'RC', canonical_form='R5') self.container.register_property('other', 'Pre-Air', 'Preair', canonical_form='Preair') self.container.register_canonical_properties('other', 'Screener', 'Remux', '3D', 'HD', 'mHD', 'HDLight', 'HQ', 'DDC', 'HR', 'PAL', 'SECAM', 'NTSC') self.container.register_canonical_properties('other', 'Limited', 'Complete', 'Classic', 'Unrated', 'LiNE', 'Bonus', 'Trailer', validator=WeakValidator()) for prop in self.container.get_properties('format'): self.container.register_property('other', prop.pattern + '(-?Scr(?:eener)?)', canonical_form='Screener') for exts in (subtitle_exts, info_exts, video_exts): for container in exts: self.container.register_property('container', container, confidence=0.3)
def __init__(self): Transformer.__init__(self, 20) of_separators = ['of', 'sur', '/', '\\'] of_separators_re = re.compile( build_or_pattern(of_separators, escape=True), re.IGNORECASE) season_words = ['seasons?', 'saisons?', 'series?'] episode_words = ['episodes?'] season_markers = ['s'] episode_markers = ['e', 'ep'] self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False) season_words_re = re.compile(build_or_pattern(season_words), re.IGNORECASE) episode_words_re = re.compile(build_or_pattern(episode_words), re.IGNORECASE) season_markers_re = re.compile(build_or_pattern(season_markers), re.IGNORECASE) episode_markers_re = re.compile(build_or_pattern(episode_markers), re.IGNORECASE) def episode_parser_x(value): return list_parser(value, 'episodeList', discrete_separators_re=re.compile( 'x', re.IGNORECASE)) def episode_parser_e(value): return list_parser(value, 'episodeList', discrete_separators_re=re.compile( 'e', re.IGNORECASE), fill_gaps=True) def episode_parser(value): return list_parser(value, 'episodeList') def season_parser(value): return list_parser(value, 'seasonList') class ResolutionCollisionValidator(object): @staticmethod def validate(prop, string, node, match, entry_start, entry_end): return len(match.group(2)) < 3 # limit self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + numeral + ')' + sep + '?' + season_words_re.pattern + '?)', confidence=1.0, formatter=parse_numeral) self.container.register_property( None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + season_words_re.pattern + '?)' + sep, confidence=1.0, formatter={ None: parse_numeral, 'season': season_parser }, validator=ChainedValidator( DefaultValidator(), FormatterValidator( 'season', lambda x: len(x) > 1 if hasattr(x, '__len__') else False))) self.container.register_property( None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral + ')*)))', confidence=1.0, formatter={ None: parse_numeral, 'episodeNumber': episode_parser_e, 'season': season_parser }, validator=NoValidator()) self.container.register_property( None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)))', confidence=1.0, formatter={ None: parse_numeral, 'episodeNumber': episode_parser, 'season': season_parser }, validator=NoValidator()) self.container.register_property( None, sep + r'((?P<season>' + digital_numeral + ')' + sep + '' + '(?P<episodeNumber>(?:x' + sep + digital_numeral + '(?:' + sep + '[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={ None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser }, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator())) self.container.register_property( None, r'((?P<season>' + digital_numeral + ')' + '(?P<episodeNumber>(?:x' + digital_numeral + '(?:[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={ None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser }, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator())) self.container.register_property( None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={ None: parse_numeral, 'season': season_parser }, validator=NoValidator()) self.container.register_property(None, r'((?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter=parse_numeral) self.container.register_property('version', sep + r'(V\d+)' + sep, confidence=0.6, formatter=parse_numeral, validator=NoValidator()) self.container.register_property( None, r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?)', confidence=0.7, formatter=parse_numeral) self.container.register_property( None, r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.7, formatter=parse_numeral) self.container.register_property( None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={ None: parse_numeral, 'episodeNumber': episode_parser }) self.container.register_property( None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + episode_words_re.pattern + '?)', confidence=0.8, formatter={ None: parse_numeral, 'episodeNumber': episode_parser }) self.container.register_property( None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter={ None: parse_numeral, 'episodeNumber': episode_parser }) self.container.register_property( None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.8, formatter={ None: parse_numeral, 'episodeNumber': episode_parser }) self.container.register_property('episodeNumber', r'^' + sep + '+(\d{2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '\d{2}' + ')*)' + sep, confidence=0.4, formatter=episode_parser) self.container.register_property( 'episodeNumber', r'^' + sep + '+0(\d{1,2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + '0\d{1,2}' + ')*)' + sep, confidence=0.4, formatter=episode_parser) self.container.register_property('episodeNumber', sep + r'(\d{2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + r'\d{2}' + ')*)' + sep + '+$', confidence=0.4, formatter=episode_parser) self.container.register_property( 'episodeNumber', sep + r'0(\d{1,2}' + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + r'0\d{1,2}' + ')*)' + sep + '+$', confidence=0.4, formatter=episode_parser) self.container.register_property( None, r'((?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + ')(?:' + sep + '?(?:episodes?|eps?))?)', confidence=0.7, formatter=parse_numeral) self.container.register_property( None, r'((?:episodes?|eps?)' + sep + '?(?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral) self.container.register_property(None, r'((?:seasons?|saisons?|s)' + sep + '?(?P<season>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral) self.container.register_property( None, r'((?P<season>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral + ')' + sep + '?(?:seasons?|saisons?|s))', confidence=0.7, formatter=parse_numeral) self.container.register_canonical_properties('other', 'FiNAL', 'Complete', validator=WeakValidator()) self.container.register_property( None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<other>xAll))', confidence=1.0, formatter={ None: parse_numeral, 'other': lambda x: 'Complete', 'season': season_parser }, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator()))
class GuessWeakEpisodesRexps(Transformer): def __init__(self): Transformer.__init__(self, 15) of_separators = ['of', 'sur', '/', '\\'] of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE) self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False) episode_words = ['episodes?'] def _formater(episode_number): epnum = parse_numeral(episode_number) if not valid_year(epnum): if epnum > 100: season, epnum = epnum // 100, epnum % 100 # episodes which have a season > 50 are most likely errors # (Simpson is at 25!) if season > 50: return None return {'season': season, 'episodeNumber': epnum} else: return epnum self.container.register_property(['episodeNumber', 'season'], '[0-9]{2,4}', confidence=0.6, formatter=_formater, disabler=lambda options: options.get('episode_prefer_number') if options else False) self.container.register_property(['episodeNumber', 'season'], '[0-9]{4}', confidence=0.6, formatter=_formater) self.container.register_property('episodeNumber', '[^0-9](\d{1,3})', confidence=0.6, formatter=parse_numeral, disabler=lambda options: not options.get('episode_prefer_number') if options else True) self.container.register_property(None, '(' + build_or_pattern(episode_words) + sep + '?(?P<episodeNumber>' + numeral + '))[^0-9]', confidence=0.4, formatter=parse_numeral) self.container.register_property(None, r'(?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral +')', confidence=0.6, formatter=parse_numeral) self.container.register_property('episodeNumber', r'^' + sep + '?(\d{1,3})' + sep, confidence=0.4, formatter=parse_numeral, disabler=lambda options: not options.get('episode_prefer_number') if options else True) self.container.register_property('episodeNumber', sep + r'(\d{1,3})' + sep + '?$', confidence=0.4, formatter=parse_numeral, disabler=lambda options: not options.get('episode_prefer_number') if options else True) def supported_properties(self): return self.container.get_supported_properties() def guess_weak_episodes_rexps(self, string, node=None, options=None): if node and 'episodeNumber' in node.root.info: return None properties = self.container.find_properties(string, node, options) guess = self.container.as_guess(properties, string) return guess def should_process(self, mtree, options=None): return mtree.guess.get('type', '').startswith('episode') def process(self, mtree, options=None): GuessFinder(self.guess_weak_episodes_rexps, 0.6, self.log, options).process_nodes(mtree.unidentified_leaves())
def guess_release_group(self, string, node=None, options=None): if options and options.get('expected_group'): expected_container = PropertiesContainer(enhance=True, canonical_from_pattern=False) for expected_group in options.get('expected_group'): if expected_group.startswith('re:'): expected_group = expected_group[3:] expected_group = expected_group.replace(' ', '-') expected_container.register_property('releaseGroup', expected_group, enhance=True) else: expected_group = re.escape(expected_group) expected_container.register_property('releaseGroup', expected_group, enhance=False) found = expected_container.find_properties(string, node, options, 'releaseGroup') guess = expected_container.as_guess(found, string, self.validate_group_name) if guess: return guess found = self.container.find_properties(string, node, options, 'releaseGroup') guess = self.container.as_guess(found, string, self.validate_group_name) validated_guess = None if guess: group_node = node.group_node() if group_node: for leaf in group_node.leaves_containing(self.previous_safe_properties): if self.validate_node(leaf, node, True): if leaf.root.value[leaf.span[1]] == '-': guess.metadata().confidence = 1 else: guess.metadata().confidence = 0.7 validated_guess = guess if not validated_guess: # If previous group last leaf is identified as a safe property, # consider the raw value as a releaseGroup previous_group_node = node.previous_group_node() if previous_group_node: for leaf in previous_group_node.leaves_containing(self.previous_safe_properties): if self.validate_node(leaf, node, False): guess = Guess({'releaseGroup': node.value}, confidence=1, input=node.value, span=(0, len(node.value))) if self.validate_group_name(guess): node.guess = guess validated_guess = guess if validated_guess: # If following group nodes have only one unidentified leaf, it belongs to the release group next_group_node = node while True: next_group_node = next_group_node.next_group_node() if next_group_node: leaves = list(next_group_node.leaves()) if len(leaves) == 1 and not leaves[0].guess: validated_guess['releaseGroup'] = validated_guess['releaseGroup'] + leaves[0].value leaves[0].guess = validated_guess else: break else: break if not validated_guess and node.is_explicit() and node.node_last_idx == 0: # first node from group validated_guess = build_guess(node, 'releaseGroup', value=node.value[1:len(node.value)-1]) validated_guess.metadata().confidence = 0.4 validated_guess.metadata().span = 1, len(node.value) node.guess = validated_guess if validated_guess: # Strip brackets validated_guess['releaseGroup'] = strip_brackets(validated_guess['releaseGroup']) return validated_guess
class GuessVideoRexps(Transformer): def __init__(self): Transformer.__init__(self, 25) self.container = PropertiesContainer(canonical_from_pattern=False) self.container.register_property(None, 'cd' + _psep + '(?P<cdNumber>[0-9])(?:' + _psep + 'of' + _psep + '(?P<cdNumberTotal>[0-9]))?', confidence=1.0, enhance=False, global_span=True, formatter=parse_numeral) self.container.register_property('cdNumberTotal', '([1-9])' + _psep + 'cds?', confidence=0.9, enhance=False, formatter=parse_numeral) self.container.register_property('bonusNumber', 'x([0-9]{1,2})', enhance=False, global_span=True, formatter=parse_numeral) self.container.register_property('filmNumber', 'f([0-9]{1,2})', enhance=False, global_span=True, formatter=parse_numeral) self.container.register_property('edition', 'collector', 'collector-edition', 'edition-collector', canonical_form='Collector Edition') self.container.register_property('edition', 'special-edition', 'edition-special', canonical_form='Special Edition') self.container.register_property('edition', 'criterion', 'criterion-edition', 'edition-criterion', canonical_form='Criterion Edition') self.container.register_property('edition', 'deluxe', 'cdeluxe-edition', 'edition-deluxe', canonical_form='Deluxe Edition') self.container.register_property('edition', 'director\'?s?-cut', 'director\'?s?-cut-edition', 'edition-director\'?s?-cut', canonical_form='Director\'s cut') def supported_properties(self): return self.container.get_supported_properties() def guess_video_rexps(self, string, node=None, options=None): found = self.container.find_properties(string, node, options) return self.container.as_guess(found, string) def process(self, mtree, options=None): GuessFinder(self.guess_video_rexps, None, self.log, options).process_nodes(mtree.unidentified_leaves())
def __init__(self): Transformer.__init__(self, 35) self.container = PropertiesContainer() self.qualities = QualitiesContainer() def register_property(propname, props, **kwargs): """props a dict of {value: [patterns]}""" for canonical_form, patterns in props.items(): if isinstance(patterns, tuple): patterns2, pattern_kwarg = patterns if kwargs: current_kwarg = dict(kwargs) current_kwarg.update(pattern_kwarg) else: current_kwarg = dict(pattern_kwarg) current_kwarg['canonical_form'] = canonical_form self.container.register_property(propname, *patterns2, **current_kwarg) elif kwargs: current_kwarg = dict(kwargs) current_kwarg['canonical_form'] = canonical_form self.container.register_property(propname, *patterns, **current_kwarg) else: self.container.register_property(propname, *patterns, canonical_form=canonical_form) def register_quality(propname, quality_dict): """props a dict of {canonical_form: quality}""" for canonical_form, quality in quality_dict.items(): self.qualities.register_quality(propname, canonical_form, quality) register_property('container', {'mp4': ['MP4']}) # http://en.wikipedia.org/wiki/Pirated_movie_release_types register_property('format', {'VHS': ['VHS', 'VHS-Rip'], 'Cam': ['CAM', 'CAMRip', 'HD-CAM'], #'Telesync': ['TELESYNC', 'PDVD'], 'Telesync': (['TS', 'HD-TS'], {'confidence': 0.4}), 'Workprint': ['WORKPRINT', 'WP'], 'Telecine': ['TELECINE', 'TC'], 'PPV': ['PPV', 'PPV-Rip'], # Pay Per View 'TV': ['SD-TV', 'SD-TV-Rip', 'Rip-SD-TV', 'TV-Rip', 'Rip-TV'], 'DVB': ['DVB-Rip', 'DVB', 'PD-TV'], 'DVD': ['DVD', 'DVD-Rip', 'VIDEO-TS', 'DVD-R', 'DVD-9', 'DVD-5'], 'HDTV': ['HD-TV', 'TV-RIP-HD', 'HD-TV-RIP', 'HD-RIP'], 'VOD': ['VOD', 'VOD-Rip'], 'WEBRip': ['WEB-Rip'], 'WEB-DL': ['WEB-DL', 'WEB-HD', 'WEB'], 'HD-DVD': ['HD-DVD-Rip', 'HD-DVD'], 'BluRay': ['Blu-ray(?:-Rip)?', 'B[DR]', 'B[DR]-Rip', 'BD[59]', 'BD25', 'BD50'] }) register_quality('format', {'VHS': -100, 'Cam': -90, 'Telesync': -80, 'Workprint': -70, 'Telecine': -60, 'PPV': -50, 'TV': -30, 'DVB': -20, 'DVD': 0, 'HDTV': 20, 'VOD': 40, 'WEBRip': 50, 'WEB-DL': 60, 'HD-DVD': 80, 'BluRay': 100 }) register_property('screenSize', {'360p': ['(?:\d{3,}(?:\\|\/|x|\*))?360(?:i|p?x?)'], '368p': ['(?:\d{3,}(?:\\|\/|x|\*))?368(?:i|p?x?)'], '480p': ['(?:\d{3,}(?:\\|\/|x|\*))?480(?:i|p?x?)'], #'480p': (['hr'], {'confidence': 0.2}), # duplicate dict key '576p': ['(?:\d{3,}(?:\\|\/|x|\*))?576(?:i|p?x?)'], '720p': ['(?:\d{3,}(?:\\|\/|x|\*))?720(?:i|p?x?)'], '900p': ['(?:\d{3,}(?:\\|\/|x|\*))?900(?:i|p?x?)'], '1080i': ['(?:\d{3,}(?:\\|\/|x|\*))?1080i'], '1080p': ['(?:\d{3,}(?:\\|\/|x|\*))?1080p?x?'], '4K': ['(?:\d{3,}(?:\\|\/|x|\*))?2160(?:i|p?x?)'] }, validator=ChainedValidator(DefaultValidator(), OnlyOneValidator())) class ResolutionValidator(object): """Make sure our match is surrounded by separators, or by another entry""" @staticmethod def validate(prop, string, node, match, entry_start, entry_end): """ span = _get_span(prop, match) span = _trim_span(span, string[span[0]:span[1]]) start, end = span sep_start = start <= 0 or string[start - 1] in sep sep_end = end >= len(string) or string[end] in sep start_by_other = start in entry_end end_by_other = end in entry_start if (sep_start or start_by_other) and (sep_end or end_by_other): return True return False """ return True _digits_re = re.compile('\d+') def resolution_formatter(value): digits = _digits_re.findall(value) return 'x'.join(digits) self.container.register_property('screenSize', '\d{3,4}-?[x\*]-?\d{3,4}', canonical_from_pattern=False, formatter=resolution_formatter, validator=ChainedValidator(DefaultValidator(), ResolutionValidator())) register_quality('screenSize', {'360p': -300, '368p': -200, '480p': -100, '576p': 0, '720p': 100, '900p': 130, '1080i': 180, '1080p': 200, '4K': 400 }) _videoCodecProperty = {'Real': ['Rv\d{2}'], # http://en.wikipedia.org/wiki/RealVideo 'Mpeg2': ['Mpeg2'], 'DivX': ['DVDivX', 'DivX'], 'XviD': ['XviD'], 'h264': ['[hx]-264(?:-AVC)?', 'MPEG-4(?:-AVC)'], 'h265': ['[hx]-265(?:-HEVC)?', 'HEVC'] } register_property('videoCodec', _videoCodecProperty) register_quality('videoCodec', {'Real': -50, 'Mpeg2': -30, 'DivX': -10, 'XviD': 0, 'h264': 100, 'h265': 150 }) # http://blog.mediacoderhq.com/h264-profiles-and-levels/ # http://fr.wikipedia.org/wiki/H.264 self.container.register_property('videoProfile', 'BP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess])) self.container.register_property('videoProfile', 'XP', 'EP', canonical_form='XP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess])) self.container.register_property('videoProfile', 'MP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess])) self.container.register_property('videoProfile', 'HP', 'HiP', canonical_form='HP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess])) self.container.register_property('videoProfile', '10.?bit', 'Hi10P', canonical_form='10bit') self.container.register_property('videoProfile', '8.?bit', canonical_form='8bit') self.container.register_property('videoProfile', 'Hi422P', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess])) self.container.register_property('videoProfile', 'Hi444PP', validator=LeavesValidator(lambdas=[lambda node: 'videoCodec' in node.guess])) register_quality('videoProfile', {'BP': -20, 'XP': -10, 'MP': 0, 'HP': 10, '10bit': 15, 'Hi422P': 25, 'Hi444PP': 35 }) # has nothing to do here (or on filenames for that matter), but some # releases use it and it helps to identify release groups, so we adapt register_property('videoApi', {'DXVA': ['DXVA']}) register_property('audioCodec', {'MP3': ['MP3', 'LAME', 'LAME(?:\d)+-(?:\d)+'], 'DolbyDigital': ['DD'], 'AAC': ['AAC'], 'AC3': ['AC3'], 'Flac': ['FLAC'], 'DTS': (['DTS'], {'validator': LeftValidator()}), 'TrueHD': ['True-HD'] }) register_quality('audioCodec', {'MP3': 10, 'DolbyDigital': 30, 'AAC': 35, 'AC3': 40, 'Flac': 45, 'DTS': 60, 'TrueHD': 70 }) self.container.register_property('audioProfile', 'HD', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS'])) self.container.register_property('audioProfile', 'HD-MA', canonical_form='HDMA', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'DTS'])) self.container.register_property('audioProfile', 'HE', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC'])) self.container.register_property('audioProfile', 'LC', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AAC'])) self.container.register_property('audioProfile', 'HQ', validator=LeavesValidator(lambdas=[lambda node: node.guess.get('audioCodec') == 'AC3'])) register_quality('audioProfile', {'HD': 20, 'HDMA': 50, 'LC': 0, 'HQ': 0, 'HE': 20 }) register_property('audioChannels', {'7.1': ['7[\W_]1', '7ch', '8ch'], '5.1': ['5[\W_]1', '5ch', '6ch'], '2.0': ['2[\W_]0', '2ch', 'stereo'], '1.0': ['1[\W_]0', '1ch', 'mono'] }) register_quality('audioChannels', {'7.1': 200, '5.1': 100, '2.0': 0, '1.0': -100 }) self.container.register_property('episodeFormat', r'Minisodes?', canonical_form='Minisode') self.container.register_property('crc32', '(?:[a-fA-F]|[0-9]){8}', enhance=False, canonical_from_pattern=False) part_words = ['pt', 'part'] self.container.register_property(None, '(' + build_or_pattern(part_words) + sep + '?(?P<part>' + numeral + '))[^0-9]', enhance=False, canonical_from_pattern=False, confidence=0.4, formatter=parse_numeral) register_property('other', {'AudioFix': ['Audio-Fix', 'Audio-Fixed'], 'SyncFix': ['Sync-Fix', 'Sync-Fixed'], 'DualAudio': ['Dual-Audio'], 'WideScreen': ['ws', 'wide-screen'], 'Netflix': ['Netflix', 'NF'] }) self.container.register_property('other', 'Real', 'Fix', canonical_form='Proper', validator=ChainedValidator(FullMatchValidator(), NeighborValidator())) self.container.register_property('other', 'Proper', 'Repack', 'Rerip', canonical_form='Proper') self.container.register_property('other', 'Fansub', canonical_form='Fansub', validator=ChainedValidator(FullMatchValidator(), NeighborValidator())) self.container.register_property('other', 'Fastsub', canonical_form='Fastsub', validator=ChainedValidator(FullMatchValidator(), NeighborValidator())) self.container.register_property('other', '(?:Seasons?' + sep + '?)?Complete', canonical_form='Complete') self.container.register_property('other', 'R5', 'RC', canonical_form='R5') self.container.register_property('other', 'Pre-Air', 'Preair', canonical_form='Preair') self.container.register_property('other', 'CC') # Close Caption self.container.register_property('other', 'LD', 'MD') # Line/Mic Dubbed self.container.register_canonical_properties('other', 'Screener', 'Remux', '3D', 'HD', 'mHD', 'HDLight', 'HQ', 'DDC', 'HR', 'PAL', 'SECAM', 'NTSC') self.container.register_canonical_properties('other', 'Limited', 'Complete', 'Classic', 'Unrated', 'LiNE', 'Bonus', 'Trailer', validator=WeakValidator()) for prop in self.container.get_properties('format'): self.container.register_property('other', prop.pattern + '(-?Scr(?:eener)?)', canonical_form='Screener') for exts in (subtitle_exts, info_exts, video_exts): for container in exts: self.container.register_property('container', container, confidence=0.3)
class GuessEpisodesRexps(Transformer): def __init__(self): Transformer.__init__(self, 20) range_separators = ['-', 'to', 'a'] discrete_separators = ['&', 'and', 'et'] of_separators = ['of', 'sur', '/', '\\'] season_words = ['seasons?', 'saisons?', 'series?'] episode_words = ['episodes?'] season_markers = ['s'] episode_markers = ['e', 'ep'] discrete_sep = sep for range_separator in range_separators: discrete_sep = discrete_sep.replace(range_separator, '') discrete_separators.append(discrete_sep) all_separators = list(range_separators) all_separators.extend(discrete_separators) self.container = PropertiesContainer(enhance=False, canonical_from_pattern=False) range_separators_re = re.compile(build_or_pattern(range_separators), re.IGNORECASE) discrete_separators_re = re.compile(build_or_pattern(discrete_separators), re.IGNORECASE) all_separators_re = re.compile(build_or_pattern(all_separators), re.IGNORECASE) of_separators_re = re.compile(build_or_pattern(of_separators, escape=True), re.IGNORECASE) season_words_re = re.compile(build_or_pattern(season_words), re.IGNORECASE) episode_words_re = re.compile(build_or_pattern(episode_words), re.IGNORECASE) season_markers_re = re.compile(build_or_pattern(season_markers), re.IGNORECASE) episode_markers_re = re.compile(build_or_pattern(episode_markers), re.IGNORECASE) def list_parser(value, property_list_name, discrete_separators_re=discrete_separators_re, range_separators_re=range_separators_re, allow_discrete=False, fill_gaps=False): discrete_elements = filter(lambda x: x != '', discrete_separators_re.split(value)) discrete_elements = [x.strip() for x in discrete_elements] proper_discrete_elements = [] i = 0 while i < len(discrete_elements): if i < len(discrete_elements) - 2 and range_separators_re.match(discrete_elements[i+1]): proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i+1] + discrete_elements[i+2]) i += 3 else: match = range_separators_re.search(discrete_elements[i]) if match and match.start() == 0: proper_discrete_elements[i-1] = proper_discrete_elements[i-1] + discrete_elements[i] elif match and match.end() == len(discrete_elements[i]): proper_discrete_elements.append(discrete_elements[i] + discrete_elements[i + 1]) else: proper_discrete_elements.append(discrete_elements[i]) i += 1 discrete_elements = proper_discrete_elements ret = [] for discrete_element in discrete_elements: range_values = filter(lambda x: x != '', range_separators_re.split(discrete_element)) range_values = [x.strip() for x in range_values] if len(range_values) > 1: for x in range(0, len(range_values) - 1): start_range_ep = parse_numeral(range_values[x]) end_range_ep = parse_numeral(range_values[x+1]) for range_ep in range(start_range_ep, end_range_ep + 1): if range_ep not in ret: ret.append(range_ep) else: discrete_value = parse_numeral(discrete_element) if discrete_value not in ret: ret.append(discrete_value) if len(ret) > 1: if not allow_discrete: valid_ret = list() # replace discrete elements by ranges valid_ret.append(ret[0]) for i in range(0, len(ret) - 1): previous = valid_ret[len(valid_ret) - 1] if ret[i+1] < previous: pass else: valid_ret.append(ret[i+1]) ret = valid_ret if fill_gaps: ret = list(range(min(ret), max(ret) + 1)) if len(ret) > 1: return {None: ret[0], property_list_name: ret} if len(ret) > 0: return ret[0] return None def episode_parser_x(value): return list_parser(value, 'episodeList', discrete_separators_re=re.compile('x', re.IGNORECASE)) def episode_parser_e(value): return list_parser(value, 'episodeList', discrete_separators_re=re.compile('e', re.IGNORECASE), fill_gaps=True) def episode_parser(value): return list_parser(value, 'episodeList') def season_parser(value): return list_parser(value, 'seasonList') class ResolutionCollisionValidator(object): def validate(self, prop, string, node, match, entry_start, entry_end): return len(match.group(2)) < 3 # limit self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + numeral + ')' + sep + '?' + season_words_re.pattern + '?)', confidence=1.0, formatter=parse_numeral) self.container.register_property(None, r'(' + season_words_re.pattern + sep + '?(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + season_words_re.pattern + '?)' + sep, confidence=1.0, formatter={None: parse_numeral, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), FormatterValidator('season', lambda x: len(x) > 1 if hasattr(x, '__len__') else False))) self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + ')[^0-9]?' + sep + '?(?P<episodeNumber>(?:e' + digital_numeral + '(?:' + sep + '?[e-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_e, 'season': season_parser}, validator=NoValidator()) # self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<episodeNumber>(?:x' + digital_numeral + '(?:' + sep + '?[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator())) self.container.register_property(None, sep + r'((?P<season>' + digital_numeral + ')' + sep + '' + '(?P<episodeNumber>(?:x' + sep + digital_numeral + '(?:' + sep + '[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator())) self.container.register_property(None, r'((?P<season>' + digital_numeral + ')' + '(?P<episodeNumber>(?:x' + digital_numeral + '(?:[x-]' + digital_numeral + ')*)))', confidence=1.0, formatter={None: parse_numeral, 'episodeNumber': episode_parser_x, 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator())) self.container.register_property(None, r'(' + season_markers_re.pattern + '(?P<season>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={None: parse_numeral, 'season': season_parser}, validator=NoValidator()) self.container.register_property(None, r'((?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter=parse_numeral) self.container.register_property(None, r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?)', confidence=0.7, formatter=parse_numeral) self.container.register_property(None, r'(ep' + sep + r'?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.7, formatter=parse_numeral) self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser}) self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + '(?:' + sep + '?' + all_separators_re.pattern + sep + '?' + digital_numeral + ')*)' + sep + '?' + episode_words_re.pattern + '?)', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser}) self.container.register_property(None, r'(' + episode_markers_re.pattern + '(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.6, formatter={None: parse_numeral, 'episodeNumber': episode_parser}) self.container.register_property(None, r'(' + episode_words_re.pattern + sep + '?(?P<episodeNumber>' + digital_numeral + ')' + sep + '?v(?P<version>\d+))', confidence=0.8, formatter={None: parse_numeral, 'episodeNumber': episode_parser}) self.container.register_property('episodeNumber', r'^ ?(\d{2})' + sep, confidence=0.4, formatter=parse_numeral) self.container.register_property('episodeNumber', r'^ ?(\d{2})' + sep, confidence=0.4, formatter=parse_numeral) self.container.register_property('episodeNumber', r'^ ?0(\d{1,2})' + sep, confidence=0.4, formatter=parse_numeral) self.container.register_property('episodeNumber', sep + r'(\d{2}) ?$', confidence=0.4, formatter=parse_numeral) self.container.register_property('episodeNumber', sep + r'0(\d{1,2}) ?$', confidence=0.4, formatter=parse_numeral) self.container.register_property(None, r'((?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + ')(?:' + sep + '?(?:episodes?|eps?))?)', confidence=0.7, formatter=parse_numeral) self.container.register_property(None, r'((?:episodes?|eps?)' + sep + '?(?P<episodeNumber>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<episodeCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral) self.container.register_property(None, r'((?:seasons?|saisons?|s)' + sep + '?(?P<season>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral + '))', confidence=0.7, formatter=parse_numeral) self.container.register_property(None, r'((?P<season>' + numeral + ')' + sep + '?' + of_separators_re.pattern + sep + '?(?P<seasonCount>' + numeral + ')' + sep + '?(?:seasons?|saisons?|s))', confidence=0.7, formatter=parse_numeral) self.container.register_canonical_properties('other', 'FiNAL', 'Complete', validator=WeakValidator()) self.container.register_property(None, r'[^0-9]((?P<season>' + digital_numeral + ')[^0-9 .-]?-?(?P<other>xAll))', confidence=1.0, formatter={None: parse_numeral, 'other': lambda x: 'Complete', 'season': season_parser}, validator=ChainedValidator(DefaultValidator(), ResolutionCollisionValidator())) def register_arguments(self, opts, naming_opts, output_opts, information_opts, webservice_opts, other_options): naming_opts.add_argument('-E', '--episode-prefer-number', action='store_true', dest='episode_prefer_number', default=False, help='Guess "serie.213.avi" as the episodeNumber 213. Without this option, ' 'it will be guessed as season 2, episodeNumber 13') def supported_properties(self): return ['episodeNumber', 'season', 'episodeList', 'seasonList', 'episodeCount', 'seasonCount', 'version', 'other'] def guess_episodes_rexps(self, string, node=None, options=None): found = self.container.find_properties(string, node, options) return self.container.as_guess(found, string) def should_process(self, mtree, options=None): return mtree.guess.get('type', '').startswith('episode') def process(self, mtree, options=None): GuessFinder(self.guess_episodes_rexps, None, self.log, options).process_nodes(mtree.unidentified_leaves())