def country(config, common_words): """ Builder for rebulk object. :param config: rule configuration :type config: dict :param common_words: common words :type common_words: set :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'country')) rebulk = rebulk.defaults(name='country') def find_countries(string, context=None): """ Find countries in given string. """ allowed_countries = context.get( 'allowed_countries') if context else None return CountryFinder(allowed_countries, common_words).find(string) rebulk.functional( find_countries, # Prefer language and any other property over country if not US or GB. conflict_solver=lambda match, other: match if other.name != 'language' or match.value not in (babelfish.Country('US'), babelfish.Country('GB')) else other, properties={'country': [None]}, disabled=lambda context: not context.get('allowed_countries')) babelfish.country_converters['guessit'] = GuessitCountryConverter( config['synonyms']) return rebulk
def path(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk() rebulk.defaults(name="path", marker=True) def mark_path(input_string, context): """ Functional pattern to mark path elements. :param input_string: :return: """ ret = [] if context.get('name_only', False): ret.append((0, len(input_string))) else: indices = list(find_all(input_string, '/')) indices += list(find_all(input_string, '\\')) indices += [-1, len(input_string)] indices.sort() for i in range(0, len(indices) - 1): ret.append((indices[i] + 1, indices[i + 1])) return ret rebulk.functional(mark_path) return rebulk
def language(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk() rebulk.string(*subtitle_prefixes, name="subtitle_language.prefix", ignore_case=True, private=True, validator=seps_surround, tags=['release-group-prefix']) rebulk.string(*subtitle_suffixes, name="subtitle_language.suffix", ignore_case=True, private=True, validator=seps_surround) rebulk.string(*lang_suffixes, name="language.suffix", ignore_case=True, private=True, validator=seps_surround, tags=['format-suffix']) rebulk.functional(find_languages, properties={'language': [None]}) rebulk.rules(SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, SubtitleExtensionRule) return rebulk
def date(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().defaults(validator=seps_surround) rebulk.regex( r"\d{4}", name="year", formatter=int, validator=lambda match: seps_surround(match) and valid_year(match.value) ) def date_functional(string, context): """ Search for date in the string and retrieves match :param string: :return: """ ret = search_date(string, context.get("date_year_first"), context.get("date_day_first")) if ret: return ret[0], ret[1], {"value": ret[2]} rebulk.functional( date_functional, name="date", properties={"date": [None]}, conflict_solver=lambda match, other: other if other.name in ["episode", "season"] else "__default__", ) rebulk.rules(KeepMarkedYearInFilepart) return rebulk
def country(config, common_words): """ Builder for rebulk object. :param config: rule configuration :type config: dict :param common_words: common words :type common_words: set :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'country')) rebulk = rebulk.defaults(name='country') def find_countries(string, context=None): """ Find countries in given string. """ allowed_countries = context.get('allowed_countries') if context else None return CountryFinder(allowed_countries, common_words).find(string) rebulk.functional(find_countries, # Prefer language and any other property over country if not US or GB. conflict_solver=lambda match, other: match if other.name != 'language' or match.value not in (babelfish.Country('US'), babelfish.Country('GB')) else other, properties={'country': [None]}, disabled=lambda context: not context.get('allowed_countries')) babelfish.country_converters['guessit'] = GuessitCountryConverter(config['synonyms']) return rebulk
def release_group(config): """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ forbidden_groupnames = config['forbidden_names'] groupname_ignore_seps = config['ignored_seps'] groupname_seps = ''.join( [c for c in seps if c not in groupname_ignore_seps]) def clean_groupname(string): """ Removes and strip separators from input_string :param string: :type string: :return: :rtype: """ string = string.strip(groupname_seps) if not (string.endswith(tuple(groupname_ignore_seps)) and string.startswith(tuple(groupname_ignore_seps))) \ and not any(i in string.strip(groupname_ignore_seps) for i in groupname_ignore_seps): string = string.strip(groupname_ignore_seps) for forbidden in forbidden_groupnames: if string.lower().startswith( forbidden) and string[len(forbidden):len(forbidden) + 1] in seps: string = string[len(forbidden):] string = string.strip(groupname_seps) if string.lower().endswith( forbidden) and string[-len(forbidden) - 1:-len(forbidden)] in seps: string = string[:len(forbidden)] string = string.strip(groupname_seps) return string rebulk = Rebulk( disabled=lambda context: is_disabled(context, 'release_group')) expected_group = build_expected_function('expected_group') rebulk.functional( expected_group, name='release_group', tags=['expected'], validator=seps_surround, conflict_solver=lambda match, other: other, disabled=lambda context: not context.get('expected_group')) return rebulk.rules(DashSeparatedReleaseGroup(clean_groupname), SceneReleaseGroup(clean_groupname), AnimeReleaseGroup)
def language(config, common_words): """ Builder for rebulk object. :param config: rule configuration :type config: dict :param common_words: common words :type common_words: set :return: Created Rebulk object :rtype: Rebulk """ subtitle_both = config['subtitle_affixes'] subtitle_prefixes = sorted(subtitle_both + config['subtitle_prefixes'], key=length_comparator) subtitle_suffixes = sorted(subtitle_both + config['subtitle_suffixes'], key=length_comparator) lang_both = config['language_affixes'] lang_prefixes = sorted(lang_both + config['language_prefixes'], key=length_comparator) lang_suffixes = sorted(lang_both + config['language_suffixes'], key=length_comparator) weak_affixes = frozenset(config['weak_affixes']) rebulk = Rebulk(disabled=lambda context: (is_disabled(context, 'language') and is_disabled(context, 'subtitle_language'))) rebulk.string(*subtitle_prefixes, name="subtitle_language.prefix", ignore_case=True, private=True, validator=seps_surround, tags=['release-group-prefix'], disabled=lambda context: is_disabled(context, 'subtitle_language')) rebulk.string(*subtitle_suffixes, name="subtitle_language.suffix", ignore_case=True, private=True, validator=seps_surround, disabled=lambda context: is_disabled(context, 'subtitle_language')) rebulk.string(*lang_suffixes, name="language.suffix", ignore_case=True, private=True, validator=seps_surround, tags=['source-suffix'], disabled=lambda context: is_disabled(context, 'language')) def find_languages(string, context=None): """Find languages in the string :return: list of tuple (property, Language, lang_word, word) """ return LanguageFinder(context, subtitle_prefixes, subtitle_suffixes, lang_prefixes, lang_suffixes, weak_affixes).find(string) rebulk.functional(find_languages, properties={'language': [None]}, disabled=lambda context: not context.get('allowed_languages')) rebulk.rules(SubtitleExtensionRule, SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, RemoveLanguage, RemoveInvalidLanguages(common_words)) babelfish.language_converters['guessit'] = GuessitConverter(config['synonyms']) return rebulk
def release_group(config): """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ forbidden_groupnames = config['forbidden_names'] groupname_ignore_seps = config['ignored_seps'] groupname_seps = ''.join([c for c in seps if c not in groupname_ignore_seps]) def clean_groupname(string): """ Removes and strip separators from input_string :param string: :type string: :return: :rtype: """ string = string.strip(groupname_seps) if not (string.endswith(tuple(groupname_ignore_seps)) and string.startswith(tuple(groupname_ignore_seps))) \ and not any(i in string.strip(groupname_ignore_seps) for i in groupname_ignore_seps): string = string.strip(groupname_ignore_seps) for forbidden in forbidden_groupnames: if string.lower().startswith(forbidden) and string[len(forbidden):len(forbidden) + 1] in seps: string = string[len(forbidden):] string = string.strip(groupname_seps) if string.lower().endswith(forbidden) and string[-len(forbidden) - 1:-len(forbidden)] in seps: string = string[:len(forbidden)] string = string.strip(groupname_seps) return string rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'release_group')) expected_group = build_expected_function('expected_group') rebulk.functional(expected_group, name='release_group', tags=['expected'], validator=seps_surround, conflict_solver=lambda match, other: other, disabled=lambda context: not context.get('expected_group')) return rebulk.rules( DashSeparatedReleaseGroup(clean_groupname), SceneReleaseGroup(clean_groupname), AnimeReleaseGroup )
def release_group(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk() expected_group = build_expected_function('expected_group') rebulk.functional(expected_group, name='release_group', tags=['expected'], validator=seps_surround, conflict_solver=lambda match, other: other, disabled=lambda context: not context.get('expected_group')) return rebulk.rules(SceneReleaseGroup, AnimeReleaseGroup)
def language(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk() rebulk.string(*subtitle_prefixes, name="subtitle_language.prefix", ignore_case=True, private=True, validator=seps_surround) rebulk.string(*subtitle_suffixes, name="subtitle_language.suffix", ignore_case=True, private=True, validator=seps_surround) rebulk.functional(find_languages, properties={'language': [None]}) rebulk.rules(SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, SubtitleExtensionRule) return rebulk
def groups(config): """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk() rebulk.defaults(name="group", marker=True) starting = config['starting'] ending = config['ending'] if len(starting) != len(ending): raise ConfigurationException("Starting and ending groups must have the same length") def mark_groups(input_string): """ Functional pattern to mark groups (...), [...] and {...}. :param input_string: :return: """ openings = ([], ) * len(starting) i = 0 ret = [] for char in input_string: start_type = starting.find(char) if start_type > -1: openings[start_type].append(i) i += 1 end_type = ending.find(char) if end_type > -1: try: start_index = openings[end_type].pop() ret.append((start_index, i)) except IndexError: pass return ret rebulk.functional(mark_groups) return rebulk
def country(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().defaults(name='country') rebulk.functional(find_countries, # Prefer language and any other property over country if not US or GB. conflict_solver=lambda match, other: match if other.name != 'language' or match.value not in [babelfish.Country('US'), babelfish.Country('GB')] else other, properties={'country': [None]}) return rebulk
def title(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().rules(TitleFromPosition, PreferTitleWithYear) expected_title = build_expected_function('expected_title') rebulk.functional(expected_title, name='title', tags=['expected', 'title'], validator=seps_surround, formatter=formatters(cleanup, reorder_title), conflict_solver=lambda match, other: other, disabled=lambda context: not context.get('expected_title')) return rebulk
def date(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().defaults(validator=seps_surround) rebulk.regex(r"\d{4}", name="year", formatter=int, disabled=lambda context: is_disabled(context, 'year'), conflict_solver=lambda match, other: other if other.name in ('episode', 'season') and len(other.raw) < len(match.raw) else '__default__', validator=lambda match: seps_surround(match) and valid_year( match.value)) def date_functional(string, context): # pylint:disable=inconsistent-return-statements """ Search for date in the string and retrieves match :param string: :return: """ ret = search_date(string, context.get('date_year_first'), context.get('date_day_first')) if ret: return ret[0], ret[1], {'value': ret[2]} rebulk.functional(date_functional, name="date", properties={'date': [None]}, disabled=lambda context: is_disabled(context, 'date'), conflict_solver=lambda match, other: other if other.name in ('episode', 'season', 'crc32') else '__default__') rebulk.rules(KeepMarkedYearInFilepart) return rebulk
def crc(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE) rebulk.defaults(validator=seps_surround) rebulk.regex('(?:[a-fA-F]|[0-9]){8}', name='crc32', conflict_solver=lambda match, other: match if other.name in ['episode', 'season'] else '__default__') rebulk.functional(guess_idnumber, name='uuid', conflict_solver=lambda match, other: match if other.name in ['episode', 'season'] else '__default__') return rebulk
def title(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().rules(TitleFromPosition, PreferTitleWithYear) def expected_title(input_string, context): """ Expected title functional pattern. :param input_string: :type input_string: :param context: :type context: :return: :rtype: """ ret = [] for search in context.get('expected_title'): if search.startswith('re:'): search = search[3:] search = search.replace(' ', '-') matches = RePattern(search, abbreviations=[dash], flags=re.IGNORECASE).matches( input_string, context) for match in matches: ret.append(match.span) else: for start in find_all(input_string, search, ignore_case=True): ret.append((start, start + len(search))) return ret rebulk.functional( expected_title, name='title', tags=['expected'], conflict_solver=lambda match, other: other, disabled=lambda context: not context.get('expected_title')) return rebulk
def crc(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE) rebulk.defaults(validator=seps_surround) rebulk.regex('(?:[a-fA-F]|[0-9]){8}', name='crc32', conflict_solver=lambda match, other: match if other.name in ['episode', 'season'] else '__default__') rebulk.functional( guess_idnumber, name='uuid', conflict_solver=lambda match, other: match if other.name in ['episode', 'season'] else '__default__') return rebulk
def title(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().rules(TitleFromPosition, PreferTitleWithYear) expected_title = build_expected_function('expected_title') rebulk.functional( expected_title, name='title', tags=['expected', 'title'], validator=seps_surround, formatter=formatters(cleanup, reorder_title), conflict_solver=lambda match, other: other, disabled=lambda context: not context.get('expected_title')) return rebulk
def groups(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk() rebulk.defaults(name="group", marker=True) starting = '([{' ending = ')]}' def mark_groups(input_string): """ Functional pattern to mark groups (...), [...] and {...}. :param input_string: :return: """ openings = ([], [], []) i = 0 ret = [] for char in input_string: start_type = starting.find(char) if start_type > -1: openings[start_type].append(i) i += 1 end_type = ending.find(char) if end_type > -1: try: start_index = openings[end_type].pop() ret.append((start_index, i)) except IndexError: pass return ret rebulk.functional(mark_groups) return rebulk
def title(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'title')) rebulk.rules(TitleFromPosition, PreferTitleWithYear) expected_title = build_expected_function('expected_title') rebulk.functional(expected_title, name='title', tags=['expected', 'title'], validator=seps_surround, formatter=formatters(cleanup, reorder_title), conflict_solver=lambda match, other: other, disabled=lambda context: not context.get('expected_title')) return rebulk
def date(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().defaults(validator=seps_surround) rebulk.regex(r"\d{4}", name="year", formatter=int, disabled=lambda context: is_disabled(context, 'year'), conflict_solver=lambda match, other: other if other.name in ('episode', 'season') and len(other.raw) < len(match.raw) else '__default__', validator=lambda match: seps_surround(match) and valid_year(match.value)) def date_functional(string, context): # pylint:disable=inconsistent-return-statements """ Search for date in the string and retrieves match :param string: :return: """ ret = search_date(string, context.get('date_year_first'), context.get('date_day_first')) if ret: return ret[0], ret[1], {'value': ret[2]} rebulk.functional(date_functional, name="date", properties={'date': [None]}, disabled=lambda context: is_disabled(context, 'date'), conflict_solver=lambda match, other: other if other.name in ('episode', 'season', 'crc32') else '__default__') rebulk.rules(KeepMarkedYearInFilepart) return rebulk
def title(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().rules(TitleFromPosition, PreferTitleWithYear) def expected_title(input_string, context): """ Expected title functional pattern. :param input_string: :type input_string: :param context: :type context: :return: :rtype: """ ret = [] for search in context.get('expected_title'): if search.startswith('re:'): search = search[3:] search = search.replace(' ', '-') matches = RePattern(search, abbreviations=[dash], flags=re.IGNORECASE).matches(input_string, context) for match in matches: # Instance of 'list' has no 'span' member (no-member). Seems to be a pylint bug. # pylint: disable=no-member ret.append(match.span) else: for start in find_all(input_string, search, ignore_case=True): ret.append((start, start+len(search))) return ret rebulk.functional(expected_title, name='title', tags=['expected'], conflict_solver=lambda match, other: other, disabled=lambda context: not context.get('expected_title')) return rebulk
def crc(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'crc32')) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE) rebulk.defaults(validator=seps_surround) rebulk.regex('(?:[a-fA-F]|[0-9]){8}', name='crc32', conflict_solver=lambda match, other: match if other.name in ['episode', 'season'] else '__default__') rebulk.functional(guess_idnumber, name='uuid', conflict_solver=lambda match, other: match if other.name in ['episode', 'season'] else '__default__') return rebulk
def crc(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'crc32')) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE) rebulk.defaults(validator=seps_surround) rebulk.regex('(?:[a-fA-F]|[0-9]){8}', name='crc32', conflict_solver=lambda match, other: other if other.name in ['episode', 'season'] else '__default__') rebulk.functional(guess_idnumber, name='uuid', conflict_solver=lambda match, other: match if other.name in ['episode', 'season'] else '__default__') return rebulk
def language(config, common_words): """ Builder for rebulk object. :param config: rule configuration :type config: dict :param common_words: common words :type common_words: set :return: Created Rebulk object :rtype: Rebulk """ subtitle_both = config['subtitle_affixes'] subtitle_prefixes = sorted(subtitle_both + config['subtitle_prefixes'], key=length_comparator) subtitle_suffixes = sorted(subtitle_both + config['subtitle_suffixes'], key=length_comparator) lang_both = config['language_affixes'] lang_prefixes = sorted(lang_both + config['language_prefixes'], key=length_comparator) lang_suffixes = sorted(lang_both + config['language_suffixes'], key=length_comparator) weak_affixes = frozenset(config['weak_affixes']) rebulk = Rebulk(disabled=lambda context: (is_disabled( context, 'language') and is_disabled(context, 'subtitle_language'))) rebulk.string( *subtitle_prefixes, name="subtitle_language.prefix", ignore_case=True, private=True, validator=seps_surround, tags=['release-group-prefix'], disabled=lambda context: is_disabled(context, 'subtitle_language')) rebulk.string( *subtitle_suffixes, name="subtitle_language.suffix", ignore_case=True, private=True, validator=seps_surround, disabled=lambda context: is_disabled(context, 'subtitle_language')) rebulk.string(*lang_suffixes, name="language.suffix", ignore_case=True, private=True, validator=seps_surround, tags=['source-suffix'], disabled=lambda context: is_disabled(context, 'language')) def find_languages(string, context=None): """Find languages in the string :return: list of tuple (property, Language, lang_word, word) """ return LanguageFinder(context, subtitle_prefixes, subtitle_suffixes, lang_prefixes, lang_suffixes, weak_affixes).find(string) rebulk.functional( find_languages, properties={'language': [None]}, disabled=lambda context: not context.get('allowed_languages')) rebulk.rules(SubtitleExtensionRule, SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, RemoveLanguage, RemoveInvalidLanguages(common_words), RemoveUndeterminedLanguages) babelfish.language_converters['guessit'] = GuessitConverter( config['synonyms']) return rebulk