def episodes(config): """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ # pylint: disable=too-many-branches,too-many-statements,too-many-locals def is_season_episode_disabled(context): """Whether season and episode rules should be enabled.""" return is_disabled(context, 'episode') or is_disabled(context, 'season') rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker']) episode_max_range = config['episode_max_range'] season_max_range = config['season_max_range'] def episodes_season_chain_breaker(matches): """ Break chains if there's more than 100 offset between two neighbor values. :param matches: :type matches: :return: :rtype: """ eps = matches.named('episode') if len(eps) > 1 and abs(eps[-1].value - eps[-2].value) > episode_max_range: return True seasons = matches.named('season') if len(seasons) > 1 and abs(seasons[-1].value - seasons[-2].value) > season_max_range: return True return False rebulk.chain_defaults(chain_breaker=episodes_season_chain_breaker) def season_episode_conflict_solver(match, other): """ Conflict solver for episode/season patterns :param match: :param other: :return: """ if match.name != other.name: if match.name == 'episode' and other.name == 'year': return match if match.name in ('season', 'episode'): if other.name in ('video_codec', 'audio_codec', 'container', 'date'): return match if (other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags and not match.initiator.children.named(match.name + 'Marker')) or ( other.name == 'screen_size' and not int_coercable(other.raw)): return match if other.name in ('season', 'episode') and match.initiator != other.initiator: if (match.initiator.name in ('weak_episode', 'weak_duplicate') and other.initiator.name in ('weak_episode', 'weak_duplicate')): return '__default__' for current in (match, other): if 'weak-episode' in current.tags or 'x' in current.initiator.raw.lower(): return current return '__default__' season_words = config['season_words'] episode_words = config['episode_words'] of_words = config['of_words'] all_words = config['all_words'] season_markers = config['season_markers'] season_ep_markers = config['season_ep_markers'] disc_markers = config['disc_markers'] episode_markers = config['episode_markers'] range_separators = config['range_separators'] weak_discrete_separators = list(sep for sep in seps_no_fs if sep not in range_separators) strong_discrete_separators = config['discrete_separators'] discrete_separators = strong_discrete_separators + weak_discrete_separators max_range_gap = config['max_range_gap'] def ordering_validator(match): """ Validator for season list. They should be in natural order to be validated. episode/season separated by a weak discrete separator should be consecutive, unless a strong discrete separator or a range separator is present in the chain (1.3&5 is valid, but 1.3-5 is not valid and 1.3.5 is not valid) """ values = match.children.to_dict() if 'season' in values and is_iterable(values['season']): # Season numbers must be in natural order to be validated. if not list(sorted(values['season'])) == values['season']: return False if 'episode' in values and is_iterable(values['episode']): # Season numbers must be in natural order to be validated. if not list(sorted(values['episode'])) == values['episode']: return False def is_consecutive(property_name): """ Check if the property season or episode has valid consecutive values. :param property_name: :type property_name: :return: :rtype: """ previous_match = None valid = True for current_match in match.children.named(property_name): if previous_match: match.children.previous(current_match, lambda m: m.name == property_name + 'Separator') separator = match.children.previous(current_match, lambda m: m.name == property_name + 'Separator', 0) if separator.raw not in range_separators and separator.raw in weak_discrete_separators: if not 0 < current_match.value - previous_match.value <= max_range_gap + 1: valid = False if separator.raw in strong_discrete_separators: valid = True break previous_match = current_match return valid return is_consecutive('episode') and is_consecutive('season') # S01E02, 01x02, S01S02S03 rebulk.chain(formatter={'season': int, 'episode': int}, tags=['SxxExx'], abbreviations=[alt_dash], children=True, private_parent=True, validate_all=True, validator={'__parent__': ordering_validator}, conflict_solver=season_episode_conflict_solver, disabled=is_season_episode_disabled) \ .regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)@?' + build_or_pattern(episode_markers + disc_markers, name='episodeMarker') + r'@?(?P<episode>\d+)', validate_all=True, validator={'__parent__': seps_before}).repeater('+') \ .regex(build_or_pattern(episode_markers + disc_markers + discrete_separators + range_separators, name='episodeSeparator', escape=True) + r'(?P<episode>\d+)').repeater('*') \ .chain() \ .regex(r'(?P<season>\d+)@?' + build_or_pattern(season_ep_markers, name='episodeMarker') + r'@?(?P<episode>\d+)', validate_all=True, validator={'__parent__': seps_before}) \ .chain() \ .regex(r'(?P<season>\d+)@?' + build_or_pattern(season_ep_markers, name='episodeMarker') + r'@?(?P<episode>\d+)', validate_all=True, validator={'__parent__': seps_before}) \ .regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators, name='episodeSeparator', escape=True) + r'(?P<episode>\d+)').repeater('*') \ .chain() \ .regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)', validate_all=True, validator={'__parent__': seps_before}) \ .regex(build_or_pattern(season_markers + discrete_separators + range_separators, name='seasonSeparator', escape=True) + r'(?P<season>\d+)').repeater('*') # episode_details property for episode_detail in ('Special', 'Pilot', 'Unaired', 'Final'): rebulk.string(episode_detail, value=episode_detail, name='episode_details', disabled=lambda context: is_disabled(context, 'episode_details')) def validate_roman(match): """ Validate a roman match if surrounded by separators :param match: :type match: :return: :rtype: """ if int_coercable(match.raw): return True return seps_surround(match) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'], validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True, conflict_solver=season_episode_conflict_solver) rebulk.chain(abbreviations=[alt_dash], formatter={'season': parse_numeral, 'count': parse_numeral}, validator={'__parent__': compose(seps_surround, ordering_validator), 'season': validate_roman, 'count': validate_roman}, disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'season')) \ .defaults(validator=None) \ .regex(build_or_pattern(season_words, name='seasonMarker') + '@?(?P<season>' + numeral + ')') \ .regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \ .regex(r'@?' + build_or_pattern(range_separators + discrete_separators + ['@'], name='seasonSeparator', escape=True) + r'@?(?P<season>\d+)').repeater('*') rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>\d+)' + r'(?:v(?P<version>\d+))?' + r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4 abbreviations=[dash], formatter={'episode': int, 'version': int, 'count': int}, disabled=lambda context: context.get('type') == 'episode' or is_disabled(context, 'episode')) rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>' + numeral + ')' + r'(?:v(?P<version>\d+))?' + r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4 abbreviations=[dash], validator={'episode': validate_roman}, formatter={'episode': parse_numeral, 'version': int, 'count': int}, disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode')) rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')', tags=['SxxExx'], abbreviations=[dash], validator=None, formatter={'season': int, 'other': lambda match: 'Complete'}, disabled=lambda context: is_disabled(context, 'season')) # 12, 13 rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int}, disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d{2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*') # 012, 013 rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int}, disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \ .defaults(validator=None) \ .regex(r'0(?P<episode>\d{1,2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})').repeater('*') # 112, 113 rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int}, name='weak_episode', disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d{3,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})').repeater('*') # 1, 2, 3 rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int}, disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode')) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d)') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})').repeater('*') # e112, e113, 1e18, 3e19 # TODO: Enhance rebulk for validator to be used globally (season_episode_validator) rebulk.chain(formatter={'season': int, 'episode': int, 'version': int}, disabled=lambda context: is_disabled(context, 'episode')) \ .defaults(validator=None) \ .regex(r'(?P<season>\d{1,2})?(?P<episodeMarker>e)(?P<episode>\d{1,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*') # ep 112, ep113, ep112, ep113 rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int}, disabled=lambda context: is_disabled(context, 'episode')) \ .defaults(validator=None) \ .regex(r'ep-?(?P<episode>\d{1,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})').repeater('*') # cap 112, cap 112_114 rebulk.chain(abbreviations=[dash], tags=['see-pattern'], formatter={'season': int, 'episode': int}, disabled=is_season_episode_disabled) \ .defaults(validator=None) \ .regex(r'(?P<seasonMarker>cap)-?(?P<season>\d{1,2})(?P<episode>\d{2})') \ .regex(r'(?P<episodeSeparator>-)(?P<season>\d{1,2})(?P<episode>\d{2})').repeater('?') # 102, 0102 rebulk.chain(tags=['weak-episode', 'weak-duplicate'], formatter={'season': int, 'episode': int, 'version': int}, name='weak_duplicate', conflict_solver=season_episode_conflict_solver, disabled=lambda context: (context.get('episode_prefer_number', False) or context.get('type') == 'movie') or is_season_episode_disabled(context)) \ .defaults(validator=None) \ .regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})').repeater('*') rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int, disabled=lambda context: is_disabled(context, 'version')) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator']) # TODO: List of words # detached of X count (season/episode) rebulk.regex(r'(?P<episode>\d+)-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?', abbreviations=[dash], children=True, private_parent=True, formatter=int, disabled=lambda context: is_disabled(context, 'episode')) rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode", disabled=lambda context: is_disabled(context, 'episode_format')) rebulk.rules(WeakConflictSolver, RemoveInvalidSeason, RemoveInvalidEpisode, SeePatternRange(range_separators + ['_']), EpisodeNumberSeparatorRange(range_separators), SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx, RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator, RemoveWeak, RenameToAbsoluteEpisode, CountValidator, EpisodeSingleDigitValidator, RenameToDiscMatch) return rebulk
def episodes(config): """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ def is_season_episode_disabled(context): """Whether season and episode rules should be enabled.""" return is_disabled(context, 'episode') or is_disabled(context, 'season') rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker']) episode_max_range = config['episode_max_range'] season_max_range = config['season_max_range'] def episodes_season_chain_breaker(matches): """ Break chains if there's more than 100 offset between two neighbor values. :param matches: :type matches: :return: :rtype: """ eps = matches.named('episode') if len(eps) > 1 and abs(eps[-1].value - eps[-2].value) > episode_max_range: return True seasons = matches.named('season') if len(seasons) > 1 and abs(seasons[-1].value - seasons[-2].value) > season_max_range: return True return False rebulk.chain_defaults(chain_breaker=episodes_season_chain_breaker) def season_episode_conflict_solver(match, other): """ Conflict solver for episode/season patterns :param match: :param other: :return: """ if match.name != other.name: if match.name == 'episode' and other.name == 'year': return match if match.name in ('season', 'episode'): if other.name in ('video_codec', 'audio_codec', 'container', 'date'): return match if (other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags and not match.initiator.children.named(match.name + 'Marker')) or ( other.name == 'screen_size' and not int_coercable(other.raw)): return match if other.name in ('season', 'episode') and match.initiator != other.initiator: if (match.initiator.name in ('weak_episode', 'weak_duplicate') and other.initiator.name in ('weak_episode', 'weak_duplicate')): return '__default__' for current in (match, other): if 'weak-episode' in current.tags or 'x' in current.initiator.raw.lower(): return current return '__default__' season_words = config['season_words'] episode_words = config['episode_words'] of_words = config['of_words'] all_words = config['all_words'] season_markers = config['season_markers'] season_ep_markers = config['season_ep_markers'] disc_markers = config['disc_markers'] episode_markers = config['episode_markers'] range_separators = config['range_separators'] weak_discrete_separators = list(sep for sep in seps_no_fs if sep not in range_separators) strong_discrete_separators = config['discrete_separators'] discrete_separators = strong_discrete_separators + weak_discrete_separators max_range_gap = config['max_range_gap'] def ordering_validator(match): """ Validator for season list. They should be in natural order to be validated. episode/season separated by a weak discrete separator should be consecutive, unless a strong discrete separator or a range separator is present in the chain (1.3&5 is valid, but 1.3-5 is not valid and 1.3.5 is not valid) """ values = match.children.to_dict() if 'season' in values and is_iterable(values['season']): # Season numbers must be in natural order to be validated. if not list(sorted(values['season'])) == values['season']: return False if 'episode' in values and is_iterable(values['episode']): # Season numbers must be in natural order to be validated. if not list(sorted(values['episode'])) == values['episode']: return False def is_consecutive(property_name): """ Check if the property season or episode has valid consecutive values. :param property_name: :type property_name: :return: :rtype: """ previous_match = None valid = True for current_match in match.children.named(property_name): if previous_match: match.children.previous(current_match, lambda m: m.name == property_name + 'Separator') separator = match.children.previous(current_match, lambda m: m.name == property_name + 'Separator', 0) if separator.raw not in range_separators and separator.raw in weak_discrete_separators: if not 0 < current_match.value - previous_match.value <= max_range_gap + 1: valid = False if separator.raw in strong_discrete_separators: valid = True break previous_match = current_match return valid return is_consecutive('episode') and is_consecutive('season') # S01E02, 01x02, S01S02S03 rebulk.chain(formatter={'season': int, 'episode': int}, tags=['SxxExx'], abbreviations=[alt_dash], children=True, private_parent=True, validate_all=True, validator={'__parent__': ordering_validator}, conflict_solver=season_episode_conflict_solver, disabled=is_season_episode_disabled) \ .regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)@?' + build_or_pattern(episode_markers + disc_markers, name='episodeMarker') + r'@?(?P<episode>\d+)', validate_all=True, validator={'__parent__': seps_before}).repeater('+') \ .regex(build_or_pattern(episode_markers + disc_markers + discrete_separators + range_separators, name='episodeSeparator', escape=True) + r'(?P<episode>\d+)').repeater('*') \ .chain() \ .regex(r'(?P<season>\d+)@?' + build_or_pattern(season_ep_markers, name='episodeMarker') + r'@?(?P<episode>\d+)', validate_all=True, validator={'__parent__': seps_before}) \ .chain() \ .regex(r'(?P<season>\d+)@?' + build_or_pattern(season_ep_markers, name='episodeMarker') + r'@?(?P<episode>\d+)', validate_all=True, validator={'__parent__': seps_before}) \ .regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators, name='episodeSeparator', escape=True) + r'(?P<episode>\d+)').repeater('*') \ .chain() \ .regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)', validate_all=True, validator={'__parent__': seps_before}) \ .regex(build_or_pattern(season_markers + discrete_separators + range_separators, name='seasonSeparator', escape=True) + r'(?P<season>\d+)').repeater('*') # episode_details property for episode_detail in ('Special', 'Pilot', 'Unaired', 'Final'): rebulk.string(episode_detail, value=episode_detail, name='episode_details', disabled=lambda context: is_disabled(context, 'episode_details')) def validate_roman(match): """ Validate a roman match if surrounded by separators :param match: :type match: :return: :rtype: """ if int_coercable(match.raw): return True return seps_surround(match) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'], validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True, conflict_solver=season_episode_conflict_solver) rebulk.chain(abbreviations=[alt_dash], formatter={'season': parse_numeral, 'count': parse_numeral}, validator={'__parent__': compose(seps_surround, ordering_validator), 'season': validate_roman, 'count': validate_roman}, disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'season')) \ .defaults(validator=None) \ .regex(build_or_pattern(season_words, name='seasonMarker') + '@?(?P<season>' + numeral + ')') \ .regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \ .regex(r'@?' + build_or_pattern(range_separators + discrete_separators + ['@'], name='seasonSeparator', escape=True) + r'@?(?P<season>\d+)').repeater('*') rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>\d+)' + r'(?:v(?P<version>\d+))?' + r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4 abbreviations=[dash], formatter={'episode': int, 'version': int, 'count': int}, disabled=lambda context: context.get('type') == 'episode' or is_disabled(context, 'episode')) rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>' + numeral + ')' + r'(?:v(?P<version>\d+))?' + r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4 abbreviations=[dash], validator={'episode': validate_roman}, formatter={'episode': parse_numeral, 'version': int, 'count': int}, disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode')) rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')', tags=['SxxExx'], abbreviations=[dash], validator=None, formatter={'season': int, 'other': lambda match: 'Complete'}, disabled=lambda context: is_disabled(context, 'season')) # 12, 13 rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int}, disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d{2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*') # 012, 013 rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int}, disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \ .defaults(validator=None) \ .regex(r'0(?P<episode>\d{1,2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})').repeater('*') # 112, 113 rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int}, name='weak_episode', disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d{3,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})').repeater('*') # 1, 2, 3 rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int}, disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode')) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d)') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})').repeater('*') # e112, e113, 1e18, 3e19 # TODO: Enhance rebulk for validator to be used globally (season_episode_validator) rebulk.chain(formatter={'season': int, 'episode': int, 'version': int}, disabled=lambda context: is_disabled(context, 'episode')) \ .defaults(validator=None) \ .regex(r'(?P<season>\d{1,2})?(?P<episodeMarker>e)(?P<episode>\d{1,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*') # ep 112, ep113, ep112, ep113 rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int}, disabled=lambda context: is_disabled(context, 'episode')) \ .defaults(validator=None) \ .regex(r'ep-?(?P<episode>\d{1,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})').repeater('*') # cap 112, cap 112_114 rebulk.chain(abbreviations=[dash], tags=['see-pattern'], formatter={'season': int, 'episode': int}, disabled=is_season_episode_disabled) \ .defaults(validator=None) \ .regex(r'(?P<seasonMarker>cap)-?(?P<season>\d{1,2})(?P<episode>\d{2})') \ .regex(r'(?P<episodeSeparator>-)(?P<season>\d{1,2})(?P<episode>\d{2})').repeater('?') # 102, 0102 rebulk.chain(tags=['weak-episode', 'weak-duplicate'], formatter={'season': int, 'episode': int, 'version': int}, name='weak_duplicate', conflict_solver=season_episode_conflict_solver, disabled=lambda context: (context.get('episode_prefer_number', False) or context.get('type') == 'movie') or is_season_episode_disabled(context)) \ .defaults(validator=None) \ .regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})').repeater('*') rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int, disabled=lambda context: is_disabled(context, 'version')) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator']) # TODO: List of words # detached of X count (season/episode) rebulk.regex(r'(?P<episode>\d+)-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?', abbreviations=[dash], children=True, private_parent=True, formatter=int, disabled=lambda context: is_disabled(context, 'episode')) rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode", disabled=lambda context: is_disabled(context, 'episode_format')) rebulk.rules(WeakConflictSolver, RemoveInvalidSeason, RemoveInvalidEpisode, SeePatternRange(range_separators + ['_']), EpisodeNumberSeparatorRange(range_separators), SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx, RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator, RemoveWeak, RenameToAbsoluteEpisode, CountValidator, EpisodeSingleDigitValidator, RenameToDiscMatch) return rebulk
def episodes(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ # pylint: disable=too-many-branches,too-many-statements,too-many-locals rebulk = Rebulk() rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator']) def episodes_season_chain_breaker(matches): """ Break chains if there's more than 100 offset between two neighbor values. :param matches: :type matches: :return: :rtype: """ eps = matches.named('episode') if len(eps) > 1 and abs(eps[-1].value - eps[-2].value) > 100: return True seasons = matches.named('season') if len(seasons) > 1 and abs(seasons[-1].value - seasons[-2].value) > 100: return True return False rebulk.chain_defaults(chain_breaker=episodes_season_chain_breaker) def season_episode_conflict_solver(match, other): """ Conflict solver for episode/season patterns :param match: :param other: :return: """ if match.name in ['season', 'episode'] and other.name in ['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date']: return match elif match.name in ['season', 'episode'] and other.name in ['season', 'episode'] \ and match.initiator != other.initiator: if 'weak-episode' in match.tags: return match if 'weak-episode' in other.tags: return other if 'x' in match.initiator.raw.lower(): return match if 'x' in other.initiator.raw.lower(): return other return '__default__' season_episode_seps = [] season_episode_seps.extend(seps) season_episode_seps.extend(['x', 'X', 'e', 'E']) season_words = ['season', 'saison', 'serie', 'seasons', 'saisons', 'series'] episode_words = ['episode', 'episodes', 'eps', 'ep'] of_words = ['of', 'sur'] all_words = ['All'] season_markers = ["S"] season_ep_markers = ["x"] episode_markers = ["xE", "Ex", "EP", "E", "x"] range_separators = ['-', '~', 'to', 'a'] weak_discrete_separators = list(sep for sep in seps if sep not in range_separators) strong_discrete_separators = ['+', '&', 'and', 'et'] discrete_separators = strong_discrete_separators + weak_discrete_separators def ordering_validator(match): """ Validator for season list. They should be in natural order to be validated. episode/season separated by a weak discrete separator should be consecutive, unless a strong discrete separator or a range separator is present in the chain (1.3&5 is valid, but 1.3-5 is not valid and 1.3.5 is not valid) """ values = match.children.to_dict(implicit=True) if 'season' in values and is_iterable(values['season']): # Season numbers must be in natural order to be validated. if not list(sorted(values['season'])) == values['season']: return False if 'episode' in values and is_iterable(values['episode']): # Season numbers must be in natural order to be validated. if not list(sorted(values['episode'])) == values['episode']: return False def is_consecutive(property_name): """ Check if the property season or episode has valid consecutive values. :param property_name: :type property_name: :return: :rtype: """ previous_match = None valid = True for current_match in match.children.named(property_name): if previous_match: match.children.previous(current_match, lambda m: m.name == property_name + 'Separator') separator = match.children.previous(current_match, lambda m: m.name == property_name + 'Separator', 0) if separator.raw not in range_separators and separator.raw in weak_discrete_separators: if not current_match.value - previous_match.value == 1: valid = False if separator.raw in strong_discrete_separators: valid = True break previous_match = current_match return valid return is_consecutive('episode') and is_consecutive('season') # S01E02, 01x02, S01S02S03 rebulk.chain(formatter={'season': int, 'episode': int}, tags=['SxxExx'], abbreviations=[alt_dash], children=True, private_parent=True, validate_all=True, validator={'__parent__': ordering_validator}, conflict_solver=season_episode_conflict_solver) \ .regex(build_or_pattern(season_markers) + r'(?P<season>\d+)@?' + build_or_pattern(episode_markers) + r'@?(?P<episode>\d+)', validate_all=True, validator={'__parent__': seps_before}).repeater('+') \ .regex(build_or_pattern(episode_markers + discrete_separators + range_separators, name='episodeSeparator', escape=True) + r'(?P<episode>\d+)').repeater('*') \ .chain() \ .regex(r'(?P<season>\d+)@?' + build_or_pattern(season_ep_markers) + r'@?(?P<episode>\d+)', validate_all=True, validator={'__parent__': seps_before}) \ .chain() \ .regex(r'(?P<season>\d+)@?' + build_or_pattern(season_ep_markers) + r'@?(?P<episode>\d+)', validate_all=True, validator={'__parent__': seps_before}) \ .regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators, name='episodeSeparator', escape=True) + r'(?P<episode>\d+)').repeater('*') \ .chain() \ .regex(build_or_pattern(season_markers) + r'(?P<season>\d+)', validate_all=True, validator={'__parent__': seps_before}) \ .regex(build_or_pattern(season_markers + discrete_separators + range_separators, name='seasonSeparator', escape=True) + r'(?P<season>\d+)').repeater('*') # episode_details property for episode_detail in ('Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired'): rebulk.string(episode_detail, value=episode_detail, name='episode_details') rebulk.regex(r'Extras?', name='episode_details', value='Extras') rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True) def validate_roman(match): """ Validate a roman match if surrounded by separators :param match: :type match: :return: :rtype: """ if int_coercable(match.raw): return True return seps_surround(match) rebulk.chain(abbreviations=[alt_dash], formatter={'season': parse_numeral, 'count': parse_numeral}, validator={'__parent__': compose(seps_surround, ordering_validator), 'season': validate_roman, 'count': validate_roman}) \ .defaults(validator=None) \ .regex(build_or_pattern(season_words) + '@?(?P<season>' + numeral + ')') \ .regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \ .regex(r'@?(?P<seasonSeparator>' + build_or_pattern(range_separators + discrete_separators + ['@'], escape=True) + r')@?(?P<season>\d+)').repeater('*') rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>\d+)' + r'(?:v(?P<version>\d+))?' + r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4 abbreviations=[dash], formatter=int, disabled=lambda context: context.get('type') == 'episode') rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>' + numeral + ')' + r'(?:v(?P<version>\d+))?' + r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4 abbreviations=[dash], validator={'episode': validate_roman}, formatter={'episode': parse_numeral, 'version': int, 'count': int}, disabled=lambda context: context.get('type') != 'episode') rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')', tags=['SxxExx'], abbreviations=[dash], validator=None, formatter={'season': int, 'other': lambda match: 'Complete'}) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True) # 12, 13 rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d{2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*') # 012, 013 rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'0(?P<episode>\d{1,2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})').repeater('*') # 112, 113 rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int}, disabled=lambda context: not context.get('episode_prefer_number', False)) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d{3,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})').repeater('*') # 1, 2, 3 rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int}, disabled=lambda context: context.get('type') != 'episode') \ .defaults(validator=None) \ .regex(r'(?P<episode>\d)') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})').repeater('*') # e112, e113 # TODO: Enhance rebulk for validator to be used globally (season_episode_validator) rebulk.chain(formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'e(?P<episode>\d{1,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*') # ep 112, ep113, ep112, ep113 rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'ep-?(?P<episode>\d{1,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})').repeater('*') # 102, 0102 rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode', 'weak-duplicate'], formatter={'season': int, 'episode': int, 'version': int}, conflict_solver=lambda match, other: match if other.name == 'year' else '__default__', disabled=lambda context: context.get('episode_prefer_number', False)) \ .defaults(validator=None) \ .regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})').repeater('*') rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator']) # TODO: List of words # detached of X count (season/episode) rebulk.regex(r'(?P<episode>\d+)?-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?', abbreviations=[dash], children=True, private_parent=True, formatter=int) rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode") # Harcoded movie to disable weak season/eps rebulk.regex('OSS-?117', abbreviations=[dash], name="hardcoded-movies", marker=True, conflict_solver=lambda match, other: None) rebulk.rules(EpisodeNumberSeparatorRange(range_separators), SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx, RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator, CountValidator, EpisodeSingleDigitValidator) return rebulk
def episodes(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ # pylint: disable=too-many-branches,too-many-statements rebulk = Rebulk() rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator']) def season_episode_conflict_solver(match, other): """ Conflict solver for episode/season patterns :param match: :param other: :return: """ if match.name in ['season', 'episode'] and other.name in ['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date']: return match elif match.name in ['season', 'episode'] and other.name in ['season', 'episode'] \ and match.initiator != other.initiator: if 'x' in match.initiator.raw.lower(): return match if 'x' in other.initiator.raw.lower(): return other return '__default__' season_episode_seps = [] season_episode_seps.extend(seps) season_episode_seps.extend(['x', 'X', 'e', 'E']) def season_episode_validator(match): """ Validator for season/episode matches """ if match.name in ['season', 'episode'] and match.initiator.start: return match.initiator.input_string[match.initiator.start] in season_episode_seps \ or match.initiator.input_string[match.initiator.start - 1] in season_episode_seps return True # 01x02, 01x02x03x04 rebulk.chain(formatter={'season': int, 'episode': int}, tags=['SxxExx'], abbreviations=[alt_dash], children=True, private_parent=True, conflict_solver=season_episode_conflict_solver) \ .defaults(validator=season_episode_validator) \ .regex(r'S(?P<season>\d+)@?(?:xE|Ex|E|x)@?(?P<episode>\d+)') \ .regex(r'(?:(?P<episodeSeparator>xE|Ex|E|x|-|\+|&)(?P<episode>\d+))').repeater('*') \ .chain() \ .regex(r'(?P<season>\d+)@?x@?(?P<episode>\d+)') \ .regex(r'(?:(?P<episodeSeparator>x|-|\+|&)(?P<episode>\d+))').repeater('*') \ .chain() \ .regex(r'S(?P<season>\d+)') \ .regex(r'(?:(?P<seasonSeparator>S|-|\+|&)(?P<season>\d+))').repeater('*') # episode_details property for episode_detail in ('Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired'): rebulk.string(episode_detail, value=episode_detail, name='episode_details') rebulk.regex(r'Extras?', name='episode_details', value='Extras') rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True) season_words = ['season', 'saison', 'serie', 'seasons', 'saisons', 'series'] episode_words = ['episode', 'episodes', 'ep'] of_words = ['of', 'sur'] all_words = ['All'] rebulk.chain(abbreviations=[alt_dash], formatter={'season': parse_numeral, 'count': parse_numeral}) \ .defaults(validator=None) \ .regex(build_or_pattern(season_words) + '@?(?P<season>' + numeral + ')') \ .regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \ .regex(r'@?(?P<seasonSeparator>-)@?(?P<season>\d+)').repeater('*') \ .regex(r'@?(?P<seasonSeparator>\+|&)@?(?P<season>\d+)').repeater('*') rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>\d+)' + r'(?:v(?P<version>\d+))?' + r'(?:-?' + build_or_pattern(of_words) + r'?-?(?P<count>\d+))?', # Episode 4 abbreviations=[dash], formatter=int, disabled=lambda context: context.get('type') == 'episode') rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>' + numeral + ')' + r'(?:v(?P<version>\d+))?' + r'(?:-?' + build_or_pattern(of_words) + r'?-?(?P<count>\d+))?', # Episode 4 abbreviations=[dash], formatter={'episode': parse_numeral, 'version': int, 'count': int}, disabled=lambda context: context.get('type') != 'episode') rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')', tags=['SxxExx'], abbreviations=[dash], validator=None, formatter={'season': int, 'other': lambda match: 'Complete'}) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True) # 12, 13 rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d{2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*') # 012, 013 rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'0(?P<episode>\d{1,2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})').repeater('*') # 112, 113 rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}, disabled=lambda context: not context.get('episode_prefer_number', False)) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d{3,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})').repeater('*') # 1, 2, 3 rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}, disabled=lambda context: context.get('type') != 'episode') \ .defaults(validator=None) \ .regex(r'(?P<episode>\d)') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})').repeater('*') # e112, e113 # TODO: Enhance rebulk for validator to be used globally (season_episode_validator) rebulk.chain(formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'e(?P<episode>\d{1,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*') # ep 112, ep113, ep112, ep113 rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'ep-?(?P<episode>\d{1,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})').repeater('*') # 102, 0102 rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-duplicate'], formatter={'season': int, 'episode': int, 'version': int}, conflict_solver=lambda match, other: match if other.name == 'year' else '__default__', disabled=lambda context: context.get('episode_prefer_number', False)) \ .defaults(validator=None) \ .regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})').repeater('*') rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator']) # TODO: List of words # detached of X count (season/episode) rebulk.regex(r'(?P<episode>\d+)?-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?', abbreviations=[dash], children=True, private_parent=True, formatter=int) rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode") # Harcoded movie to disable weak season/episodes rebulk.regex('OSS-?117', abbreviations=[dash], name="hardcoded-movies", marker=True, conflict_solver=lambda match, other: None) rebulk.rules(EpisodeNumberSeparatorRange, SeasonSeparatorRange, RemoveWeakIfMovie, RemoveWeakIfSxxExx, RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator, CountValidator, EpisodeSingleDigitValidator) return rebulk
def episodes(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ #pylint: disable=too-many-branches,too-many-statements rebulk = Rebulk() rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator']) # 01x02, 01x02x03x04 if REGEX_AVAILABLE: rebulk.regex(r'(?P<season>\d+)@?x@?(?P<episode>\d+)' + r'(?:(?P<episodeSeparator>x|-|\+|&)(?P<episode>\d+))*', # S01E02, S01x02, S01E02E03, S01Ex02, S01xE02, SO1Ex02Ex03 r'S(?P<season>\d+)@?(?:xE|Ex|E|x)@?(?P<episode>\d+)' + r'(?:(?P<episodeSeparator>xE|Ex|E|x|-|\+|&)(?P<episode>\d+))*', # S01 r'S(?P<season>\d+)' + r'(?:(?P<seasonSeparator>S|-|\+|&)(?P<season>\d+))*', formatter={'season': int, 'episode': int}, tags=['SxxExx'], abbreviations=[alt_dash], children=True, private_parent=True, conflict_solver=lambda match, other: match if match.name in ['season', 'episode'] and other.name in ['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date'] else '__default__') else: rebulk.chain(formatter={'season': int, 'episode': int}, tags=['SxxExx'], abbreviations=[alt_dash], children=True, private_parent=True, conflict_solver=lambda match, other: match if match.name in ['season', 'episode'] and other.name in ['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date'] else '__default__') \ .defaults(validator=None)\ .regex(r'S(?P<season>\d+)@?(?:xE|Ex|E|x)@?(?P<episode>\d+)') \ .regex(r'(?:(?P<episodeSeparator>xE|Ex|E|x|-|\+|&)(?P<episode>\d+))').repeater('*') \ .chain() \ .regex(r'(?P<season>\d+)@?x@?(?P<episode>\d+)') \ .regex(r'(?:(?P<episodeSeparator>x|-|\+|&)(?P<episode>\d+))').repeater('*') \ .chain() \ .regex(r'S(?P<season>\d+)') \ .regex(r'(?:(?P<seasonSeparator>S|-|\+|&)(?P<season>\d+))').repeater('*') # episode_details property for episode_detail in ('Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired'): rebulk.string(episode_detail, value=episode_detail, name='episode_details') rebulk.regex(r'Extras?', name='episode_details', value='Extras') rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True) season_words = ['season', 'saison', 'serie', 'seasons', 'saisons', 'series'] episode_words = ['episode', 'episodes', 'ep'] of_words = ['of', 'sur'] all_words = ['All'] if REGEX_AVAILABLE: rebulk.regex(r'\L<season_words>@?(?P<season>' + numeral + ')' + r'(?:@?\L<of_words>@?(?P<count>' + numeral + '))?' + r'(?:@?(?P<seasonSeparator>-)@?(?P<season>\d+))*' + r'(?:@?(?P<seasonSeparator>\+|&)@?(?P<season>\d+))*', of_words=of_words, season_words=season_words, # Season 1, # Season one abbreviations=[alt_dash], formatter={'season': parse_numeral, 'count': parse_numeral}) else: rebulk.chain(abbreviations=[alt_dash], formatter={'season': parse_numeral, 'count': parse_numeral})\ .defaults(validator=None)\ .regex(build_or_pattern(season_words) + '@?(?P<season>' + numeral + ')') \ .regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \ .regex(r'@?(?P<seasonSeparator>-)@?(?P<season>\d+)').repeater('*') \ .regex(r'@?(?P<seasonSeparator>\+|&)@?(?P<season>\d+)').repeater('*') if REGEX_AVAILABLE: rebulk.regex(r'\L<episode_words>-?(?P<episode>\d+)' + r'(?:v(?P<version>\d+))?' + r'(?:-?\L<of_words>?-?(?P<count>\d+))?', of_words=of_words, episode_words=episode_words, # Episode 4 abbreviations=[dash], formatter=int, disabled=lambda context: context.get('type') == 'episode') else: rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>\d+)' + r'(?:v(?P<version>\d+))?' + r'(?:-?' + build_or_pattern(of_words) + r'?-?(?P<count>\d+))?', # Episode 4 abbreviations=[dash], formatter=int, disabled=lambda context: context.get('type') == 'episode') if REGEX_AVAILABLE: rebulk.regex(r'\L<episode_words>-?(?P<episode>' + numeral + ')' + r'(?:v(?P<version>\d+))?' + r'(?:-?\L<of_words>?-?(?P<count>\d+))?', of_words=of_words, episode_words=episode_words, # Episode 4 abbreviations=[dash], formatter={'episode': parse_numeral, 'version': int, 'count': int}, disabled=lambda context: context.get('type') != 'episode') else: rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>' + numeral + ')' + r'(?:v(?P<version>\d+))?' + r'(?:-?'+ build_or_pattern(of_words) + r'?-?(?P<count>\d+))?', # Episode 4 abbreviations=[dash], formatter={'episode': parse_numeral, 'version': int, 'count': int}, disabled=lambda context: context.get('type') != 'episode') if REGEX_AVAILABLE: rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>\L<all_words>)', tags=['SxxExx'], all_words=all_words, abbreviations=[dash], validator=None, formatter={'season': int, 'other': lambda match: 'Complete'}) else: rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>'+build_or_pattern(all_words)+')', tags=['SxxExx'], abbreviations=[dash], validator=None, formatter={'season': int, 'other': lambda match: 'Complete'}) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True) if REGEX_AVAILABLE: # 12, 13 rebulk.regex(r'(?P<episode>\d{2})' + r'(?:v(?P<version>\d+))?' + r'(?:(?P<episodeSeparator>[x-])(?P<episode>\d{2}))*', tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) else: rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d{2})')\ .regex(r'v(?P<version>\d+)').repeater('?')\ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*') if REGEX_AVAILABLE: # 012, 013 rebulk.regex(r'0(?P<episode>\d{1,2})' + r'(?:v(?P<version>\d+))?' + r'(?:(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2}))*', tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) else: rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'0(?P<episode>\d{1,2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})').repeater('*') if REGEX_AVAILABLE: # 112, 113 rebulk.regex(r'(?P<episode>\d{3,4})' + r'(?:v(?P<version>\d+))?' + r'(?:(?P<episodeSeparator>[x-])(?P<episode>\d{3,4}))*', tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}, disabled=lambda context: not context.get('episode_prefer_number', False)) else: rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}, disabled=lambda context: not context.get('episode_prefer_number', False)) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d{3,4})')\ .regex(r'v(?P<version>\d+)').repeater('?')\ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})').repeater('*') if REGEX_AVAILABLE: # 1, 2, 3 rebulk.regex(r'(?P<episode>\d)' + r'(?:v(?P<version>\d+))?' + r'(?:(?P<episodeSeparator>[x-])(?P<episode>\d{1,2}))*', tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}, disabled=lambda context: context.get('type') != 'episode') else: rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}, disabled=lambda context: context.get('type') != 'episode') \ .defaults(validator=None) \ .regex(r'(?P<episode>\d)')\ .regex(r'v(?P<version>\d+)').repeater('?')\ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})').repeater('*') # e112, e113 if REGEX_AVAILABLE: rebulk.regex(r'e(?P<episode>\d{1,4})' + r'(?:v(?P<version>\d+))?' + r'(?:(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4}))*', formatter={'episode': int, 'version': int}) else: rebulk.chain(formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'e(?P<episode>\d{1,4})')\ .regex(r'v(?P<version>\d+)').repeater('?')\ .regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*') # ep 112, ep113, ep112, ep113 if REGEX_AVAILABLE: rebulk.regex(r'ep-?(?P<episode>\d{1,4})' + r'(?:v(?P<version>\d+))?' + r'(?:(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4}))*', abbreviations=[dash], formatter={'episode': int, 'version': int}) else: rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'ep-?(?P<episode>\d{1,4})')\ .regex(r'v(?P<version>\d+)').repeater('?')\ .regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})').repeater('*') # 102, 0102 if REGEX_AVAILABLE: rebulk.regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})' + r'(?:v(?P<version>\d+))?' + r'(?:(?P<episodeSeparator>x|-)(?P<episode>\d{2}))*', tags=['bonus-conflict', 'weak-movie', 'weak-duplicate'], formatter={'season': int, 'episode': int, 'version': int}, conflict_solver=lambda match, other: match if other.name == 'year' else '__default__', disabled=lambda context: context.get('episode_prefer_number', False)) else: rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-duplicate'], formatter={'season': int, 'episode': int, 'version': int}, conflict_solver=lambda match, other: match if other.name == 'year' else '__default__', disabled=lambda context: context.get('episode_prefer_number', False))\ .defaults(validator=None)\ .regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})')\ .regex(r'v(?P<version>\d+)').repeater('?')\ .regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})').repeater('*') rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator']) #TODO: List of words # detached of X count (season/episode) if REGEX_AVAILABLE: rebulk.regex(r'(?P<episode>\d+)?-?\L<of_words>-?(?P<count>\d+)-?\L<episode_words>?', of_words=of_words, episode_words=episode_words, abbreviations=[dash], children=True, private_parent=True, formatter=int) else: rebulk.regex(r'(?P<episode>\d+)?-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?', abbreviations=[dash], children=True, private_parent=True, formatter=int) rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode") # Harcoded movie to disable weak season/episodes rebulk.regex('OSS-?117', abbreviations=[dash], name="hardcoded-movies", marker=True, conflict_solver=lambda match, other: None) rebulk.rules(EpisodeNumberSeparatorRange, SeasonSeparatorRange, RemoveWeakIfMovie, RemoveWeakIfSxxExx, RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator, CountValidator, EpisodeSingleDigitValidator) return rebulk