def video_codec(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) rebulk.defaults(name="video_codec") rebulk.regex(r"Rv\d{2}", value="Real") rebulk.regex("Mpeg2", value="Mpeg2") rebulk.regex("DVDivX", "DivX", value="DivX") rebulk.regex("XviD", value="XviD") rebulk.regex("[hx]-?264(?:-?AVC(HD)?)?", "MPEG-?4(?:-?AVC(HD)?)", "AVCHD", value="h264") rebulk.regex("[hx]-?265(?:-?HEVC)?", "HEVC", value="h265") # http://blog.mediacoderhq.com/h264-profiles-and-levels/ # http://fr.wikipedia.org/wiki/H.264 rebulk.defaults(name="video_profile", validator=seps_surround) rebulk.regex('10.?bits?', 'Hi10P', value='10bit') rebulk.regex('8.?bits?', value='8bit') rebulk.string('BP', value='BP', tags='video_profile.rule') rebulk.string('XP', 'EP', value='XP', tags='video_profile.rule') rebulk.string('MP', value='MP', tags='video_profile.rule') rebulk.string('HP', 'HiP', value='HP', tags='video_profile.rule') rebulk.regex('Hi422P', value='Hi422P', tags='video_profile.rule') rebulk.regex('Hi444PP', value='Hi444PP', tags='video_profile.rule') rebulk.string('DXVA', value='DXVA', name='video_api') rebulk.rules(ValidateVideoCodec, VideoProfileRule) return rebulk
def when(self, matches, context): expected_rebulk = Rebulk().defaults(name='release_group') for expected_group in context.get('expected_group'): if expected_group.startswith('re:'): expected_group = expected_group[3:] expected_group = expected_group.replace(' ', '-') expected_rebulk.regex(expected_group, abbreviations=[dash], flags=re.IGNORECASE) else: expected_rebulk.string(expected_group, ignore_case=True) matches = expected_rebulk.matches(matches.input_string, context) return matches
def screen_size(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ def conflict_solver(match, other): """ Conflict solver for most screen_size. """ if other.name == 'screen_size': if 'resolution' in other.tags: # The chtouile to solve conflict in "720 x 432" string matching both 720p pattern int_value = _digits_re.findall(match.raw)[-1] if other.value.startswith(int_value): return match return other return '__default__' rebulk = Rebulk().string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE) rebulk.defaults(name="screen_size", validator=seps_surround, conflict_solver=conflict_solver) rebulk.regex(r"(?:\d{3,}(?:x|\*))?360(?:i|p?x?)", value="360p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?368(?:i|p?x?)", value="368p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?480(?:i|p?x?)", value="480p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?576(?:i|p?x?)", value="576p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?720(?:i|p?(?:50|60)?x?)", value="720p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?720(?:p(?:50|60)?x?)", value="720p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?720p?hd", value="720p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?900(?:i|p?x?)", value="900p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080i", value="1080i") rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?x?", value="1080p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080(?:p(?:50|60)?x?)", value="1080p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?hd", value="1080p") rebulk.regex(r"(?:\d{3,}(?:x|\*))?2160(?:i|p?x?)", value="4K") rebulk.string('4k', value='4K') _digits_re = re.compile(r'\d+') rebulk.defaults(name="screen_size", validator=seps_surround) rebulk.regex(r'\d{3,}-?(?:x|\*)-?\d{3,}', formatter=lambda value: 'x'.join(_digits_re.findall(value)), abbreviations=[dash], tags=['resolution'], conflict_solver=lambda match, other: '__default__' if other.name == 'screen_size' else other) rebulk.rules(ScreenSizeOnlyOne, RemoveScreenSizeConflicts) return rebulk
def language(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk() rebulk.string(*subtitle_prefixes, name="subtitle_language.prefix", ignore_case=True, private=True, validator=seps_surround) rebulk.string(*subtitle_suffixes, name="subtitle_language.suffix", ignore_case=True, private=True, validator=seps_surround) rebulk.functional(find_languages, properties={'language': [None]}) rebulk.rules(SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, SubtitleExtensionRule) return rebulk
def screen_size(config): """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ interlaced = frozenset({res for res in config['interlaced']}) progressive = frozenset({res for res in config['progressive']}) frame_rates = [re.escape(rate) for rate in config['frame_rates']] min_ar = config['min_ar'] max_ar = config['max_ar'] rebulk = Rebulk() rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE) rebulk.defaults(name='screen_size', validator=seps_surround, abbreviations=[dash], disabled=lambda context: is_disabled(context, 'screen_size')) frame_rate_pattern = build_or_pattern(frame_rates, name='frame_rate') interlaced_pattern = build_or_pattern(interlaced, name='height') progressive_pattern = build_or_pattern(progressive, name='height') res_pattern = r'(?:(?P<width>\d{3,4})(?:x|\*))?' rebulk.regex(res_pattern + interlaced_pattern + r'(?P<scan_type>i)' + frame_rate_pattern + '?') rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)' + frame_rate_pattern + '?') rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)?(?:hd)') rebulk.regex(res_pattern + progressive_pattern + r'(?P<scan_type>p)?x?') rebulk.string('4k', value='2160p') rebulk.regex(r'(?P<width>\d{3,4})-?(?:x|\*)-?(?P<height>\d{3,4})', conflict_solver=lambda match, other: '__default__' if other.name == 'screen_size' else other) rebulk.regex(frame_rate_pattern + '(p|fps)', name='frame_rate', formatter=FrameRate.fromstring, disabled=lambda context: is_disabled(context, 'frame_rate')) rebulk.rules(PostProcessScreenSize(progressive, min_ar, max_ar), ScreenSizeOnlyOne, ResolveScreenSizeConflicts) return rebulk
def edition(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) rebulk.defaults(name='edition', validator=seps_surround) rebulk.regex('collector', 'collector-edition', 'edition-collector', value='Collector Edition') rebulk.regex('special-edition', 'edition-special', value='Special Edition', conflict_solver=lambda match, other: other if other.name == 'episode_details' and other.value == 'Special' else '__default__') rebulk.string('se', value='Special Edition', tags='has-neighbor') rebulk.regex('criterion-edition', 'edition-criterion', value='Criterion Edition') rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe Edition') rebulk.regex('limited', 'limited-edition', value='Limited Edition', tags=['has-neighbor', 'release-group-prefix']) rebulk.regex(r'theatrical-cut', r'theatrical-edition', r'theatrical', value='Theatrical Edition') rebulk.regex(r"director'?s?-cut", r"director'?s?-cut-edition", r"edition-director'?s?-cut", 'DC', value="Director's Cut") rebulk.regex('extended', 'extended-?cut', 'extended-?version', value='Extended', tags=['has-neighbor', 'release-group-prefix']) rebulk.regex('alternat(e|ive)(?:-?Cut)?', value='Alternative Cut', tags=['has-neighbor', 'release-group-prefix']) for value in ('Remastered', 'Uncensored', 'Uncut', 'Unrated'): rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix']) rebulk.string('Festival', value='Festival', tags=['has-neighbor-before', 'has-neighbor-after']) return rebulk
def streaming_service(config): # pylint: disable=too-many-statements,unused-argument """Streaming service property. :param config: rule configuration :type config: dict :return: :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'streaming_service')) rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) rebulk.defaults(name='streaming_service', tags=['source-prefix']) for value, items in config.items(): patterns = items if isinstance(items, list) else [items] for pattern in patterns: if pattern.startswith('re:'): rebulk.regex(pattern, value=value) else: rebulk.string(pattern, value=value) rebulk.rules(ValidateStreamingService) return rebulk
def container(config): """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'container')) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True) rebulk.defaults(name='container', formatter=lambda value: value.strip(seps), tags=['extension'], conflict_solver=lambda match, other: other if other.name in ('source', 'video_codec') or other.name == 'container' and 'extension' not in other.tags else '__default__') subtitles = config['subtitles'] info = config['info'] videos = config['videos'] torrent = config['torrent'] nzb = config['nzb'] rebulk.regex(r'\.'+build_or_pattern(subtitles)+'$', exts=subtitles, tags=['extension', 'subtitle']) rebulk.regex(r'\.'+build_or_pattern(info)+'$', exts=info, tags=['extension', 'info']) rebulk.regex(r'\.'+build_or_pattern(videos)+'$', exts=videos, tags=['extension', 'video']) rebulk.regex(r'\.'+build_or_pattern(torrent)+'$', exts=torrent, tags=['extension', 'torrent']) rebulk.regex(r'\.'+build_or_pattern(nzb)+'$', exts=nzb, tags=['extension', 'nzb']) rebulk.defaults(name='container', validator=seps_surround, formatter=lambda s: s.lower(), conflict_solver=lambda match, other: match if other.name in ('source', 'video_codec') or other.name == 'container' and 'extension' in other.tags else '__default__') rebulk.string(*[sub for sub in subtitles if sub not in ('sub', 'ass')], tags=['subtitle']) rebulk.string(*videos, tags=['video']) rebulk.string(*torrent, tags=['torrent']) rebulk.string(*nzb, tags=['nzb']) return rebulk
def container(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True) rebulk.defaults(name='container', formatter=lambda value: value.strip(seps), tags=['extension'], conflict_solver=lambda match, other: other if other.name in ['format', 'video_codec'] or other.name == 'container' and 'extension' not in other.tags else '__default__') subtitles = ['srt', 'idx', 'sub', 'ssa', 'ass'] info = ['nfo'] videos = ['3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2', 'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm', 'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv', 'iso', 'vob'] torrent = ['torrent'] nzb = ['nzb'] rebulk.regex(r'\.'+build_or_pattern(subtitles)+'$', exts=subtitles, tags=['extension', 'subtitle']) rebulk.regex(r'\.'+build_or_pattern(info)+'$', exts=info, tags=['extension', 'info']) rebulk.regex(r'\.'+build_or_pattern(videos)+'$', exts=videos, tags=['extension', 'video']) rebulk.regex(r'\.'+build_or_pattern(torrent)+'$', exts=torrent, tags=['extension', 'torrent']) rebulk.regex(r'\.'+build_or_pattern(nzb)+'$', exts=nzb, tags=['extension', 'nzb']) rebulk.defaults(name='container', validator=seps_surround, formatter=lambda s: s.lower(), conflict_solver=lambda match, other: match if other.name in ['format', 'video_codec'] or other.name == 'container' and 'extension' in other.tags else '__default__') rebulk.string(*[sub for sub in subtitles if sub not in ['sub']], tags=['subtitle']) rebulk.string(*videos, tags=['video']) rebulk.string(*torrent, tags=['torrent']) rebulk.string(*nzb, tags=['nzb']) return rebulk
def language(config, common_words): """ Builder for rebulk object. :param config: rule configuration :type config: dict :param common_words: common words :type common_words: set :return: Created Rebulk object :rtype: Rebulk """ subtitle_both = config['subtitle_affixes'] subtitle_prefixes = sorted(subtitle_both + config['subtitle_prefixes'], key=length_comparator) subtitle_suffixes = sorted(subtitle_both + config['subtitle_suffixes'], key=length_comparator) lang_both = config['language_affixes'] lang_prefixes = sorted(lang_both + config['language_prefixes'], key=length_comparator) lang_suffixes = sorted(lang_both + config['language_suffixes'], key=length_comparator) weak_affixes = frozenset(config['weak_affixes']) rebulk = Rebulk(disabled=lambda context: (is_disabled(context, 'language') and is_disabled(context, 'subtitle_language'))) rebulk.string(*subtitle_prefixes, name="subtitle_language.prefix", ignore_case=True, private=True, validator=seps_surround, tags=['release-group-prefix'], disabled=lambda context: is_disabled(context, 'subtitle_language')) rebulk.string(*subtitle_suffixes, name="subtitle_language.suffix", ignore_case=True, private=True, validator=seps_surround, disabled=lambda context: is_disabled(context, 'subtitle_language')) rebulk.string(*lang_suffixes, name="language.suffix", ignore_case=True, private=True, validator=seps_surround, tags=['source-suffix'], disabled=lambda context: is_disabled(context, 'language')) def find_languages(string, context=None): """Find languages in the string :return: list of tuple (property, Language, lang_word, word) """ return LanguageFinder(context, subtitle_prefixes, subtitle_suffixes, lang_prefixes, lang_suffixes, weak_affixes).find(string) rebulk.functional(find_languages, properties={'language': [None]}, disabled=lambda context: not context.get('allowed_languages')) rebulk.rules(SubtitleExtensionRule, SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, RemoveLanguage, RemoveInvalidLanguages(common_words)) babelfish.language_converters['guessit'] = GuessitConverter(config['synonyms']) return rebulk
def edition(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'edition')) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) rebulk.defaults(name='edition', validator=seps_surround) rebulk.regex('collector', "collector'?s?-edition", 'edition-collector', value='Collector') rebulk.regex('special-edition', 'edition-special', value='Special', conflict_solver=lambda match, other: other if other.name == 'episode_details' and other.value == 'Special' else '__default__') rebulk.string('se', value='Special', tags='has-neighbor') rebulk.string('ddc', value="Director's Definitive Cut") rebulk.regex('criterion-edition', 'edition-criterion', 'CC', value='Criterion') rebulk.regex('deluxe', 'deluxe-edition', 'edition-deluxe', value='Deluxe') rebulk.regex('limited', 'limited-edition', value='Limited', tags=['has-neighbor', 'release-group-prefix']) rebulk.regex(r'theatrical-cut', r'theatrical-edition', r'theatrical', value='Theatrical') rebulk.regex(r"director'?s?-cut", r"director'?s?-cut-edition", r"edition-director'?s?-cut", 'DC', value="Director's Cut") rebulk.regex('extended', 'extended-?cut', 'extended-?version', value='Extended', tags=['has-neighbor', 'release-group-prefix']) rebulk.regex('alternat(e|ive)(?:-?Cut)?', value='Alternative Cut', tags=['has-neighbor', 'release-group-prefix']) for value in ('Remastered', 'Uncensored', 'Uncut', 'Unrated'): rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix']) rebulk.string('Festival', value='Festival', tags=['has-neighbor-before', 'has-neighbor-after']) rebulk.regex('imax', 'imax-edition', value='IMAX') rebulk.regex('fan-edit(?:ion)?', 'fan-collection', value='Fan') rebulk.regex('ultimate-edition', value='Ultimate') rebulk.regex("ultimate-collector'?s?-edition", value=['Ultimate', 'Collector']) rebulk.regex('ultimate-fan-edit(?:ion)?', 'ultimate-fan-collection', value=['Ultimate', 'Fan']) return rebulk
def other(config): # pylint:disable=unused-argument,too-many-statements """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'other')) rebulk = rebulk.regex_defaults( flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) rebulk.defaults(name="other", validator=seps_surround) rebulk.regex('Audio-?Fix', 'Audio-?Fixed', value='Audio Fixed') rebulk.regex('Sync-?Fix', 'Sync-?Fixed', value='Sync Fixed') rebulk.regex('Dual', 'Dual-?Audio', value='Dual Audio') rebulk.regex('ws', 'wide-?screen', value='Widescreen') rebulk.regex('Re-?Enc(?:oded)?', value='Reencoded') rebulk.string( 'Repack', 'Rerip', value='Proper', tags=['streaming_service.prefix', 'streaming_service.suffix']) rebulk.string('Proper', value='Proper', tags=[ 'has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix' ]) rebulk.regex( 'Real-Proper', 'Real-Repack', 'Real-Rerip', value='Proper', tags=['streaming_service.prefix', 'streaming_service.suffix', 'real']) rebulk.regex('Real', value='Proper', tags=[ 'has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix', 'real' ]) rebulk.string('Fix', 'Fixed', value='Fix', tags=[ 'has-neighbor-before', 'has-neighbor-after', 'streaming_service.prefix', 'streaming_service.suffix' ]) rebulk.string( 'Dirfix', 'Nfofix', 'Prooffix', value='Fix', tags=['streaming_service.prefix', 'streaming_service.suffix']) rebulk.regex('(?:Proof-?)?Sample-?Fix', value='Fix', tags=['streaming_service.prefix', 'streaming_service.suffix']) rebulk.string('Fansub', value='Fan Subtitled', tags='has-neighbor') rebulk.string('Fastsub', value='Fast Subtitled', tags='has-neighbor') season_words = build_or_pattern(["seasons?", "series?"]) complete_articles = build_or_pattern(["The"]) def validate_complete(match): """ Make sure season word is are defined. :param match: :type match: :return: :rtype: """ children = match.children if not children.named('completeWordsBefore') and not children.named( 'completeWordsAfter'): return False return True rebulk.regex( '(?P<completeArticle>' + complete_articles + '-)?' + '(?P<completeWordsBefore>' + season_words + '-)?' + 'Complete' + '(?P<completeWordsAfter>-' + season_words + ')?', private_names=[ 'completeArticle', 'completeWordsBefore', 'completeWordsAfter' ], value={'other': 'Complete'}, tags=['release-group-prefix'], validator={'__parent__': and_(seps_surround, validate_complete)}) rebulk.string('R5', value='Region 5') rebulk.string('RC', value='Region C') rebulk.regex('Pre-?Air', value='Preair') rebulk.regex('(?:PS-?)Vita', value='PS Vita') rebulk.regex('Vita', value='PS Vita', tags='has-neighbor') rebulk.regex('(HD)(?P<another>Rip)', value={ 'other': 'HD', 'another': 'Rip' }, private_parent=True, children=True, validator={'__parent__': seps_surround}, validate_all=True) for value in ('Screener', 'Remux', 'PAL', 'SECAM', 'NTSC', 'XXX'): rebulk.string(value, value=value) rebulk.string('3D', value='3D', tags='has-neighbor') rebulk.string('HQ', value='High Quality', tags='uhdbluray-neighbor') rebulk.string('HR', value='High Resolution') rebulk.string('LD', value='Line Dubbed') rebulk.string('MD', value='Mic Dubbed') rebulk.string('mHD', 'HDLight', value='Micro HD') rebulk.string('LDTV', value='Low Definition') rebulk.string('HFR', value='High Frame Rate') rebulk.string('VFR', value='Variable Frame Rate') rebulk.string( 'HD', value='HD', validator=None, tags=['streaming_service.prefix', 'streaming_service.suffix']) rebulk.regex('Full-?HD', 'FHD', value='Full HD', validator=None, tags=['streaming_service.prefix', 'streaming_service.suffix']) rebulk.regex('Ultra-?(?:HD)?', 'UHD', value='Ultra HD', validator=None, tags=['streaming_service.prefix', 'streaming_service.suffix']) rebulk.regex('Upscaled?', value='Upscaled') for value in ('Complete', 'Classic', 'Bonus', 'Trailer', 'Retail', 'Colorized', 'Internal'): rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix']) rebulk.regex('LiNE', value='Line Audio', tags=[ 'has-neighbor-before', 'has-neighbor-after', 'release-group-prefix' ]) rebulk.regex('Read-?NFO', value='Read NFO') rebulk.string('CONVERT', value='Converted', tags='has-neighbor') rebulk.string('DOCU', 'DOKU', value='Documentary', tags='has-neighbor') rebulk.string('OM', value='Open Matte', tags='has-neighbor') rebulk.string('STV', value='Straight to Video', tags='has-neighbor') rebulk.string('OAR', value='Original Aspect Ratio', tags='has-neighbor') rebulk.string('Complet', value='Complete', tags=['has-neighbor', 'release-group-prefix']) for coast in ('East', 'West'): rebulk.regex(r'(?:Live-)?(?:Episode-)?' + coast + '-?(?:Coast-)?Feed', value=coast + ' Coast Feed') rebulk.string('VO', 'OV', value='Original Video', tags='has-neighbor') rebulk.string('Ova', 'Oav', value='Original Animated Video') rebulk.regex( 'Scr(?:eener)?', value='Screener', validator=None, tags=['other.validate.screener', 'source-prefix', 'source-suffix']) rebulk.string( 'Mux', value='Mux', validator=seps_after, tags=['other.validate.mux', 'video-codec-prefix', 'source-suffix']) rebulk.string('HC', 'vost', value='Hardcoded Subtitles') rebulk.string('SDR', value='Standard Dynamic Range', tags='uhdbluray-neighbor') rebulk.regex('HDR(?:10)?', value='HDR10', tags='uhdbluray-neighbor') rebulk.regex('Dolby-?Vision', value='Dolby Vision', tags='uhdbluray-neighbor') rebulk.regex('BT-?2020', value='BT.2020', tags='uhdbluray-neighbor') rebulk.string('Sample', value='Sample', tags=['at-end', 'not-a-release-group']) rebulk.string('Extras', value='Extras', tags='has-neighbor') rebulk.regex('Digital-?Extras?', value='Extras') rebulk.string('Proof', value='Proof', tags=['at-end', 'not-a-release-group']) rebulk.string('Obfuscated', 'Scrambled', value='Obfuscated', tags=['at-end', 'not-a-release-group']) rebulk.string('xpost', 'postbot', 'asrequested', value='Repost', tags='not-a-release-group') rebulk.rules(RenameAnotherToOther, ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore, ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor, ValidateAtEnd, ValidateReal, ProperCountRule) return rebulk
def audio_codec(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) def audio_codec_priority(match1, match2): """ Gives priority to audio_codec :param match1: :type match1: :param match2: :type match2: :return: :rtype: """ if match1.name == 'audio_codec' and match2.name in ['audio_profile', 'audio_channels']: return match2 if match1.name in ['audio_profile', 'audio_channels'] and match2.name == 'audio_codec': return match1 return '__default__' rebulk.defaults(name='audio_codec', conflict_solver=audio_codec_priority, disabled=lambda context: is_disabled(context, 'audio_codec')) rebulk.regex("MP3", "LAME", r"LAME(?:\d)+-?(?:\d)+", value="MP3") rebulk.string("MP2", value="MP2") rebulk.regex('Dolby', 'DolbyDigital', 'Dolby-Digital', 'DD', 'AC3D?', value='Dolby Digital') rebulk.regex('Dolby-?Atmos', 'Atmos', value='Dolby Atmos') rebulk.string("AAC", value="AAC") rebulk.string('EAC3', 'DDP', 'DD+', value='Dolby Digital Plus') rebulk.string("Flac", value="FLAC") rebulk.string("DTS", value="DTS") rebulk.regex('DTS-?HD', 'DTS(?=-?MA)', value='DTS-HD', conflict_solver=lambda match, other: other if other.name == 'audio_codec' else '__default__') rebulk.regex('True-?HD', value='Dolby TrueHD') rebulk.string('Opus', value='Opus') rebulk.string('Vorbis', value='Vorbis') rebulk.string('PCM', value='PCM') rebulk.string('LPCM', value='LPCM') rebulk.defaults(name='audio_profile', disabled=lambda context: is_disabled(context, 'audio_profile')) rebulk.string('MA', value='Master Audio', tags=['audio_profile.rule', 'DTS-HD']) rebulk.string('HR', 'HRA', value='High Resolution Audio', tags=['audio_profile.rule', 'DTS-HD']) rebulk.string('ES', value='Extended Surround', tags=['audio_profile.rule', 'DTS']) rebulk.string('HE', value='High Efficiency', tags=['audio_profile.rule', 'AAC']) rebulk.string('LC', value='Low Complexity', tags=['audio_profile.rule', 'AAC']) rebulk.string('HQ', value='High Quality', tags=['audio_profile.rule', 'Dolby Digital']) rebulk.string('EX', value='EX', tags=['audio_profile.rule', 'Dolby Digital']) rebulk.defaults(name="audio_channels", disabled=lambda context: is_disabled(context, 'audio_channels')) rebulk.regex(r'(7[\W_][01](?:ch)?)(?=[^\d]|$)', value='7.1', children=True) rebulk.regex(r'(5[\W_][01](?:ch)?)(?=[^\d]|$)', value='5.1', children=True) rebulk.regex(r'(2[\W_]0(?:ch)?)(?=[^\d]|$)', value='2.0', children=True) rebulk.regex('7[01]', value='7.1', validator=seps_after, tags='weak-audio_channels') rebulk.regex('5[01]', value='5.1', validator=seps_after, tags='weak-audio_channels') rebulk.string('20', value='2.0', validator=seps_after, tags='weak-audio_channels') rebulk.string('7ch', '8ch', value='7.1') rebulk.string('5ch', '6ch', value='5.1') rebulk.string('2ch', 'stereo', value='2.0') rebulk.string('1ch', 'mono', value='1.0') rebulk.rules(DtsHDRule, DtsRule, AacRule, DolbyDigitalRule, AudioValidatorRule, HqConflictRule, AudioChannelsValidatorRule) return rebulk
def audio_codec(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) def audio_codec_priority(match1, match2): """ Gives priority to audio_codec :param match1: :type match1: :param match2: :type match2: :return: :rtype: """ if match1.name == 'audio_codec' and match2.name in ['audio_profile', 'audio_channels']: return match2 if match1.name in ['audio_profile', 'audio_channels'] and match2.name == 'audio_codec': return match1 return '__default__' rebulk.defaults(name="audio_codec", conflict_solver=audio_codec_priority) rebulk.regex("MP3", "LAME", r"LAME(?:\d)+-?(?:\d)+", value="MP3") rebulk.regex('Dolby', 'DolbyDigital', 'Dolby-Digital', 'DD', 'AC3D?', value='AC3') rebulk.regex("DolbyAtmos", "Dolby-Atmos", "Atmos", value="DolbyAtmos") rebulk.string("AAC", value="AAC") rebulk.string('EAC3', 'DDP', 'DD+', value="EAC3") rebulk.string("Flac", value="FLAC") rebulk.string("DTS", value="DTS") rebulk.regex("True-?HD", value="TrueHD") rebulk.defaults(name="audio_profile") rebulk.string("HD", value="HD", tags="DTS") rebulk.regex("HD-?MA", value="HDMA", tags="DTS") rebulk.string("HE", value="HE", tags="AAC") rebulk.string("LC", value="LC", tags="AAC") rebulk.string("HQ", value="HQ", tags="AC3") rebulk.defaults(name="audio_channels") rebulk.regex(r'(7[\W_][01](?:ch)?)(?:[^\d]|$)', value='7.1', children=True) rebulk.regex(r'(5[\W_][01](?:ch)?)(?:[^\d]|$)', value='5.1', children=True) rebulk.regex(r'(2[\W_]0(?:ch)?)(?:[^\d]|$)', value='2.0', children=True) rebulk.regex('7[01]', value='7.1', validator=seps_after, tags='weak-audio_channels') rebulk.regex('5[01]', value='5.1', validator=seps_after, tags='weak-audio_channels') rebulk.string('20', value='2.0', validator=seps_after, tags='weak-audio_channels') rebulk.string('7ch', '8ch', value='7.1') rebulk.string('5ch', '6ch', value='5.1') rebulk.string('2ch', 'stereo', value='2.0') rebulk.string('1ch', 'mono', value='1.0') rebulk.rules(DtsRule, AacRule, Ac3Rule, AudioValidatorRule, HqConflictRule, AudioChannelsValidatorRule) return rebulk
def other(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) rebulk.defaults(name="other", validator=seps_surround) rebulk.regex('Audio-?Fix', 'Audio-?Fixed', value='AudioFix') rebulk.regex('Sync-?Fix', 'Sync-?Fixed', value='SyncFix') rebulk.regex('Dual', 'Dual-?Audio', value='DualAudio') rebulk.regex('ws', 'wide-?screen', value='WideScreen') rebulk.regex('Re-?Enc(?:oded)?', value='ReEncoded') rebulk.string('Real', 'Fix', 'Fixed', value='Proper', tags=['has-neighbor-before', 'has-neighbor-after']) rebulk.string('Proper', 'Repack', 'Rerip', 'Dirfix', 'Nfofix', 'Prooffix', value='Proper', tags=['streaming_service.prefix', 'streaming_service.suffix']) rebulk.regex('(?:Proof-?)?Sample-?Fix', value='Proper', tags=['streaming_service.prefix', 'streaming_service.suffix']) rebulk.string('Fansub', value='Fansub', tags='has-neighbor') rebulk.string('Fastsub', value='Fastsub', tags='has-neighbor') season_words = build_or_pattern(["seasons?", "series?"]) complete_articles = build_or_pattern(["The"]) def validate_complete(match): """ Make sure season word is are defined. :param match: :type match: :return: :rtype: """ children = match.children if not children.named('completeWordsBefore') and not children.named('completeWordsAfter'): return False return True rebulk.regex('(?P<completeArticle>' + complete_articles + '-)?' + '(?P<completeWordsBefore>' + season_words + '-)?' + 'Complete' + '(?P<completeWordsAfter>-' + season_words + ')?', private_names=['completeArticle', 'completeWordsBefore', 'completeWordsAfter'], value={'other': 'Complete'}, tags=['release-group-prefix'], validator={'__parent__': compose(seps_surround, validate_complete)}) rebulk.string('R5', 'RC', value='R5') rebulk.regex('Pre-?Air', value='Preair') rebulk.regex('(?:PS-?)?Vita', value='PS Vita') for value in ( 'Screener', 'Remux', '3D', 'mHD', 'HDLight', 'HQ', 'DDC', 'HR', 'PAL', 'SECAM', 'NTSC', 'CC', 'LD', 'MD', 'XXX'): rebulk.string(value, value=value) rebulk.string('LDTV', value='LD') rebulk.string('HD', value='HD', validator=None, tags=['streaming_service.prefix', 'streaming_service.suffix']) rebulk.regex('Full-?HD', 'FHD', value='FullHD', validator=None, tags=['streaming_service.prefix', 'streaming_service.suffix']) rebulk.regex('Ultra-?(?:HD)?', 'UHD', value='UltraHD', validator=None, tags=['streaming_service.prefix', 'streaming_service.suffix']) for value in ('Complete', 'Classic', 'LiNE', 'Bonus', 'Trailer', 'FINAL', 'Retail', 'Colorized', 'Internal'): rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix']) rebulk.regex('Read-?NFO', value='Read NFO') rebulk.string('CONVERT', value='Converted', tags='has-neighbor') rebulk.string('DOCU', value='Documentary', tags='has-neighbor') rebulk.string('OM', value='Open Matte', tags='has-neighbor') rebulk.string('STV', value='Straight to Video', tags='has-neighbor') rebulk.string('OAR', value='Original Aspect Ratio', tags='has-neighbor') rebulk.string('Complet', value='Complete', tags=['has-neighbor', 'release-group-prefix']) for coast in ('East', 'West'): rebulk.regex(r'(?:Live-)?(?:Episode-)?' + coast + '-?(?:Coast-)?Feed', value=coast + ' Coast Feed') rebulk.string('VO', 'OV', value='OV', tags='has-neighbor') rebulk.regex('Scr(?:eener)?', value='Screener', validator=None, tags=['other.validate.screener', 'format-prefix', 'format-suffix']) rebulk.string('Mux', value='Mux', validator=seps_after, tags=['other.validate.mux', 'video-codec-prefix', 'format-suffix']) rebulk.string('HC', value='Hardcoded Subtitles') rebulk.rules(ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore, ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor, ProperCountRule) return rebulk
def episodes(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ # pylint: disable=too-many-branches,too-many-statements rebulk = Rebulk() rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator']) def season_episode_conflict_solver(match, other): """ Conflict solver for episode/season patterns :param match: :param other: :return: """ if match.name in ['season', 'episode'] and other.name in ['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date']: return match elif match.name in ['season', 'episode'] and other.name in ['season', 'episode'] \ and match.initiator != other.initiator: if 'x' in match.initiator.raw.lower(): return match if 'x' in other.initiator.raw.lower(): return other return '__default__' season_episode_seps = [] season_episode_seps.extend(seps) season_episode_seps.extend(['x', 'X', 'e', 'E']) def season_episode_validator(match): """ Validator for season/episode matches """ if match.name in ['season', 'episode'] and match.initiator.start: return match.initiator.input_string[match.initiator.start] in season_episode_seps \ or match.initiator.input_string[match.initiator.start - 1] in season_episode_seps return True # 01x02, 01x02x03x04 rebulk.chain(formatter={'season': int, 'episode': int}, tags=['SxxExx'], abbreviations=[alt_dash], children=True, private_parent=True, conflict_solver=season_episode_conflict_solver) \ .defaults(validator=season_episode_validator) \ .regex(r'S(?P<season>\d+)@?(?:xE|Ex|E|x)@?(?P<episode>\d+)') \ .regex(r'(?:(?P<episodeSeparator>xE|Ex|E|x|-|\+|&)(?P<episode>\d+))').repeater('*') \ .chain() \ .regex(r'(?P<season>\d+)@?x@?(?P<episode>\d+)') \ .regex(r'(?:(?P<episodeSeparator>x|-|\+|&)(?P<episode>\d+))').repeater('*') \ .chain() \ .regex(r'S(?P<season>\d+)') \ .regex(r'(?:(?P<seasonSeparator>S|-|\+|&)(?P<season>\d+))').repeater('*') # episode_details property for episode_detail in ('Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired'): rebulk.string(episode_detail, value=episode_detail, name='episode_details') rebulk.regex(r'Extras?', name='episode_details', value='Extras') rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True) season_words = ['season', 'saison', 'serie', 'seasons', 'saisons', 'series'] episode_words = ['episode', 'episodes', 'ep'] of_words = ['of', 'sur'] all_words = ['All'] rebulk.chain(abbreviations=[alt_dash], formatter={'season': parse_numeral, 'count': parse_numeral}) \ .defaults(validator=None) \ .regex(build_or_pattern(season_words) + '@?(?P<season>' + numeral + ')') \ .regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \ .regex(r'@?(?P<seasonSeparator>-)@?(?P<season>\d+)').repeater('*') \ .regex(r'@?(?P<seasonSeparator>\+|&)@?(?P<season>\d+)').repeater('*') rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>\d+)' + r'(?:v(?P<version>\d+))?' + r'(?:-?' + build_or_pattern(of_words) + r'?-?(?P<count>\d+))?', # Episode 4 abbreviations=[dash], formatter=int, disabled=lambda context: context.get('type') == 'episode') rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>' + numeral + ')' + r'(?:v(?P<version>\d+))?' + r'(?:-?' + build_or_pattern(of_words) + r'?-?(?P<count>\d+))?', # Episode 4 abbreviations=[dash], formatter={'episode': parse_numeral, 'version': int, 'count': int}, disabled=lambda context: context.get('type') != 'episode') rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')', tags=['SxxExx'], abbreviations=[dash], validator=None, formatter={'season': int, 'other': lambda match: 'Complete'}) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True) # 12, 13 rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d{2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*') # 012, 013 rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'0(?P<episode>\d{1,2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})').repeater('*') # 112, 113 rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}, disabled=lambda context: not context.get('episode_prefer_number', False)) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d{3,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})').repeater('*') # 1, 2, 3 rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}, disabled=lambda context: context.get('type') != 'episode') \ .defaults(validator=None) \ .regex(r'(?P<episode>\d)') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})').repeater('*') # e112, e113 # TODO: Enhance rebulk for validator to be used globally (season_episode_validator) rebulk.chain(formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'e(?P<episode>\d{1,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*') # ep 112, ep113, ep112, ep113 rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'ep-?(?P<episode>\d{1,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})').repeater('*') # 102, 0102 rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-duplicate'], formatter={'season': int, 'episode': int, 'version': int}, conflict_solver=lambda match, other: match if other.name == 'year' else '__default__', disabled=lambda context: context.get('episode_prefer_number', False)) \ .defaults(validator=None) \ .regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})').repeater('*') rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator']) # TODO: List of words # detached of X count (season/episode) rebulk.regex(r'(?P<episode>\d+)?-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?', abbreviations=[dash], children=True, private_parent=True, formatter=int) rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode") # Harcoded movie to disable weak season/episodes rebulk.regex('OSS-?117', abbreviations=[dash], name="hardcoded-movies", marker=True, conflict_solver=lambda match, other: None) rebulk.rules(EpisodeNumberSeparatorRange, SeasonSeparatorRange, RemoveWeakIfMovie, RemoveWeakIfSxxExx, RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator, CountValidator, EpisodeSingleDigitValidator) return rebulk
def video_codec(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk() rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) rebulk.defaults(name="video_codec", tags=['source-suffix', 'streaming_service.suffix'], disabled=lambda context: is_disabled(context, 'video_codec')) rebulk.regex(r'Rv\d{2}', value='RealVideo') rebulk.regex('Mpe?g-?2', '[hx]-?262', value='MPEG-2') rebulk.string("DVDivX", "DivX", value="DivX") rebulk.string('XviD', value='Xvid') rebulk.regex('VC-?1', value='VC-1') rebulk.string('VP7', value='VP7') rebulk.string('VP8', 'VP80', value='VP8') rebulk.string('VP9', value='VP9') rebulk.regex('[hx]-?263', value='H.263') rebulk.regex('[hx]-?264', '(MPEG-?4)?AVC(?:HD)?', value='H.264') rebulk.regex('[hx]-?265', 'HEVC', value='H.265') rebulk.regex('(?P<video_codec>hevc)(?P<color_depth>10)', value={'video_codec': 'H.265', 'color_depth': '10-bit'}, tags=['video-codec-suffix'], children=True) # http://blog.mediacoderhq.com/h264-profiles-and-levels/ # https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC rebulk.defaults(name="video_profile", validator=seps_surround, disabled=lambda context: is_disabled(context, 'video_profile')) rebulk.string('BP', value='Baseline', tags='video_profile.rule') rebulk.string('XP', 'EP', value='Extended', tags='video_profile.rule') rebulk.string('MP', value='Main', tags='video_profile.rule') rebulk.string('HP', 'HiP', value='High', tags='video_profile.rule') # https://en.wikipedia.org/wiki/Scalable_Video_Coding rebulk.string('SC', 'SVC', value='Scalable Video Coding', tags='video_profile.rule') # https://en.wikipedia.org/wiki/AVCHD rebulk.regex('AVC(?:HD)?', value='Advanced Video Codec High Definition', tags='video_profile.rule') # https://en.wikipedia.org/wiki/H.265/HEVC rebulk.string('HEVC', value='High Efficiency Video Coding', tags='video_profile.rule') rebulk.regex('Hi422P', value='High 4:2:2') rebulk.regex('Hi444PP', value='High 4:4:4 Predictive') rebulk.regex('Hi10P?', value='High 10') # no profile validation is required rebulk.string('DXVA', value='DXVA', name='video_api', disabled=lambda context: is_disabled(context, 'video_api')) rebulk.defaults(name='color_depth', validator=seps_surround, disabled=lambda context: is_disabled(context, 'color_depth')) rebulk.regex('12.?bits?', value='12-bit') rebulk.regex('10.?bits?', 'YUV420P10', 'Hi10P?', value='10-bit') rebulk.regex('8.?bits?', value='8-bit') rebulk.rules(ValidateVideoCodec, VideoProfileRule) return rebulk
def audio_codec(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().regex_defaults( flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) def audio_codec_priority(match1, match2): """ Gives priority to audio_codec :param match1: :type match1: :param match2: :type match2: :return: :rtype: """ if match1.name == 'audio_codec' and match2.name in [ 'audio_profile', 'audio_channels' ]: return match2 if match1.name in ['audio_profile', 'audio_channels' ] and match2.name == 'audio_codec': return match1 return '__default__' rebulk.defaults(name="audio_codec", conflict_solver=audio_codec_priority) rebulk.regex("MP3", "LAME", r"LAME(?:\d)+-?(?:\d)+", value="MP3") rebulk.regex("Dolby", "DolbyDigital", "Dolby-Digital", "DDP?", value="DolbyDigital") rebulk.regex("DolbyAtmos", "Dolby-Atmos", "Atmos", value="DolbyAtmos") rebulk.regex("AAC", value="AAC") rebulk.regex("AC3D?", value="AC3") rebulk.regex("Flac", value="FLAC") rebulk.regex("DTS", value="DTS") rebulk.regex("True-?HD", value="TrueHD") rebulk.defaults(name="audio_profile") rebulk.string("HD", value="HD", tags="DTS") rebulk.regex("HD-?MA", value="HDMA", tags="DTS") rebulk.string("HE", value="HE", tags="AAC") rebulk.string("LC", value="LC", tags="AAC") rebulk.string("HQ", value="HQ", tags="AC3") rebulk.defaults(name="audio_channels") rebulk.regex(r'(7[\W_][01](?:ch)?)(?:[^\d]|$)', value='7.1', children=True) rebulk.regex(r'(5[\W_][01](?:ch)?)(?:[^\d]|$)', value='5.1', children=True) rebulk.regex(r'(2[\W_]0(?:ch)?)(?:[^\d]|$)', value='2.0', children=True) rebulk.string('7ch', '8ch', value='7.1') rebulk.string('5ch', '6ch', value='5.1') rebulk.string('2ch', 'stereo', value='2.0') rebulk.string('1ch', 'mono', value='1.0') rebulk.rules(DtsRule, AacRule, Ac3Rule, AudioValidatorRule, HqConflictRule) return rebulk
def website(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True) rebulk.defaults(name="website") tlds = [l.strip().decode('utf-8') for l in resource_stream('guessit', 'tlds-alpha-by-domain.txt').readlines() if b'--' not in l][1:] # All registered domain extension safe_tlds = ['com', 'org', 'net'] # For sure a website extension safe_subdomains = ['www'] # For sure a website subdomain safe_prefix = ['co', 'com', 'org', 'net'] # Those words before a tlds are sure website_prefixes = ['from'] rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) + r'\.)+(?:[a-z-]+\.)+(?:'+build_or_pattern(tlds) + r'))(?:[^a-z0-9]|$)', children=True) rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) + r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_tlds) + r'))(?:[^a-z0-9]|$)', safe_subdomains=safe_subdomains, safe_tlds=safe_tlds, children=True) rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) + r'\.)*[a-z-]+\.(?:'+build_or_pattern(safe_prefix) + r'\.)+(?:'+build_or_pattern(tlds) + r'))(?:[^a-z0-9]|$)', safe_subdomains=safe_subdomains, safe_prefix=safe_prefix, tlds=tlds, children=True) rebulk.string(*website_prefixes, validator=seps_surround, private=True, tags=['website.prefix']) class PreferTitleOverWebsite(Rule): """ If found match is more likely a title, remove website. """ consequence = RemoveMatch @staticmethod def valid_followers(match): """ Validator for next website matches """ return any(name in ['season', 'episode', 'year'] for name in match.names) def when(self, matches, context): to_remove = [] for website_match in matches.named('website'): safe = False for safe_start in safe_subdomains + safe_prefix: if website_match.value.lower().startswith(safe_start): safe = True break if not safe: suffix = matches.next(website_match, PreferTitleOverWebsite.valid_followers, 0) if suffix: to_remove.append(website_match) return to_remove rebulk.rules(PreferTitleOverWebsite, ValidateWebsitePrefix) return rebulk
def audio_codec(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) def audio_codec_priority(match1, match2): """ Gives priority to audio_codec :param match1: :type match1: :param match2: :type match2: :return: :rtype: """ if match1.name == 'audio_codec' and match2.name in ['audio_profile', 'audio_channels']: return match2 if match1.name in ['audio_profile', 'audio_channels'] and match2.name == 'audio_codec': return match1 return '__default__' rebulk.defaults(name='audio_codec', conflict_solver=audio_codec_priority, disabled=lambda context: is_disabled(context, 'audio_codec')) rebulk.regex("MP3", "LAME", r"LAME(?:\d)+-?(?:\d)+", value="MP3") rebulk.string("MP2", value="MP2") rebulk.regex('Dolby', 'DolbyDigital', 'Dolby-Digital', 'DD', 'AC3D?', value='Dolby Digital') rebulk.regex('Dolby-?Atmos', 'Atmos', value='Dolby Atmos') rebulk.string("AAC", value="AAC") rebulk.string('EAC3', 'DDP', 'DD+', value='Dolby Digital Plus') rebulk.string("Flac", value="FLAC") rebulk.string("DTS", value="DTS") rebulk.regex('DTS-?HD', 'DTS(?=-?MA)', value='DTS-HD', conflict_solver=lambda match, other: other if other.name == 'audio_codec' else '__default__') rebulk.regex('True-?HD', value='Dolby TrueHD') rebulk.string('Opus', value='Opus') rebulk.string('Vorbis', value='Vorbis') rebulk.string('PCM', value='PCM') rebulk.string('LPCM', value='LPCM') rebulk.defaults(name='audio_profile', disabled=lambda context: is_disabled(context, 'audio_profile')) rebulk.string('MA', value='Master Audio', tags=['audio_profile.rule', 'DTS-HD']) rebulk.string('HR', 'HRA', value='High Resolution Audio', tags=['audio_profile.rule', 'DTS-HD']) rebulk.string('ES', value='Extended Surround', tags=['audio_profile.rule', 'DTS']) rebulk.string('HE', value='High Efficiency', tags=['audio_profile.rule', 'AAC']) rebulk.string('LC', value='Low Complexity', tags=['audio_profile.rule', 'AAC']) rebulk.string('HQ', value='High Quality', tags=['audio_profile.rule', 'Dolby Digital']) rebulk.string('EX', value='EX', tags=['audio_profile.rule', 'Dolby Digital']) rebulk.defaults(name="audio_channels", disabled=lambda context: is_disabled(context, 'audio_channels')) rebulk.regex('7[01]', value='7.1', validator=seps_after, tags='weak-audio_channels') rebulk.regex('5[01]', value='5.1', validator=seps_after, tags='weak-audio_channels') rebulk.string('20', value='2.0', validator=seps_after, tags='weak-audio_channels') for value, items in config.get('audio_channels').items(): for item in items: if item.startswith('re:'): rebulk.regex(item[3:], value=value, children=True) else: rebulk.string(item, value=value) rebulk.rules(DtsHDRule, DtsRule, AacRule, DolbyDigitalRule, AudioValidatorRule, HqConflictRule, AudioChannelsValidatorRule) return rebulk
def other(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) rebulk.defaults(name="other", validator=seps_surround) rebulk.regex('Audio-?Fix', 'Audio-?Fixed', value='AudioFix') rebulk.regex('Sync-?Fix', 'Sync-?Fixed', value='SyncFix') rebulk.regex('Dual-?Audio', value='DualAudio') rebulk.regex('ws', 'wide-?screen', value='WideScreen') rebulk.string('Netflix', 'NF', value='Netflix') rebulk.string('Real', 'Fix', 'Fixed', value='Proper', tags=['has-neighbor-before', 'has-neighbor-after']) rebulk.string('Proper', 'Repack', 'Rerip', value='Proper') rebulk.string('Fansub', value='Fansub', tags='has-neighbor') rebulk.string('Fastsub', value='Fastsub', tags='has-neighbor') rebulk.regex('(?:Seasons?-)?Complete', value='Complete', tags=['release-group-prefix'], validator=lambda match: seps_surround(match) and match.raw.lower().strip(seps) != "complete") rebulk.string('R5', 'RC', value='R5') rebulk.regex('Pre-?Air', value='Preair') for value in ( 'Screener', 'Remux', 'Remastered', '3D', 'HD', 'mHD', 'HDLight', 'HQ', 'DDC', 'HR', 'PAL', 'SECAM', 'NTSC', 'CC', 'LD', 'MD', 'XXX'): rebulk.string(value, value=value) for value in ('Limited', 'Complete', 'Classic', 'Unrated', 'LiNE', 'Bonus', 'Trailer', 'FINAL', 'Retail', 'Uncut', 'Extended', 'Extended Cut'): rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix']) rebulk.string('VO', 'OV', value='OV', tags='has-neighbor') rebulk.regex('Scr(?:eener)?', value='Screener', validator=None, tags='other.validate.screener') rebulk.rules(ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore, ValidateScreenerRule, ProperCountRule) return rebulk
def episodes(config): """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ # pylint: disable=too-many-branches,too-many-statements,too-many-locals def is_season_episode_disabled(context): """Whether season and episode rules should be enabled.""" return is_disabled(context, 'episode') or is_disabled( context, 'season') rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults( ignore_case=True) rebulk.defaults(private_names=[ 'episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker' ]) episode_max_range = config['episode_max_range'] season_max_range = config['season_max_range'] def episodes_season_chain_breaker(matches): """ Break chains if there's more than 100 offset between two neighbor values. :param matches: :type matches: :return: :rtype: """ eps = matches.named('episode') if len(eps) > 1 and abs(eps[-1].value - eps[-2].value) > episode_max_range: return True seasons = matches.named('season') if len(seasons) > 1 and abs(seasons[-1].value - seasons[-2].value) > season_max_range: return True return False rebulk.chain_defaults(chain_breaker=episodes_season_chain_breaker) def season_episode_conflict_solver(match, other): """ Conflict solver for episode/season patterns :param match: :param other: :return: """ if match.name != other.name: if match.name == 'episode' and other.name == 'year': return match if match.name in ('season', 'episode'): if other.name in ('video_codec', 'audio_codec', 'container', 'date'): return match if (other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags and not match.initiator.children.named(match.name + 'Marker') ) or (other.name == 'screen_size' and not int_coercable(other.raw)): return match if other.name in ('season', 'episode' ) and match.initiator != other.initiator: if (match.initiator.name in ('weak_episode', 'weak_duplicate') and other.initiator.name in ('weak_episode', 'weak_duplicate')): return '__default__' for current in (match, other): if 'weak-episode' in current.tags or 'x' in current.initiator.raw.lower( ): return current return '__default__' season_words = config['season_words'] episode_words = config['episode_words'] of_words = config['of_words'] all_words = config['all_words'] season_markers = config['season_markers'] season_ep_markers = config['season_ep_markers'] disc_markers = config['disc_markers'] episode_markers = config['episode_markers'] range_separators = config['range_separators'] weak_discrete_separators = list(sep for sep in seps_no_fs if sep not in range_separators) strong_discrete_separators = config['discrete_separators'] discrete_separators = strong_discrete_separators + weak_discrete_separators max_range_gap = config['max_range_gap'] def ordering_validator(match): """ Validator for season list. They should be in natural order to be validated. episode/season separated by a weak discrete separator should be consecutive, unless a strong discrete separator or a range separator is present in the chain (1.3&5 is valid, but 1.3-5 is not valid and 1.3.5 is not valid) """ values = match.children.to_dict() if 'season' in values and is_iterable(values['season']): # Season numbers must be in natural order to be validated. if not list(sorted(values['season'])) == values['season']: return False if 'episode' in values and is_iterable(values['episode']): # Season numbers must be in natural order to be validated. if not list(sorted(values['episode'])) == values['episode']: return False def is_consecutive(property_name): """ Check if the property season or episode has valid consecutive values. :param property_name: :type property_name: :return: :rtype: """ previous_match = None valid = True for current_match in match.children.named(property_name): if previous_match: match.children.previous( current_match, lambda m: m.name == property_name + 'Separator') separator = match.children.previous( current_match, lambda m: m.name == property_name + 'Separator', 0) if separator.raw not in range_separators and separator.raw in weak_discrete_separators: if not 0 < current_match.value - previous_match.value <= max_range_gap + 1: valid = False if separator.raw in strong_discrete_separators: valid = True break previous_match = current_match return valid return is_consecutive('episode') and is_consecutive('season') # S01E02, 01x02, S01S02S03 rebulk.chain(formatter={'season': int, 'episode': int}, tags=['SxxExx'], abbreviations=[alt_dash], children=True, private_parent=True, validate_all=True, validator={'__parent__': ordering_validator}, conflict_solver=season_episode_conflict_solver, disabled=is_season_episode_disabled) \ .regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)@?' + build_or_pattern(episode_markers + disc_markers, name='episodeMarker') + r'@?(?P<episode>\d+)', validate_all=True, validator={'__parent__': seps_before}).repeater('+') \ .regex(build_or_pattern(episode_markers + disc_markers + discrete_separators + range_separators, name='episodeSeparator', escape=True) + r'(?P<episode>\d+)').repeater('*') \ .chain() \ .regex(r'(?P<season>\d+)@?' + build_or_pattern(season_ep_markers, name='episodeMarker') + r'@?(?P<episode>\d+)', validate_all=True, validator={'__parent__': seps_before}) \ .chain() \ .regex(r'(?P<season>\d+)@?' + build_or_pattern(season_ep_markers, name='episodeMarker') + r'@?(?P<episode>\d+)', validate_all=True, validator={'__parent__': seps_before}) \ .regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators, name='episodeSeparator', escape=True) + r'(?P<episode>\d+)').repeater('*') \ .chain() \ .regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)', validate_all=True, validator={'__parent__': seps_before}) \ .regex(build_or_pattern(season_markers + discrete_separators + range_separators, name='seasonSeparator', escape=True) + r'(?P<season>\d+)').repeater('*') # episode_details property for episode_detail in ('Special', 'Bonus', 'Pilot', 'Unaired', 'Final'): rebulk.string( episode_detail, value=episode_detail, name='episode_details', disabled=lambda context: is_disabled(context, 'episode_details')) rebulk.regex( r'Extras?', 'Omake', name='episode_details', value='Extras', disabled=lambda context: is_disabled(context, 'episode_details')) def validate_roman(match): """ Validate a roman match if surrounded by separators :param match: :type match: :return: :rtype: """ if int_coercable(match.raw): return True return seps_surround(match) rebulk.defaults(private_names=[ 'episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker' ], validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True, conflict_solver=season_episode_conflict_solver) rebulk.chain(abbreviations=[alt_dash], formatter={'season': parse_numeral, 'count': parse_numeral}, validator={'__parent__': compose(seps_surround, ordering_validator), 'season': validate_roman, 'count': validate_roman}, disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'season')) \ .defaults(validator=None) \ .regex(build_or_pattern(season_words, name='seasonMarker') + '@?(?P<season>' + numeral + ')') \ .regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \ .regex(r'@?' + build_or_pattern(range_separators + discrete_separators + ['@'], name='seasonSeparator', escape=True) + r'@?(?P<season>\d+)').repeater('*') rebulk.regex( build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>\d+)' + r'(?:v(?P<version>\d+))?' + r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4 abbreviations=[dash], formatter={ 'episode': int, 'version': int, 'count': int }, disabled=lambda context: context.get( 'type') == 'episode' or is_disabled(context, 'episode')) rebulk.regex( build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>' + numeral + ')' + r'(?:v(?P<version>\d+))?' + r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4 abbreviations=[dash], validator={'episode': validate_roman}, formatter={ 'episode': parse_numeral, 'version': int, 'count': int }, disabled=lambda context: context.get( 'type') != 'episode' or is_disabled(context, 'episode')) rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')', tags=['SxxExx'], abbreviations=[dash], validator=None, formatter={ 'season': int, 'other': lambda match: 'Complete' }, disabled=lambda context: is_disabled(context, 'season')) # 12, 13 rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int}, disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d{2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*') # 012, 013 rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int}, disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \ .defaults(validator=None) \ .regex(r'0(?P<episode>\d{1,2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})').repeater('*') # 112, 113 rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int}, name='weak_episode', disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d{3,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})').repeater('*') # 1, 2, 3 rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int}, disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode')) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d)') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})').repeater('*') # e112, e113 # TODO: Enhance rebulk for validator to be used globally (season_episode_validator) rebulk.chain(formatter={'episode': int, 'version': int}, disabled=lambda context: is_disabled(context, 'episode')) \ .defaults(validator=None) \ .regex(r'(?P<episodeMarker>e)(?P<episode>\d{1,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*') # ep 112, ep113, ep112, ep113 rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int}, disabled=lambda context: is_disabled(context, 'episode')) \ .defaults(validator=None) \ .regex(r'ep-?(?P<episode>\d{1,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})').repeater('*') # cap 112, cap 112_114 rebulk.chain(abbreviations=[dash], tags=['see-pattern'], formatter={'season': int, 'episode': int}, disabled=is_season_episode_disabled) \ .defaults(validator=None) \ .regex(r'(?P<seasonMarker>cap)-?(?P<season>\d{1,2})(?P<episode>\d{2})') \ .regex(r'(?P<episodeSeparator>-)(?P<season>\d{1,2})(?P<episode>\d{2})').repeater('?') # 102, 0102 rebulk.chain(tags=['weak-episode', 'weak-duplicate'], formatter={'season': int, 'episode': int, 'version': int}, name='weak_duplicate', conflict_solver=season_episode_conflict_solver, disabled=lambda context: (context.get('episode_prefer_number', False) or context.get('type') == 'movie') or is_season_episode_disabled(context)) \ .defaults(validator=None) \ .regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})').repeater('*') rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int, disabled=lambda context: is_disabled(context, 'version')) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator']) # TODO: List of words # detached of X count (season/episode) rebulk.regex(r'(?P<episode>\d+)-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?', abbreviations=[dash], children=True, private_parent=True, formatter=int, disabled=lambda context: is_disabled(context, 'episode')) rebulk.regex( r'Minisodes?', name='episode_format', value="Minisode", disabled=lambda context: is_disabled(context, 'episode_format')) rebulk.rules(WeakConflictSolver, RemoveInvalidSeason, RemoveInvalidEpisode, SeePatternRange(range_separators + ['_']), EpisodeNumberSeparatorRange(range_separators), SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx, RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator, RemoveWeak, RenameToAbsoluteEpisode, CountValidator, EpisodeSingleDigitValidator, RenameToDiscMatch) return rebulk
def streaming_service(): """Streaming service property. :return: :rtype: Rebulk """ rebulk = Rebulk().string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) rebulk.defaults(name='streaming_service', tags=['source-prefix']) rebulk.string('AE', 'A&E', value='A&E') rebulk.string('AMBC', value='ABC') rebulk.string('AMC', value='AMC') rebulk.string('AMZN', 'Amazon', 'AmazonPrime', value='Amazon Prime') rebulk.regex('Amazon-Prime', value='Amazon Prime') rebulk.string('AS', 'AdultSwim', value='Adult Swim') rebulk.regex('Adult-Swim', value='Adult Swim') rebulk.string('iP', 'BBCiPlayer', value='BBC iPlayer') rebulk.regex('BBC-iPlayer', value='BBC iPlayer') rebulk.string('CBS', value='CBS') rebulk.string('CC', 'ComedyCentral', value='Comedy Central') rebulk.regex('Comedy-Central', value='Comedy Central') rebulk.string('CR', 'CrunchyRoll', value='Crunchy Roll') rebulk.regex('Crunchy-Roll', value='Crunchy Roll') rebulk.string('CW', 'TheCW', value='The CW') rebulk.regex('The-CW', value='The CW') rebulk.string('DISC', 'Discovery', value='Discovery') rebulk.string('DIY', value='DIY Network') rebulk.string('DSNY', 'Disney', value='Disney') rebulk.string('EPIX', 'ePix', value='ePix') rebulk.string('HBO', 'HBOGo', value='HBO Go') rebulk.regex('HBO-Go', value='HBO Go') rebulk.string('HIST', 'History', value='History') rebulk.string('ID', value='Investigation Discovery') rebulk.string('IFC', 'IFC', value='IFC') rebulk.string('PBS', 'PBS', value='PBS') rebulk.string('NATG', 'NationalGeographic', value='National Geographic') rebulk.regex('National-Geographic', value='National Geographic') rebulk.string('NBA', 'NBATV', value='NBA TV') rebulk.regex('NBA-TV', value='NBA TV') rebulk.string('NBC', value='NBC') rebulk.string('NFL', value='NFL') rebulk.string('NICK', 'Nickelodeon', value='Nickelodeon') rebulk.string('NF', 'Netflix', value='Netflix') rebulk.string('iTunes', 'iT', value='iTunes') rebulk.string('RTE', value='RTÉ One') rebulk.string('SESO', 'SeeSo', value='SeeSo') rebulk.string('SPKE', 'SpikeTV', 'Spike TV', value='Spike TV') rebulk.string('SYFY', 'Syfy', value='Syfy') rebulk.string('TFOU', 'TFou', value='TFou') rebulk.string('TLC', value='TLC') rebulk.string('TV3', value='TV3 Ireland') rebulk.string('TV4', value='TV4 Sweeden') rebulk.string('TVL', 'TVLand', 'TV Land', value='TV Land') rebulk.string('UFC', value='UFC') rebulk.string('USAN', value='USA Network') rebulk.rules(ValidateStreamingService) return rebulk
def streaming_service(config): # pylint: disable=too-many-statements,unused-argument """Streaming service property. :param config: rule configuration :type config: dict :return: :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'streaming_service')) rebulk = rebulk.string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) rebulk.defaults(name='streaming_service', tags=['source-prefix']) rebulk.string('AE', 'A&E', value='A&E') rebulk.string('AMBC', value='ABC') rebulk.string('AUBC', value='ABC Australia') rebulk.string('AJAZ', value='Al Jazeera English') rebulk.string('AMC', value='AMC') rebulk.string('AMZN', 'Amazon', value='Amazon Prime') rebulk.regex('Amazon-?Prime', value='Amazon Prime') rebulk.string('AS', value='Adult Swim') rebulk.regex('Adult-?Swim', value='Adult Swim') rebulk.string('ATK', value="America's Test Kitchen") rebulk.string('ANPL', value='Animal Planet') rebulk.string('ANLB', value='AnimeLab') rebulk.string('AOL', value='AOL') rebulk.string('ARD', value='ARD') rebulk.string('iP', value='BBC iPlayer') rebulk.regex('BBC-?iPlayer', value='BBC iPlayer') rebulk.string('BRAV', value='BravoTV') rebulk.string('CNLP', value='Canal+') rebulk.string('CN', value='Cartoon Network') rebulk.string('CBC', value='CBC') rebulk.string('CBS', value='CBS') rebulk.string('CNBC', value='CNBC') rebulk.string('CC', value='Comedy Central') rebulk.string('4OD', value='Channel 4') rebulk.string('CHGD', value='CHRGD') rebulk.string('CMAX', value='Cinemax') rebulk.string('CMT', value='Country Music Television') rebulk.regex('Comedy-?Central', value='Comedy Central') rebulk.string('CCGC', value='Comedians in Cars Getting Coffee') rebulk.string('CR', value='Crunchy Roll') rebulk.string('CRKL', value='Crackle') rebulk.regex('Crunchy-?Roll', value='Crunchy Roll') rebulk.string('CSPN', value='CSpan') rebulk.string('CTV', value='CTV') rebulk.string('CUR', value='CuriosityStream') rebulk.string('CWS', value='CWSeed') rebulk.string('DSKI', value='Daisuki') rebulk.string('DHF', value='Deadhouse Films') rebulk.string('DDY', value='Digiturk Diledigin Yerde') rebulk.string('DISC', 'Discovery', value='Discovery') rebulk.string('DSNY', 'Disney', value='Disney') rebulk.string('DIY', value='DIY Network') rebulk.string('DOCC', value='Doc Club') rebulk.string('DPLY', value='DPlay') rebulk.string('ETV', value='E!') rebulk.string('EPIX', value='ePix') rebulk.string('ETTV', value='El Trece') rebulk.string('ESPN', value='ESPN') rebulk.string('ESQ', value='Esquire') rebulk.string('FAM', value='Family') rebulk.string('FJR', value='Family Jr') rebulk.string('FOOD', value='Food Network') rebulk.string('FOX', value='Fox') rebulk.string('FREE', value='Freeform') rebulk.string('FYI', value='FYI Network') rebulk.string('GLBL', value='Global') rebulk.string('GLOB', value='GloboSat Play') rebulk.string('HLMK', value='Hallmark') rebulk.string('HBO', value='HBO Go') rebulk.regex('HBO-?Go', value='HBO Go') rebulk.string('HGTV', value='HGTV') rebulk.string('HIST', 'History', value='History') rebulk.string('HULU', value='Hulu') rebulk.string('ID', value='Investigation Discovery') rebulk.string('IFC', value='IFC') rebulk.string('iTunes', 'iT', value='iTunes') rebulk.string('ITV', value='ITV') rebulk.string('KNOW', value='Knowledge Network') rebulk.string('LIFE', value='Lifetime') rebulk.string('MTOD', value='Motor Trend OnDemand') rebulk.string('MNBC', value='MSNBC') rebulk.string('MTV', value='MTV') rebulk.string('NATG', value='National Geographic') rebulk.regex('National-?Geographic', value='National Geographic') rebulk.string('NBA', value='NBA TV') rebulk.regex('NBA-?TV', value='NBA TV') rebulk.string('NBC', value='NBC') rebulk.string('NF', 'Netflix', value='Netflix') rebulk.string('NFL', value='NFL') rebulk.string('NFLN', value='NFL Now') rebulk.string('GC', value='NHL GameCenter') rebulk.string('NICK', 'Nickelodeon', value='Nickelodeon') rebulk.string('NRK', value='Norsk Rikskringkasting') rebulk.string('PBS', value='PBS') rebulk.string('PBSK', value='PBS Kids') rebulk.string('PSN', value='Playstation Network') rebulk.string('PLUZ', value='Pluzz') rebulk.string('RTE', value='RTE One') rebulk.string('SBS', value='SBS (AU)') rebulk.string('SESO', 'SeeSo', value='SeeSo') rebulk.string('SHMI', value='Shomi') rebulk.string('SPIK', value='Spike') rebulk.string('SPKE', value='Spike TV') rebulk.regex('Spike-?TV', value='Spike TV') rebulk.string('SNET', value='Sportsnet') rebulk.string('SPRT', value='Sprout') rebulk.string('STAN', value='Stan') rebulk.string('STZ', value='Starz') rebulk.string('SVT', value='Sveriges Television') rebulk.string('SWER', value='SwearNet') rebulk.string('SYFY', value='Syfy') rebulk.string('TBS', value='TBS') rebulk.string('TFOU', value='TFou') rebulk.string('CW', value='The CW') rebulk.regex('The-?CW', value='The CW') rebulk.string('TLC', value='TLC') rebulk.string('TUBI', value='TubiTV') rebulk.string('TV3', value='TV3 Ireland') rebulk.string('TV4', value='TV4 Sweeden') rebulk.string('TVL', value='TV Land') rebulk.regex('TV-?Land', value='TV Land') rebulk.string('UFC', value='UFC') rebulk.string('UKTV', value='UKTV') rebulk.string('UNIV', value='Univision') rebulk.string('USAN', value='USA Network') rebulk.string('VLCT', value='Velocity') rebulk.string('VH1', value='VH1') rebulk.string('VICE', value='Viceland') rebulk.string('VMEO', value='Vimeo') rebulk.string('VRV', value='VRV') rebulk.string('WNET', value='W Network') rebulk.string('WME', value='WatchMe') rebulk.string('WWEN', value='WWE Network') rebulk.string('XBOX', value='Xbox Video') rebulk.string('YHOO', value='Yahoo') rebulk.string('RED', value='YouTube Red') rebulk.string('ZDF', value='ZDF') rebulk.rules(ValidateStreamingService) return rebulk
def video_codec(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk() rebulk = rebulk.regex_defaults( flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) rebulk.defaults( name="video_codec", tags=['source-suffix', 'streaming_service.suffix'], disabled=lambda context: is_disabled(context, 'video_codec')) rebulk.regex(r'Rv\d{2}', value='RealVideo') rebulk.regex('Mpe?g-?2', '[hx]-?262', value='MPEG-2') rebulk.string("DVDivX", "DivX", value="DivX") rebulk.string('XviD', value='Xvid') rebulk.regex('VC-?1', value='VC-1') rebulk.string('VP7', value='VP7') rebulk.string('VP8', 'VP80', value='VP8') rebulk.string('VP9', value='VP9') rebulk.regex('[hx]-?263', value='H.263') rebulk.regex('[hx]-?264', '(MPEG-?4)?AVC(?:HD)?', value='H.264') rebulk.regex('[hx]-?265', 'HEVC', value='H.265') rebulk.regex('(?P<video_codec>hevc)(?P<color_depth>10)', value={ 'video_codec': 'H.265', 'color_depth': '10-bit' }, tags=['video-codec-suffix'], children=True) # http://blog.mediacoderhq.com/h264-profiles-and-levels/ # https://en.wikipedia.org/wiki/H.264/MPEG-4_AVC rebulk.defaults( clear=True, name="video_profile", validator=seps_surround, disabled=lambda context: is_disabled(context, 'video_profile')) rebulk.string('BP', value='Baseline', tags='video_profile.rule') rebulk.string('XP', 'EP', value='Extended', tags='video_profile.rule') rebulk.string('MP', value='Main', tags='video_profile.rule') rebulk.string('HP', 'HiP', value='High', tags='video_profile.rule') # https://en.wikipedia.org/wiki/Scalable_Video_Coding rebulk.string('SC', 'SVC', value='Scalable Video Coding', tags='video_profile.rule') # https://en.wikipedia.org/wiki/AVCHD rebulk.regex('AVC(?:HD)?', value='Advanced Video Codec High Definition', tags='video_profile.rule') # https://en.wikipedia.org/wiki/H.265/HEVC rebulk.string('HEVC', value='High Efficiency Video Coding', tags='video_profile.rule') rebulk.regex('Hi422P', value='High 4:2:2') rebulk.regex('Hi444PP', value='High 4:4:4 Predictive') rebulk.regex('Hi10P?', value='High 10') # no profile validation is required rebulk.string('DXVA', value='DXVA', name='video_api', disabled=lambda context: is_disabled(context, 'video_api')) rebulk.defaults( clear=True, name='color_depth', validator=seps_surround, disabled=lambda context: is_disabled(context, 'color_depth')) rebulk.regex('12.?bits?', value='12-bit') rebulk.regex('10.?bits?', 'YUV420P10', 'Hi10P?', value='10-bit') rebulk.regex('8.?bits?', value='8-bit') rebulk.rules(ValidateVideoCodec, VideoProfileRule) return rebulk
def streaming_service(): """Streaming service property. :return: :rtype: Rebulk """ rebulk = Rebulk().string_defaults(ignore_case=True).regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) rebulk.defaults(name='streaming_service', validator=seps_surround) rebulk.string('AE', 'A&E', value='A&E') rebulk.string('AMBC', value='ABC') rebulk.string('AMZN', 'AmazonPrime', value='Amazon Prime') rebulk.regex('Amazon-Prime', value='Amazon Prime') rebulk.string('AS', 'AdultSwim', value='Adult Swim') rebulk.regex('Adult-Swim', value='Adult Swim') rebulk.string('iP', 'BBCiPlayer', value='BBC iPlayer') rebulk.regex('BBC-iPlayer', value='BBC iPlayer') rebulk.string('CBS', value='CBS') rebulk.string('CC', 'ComedyCentral', value='Comedy Central') rebulk.regex('Comedy-Central', value='Comedy Central') rebulk.string('CR', 'CrunchyRoll', value='Crunchy Roll') rebulk.regex('Crunchy-Roll', value='Crunchy Roll') rebulk.string('CW', 'TheCW', value='The CW') rebulk.regex('The-CW', value='The CW') rebulk.string('DISC', 'Discovery', value='Discovery') rebulk.string('DSNY', 'Disney', value='Disney') rebulk.string('EPIX', 'ePix', value='ePix') rebulk.string('HBO', 'HBOGo', value='HBO Go') rebulk.regex('HBO-Go', value='HBO Go') rebulk.string('HIST', 'History', value='History') rebulk.string('IFC', 'IFC', value='IFC') rebulk.string('PBS', 'PBS', value='PBS') rebulk.string('NATG', 'NationalGeographic', value='National Geographic') rebulk.regex('National-Geographic', value='National Geographic') rebulk.string('NBA', 'NBATV', value='NBA TV') rebulk.regex('NBA-TV', value='NBA TV') rebulk.string('NBC', value='NBC') rebulk.string('NFL', value='NFL') rebulk.string('NICK', 'Nickelodeon', value='Nickelodeon') rebulk.string('NF', 'Netflix', value='Netflix') rebulk.string('SESO', 'SeeSo', value='SeeSo') rebulk.string('SPKE', 'SpikeTV', 'Spike TV', value='Spike TV') rebulk.string('SYFY', 'Syfy', value='Syfy') rebulk.string('TFOU', 'TFou', value='TFou') rebulk.string('TVL', 'TVLand', 'TV Land', value='TV Land') rebulk.string('UFC', value='UFC') rebulk.rules(ValidateStreamingService) return rebulk
def episodes(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ # pylint: disable=too-many-branches,too-many-statements,too-many-locals rebulk = Rebulk() rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator']) def episodes_season_chain_breaker(matches): """ Break chains if there's more than 100 offset between two neighbor values. :param matches: :type matches: :return: :rtype: """ eps = matches.named('episode') if len(eps) > 1 and abs(eps[-1].value - eps[-2].value) > 100: return True seasons = matches.named('season') if len(seasons) > 1 and abs(seasons[-1].value - seasons[-2].value) > 100: return True return False rebulk.chain_defaults(chain_breaker=episodes_season_chain_breaker) def season_episode_conflict_solver(match, other): """ Conflict solver for episode/season patterns :param match: :param other: :return: """ if match.name in ['season', 'episode'] and other.name in ['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date']: return match elif match.name in ['season', 'episode'] and other.name in ['season', 'episode'] \ and match.initiator != other.initiator: if 'weak-episode' in match.tags: return match if 'weak-episode' in other.tags: return other if 'x' in match.initiator.raw.lower(): return match if 'x' in other.initiator.raw.lower(): return other return '__default__' season_episode_seps = [] season_episode_seps.extend(seps) season_episode_seps.extend(['x', 'X', 'e', 'E']) season_words = ['season', 'saison', 'serie', 'seasons', 'saisons', 'series'] episode_words = ['episode', 'episodes', 'eps', 'ep'] of_words = ['of', 'sur'] all_words = ['All'] season_markers = ["S"] season_ep_markers = ["x"] episode_markers = ["xE", "Ex", "EP", "E", "x"] range_separators = ['-', '~', 'to', 'a'] weak_discrete_separators = list(sep for sep in seps if sep not in range_separators) strong_discrete_separators = ['+', '&', 'and', 'et'] discrete_separators = strong_discrete_separators + weak_discrete_separators def ordering_validator(match): """ Validator for season list. They should be in natural order to be validated. episode/season separated by a weak discrete separator should be consecutive, unless a strong discrete separator or a range separator is present in the chain (1.3&5 is valid, but 1.3-5 is not valid and 1.3.5 is not valid) """ values = match.children.to_dict(implicit=True) if 'season' in values and is_iterable(values['season']): # Season numbers must be in natural order to be validated. if not list(sorted(values['season'])) == values['season']: return False if 'episode' in values and is_iterable(values['episode']): # Season numbers must be in natural order to be validated. if not list(sorted(values['episode'])) == values['episode']: return False def is_consecutive(property_name): """ Check if the property season or episode has valid consecutive values. :param property_name: :type property_name: :return: :rtype: """ previous_match = None valid = True for current_match in match.children.named(property_name): if previous_match: match.children.previous(current_match, lambda m: m.name == property_name + 'Separator') separator = match.children.previous(current_match, lambda m: m.name == property_name + 'Separator', 0) if separator.raw not in range_separators and separator.raw in weak_discrete_separators: if not current_match.value - previous_match.value == 1: valid = False if separator.raw in strong_discrete_separators: valid = True break previous_match = current_match return valid return is_consecutive('episode') and is_consecutive('season') # S01E02, 01x02, S01S02S03 rebulk.chain(formatter={'season': int, 'episode': int}, tags=['SxxExx'], abbreviations=[alt_dash], children=True, private_parent=True, validate_all=True, validator={'__parent__': ordering_validator}, conflict_solver=season_episode_conflict_solver) \ .regex(build_or_pattern(season_markers) + r'(?P<season>\d+)@?' + build_or_pattern(episode_markers) + r'@?(?P<episode>\d+)', validate_all=True, validator={'__parent__': seps_before}).repeater('+') \ .regex(build_or_pattern(episode_markers + discrete_separators + range_separators, name='episodeSeparator', escape=True) + r'(?P<episode>\d+)').repeater('*') \ .chain() \ .regex(r'(?P<season>\d+)@?' + build_or_pattern(season_ep_markers) + r'@?(?P<episode>\d+)', validate_all=True, validator={'__parent__': seps_before}) \ .chain() \ .regex(r'(?P<season>\d+)@?' + build_or_pattern(season_ep_markers) + r'@?(?P<episode>\d+)', validate_all=True, validator={'__parent__': seps_before}) \ .regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators, name='episodeSeparator', escape=True) + r'(?P<episode>\d+)').repeater('*') \ .chain() \ .regex(build_or_pattern(season_markers) + r'(?P<season>\d+)', validate_all=True, validator={'__parent__': seps_before}) \ .regex(build_or_pattern(season_markers + discrete_separators + range_separators, name='seasonSeparator', escape=True) + r'(?P<season>\d+)').repeater('*') # episode_details property for episode_detail in ('Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired'): rebulk.string(episode_detail, value=episode_detail, name='episode_details') rebulk.regex(r'Extras?', name='episode_details', value='Extras') rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True) def validate_roman(match): """ Validate a roman match if surrounded by separators :param match: :type match: :return: :rtype: """ if int_coercable(match.raw): return True return seps_surround(match) rebulk.chain(abbreviations=[alt_dash], formatter={'season': parse_numeral, 'count': parse_numeral}, validator={'__parent__': compose(seps_surround, ordering_validator), 'season': validate_roman, 'count': validate_roman}) \ .defaults(validator=None) \ .regex(build_or_pattern(season_words) + '@?(?P<season>' + numeral + ')') \ .regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \ .regex(r'@?(?P<seasonSeparator>' + build_or_pattern(range_separators + discrete_separators + ['@'], escape=True) + r')@?(?P<season>\d+)').repeater('*') rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>\d+)' + r'(?:v(?P<version>\d+))?' + r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4 abbreviations=[dash], formatter=int, disabled=lambda context: context.get('type') == 'episode') rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>' + numeral + ')' + r'(?:v(?P<version>\d+))?' + r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4 abbreviations=[dash], validator={'episode': validate_roman}, formatter={'episode': parse_numeral, 'version': int, 'count': int}, disabled=lambda context: context.get('type') != 'episode') rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')', tags=['SxxExx'], abbreviations=[dash], validator=None, formatter={'season': int, 'other': lambda match: 'Complete'}) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True) # 12, 13 rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d{2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*') # 012, 013 rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'0(?P<episode>\d{1,2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})').repeater('*') # 112, 113 rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int}, disabled=lambda context: not context.get('episode_prefer_number', False)) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d{3,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})').repeater('*') # 1, 2, 3 rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode'], formatter={'episode': int, 'version': int}, disabled=lambda context: context.get('type') != 'episode') \ .defaults(validator=None) \ .regex(r'(?P<episode>\d)') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})').repeater('*') # e112, e113 # TODO: Enhance rebulk for validator to be used globally (season_episode_validator) rebulk.chain(formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'e(?P<episode>\d{1,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*') # ep 112, ep113, ep112, ep113 rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'ep-?(?P<episode>\d{1,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})').repeater('*') # 102, 0102 rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-episode', 'weak-duplicate'], formatter={'season': int, 'episode': int, 'version': int}, conflict_solver=lambda match, other: match if other.name == 'year' else '__default__', disabled=lambda context: context.get('episode_prefer_number', False)) \ .defaults(validator=None) \ .regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})').repeater('*') rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator']) # TODO: List of words # detached of X count (season/episode) rebulk.regex(r'(?P<episode>\d+)?-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?', abbreviations=[dash], children=True, private_parent=True, formatter=int) rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode") # Harcoded movie to disable weak season/eps rebulk.regex('OSS-?117', abbreviations=[dash], name="hardcoded-movies", marker=True, conflict_solver=lambda match, other: None) rebulk.rules(EpisodeNumberSeparatorRange(range_separators), SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx, RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator, CountValidator, EpisodeSingleDigitValidator) return rebulk
def language(config, common_words): """ Builder for rebulk object. :param config: rule configuration :type config: dict :param common_words: common words :type common_words: set :return: Created Rebulk object :rtype: Rebulk """ subtitle_both = config['subtitle_affixes'] subtitle_prefixes = sorted(subtitle_both + config['subtitle_prefixes'], key=length_comparator) subtitle_suffixes = sorted(subtitle_both + config['subtitle_suffixes'], key=length_comparator) lang_both = config['language_affixes'] lang_prefixes = sorted(lang_both + config['language_prefixes'], key=length_comparator) lang_suffixes = sorted(lang_both + config['language_suffixes'], key=length_comparator) weak_affixes = frozenset(config['weak_affixes']) rebulk = Rebulk(disabled=lambda context: (is_disabled( context, 'language') and is_disabled(context, 'subtitle_language'))) rebulk.string( *subtitle_prefixes, name="subtitle_language.prefix", ignore_case=True, private=True, validator=seps_surround, tags=['release-group-prefix'], disabled=lambda context: is_disabled(context, 'subtitle_language')) rebulk.string( *subtitle_suffixes, name="subtitle_language.suffix", ignore_case=True, private=True, validator=seps_surround, disabled=lambda context: is_disabled(context, 'subtitle_language')) rebulk.string(*lang_suffixes, name="language.suffix", ignore_case=True, private=True, validator=seps_surround, tags=['source-suffix'], disabled=lambda context: is_disabled(context, 'language')) def find_languages(string, context=None): """Find languages in the string :return: list of tuple (property, Language, lang_word, word) """ return LanguageFinder(context, subtitle_prefixes, subtitle_suffixes, lang_prefixes, lang_suffixes, weak_affixes).find(string) rebulk.functional( find_languages, properties={'language': [None]}, disabled=lambda context: not context.get('allowed_languages')) rebulk.rules(SubtitleExtensionRule, SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, RemoveLanguage, RemoveInvalidLanguages(common_words)) babelfish.language_converters['guessit'] = GuessitConverter( config['synonyms']) return rebulk
def audio_codec(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) rebulk.defaults(name="audio_codec") rebulk.regex("MP3", "LAME", r"LAME(?:\d)+-?(?:\d)+", value="MP3") rebulk.regex("DolbyDigital", "Dolby-Digital", "DD", value="DolbyDigital") rebulk.regex("DolbyAtmos", "Dolby-Atmos", "Atmos", value="DolbyAtmos") rebulk.regex("AAC", value="AAC") rebulk.regex("AC3D?", value="AC3") rebulk.regex("Flac", value="FLAC") rebulk.regex("DTS", value="DTS") rebulk.regex("True-?HD", value="TrueHD") rebulk.defaults(name="audio_profile") rebulk.string("HD", value="HD", tags="DTS") rebulk.regex("HD-?MA", value="HDMA", tags="DTS") rebulk.string("HE", value="HE", tags="AAC") rebulk.string("LC", value="LC", tags="AAC") rebulk.string("HQ", value="HQ", tags="AC3") rebulk.defaults(name="audio_channels") rebulk.regex(r"(7[\W_]1)(?:[^\d]|$)", value="7.1", children=True) rebulk.regex(r"(5[\W_]1)(?:[^\d]|$)", value="5.1", children=True) rebulk.regex(r"(2[\W_]0)(?:[^\d]|$)", value="2.0", children=True) rebulk.string("7ch", "8ch", value="7.1") rebulk.string("5ch", "6ch", value="5.1") rebulk.string("2ch", "stereo", value="2.0") rebulk.string("1ch", "mono", value="1.0") rebulk.rules(DtsRule, AacRule, Ac3Rule, AudioValidatorRule, HqConflictRule) return rebulk
def website(config): """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'website')) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults( ignore_case=True) rebulk.defaults(name="website") tlds = [ l.strip().decode('utf-8') for l in resource_stream( 'guessit', 'tlds-alpha-by-domain.txt').readlines() if b'--' not in l ][1:] # All registered domain extension safe_tlds = config['safe_tlds'] # For sure a website extension safe_subdomains = config['safe_subdomains'] # For sure a website subdomain safe_prefix = config['safe_prefixes'] # Those words before a tlds are sure website_prefixes = config['prefixes'] rebulk.regex(r'(?:[^a-z0-9]|^)((?:' + build_or_pattern(safe_subdomains) + r'\.)+(?:[a-z-]+\.)+(?:' + build_or_pattern(tlds) + r'))(?:[^a-z0-9]|$)', children=True) rebulk.regex(r'(?:[^a-z0-9]|^)((?:' + build_or_pattern(safe_subdomains) + r'\.)*[a-z-]+\.(?:' + build_or_pattern(safe_tlds) + r'))(?:[^a-z0-9]|$)', safe_subdomains=safe_subdomains, safe_tlds=safe_tlds, children=True) rebulk.regex(r'(?:[^a-z0-9]|^)((?:' + build_or_pattern(safe_subdomains) + r'\.)*[a-z-]+\.(?:' + build_or_pattern(safe_prefix) + r'\.)+(?:' + build_or_pattern(tlds) + r'))(?:[^a-z0-9]|$)', safe_subdomains=safe_subdomains, safe_prefix=safe_prefix, tlds=tlds, children=True) rebulk.string(*website_prefixes, validator=seps_surround, private=True, tags=['website.prefix']) class PreferTitleOverWebsite(Rule): """ If found match is more likely a title, remove website. """ consequence = RemoveMatch @staticmethod def valid_followers(match): """ Validator for next website matches """ return any(name in ['season', 'episode', 'year'] for name in match.names) def when(self, matches, context): to_remove = [] for website_match in matches.named('website'): safe = False for safe_start in safe_subdomains + safe_prefix: if website_match.value.lower().startswith(safe_start): safe = True break if not safe: suffix = matches.next( website_match, PreferTitleOverWebsite.valid_followers, 0) if suffix: to_remove.append(website_match) return to_remove rebulk.rules(PreferTitleOverWebsite, ValidateWebsitePrefix) return rebulk
def container(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True) rebulk.defaults( name="container", formatter=lambda value: value[1:], tags=["extension"], conflict_solver=lambda match, other: other if other.name in ["format", "video_codec"] or other.name == "container" and "extension" not in other.tags else "__default__", ) subtitles = ["srt", "idx", "sub", "ssa", "ass"] info = ["nfo"] videos = [ "3g2", "3gp", "3gp2", "asf", "avi", "divx", "flv", "m4v", "mk2", "mka", "mkv", "mov", "mp4", "mp4a", "mpeg", "mpg", "ogg", "ogm", "ogv", "qt", "ra", "ram", "rm", "ts", "wav", "webm", "wma", "wmv", "iso", "vob", ] torrent = ["torrent"] if REGEX_AVAILABLE: rebulk.regex(r"\.\L<exts>$", exts=subtitles, tags=["extension", "subtitle"]) rebulk.regex(r"\.\L<exts>$", exts=info, tags=["extension", "info"]) rebulk.regex(r"\.\L<exts>$", exts=videos, tags=["extension", "video"]) rebulk.regex(r"\.\L<exts>$", exts=torrent, tags=["extension", "torrent"]) else: rebulk.regex(r"\." + build_or_pattern(subtitles) + "$", exts=subtitles, tags=["extension", "subtitle"]) rebulk.regex(r"\." + build_or_pattern(info) + "$", exts=info, tags=["extension", "info"]) rebulk.regex(r"\." + build_or_pattern(videos) + "$", exts=videos, tags=["extension", "video"]) rebulk.regex(r"\." + build_or_pattern(torrent) + "$", exts=torrent, tags=["extension", "torrent"]) rebulk.defaults( name="container", validator=seps_surround, formatter=lambda s: s.upper(), conflict_solver=lambda match, other: match if other.name in ["format", "video_codec"] or other.name == "container" and "extension" in other.tags else "__default__", ) rebulk.string(*[sub for sub in subtitles if sub not in ["sub"]], tags=["subtitle"]) rebulk.string(*videos, tags=["video"]) rebulk.string(*torrent, tags=["torrent"]) return rebulk
def other(config): # pylint:disable=unused-argument,too-many-statements """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'other')) rebulk = rebulk.regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) rebulk.defaults(name="other", validator=seps_surround) rebulk.regex('Audio-?Fix', 'Audio-?Fixed', value='Audio Fixed') rebulk.regex('Sync-?Fix', 'Sync-?Fixed', value='Sync Fixed') rebulk.regex('Dual', 'Dual-?Audio', value='Dual Audio') rebulk.regex('ws', 'wide-?screen', value='Widescreen') rebulk.regex('Re-?Enc(?:oded)?', value='Reencoded') rebulk.string('Repack', 'Rerip', value='Proper', tags=['streaming_service.prefix', 'streaming_service.suffix']) rebulk.string('Proper', value='Proper', tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix']) rebulk.regex('Real-Proper', 'Real-Repack', 'Real-Rerip', value='Proper', tags=['streaming_service.prefix', 'streaming_service.suffix', 'real']) rebulk.regex('Real', value='Proper', tags=['has-neighbor', 'streaming_service.prefix', 'streaming_service.suffix', 'real']) rebulk.string('Fix', 'Fixed', value='Fix', tags=['has-neighbor-before', 'has-neighbor-after', 'streaming_service.prefix', 'streaming_service.suffix']) rebulk.string('Dirfix', 'Nfofix', 'Prooffix', value='Fix', tags=['streaming_service.prefix', 'streaming_service.suffix']) rebulk.regex('(?:Proof-?)?Sample-?Fix', value='Fix', tags=['streaming_service.prefix', 'streaming_service.suffix']) rebulk.string('Fansub', value='Fan Subtitled', tags='has-neighbor') rebulk.string('Fastsub', value='Fast Subtitled', tags='has-neighbor') season_words = build_or_pattern(["seasons?", "series?"]) complete_articles = build_or_pattern(["The"]) def validate_complete(match): """ Make sure season word is are defined. :param match: :type match: :return: :rtype: """ children = match.children if not children.named('completeWordsBefore') and not children.named('completeWordsAfter'): return False return True rebulk.regex('(?P<completeArticle>' + complete_articles + '-)?' + '(?P<completeWordsBefore>' + season_words + '-)?' + 'Complete' + '(?P<completeWordsAfter>-' + season_words + ')?', private_names=['completeArticle', 'completeWordsBefore', 'completeWordsAfter'], value={'other': 'Complete'}, tags=['release-group-prefix'], validator={'__parent__': compose(seps_surround, validate_complete)}) rebulk.string('R5', value='Region 5') rebulk.string('RC', value='Region C') rebulk.regex('Pre-?Air', value='Preair') rebulk.regex('(?:PS-?)?Vita', value='PS Vita') rebulk.regex('(HD)(?P<another>Rip)', value={'other': 'HD', 'another': 'Rip'}, private_parent=True, children=True, validator={'__parent__': seps_surround}, validate_all=True) for value in ('Screener', 'Remux', 'PAL', 'SECAM', 'NTSC', 'XXX'): rebulk.string(value, value=value) rebulk.string('3D', value='3D', tags='has-neighbor') rebulk.string('HQ', value='High Quality', tags='uhdbluray-neighbor') rebulk.string('HR', value='High Resolution') rebulk.string('LD', value='Line Dubbed') rebulk.string('MD', value='Mic Dubbed') rebulk.string('mHD', 'HDLight', value='Micro HD') rebulk.string('LDTV', value='Low Definition') rebulk.string('HFR', value='High Frame Rate') rebulk.string('HD', value='HD', validator=None, tags=['streaming_service.prefix', 'streaming_service.suffix']) rebulk.regex('Full-?HD', 'FHD', value='Full HD', validator=None, tags=['streaming_service.prefix', 'streaming_service.suffix']) rebulk.regex('Ultra-?(?:HD)?', 'UHD', value='Ultra HD', validator=None, tags=['streaming_service.prefix', 'streaming_service.suffix']) rebulk.regex('Upscaled?', value='Upscaled') for value in ('Complete', 'Classic', 'Bonus', 'Trailer', 'Retail', 'Colorized', 'Internal'): rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix']) rebulk.regex('LiNE', value='Line Audio', tags=['has-neighbor-before', 'has-neighbor-after', 'release-group-prefix']) rebulk.regex('Read-?NFO', value='Read NFO') rebulk.string('CONVERT', value='Converted', tags='has-neighbor') rebulk.string('DOCU', 'DOKU', value='Documentary', tags='has-neighbor') rebulk.string('OM', value='Open Matte', tags='has-neighbor') rebulk.string('STV', value='Straight to Video', tags='has-neighbor') rebulk.string('OAR', value='Original Aspect Ratio', tags='has-neighbor') rebulk.string('Complet', value='Complete', tags=['has-neighbor', 'release-group-prefix']) for coast in ('East', 'West'): rebulk.regex(r'(?:Live-)?(?:Episode-)?' + coast + '-?(?:Coast-)?Feed', value=coast + ' Coast Feed') rebulk.string('VO', 'OV', value='Original Video', tags='has-neighbor') rebulk.string('Ova', 'Oav', value='Original Animated Video') rebulk.regex('Scr(?:eener)?', value='Screener', validator=None, tags=['other.validate.screener', 'source-prefix', 'source-suffix']) rebulk.string('Mux', value='Mux', validator=seps_after, tags=['other.validate.mux', 'video-codec-prefix', 'source-suffix']) rebulk.string('HC', 'vost', value='Hardcoded Subtitles') rebulk.string('SDR', value='Standard Dynamic Range', tags='uhdbluray-neighbor') rebulk.regex('HDR(?:10)?', value='HDR10', tags='uhdbluray-neighbor') rebulk.regex('Dolby-?Vision', value='Dolby Vision', tags='uhdbluray-neighbor') rebulk.regex('BT-?2020', value='BT.2020', tags='uhdbluray-neighbor') rebulk.string('Sample', value='Sample', tags=['at-end', 'not-a-release-group']) rebulk.string('Extras', value='Extras', tags='has-neighbor') rebulk.regex('Digital-?Extras?', value='Extras') rebulk.string('Proof', value='Proof', tags=['at-end', 'not-a-release-group']) rebulk.string('Obfuscated', 'Scrambled', value='Obfuscated', tags=['at-end', 'not-a-release-group']) rebulk.string('xpost', 'postbot', 'asrequested', value='Repost', tags='not-a-release-group') rebulk.rules(RenameAnotherToOther, ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore, ValidateScreenerRule, ValidateMuxRule, ValidateHardcodedSubs, ValidateStreamingServiceNeighbor, ValidateAtEnd, ValidateReal, ProperCountRule) return rebulk
def other(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) rebulk.defaults(name="other", validator=seps_surround) rebulk.regex('Audio-?Fix', 'Audio-?Fixed', value='AudioFix') rebulk.regex('Sync-?Fix', 'Sync-?Fixed', value='SyncFix') rebulk.regex('Dual-?Audio', value='DualAudio') rebulk.regex('ws', 'wide-?screen', value='WideScreen') rebulk.string('Real', 'Fix', 'Fixed', value='Proper', tags=['has-neighbor-before', 'has-neighbor-after']) rebulk.string('Proper', 'Repack', 'Rerip', value='Proper') rebulk.string('Fansub', value='Fansub', tags='has-neighbor') rebulk.string('Fastsub', value='Fastsub', tags='has-neighbor') season_words = build_or_pattern(["seasons?", "series?"]) complete_articles = build_or_pattern(["The"]) def validate_complete(match): """ Make sure season word is are defined. :param match: :type match: :return: :rtype: """ children = match.children if not children.named('completeWordsBefore') and not children.named('completeWordsAfter'): return False return True rebulk.regex('(?P<completeArticle>' + complete_articles + '-)?' + '(?P<completeWordsBefore>' + season_words + '-)?' + 'Complete' + '(?P<completeWordsAfter>-' + season_words + ')?', private_names=['completeArticle', 'completeWordsBefore', 'completeWordsAfter'], value={'other': 'Complete'}, tags=['release-group-prefix'], validator={'__parent__': compose(seps_surround, validate_complete)}) rebulk.string('R5', 'RC', value='R5') rebulk.regex('Pre-?Air', value='Preair') rebulk.regex('(?:PS-?)?Vita', value='PS Vita') for value in ( 'Screener', 'Remux', 'Remastered', '3D', 'HD', 'mHD', 'HDLight', 'HQ', 'DDC', 'HR', 'PAL', 'SECAM', 'NTSC', 'CC', 'LD', 'MD', 'XXX'): rebulk.string(value, value=value) rebulk.string('LDTV', value='LD') for value in ('Limited', 'Complete', 'Classic', 'Unrated', 'LiNE', 'Bonus', 'Trailer', 'FINAL', 'Retail', 'Uncut', 'Extended', 'Extended Cut'): rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix']) rebulk.string('VO', 'OV', value='OV', tags='has-neighbor') rebulk.regex('Scr(?:eener)?', value='Screener', validator=None, tags='other.validate.screener') rebulk.rules(ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore, ValidateScreenerRule, ProperCountRule) return rebulk
def container(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults( ignore_case=True) rebulk.defaults( name='container', formatter=lambda value: value[1:], tags=['extension'], conflict_solver=lambda match, other: other if other.name in ['format', 'video_codec'] or other.name == 'container' and 'extension' not in other.tags else '__default__') subtitles = ['srt', 'idx', 'sub', 'ssa', 'ass'] info = ['nfo'] videos = [ '3g2', '3gp', '3gp2', 'asf', 'avi', 'divx', 'flv', 'm4v', 'mk2', 'mka', 'mkv', 'mov', 'mp4', 'mp4a', 'mpeg', 'mpg', 'ogg', 'ogm', 'ogv', 'qt', 'ra', 'ram', 'rm', 'ts', 'wav', 'webm', 'wma', 'wmv', 'iso', 'vob' ] torrent = ['torrent'] if REGEX_AVAILABLE: rebulk.regex(r'\.\L<exts>$', exts=subtitles, tags=['extension', 'subtitle']) rebulk.regex(r'\.\L<exts>$', exts=info, tags=['extension', 'info']) rebulk.regex(r'\.\L<exts>$', exts=videos, tags=['extension', 'video']) rebulk.regex(r'\.\L<exts>$', exts=torrent, tags=['extension', 'torrent']) else: rebulk.regex(r'\.' + build_or_pattern(subtitles) + '$', exts=subtitles, tags=['extension', 'subtitle']) rebulk.regex(r'\.' + build_or_pattern(info) + '$', exts=info, tags=['extension', 'info']) rebulk.regex(r'\.' + build_or_pattern(videos) + '$', exts=videos, tags=['extension', 'video']) rebulk.regex(r'\.' + build_or_pattern(torrent) + '$', exts=torrent, tags=['extension', 'torrent']) rebulk.defaults( name='container', validator=seps_surround, formatter=lambda s: s.upper(), conflict_solver=lambda match, other: match if other.name in ['format', 'video_codec'] or other.name == 'container' and 'extension' in other.tags else '__default__') rebulk.string(*[sub for sub in subtitles if sub not in ['sub']], tags=['subtitle']) rebulk.string(*videos, tags=['video']) rebulk.string(*torrent, tags=['torrent']) return rebulk
def episodes(config): """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ # pylint: disable=too-many-branches,too-many-statements,too-many-locals def is_season_episode_disabled(context): """Whether season and episode rules should be enabled.""" return is_disabled(context, 'episode') or is_disabled(context, 'season') rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker']) episode_max_range = config['episode_max_range'] season_max_range = config['season_max_range'] def episodes_season_chain_breaker(matches): """ Break chains if there's more than 100 offset between two neighbor values. :param matches: :type matches: :return: :rtype: """ eps = matches.named('episode') if len(eps) > 1 and abs(eps[-1].value - eps[-2].value) > episode_max_range: return True seasons = matches.named('season') if len(seasons) > 1 and abs(seasons[-1].value - seasons[-2].value) > season_max_range: return True return False rebulk.chain_defaults(chain_breaker=episodes_season_chain_breaker) def season_episode_conflict_solver(match, other): """ Conflict solver for episode/season patterns :param match: :param other: :return: """ if match.name != other.name: if match.name == 'episode' and other.name == 'year': return match if match.name in ('season', 'episode'): if other.name in ('video_codec', 'audio_codec', 'container', 'date'): return match if (other.name == 'audio_channels' and 'weak-audio_channels' not in other.tags and not match.initiator.children.named(match.name + 'Marker')) or ( other.name == 'screen_size' and not int_coercable(other.raw)): return match if other.name in ('season', 'episode') and match.initiator != other.initiator: if (match.initiator.name in ('weak_episode', 'weak_duplicate') and other.initiator.name in ('weak_episode', 'weak_duplicate')): return '__default__' for current in (match, other): if 'weak-episode' in current.tags or 'x' in current.initiator.raw.lower(): return current return '__default__' season_words = config['season_words'] episode_words = config['episode_words'] of_words = config['of_words'] all_words = config['all_words'] season_markers = config['season_markers'] season_ep_markers = config['season_ep_markers'] disc_markers = config['disc_markers'] episode_markers = config['episode_markers'] range_separators = config['range_separators'] weak_discrete_separators = list(sep for sep in seps_no_fs if sep not in range_separators) strong_discrete_separators = config['discrete_separators'] discrete_separators = strong_discrete_separators + weak_discrete_separators max_range_gap = config['max_range_gap'] def ordering_validator(match): """ Validator for season list. They should be in natural order to be validated. episode/season separated by a weak discrete separator should be consecutive, unless a strong discrete separator or a range separator is present in the chain (1.3&5 is valid, but 1.3-5 is not valid and 1.3.5 is not valid) """ values = match.children.to_dict() if 'season' in values and is_iterable(values['season']): # Season numbers must be in natural order to be validated. if not list(sorted(values['season'])) == values['season']: return False if 'episode' in values and is_iterable(values['episode']): # Season numbers must be in natural order to be validated. if not list(sorted(values['episode'])) == values['episode']: return False def is_consecutive(property_name): """ Check if the property season or episode has valid consecutive values. :param property_name: :type property_name: :return: :rtype: """ previous_match = None valid = True for current_match in match.children.named(property_name): if previous_match: match.children.previous(current_match, lambda m: m.name == property_name + 'Separator') separator = match.children.previous(current_match, lambda m: m.name == property_name + 'Separator', 0) if separator.raw not in range_separators and separator.raw in weak_discrete_separators: if not 0 < current_match.value - previous_match.value <= max_range_gap + 1: valid = False if separator.raw in strong_discrete_separators: valid = True break previous_match = current_match return valid return is_consecutive('episode') and is_consecutive('season') # S01E02, 01x02, S01S02S03 rebulk.chain(formatter={'season': int, 'episode': int}, tags=['SxxExx'], abbreviations=[alt_dash], children=True, private_parent=True, validate_all=True, validator={'__parent__': ordering_validator}, conflict_solver=season_episode_conflict_solver, disabled=is_season_episode_disabled) \ .regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)@?' + build_or_pattern(episode_markers + disc_markers, name='episodeMarker') + r'@?(?P<episode>\d+)', validate_all=True, validator={'__parent__': seps_before}).repeater('+') \ .regex(build_or_pattern(episode_markers + disc_markers + discrete_separators + range_separators, name='episodeSeparator', escape=True) + r'(?P<episode>\d+)').repeater('*') \ .chain() \ .regex(r'(?P<season>\d+)@?' + build_or_pattern(season_ep_markers, name='episodeMarker') + r'@?(?P<episode>\d+)', validate_all=True, validator={'__parent__': seps_before}) \ .chain() \ .regex(r'(?P<season>\d+)@?' + build_or_pattern(season_ep_markers, name='episodeMarker') + r'@?(?P<episode>\d+)', validate_all=True, validator={'__parent__': seps_before}) \ .regex(build_or_pattern(season_ep_markers + discrete_separators + range_separators, name='episodeSeparator', escape=True) + r'(?P<episode>\d+)').repeater('*') \ .chain() \ .regex(build_or_pattern(season_markers, name='seasonMarker') + r'(?P<season>\d+)', validate_all=True, validator={'__parent__': seps_before}) \ .regex(build_or_pattern(season_markers + discrete_separators + range_separators, name='seasonSeparator', escape=True) + r'(?P<season>\d+)').repeater('*') # episode_details property for episode_detail in ('Special', 'Pilot', 'Unaired', 'Final'): rebulk.string(episode_detail, value=episode_detail, name='episode_details', disabled=lambda context: is_disabled(context, 'episode_details')) def validate_roman(match): """ Validate a roman match if surrounded by separators :param match: :type match: :return: :rtype: """ if int_coercable(match.raw): return True return seps_surround(match) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator', 'episodeMarker', 'seasonMarker'], validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True, conflict_solver=season_episode_conflict_solver) rebulk.chain(abbreviations=[alt_dash], formatter={'season': parse_numeral, 'count': parse_numeral}, validator={'__parent__': compose(seps_surround, ordering_validator), 'season': validate_roman, 'count': validate_roman}, disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'season')) \ .defaults(validator=None) \ .regex(build_or_pattern(season_words, name='seasonMarker') + '@?(?P<season>' + numeral + ')') \ .regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \ .regex(r'@?' + build_or_pattern(range_separators + discrete_separators + ['@'], name='seasonSeparator', escape=True) + r'@?(?P<season>\d+)').repeater('*') rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>\d+)' + r'(?:v(?P<version>\d+))?' + r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4 abbreviations=[dash], formatter={'episode': int, 'version': int, 'count': int}, disabled=lambda context: context.get('type') == 'episode' or is_disabled(context, 'episode')) rebulk.regex(build_or_pattern(episode_words, name='episodeMarker') + r'-?(?P<episode>' + numeral + ')' + r'(?:v(?P<version>\d+))?' + r'(?:-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+))?', # Episode 4 abbreviations=[dash], validator={'episode': validate_roman}, formatter={'episode': parse_numeral, 'version': int, 'count': int}, disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode')) rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>' + build_or_pattern(all_words) + ')', tags=['SxxExx'], abbreviations=[dash], validator=None, formatter={'season': int, 'other': lambda match: 'Complete'}, disabled=lambda context: is_disabled(context, 'season')) # 12, 13 rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int}, disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d{2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*') # 012, 013 rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int}, disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \ .defaults(validator=None) \ .regex(r'0(?P<episode>\d{1,2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})').repeater('*') # 112, 113 rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int}, name='weak_episode', disabled=lambda context: context.get('type') == 'movie' or is_disabled(context, 'episode')) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d{3,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})').repeater('*') # 1, 2, 3 rebulk.chain(tags=['weak-episode'], formatter={'episode': int, 'version': int}, disabled=lambda context: context.get('type') != 'episode' or is_disabled(context, 'episode')) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d)') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})').repeater('*') # e112, e113, 1e18, 3e19 # TODO: Enhance rebulk for validator to be used globally (season_episode_validator) rebulk.chain(formatter={'season': int, 'episode': int, 'version': int}, disabled=lambda context: is_disabled(context, 'episode')) \ .defaults(validator=None) \ .regex(r'(?P<season>\d{1,2})?(?P<episodeMarker>e)(?P<episode>\d{1,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*') # ep 112, ep113, ep112, ep113 rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int}, disabled=lambda context: is_disabled(context, 'episode')) \ .defaults(validator=None) \ .regex(r'ep-?(?P<episode>\d{1,4})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})').repeater('*') # cap 112, cap 112_114 rebulk.chain(abbreviations=[dash], tags=['see-pattern'], formatter={'season': int, 'episode': int}, disabled=is_season_episode_disabled) \ .defaults(validator=None) \ .regex(r'(?P<seasonMarker>cap)-?(?P<season>\d{1,2})(?P<episode>\d{2})') \ .regex(r'(?P<episodeSeparator>-)(?P<season>\d{1,2})(?P<episode>\d{2})').repeater('?') # 102, 0102 rebulk.chain(tags=['weak-episode', 'weak-duplicate'], formatter={'season': int, 'episode': int, 'version': int}, name='weak_duplicate', conflict_solver=season_episode_conflict_solver, disabled=lambda context: (context.get('episode_prefer_number', False) or context.get('type') == 'movie') or is_season_episode_disabled(context)) \ .defaults(validator=None) \ .regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})').repeater('*') rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int, disabled=lambda context: is_disabled(context, 'version')) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator']) # TODO: List of words # detached of X count (season/episode) rebulk.regex(r'(?P<episode>\d+)-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?', abbreviations=[dash], children=True, private_parent=True, formatter=int, disabled=lambda context: is_disabled(context, 'episode')) rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode", disabled=lambda context: is_disabled(context, 'episode_format')) rebulk.rules(WeakConflictSolver, RemoveInvalidSeason, RemoveInvalidEpisode, SeePatternRange(range_separators + ['_']), EpisodeNumberSeparatorRange(range_separators), SeasonSeparatorRange(range_separators), RemoveWeakIfMovie, RemoveWeakIfSxxExx, RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator, RemoveWeak, RenameToAbsoluteEpisode, CountValidator, EpisodeSingleDigitValidator, RenameToDiscMatch) return rebulk
def episodes(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ #pylint: disable=too-many-branches,too-many-statements rebulk = Rebulk() rebulk.regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator']) # 01x02, 01x02x03x04 if REGEX_AVAILABLE: rebulk.regex(r'(?P<season>\d+)@?x@?(?P<episode>\d+)' + r'(?:(?P<episodeSeparator>x|-|\+|&)(?P<episode>\d+))*', # S01E02, S01x02, S01E02E03, S01Ex02, S01xE02, SO1Ex02Ex03 r'S(?P<season>\d+)@?(?:xE|Ex|E|x)@?(?P<episode>\d+)' + r'(?:(?P<episodeSeparator>xE|Ex|E|x|-|\+|&)(?P<episode>\d+))*', # S01 r'S(?P<season>\d+)' + r'(?:(?P<seasonSeparator>S|-|\+|&)(?P<season>\d+))*', formatter={'season': int, 'episode': int}, tags=['SxxExx'], abbreviations=[alt_dash], children=True, private_parent=True, conflict_solver=lambda match, other: match if match.name in ['season', 'episode'] and other.name in ['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date'] else '__default__') else: rebulk.chain(formatter={'season': int, 'episode': int}, tags=['SxxExx'], abbreviations=[alt_dash], children=True, private_parent=True, conflict_solver=lambda match, other: match if match.name in ['season', 'episode'] and other.name in ['screen_size', 'video_codec', 'audio_codec', 'audio_channels', 'container', 'date'] else '__default__') \ .defaults(validator=None)\ .regex(r'S(?P<season>\d+)@?(?:xE|Ex|E|x)@?(?P<episode>\d+)') \ .regex(r'(?:(?P<episodeSeparator>xE|Ex|E|x|-|\+|&)(?P<episode>\d+))').repeater('*') \ .chain() \ .regex(r'(?P<season>\d+)@?x@?(?P<episode>\d+)') \ .regex(r'(?:(?P<episodeSeparator>x|-|\+|&)(?P<episode>\d+))').repeater('*') \ .chain() \ .regex(r'S(?P<season>\d+)') \ .regex(r'(?:(?P<seasonSeparator>S|-|\+|&)(?P<season>\d+))').repeater('*') # episode_details property for episode_detail in ('Special', 'Bonus', 'Omake', 'Ova', 'Oav', 'Pilot', 'Unaired'): rebulk.string(episode_detail, value=episode_detail, name='episode_details') rebulk.regex(r'Extras?', name='episode_details', value='Extras') rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True) season_words = ['season', 'saison', 'serie', 'seasons', 'saisons', 'series'] episode_words = ['episode', 'episodes', 'ep'] of_words = ['of', 'sur'] all_words = ['All'] if REGEX_AVAILABLE: rebulk.regex(r'\L<season_words>@?(?P<season>' + numeral + ')' + r'(?:@?\L<of_words>@?(?P<count>' + numeral + '))?' + r'(?:@?(?P<seasonSeparator>-)@?(?P<season>\d+))*' + r'(?:@?(?P<seasonSeparator>\+|&)@?(?P<season>\d+))*', of_words=of_words, season_words=season_words, # Season 1, # Season one abbreviations=[alt_dash], formatter={'season': parse_numeral, 'count': parse_numeral}) else: rebulk.chain(abbreviations=[alt_dash], formatter={'season': parse_numeral, 'count': parse_numeral})\ .defaults(validator=None)\ .regex(build_or_pattern(season_words) + '@?(?P<season>' + numeral + ')') \ .regex(r'' + build_or_pattern(of_words) + '@?(?P<count>' + numeral + ')').repeater('?') \ .regex(r'@?(?P<seasonSeparator>-)@?(?P<season>\d+)').repeater('*') \ .regex(r'@?(?P<seasonSeparator>\+|&)@?(?P<season>\d+)').repeater('*') if REGEX_AVAILABLE: rebulk.regex(r'\L<episode_words>-?(?P<episode>\d+)' + r'(?:v(?P<version>\d+))?' + r'(?:-?\L<of_words>?-?(?P<count>\d+))?', of_words=of_words, episode_words=episode_words, # Episode 4 abbreviations=[dash], formatter=int, disabled=lambda context: context.get('type') == 'episode') else: rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>\d+)' + r'(?:v(?P<version>\d+))?' + r'(?:-?' + build_or_pattern(of_words) + r'?-?(?P<count>\d+))?', # Episode 4 abbreviations=[dash], formatter=int, disabled=lambda context: context.get('type') == 'episode') if REGEX_AVAILABLE: rebulk.regex(r'\L<episode_words>-?(?P<episode>' + numeral + ')' + r'(?:v(?P<version>\d+))?' + r'(?:-?\L<of_words>?-?(?P<count>\d+))?', of_words=of_words, episode_words=episode_words, # Episode 4 abbreviations=[dash], formatter={'episode': parse_numeral, 'version': int, 'count': int}, disabled=lambda context: context.get('type') != 'episode') else: rebulk.regex(build_or_pattern(episode_words) + r'-?(?P<episode>' + numeral + ')' + r'(?:v(?P<version>\d+))?' + r'(?:-?'+ build_or_pattern(of_words) + r'?-?(?P<count>\d+))?', # Episode 4 abbreviations=[dash], formatter={'episode': parse_numeral, 'version': int, 'count': int}, disabled=lambda context: context.get('type') != 'episode') if REGEX_AVAILABLE: rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>\L<all_words>)', tags=['SxxExx'], all_words=all_words, abbreviations=[dash], validator=None, formatter={'season': int, 'other': lambda match: 'Complete'}) else: rebulk.regex(r'S?(?P<season>\d+)-?(?:xE|Ex|E|x)-?(?P<other>'+build_or_pattern(all_words)+')', tags=['SxxExx'], abbreviations=[dash], validator=None, formatter={'season': int, 'other': lambda match: 'Complete'}) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator'], validate_all=True, validator={'__parent__': seps_surround}, children=True, private_parent=True) if REGEX_AVAILABLE: # 12, 13 rebulk.regex(r'(?P<episode>\d{2})' + r'(?:v(?P<version>\d+))?' + r'(?:(?P<episodeSeparator>[x-])(?P<episode>\d{2}))*', tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) else: rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d{2})')\ .regex(r'v(?P<version>\d+)').repeater('?')\ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{2})').repeater('*') if REGEX_AVAILABLE: # 012, 013 rebulk.regex(r'0(?P<episode>\d{1,2})' + r'(?:v(?P<version>\d+))?' + r'(?:(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2}))*', tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) else: rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'0(?P<episode>\d{1,2})') \ .regex(r'v(?P<version>\d+)').repeater('?') \ .regex(r'(?P<episodeSeparator>[x-])0(?P<episode>\d{1,2})').repeater('*') if REGEX_AVAILABLE: # 112, 113 rebulk.regex(r'(?P<episode>\d{3,4})' + r'(?:v(?P<version>\d+))?' + r'(?:(?P<episodeSeparator>[x-])(?P<episode>\d{3,4}))*', tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}, disabled=lambda context: not context.get('episode_prefer_number', False)) else: rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}, disabled=lambda context: not context.get('episode_prefer_number', False)) \ .defaults(validator=None) \ .regex(r'(?P<episode>\d{3,4})')\ .regex(r'v(?P<version>\d+)').repeater('?')\ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{3,4})').repeater('*') if REGEX_AVAILABLE: # 1, 2, 3 rebulk.regex(r'(?P<episode>\d)' + r'(?:v(?P<version>\d+))?' + r'(?:(?P<episodeSeparator>[x-])(?P<episode>\d{1,2}))*', tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}, disabled=lambda context: context.get('type') != 'episode') else: rebulk.chain(tags=['bonus-conflict', 'weak-movie'], formatter={'episode': int, 'version': int}, disabled=lambda context: context.get('type') != 'episode') \ .defaults(validator=None) \ .regex(r'(?P<episode>\d)')\ .regex(r'v(?P<version>\d+)').repeater('?')\ .regex(r'(?P<episodeSeparator>[x-])(?P<episode>\d{1,2})').repeater('*') # e112, e113 if REGEX_AVAILABLE: rebulk.regex(r'e(?P<episode>\d{1,4})' + r'(?:v(?P<version>\d+))?' + r'(?:(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4}))*', formatter={'episode': int, 'version': int}) else: rebulk.chain(formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'e(?P<episode>\d{1,4})')\ .regex(r'v(?P<version>\d+)').repeater('?')\ .regex(r'(?P<episodeSeparator>e|x|-)(?P<episode>\d{1,4})').repeater('*') # ep 112, ep113, ep112, ep113 if REGEX_AVAILABLE: rebulk.regex(r'ep-?(?P<episode>\d{1,4})' + r'(?:v(?P<version>\d+))?' + r'(?:(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4}))*', abbreviations=[dash], formatter={'episode': int, 'version': int}) else: rebulk.chain(abbreviations=[dash], formatter={'episode': int, 'version': int}) \ .defaults(validator=None) \ .regex(r'ep-?(?P<episode>\d{1,4})')\ .regex(r'v(?P<version>\d+)').repeater('?')\ .regex(r'(?P<episodeSeparator>ep|e|x|-)(?P<episode>\d{1,4})').repeater('*') # 102, 0102 if REGEX_AVAILABLE: rebulk.regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})' + r'(?:v(?P<version>\d+))?' + r'(?:(?P<episodeSeparator>x|-)(?P<episode>\d{2}))*', tags=['bonus-conflict', 'weak-movie', 'weak-duplicate'], formatter={'season': int, 'episode': int, 'version': int}, conflict_solver=lambda match, other: match if other.name == 'year' else '__default__', disabled=lambda context: context.get('episode_prefer_number', False)) else: rebulk.chain(tags=['bonus-conflict', 'weak-movie', 'weak-duplicate'], formatter={'season': int, 'episode': int, 'version': int}, conflict_solver=lambda match, other: match if other.name == 'year' else '__default__', disabled=lambda context: context.get('episode_prefer_number', False))\ .defaults(validator=None)\ .regex(r'(?P<season>\d{1,2})(?P<episode>\d{2})')\ .regex(r'v(?P<version>\d+)').repeater('?')\ .regex(r'(?P<episodeSeparator>x|-)(?P<episode>\d{2})').repeater('*') rebulk.regex(r'v(?P<version>\d+)', children=True, private_parent=True, formatter=int) rebulk.defaults(private_names=['episodeSeparator', 'seasonSeparator']) #TODO: List of words # detached of X count (season/episode) if REGEX_AVAILABLE: rebulk.regex(r'(?P<episode>\d+)?-?\L<of_words>-?(?P<count>\d+)-?\L<episode_words>?', of_words=of_words, episode_words=episode_words, abbreviations=[dash], children=True, private_parent=True, formatter=int) else: rebulk.regex(r'(?P<episode>\d+)?-?' + build_or_pattern(of_words) + r'-?(?P<count>\d+)-?' + build_or_pattern(episode_words) + '?', abbreviations=[dash], children=True, private_parent=True, formatter=int) rebulk.regex(r'Minisodes?', name='episode_format', value="Minisode") # Harcoded movie to disable weak season/episodes rebulk.regex('OSS-?117', abbreviations=[dash], name="hardcoded-movies", marker=True, conflict_solver=lambda match, other: None) rebulk.rules(EpisodeNumberSeparatorRange, SeasonSeparatorRange, RemoveWeakIfMovie, RemoveWeakIfSxxExx, RemoveWeakDuplicate, EpisodeDetailValidator, RemoveDetachedEpisodeNumber, VersionValidator, CountValidator, EpisodeSingleDigitValidator) return rebulk
def other(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().regex_defaults( flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) rebulk.defaults(name="other", validator=seps_surround) rebulk.regex('Audio-?Fix', 'Audio-?Fixed', value='AudioFix') rebulk.regex('Sync-?Fix', 'Sync-?Fixed', value='SyncFix') rebulk.regex('Dual-?Audio', value='DualAudio') rebulk.regex('ws', 'wide-?screen', value='WideScreen') rebulk.string('Netflix', 'NF', value='Netflix') rebulk.string('Real', 'Fix', 'Fixed', value='Proper', tags=['has-neighbor-before', 'has-neighbor-after']) rebulk.string('Proper', 'Repack', 'Rerip', value='Proper') rebulk.string('Fansub', value='Fansub', tags='has-neighbor') rebulk.string('Fastsub', value='Fastsub', tags='has-neighbor') season_words = build_or_pattern(["seasons?", "series?"]) complete_articles = build_or_pattern(["The"]) def validate_complete(match): """ Make sure season word is are defined. :param match: :type match: :return: :rtype: """ children = match.children if not children.named('completeWordsBefore') and not children.named( 'completeWordsAfter'): return False return True rebulk.regex( '(?P<completeArticle>' + complete_articles + '-)?' + '(?P<completeWordsBefore>' + season_words + '-)?' + 'Complete' + '(?P<completeWordsAfter>-' + season_words + ')?', private_names=[ 'completeArticle', 'completeWordsBefore', 'completeWordsAfter' ], value={'other': 'Complete'}, tags=['release-group-prefix'], validator={'__parent__': compose(seps_surround, validate_complete)}) rebulk.string('R5', 'RC', value='R5') rebulk.regex('Pre-?Air', value='Preair') for value in ('Screener', 'Remux', 'Remastered', '3D', 'HD', 'mHD', 'HDLight', 'HQ', 'DDC', 'HR', 'PAL', 'SECAM', 'NTSC', 'CC', 'LD', 'MD', 'XXX'): rebulk.string(value, value=value) for value in ('Limited', 'Complete', 'Classic', 'Unrated', 'LiNE', 'Bonus', 'Trailer', 'FINAL', 'Retail', 'Uncut', 'Extended', 'Extended Cut'): rebulk.string(value, value=value, tags=['has-neighbor', 'release-group-prefix']) rebulk.string('VO', 'OV', value='OV', tags='has-neighbor') rebulk.regex('Scr(?:eener)?', value='Screener', validator=None, tags='other.validate.screener') rebulk.rules(ValidateHasNeighbor, ValidateHasNeighborAfter, ValidateHasNeighborBefore, ValidateScreenerRule, ProperCountRule) return rebulk
def audio_codec(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]).string_defaults(ignore_case=True) def audio_codec_priority(match1, match2): """ Gives priority to audio_codec :param match1: :type match1: :param match2: :type match2: :return: :rtype: """ if match1.name == 'audio_codec' and match2.name in ['audio_profile', 'audio_channels']: return match2 if match1.name in ['audio_profile', 'audio_channels'] and match2.name == 'audio_codec': return match1 return '__default__' rebulk.defaults(name="audio_codec", conflict_solver=audio_codec_priority) rebulk.regex("MP3", "LAME", r"LAME(?:\d)+-?(?:\d)+", value="MP3") rebulk.regex('Dolby', 'DolbyDigital', 'Dolby-Digital', 'DD', 'AC3D?', value='Dolby Digital') rebulk.regex('Dolby-?Atmos', 'Atmos', value='Dolby Atmos') rebulk.string("AAC", value="AAC") rebulk.string('EAC3', 'DDP', 'DD+', value='Dolby Digital Plus') rebulk.string("Flac", value="FLAC") rebulk.string("DTS", value="DTS") rebulk.regex('DTS-?HD', value='DTS-HD') rebulk.regex('True-?HD', value='Dolby TrueHD') rebulk.string('Opus', value='Opus') rebulk.defaults(name='audio_profile') rebulk.string('MA', value='Master Audio', tags='DTS-HD') rebulk.string('HR', 'HRA', value='High Resolution Audio', tags='DTS-HD') rebulk.string('ES', value='Extended Surround', tags='DTS') rebulk.string('HE', value='High Efficiency', tags='AAC') rebulk.string('LC', value='Low Complexity', tags='AAC') rebulk.string('HQ', value='High Quality', tags='Dolby Digital') rebulk.string('EX', value='EX', tags='Dolby Digital') rebulk.defaults(name="audio_channels") rebulk.regex(r'(7[\W_][01](?:ch)?)(?:[^\d]|$)', value='7.1', children=True) rebulk.regex(r'(5[\W_][01](?:ch)?)(?:[^\d]|$)', value='5.1', children=True) rebulk.regex(r'(2[\W_]0(?:ch)?)(?:[^\d]|$)', value='2.0', children=True) rebulk.regex('7[01]', value='7.1', validator=seps_after, tags='weak-audio_channels') rebulk.regex('5[01]', value='5.1', validator=seps_after, tags='weak-audio_channels') rebulk.string('20', value='2.0', validator=seps_after, tags='weak-audio_channels') rebulk.string('7ch', '8ch', value='7.1') rebulk.string('5ch', '6ch', value='5.1') rebulk.string('2ch', 'stereo', value='2.0') rebulk.string('1ch', 'mono', value='1.0') rebulk.rules(DtsHDRule, AacRule, DolbyDigitalRule, AudioValidatorRule, HqConflictRule, AudioChannelsValidatorRule) return rebulk