def search_for_isolated_numbers(self): for token in Tokens.get_list(TokenFlags.UNKNOWN): if not token.content.isdigit() or \ not parser_helper.is_token_isolated(token): continue number = int(token.content) # Anime year if number >= parser_number.ANIME_YEAR_MIN and \ number <= parser_number.ANIME_YEAR_MAX: if not Elements.contains(ElementCategory.ANIME_YEAR): Elements.insert(ElementCategory.ANIME_YEAR, token.content) token.category = TokenCategory.IDENTIFIER continue # Video resolution if number == 480 or number == 720 or number == 1080: # If these numbers are isolated, it's more likely for them to # be the video resolution rather than the episode number. Some # fansub groups use these without the "p" suffix. if not Elements.contains(ElementCategory.VIDEO_RESOLUTION): Elements.insert( ElementCategory.VIDEO_RESOLUTION, token.content) token.category = TokenCategory.IDENTIFIER continue
def search_for_keywords(self): for token in Tokens.get_list(TokenFlags.UNKNOWN): word = token.content word = word.strip(' -') if not word: continue # Don't bother if the word is a number that cannot be CRC if len(word) != 8 and word.isdigit(): continue category = ElementCategory.UNKNOWN keyword = keyword_manager.find(keyword_manager.normalize(word)) if keyword: category = keyword.category if not self.options['parse_release_group'] and \ category == ElementCategory.RELEASE_GROUP: continue if not ElementCategory.is_searchable(category) or \ not keyword.options.searchable: continue if ElementCategory.is_singular(category) and \ Elements.contains(category): continue if category == ElementCategory.ANIME_SEASON_PREFIX: parser_helper.check_anime_season_keyword(token) continue elif category == ElementCategory.EPISODE_PREFIX: if keyword.options.valid: parser_number.check_extent_keyword( ElementCategory.EPISODE_NUMBER, token) continue elif category == ElementCategory.RELEASE_VERSION: word = word[1:] # number without "v" elif category == ElementCategory.VOLUME_PREFIX: parser_number.check_extent_keyword( ElementCategory.VOLUME_NUMBER, token) continue else: if not Elements.contains(ElementCategory.FILE_CHECKSUM) and \ parser_helper.is_crc32(word): category = ElementCategory.FILE_CHECKSUM elif not Elements.contains(ElementCategory.VIDEO_RESOLUTION) \ and parser_helper.is_resolution(word): category = ElementCategory.VIDEO_RESOLUTION if category != ElementCategory.UNKNOWN: Elements.insert(category, word) if keyword is None or keyword.options.identifiable: token.category = TokenCategory.IDENTIFIER
def validate_elements(self): # Validate anime type and episode title if Elements.contains(ElementCategory.ANIME_TYPE) and \ Elements.contains(ElementCategory.EPISODE_TITLE): # Here we check whether the episode title contains an anime type episode_title = Elements.get(ElementCategory.EPISODE_TITLE)[0] # Copy list because we may modify it anime_type_list = list(Elements.get(ElementCategory.ANIME_TYPE)) for anime_type in anime_type_list: if anime_type == episode_title: # Invalid episode title Elements.erase(ElementCategory.EPISODE_TITLE) elif anime_type in episode_title: norm_anime_type = keyword_manager.normalize(anime_type) if keyword_manager.find( norm_anime_type, ElementCategory.ANIME_TYPE): Elements.remove(ElementCategory.ANIME_TYPE, anime_type) continue
def parse(self): self.search_for_keywords() self.search_for_isolated_numbers() if self.options['parse_episode_number']: self.search_for_episode_number() self.search_for_anime_title() if self.options['parse_release_group'] and \ not Elements.contains(ElementCategory.RELEASE_GROUP): self.search_for_release_group() if self.options['parse_episode_title'] and \ Elements.contains(ElementCategory.EPISODE_NUMBER): self.search_for_episode_title() self.validate_elements() return not Elements.empty()
def search_for_episode_number(self): # List all unknown tokens that contain a number tokens = [token for token in Tokens.get_list(TokenFlags.UNKNOWN) if parser_helper.find_number_in_string(token.content) is not None] if not tokens: return Elements.set_check_alt_number( Elements.contains(ElementCategory.EPISODE_NUMBER)) # If a token matches a known episode pattern, it has to be the episode # number if parser_number.search_for_episode_patterns(tokens): return if Elements.contains(ElementCategory.EPISODE_NUMBER): return # We have previously found an episode number via keywords # From now on, we're only interested in numeric tokens tokens = [token for token in tokens if token.content.isdigit()] if not tokens: return # e.g. "01 (176)", "29 (04)" if parser_number.search_for_equivalent_numbers(tokens): return # e.g. " - 08" if parser_number.search_for_separated_numbers(tokens): return # e.g. "[12]", "(2006)" if parser_number.search_for_isolated_numbers(tokens): return # Consider using the last number as a last resort parser_number.search_for_last_number(tokens)