def search_for_release_group(self): token_end = None while True: # Find the first enclosed unknown token if token_end: token_begin = Tokens.find_next( token_end, TokenFlags.ENCLOSED | TokenFlags.UNKNOWN) else: token_begin = Tokens.find( TokenFlags.ENCLOSED | TokenFlags.UNKNOWN) if token_begin is None: return # Continue until a bracket or identifier is found token_end = Tokens.find_next( token_begin, TokenFlags.BRACKET | TokenFlags.IDENTIFIER) if token_end is None: return if token_end.category != TokenCategory.BRACKET: continue # Ignore if it's not the first non-delimiter token in group previous_token = Tokens.find_previous( token_begin, TokenFlags.NOT_DELIMITER) if previous_token is not None and \ previous_token.category != TokenCategory.BRACKET: continue # Build release group, token end is a bracket, so we get the # previous token to be included in the element token_end = Tokens.find_previous(token_end, TokenFlags.VALID) parser_helper.build_element( ElementCategory.RELEASE_GROUP, token_begin, token_end, keep_delimiters=True) return
def search_for_episode_title(self): token_end = None while True: # Find the first non-enclosed unknown token if token_end: token_begin = Tokens.find_next( token_end, TokenFlags.NOT_ENCLOSED | TokenFlags.UNKNOWN) else: token_begin = Tokens.find( TokenFlags.NOT_ENCLOSED | TokenFlags.UNKNOWN) if token_begin is None: return # Continue until a bracket or identifier is found token_end = Tokens.find_next( token_begin, TokenFlags.BRACKET | TokenFlags.IDENTIFIER) # Ignore if it's only a dash if Tokens.distance(token_begin, token_end) <= 2 and \ parser_helper.is_dash_character(token_begin.content): continue # If token end is a bracket, then we get the previous token to be # included in the element if token_end and token_end.category == TokenCategory.BRACKET: token_end = Tokens.find_previous(token_end, TokenFlags.VALID) # Build episode title parser_helper.build_element( ElementCategory.EPISODE_TITLE, token_begin, token_end, keep_delimiters=False) return
def search_for_last_number(tokens): for token in tokens: token_index = Tokens.get_index(token) # Assuming that episode number always comes after the title, first # token cannot be what we're looking for if token_index == 0: continue # An enclosed token is unlikely to be the episode number at this point if token.enclosed: continue # Ignore if it's the first non-enclosed, non-delimiter token if all([ t.enclosed or t.category == TokenCategory.DELIMITER for t in Tokens.get_list()[:token_index] ]): continue # Ignore if the previous token is "Movie" or "Part" previous_token = Tokens.find_previous(token, TokenFlags.NOT_DELIMITER) if previous_token.category == TokenCategory.UNKNOWN: if previous_token.content.lower() == 'movie' or \ previous_token.content.lower() == 'part': continue # We'll use this number after all if set_episode_number(token.content, token, validate=True): return True return False
def is_token_isolated(token): previous_token = Tokens.find_previous(token, TokenFlags.NOT_DELIMITER) if previous_token.category != TokenCategory.BRACKET: return False next_token = Tokens.find_next(token, TokenFlags.NOT_DELIMITER) if next_token.category != TokenCategory.BRACKET: return False return True
def search_for_separated_numbers(tokens): for token in tokens: previous_token = Tokens.find_previous(token, TokenFlags.NOT_DELIMITER) # See if the number has a preceding "-" separator if previous_token.category == TokenCategory.UNKNOWN and \ parser_helper.is_dash_character(previous_token.content): if set_episode_number(token.content, token, validate=True): previous_token.category = TokenCategory.IDENTIFIER return True return False
def check_anime_season_keyword(token): def set_anime_season(first, second, content): Elements.insert(ElementCategory.ANIME_SEASON, content) first.category = TokenCategory.IDENTIFIER second.category = TokenCategory.IDENTIFIER previous_token = Tokens.find_previous(token, TokenFlags.NOT_DELIMITER) if previous_token: number = get_number_from_ordinal(previous_token.content) if number: set_anime_season(previous_token, token, number) return True next_token = Tokens.find_next(token, TokenFlags.NOT_DELIMITER) if next_token and next_token.content.isdigit(): set_anime_season(token, next_token, next_token.content) return True return False
def search_for_anime_title(self): enclosed_title = False # Find the first non-enclosed unknown token token_begin = Tokens.find(TokenFlags.NOT_ENCLOSED | TokenFlags.UNKNOWN) # If that doesn't work, find the first unknown token in the second # enclosed group, assuming that the first one is the release group if token_begin is None: enclosed_title = True token_begin = Tokens.get(0) skipped_previous_group = False while token_begin is not None: token_begin = Tokens.find_next(token_begin, TokenFlags.UNKNOWN) if token_begin is None: break # Ignore groups that are composed of non-Latin characters if parser_helper.is_mostly_latin_string(token_begin.content): if skipped_previous_group: break # Found it # Get the first unknown token of the next group token_begin = Tokens.find_next(token_begin, TokenFlags.BRACKET) skipped_previous_group = True if token_begin is None: return # Continue until an identifier (or a bracket, if the title is enclosed) # is found token_end = Tokens.find_next( token_begin, TokenFlags.IDENTIFIER | ( TokenFlags.BRACKET if enclosed_title else TokenFlags.NONE )) # If within the interval there's an open bracket without its matching # pair, move the upper endpoint back to the bracket if not enclosed_title: last_bracket = token_end bracket_open = False for token in Tokens.get_list(TokenFlags.BRACKET, begin=token_begin, end=token_end): last_bracket = token bracket_open = not bracket_open if bracket_open: token_end = last_bracket # If the interval ends with an enclosed group (e.g. "Anime Title # [Fansub]"), move the upper endpoint back to the beginning of the # group. We ignore parentheses in order to keep certain groups (e.g. # "(TV)") intact. if not enclosed_title: token = Tokens.find_previous(token_end, TokenFlags.NOT_DELIMITER) while token.category == TokenCategory.BRACKET and \ token.content != ')': token = Tokens.find_previous(token, TokenFlags.BRACKET) if token is not None: token_end = token token = Tokens.find_previous( token_end, TokenFlags.NOT_DELIMITER) # Token end is a bracket, so we get the previous token to be included # in the element token_end = Tokens.find_previous(token_end, TokenFlags.VALID) parser_helper.build_element(ElementCategory.ANIME_TITLE, token_begin, token_end, keep_delimiters=False)
def find_previous_valid_token(token): return Tokens.find_previous(token, TokenFlags.VALID)