示例#1
0
文件: parser.py 项目: kayabe/anitopy
    def search_for_release_group(self):
        token_end = None
        while True:
            # Find the first enclosed unknown token
            if token_end:
                token_begin = Tokens.find_next(
                    token_end, TokenFlags.ENCLOSED | TokenFlags.UNKNOWN)
            else:
                token_begin = Tokens.find(
                    TokenFlags.ENCLOSED | TokenFlags.UNKNOWN)
            if token_begin is None:
                return

            # Continue until a bracket or identifier is found
            token_end = Tokens.find_next(
                token_begin, TokenFlags.BRACKET | TokenFlags.IDENTIFIER)
            if token_end is None:
                return
            if token_end.category != TokenCategory.BRACKET:
                continue

            # Ignore if it's not the first non-delimiter token in group
            previous_token = Tokens.find_previous(
                token_begin, TokenFlags.NOT_DELIMITER)
            if previous_token is not None and \
                    previous_token.category != TokenCategory.BRACKET:
                continue

            # Build release group, token end is a bracket, so we get the
            # previous token to be included in the element
            token_end = Tokens.find_previous(token_end, TokenFlags.VALID)
            parser_helper.build_element(
                ElementCategory.RELEASE_GROUP, token_begin, token_end,
                keep_delimiters=True)
            return
示例#2
0
文件: parser.py 项目: kayabe/anitopy
    def search_for_episode_title(self):
        token_end = None
        while True:
            # Find the first non-enclosed unknown token
            if token_end:
                token_begin = Tokens.find_next(
                    token_end, TokenFlags.NOT_ENCLOSED | TokenFlags.UNKNOWN)
            else:
                token_begin = Tokens.find(
                    TokenFlags.NOT_ENCLOSED | TokenFlags.UNKNOWN)
            if token_begin is None:
                return

            # Continue until a bracket or identifier is found
            token_end = Tokens.find_next(
                token_begin, TokenFlags.BRACKET | TokenFlags.IDENTIFIER)

            # Ignore if it's only a dash
            if Tokens.distance(token_begin, token_end) <= 2 and \
                    parser_helper.is_dash_character(token_begin.content):
                continue

            # If token end is a bracket, then we get the previous token to be
            # included in the element
            if token_end and token_end.category == TokenCategory.BRACKET:
                token_end = Tokens.find_previous(token_end, TokenFlags.VALID)
            # Build episode title
            parser_helper.build_element(
                ElementCategory.EPISODE_TITLE, token_begin, token_end,
                keep_delimiters=False)
            return
示例#3
0
def search_for_last_number(tokens):
    for token in tokens:
        token_index = Tokens.get_index(token)

        # Assuming that episode number always comes after the title, first
        # token cannot be what we're looking for
        if token_index == 0:
            continue

        # An enclosed token is unlikely to be the episode number at this point
        if token.enclosed:
            continue

        # Ignore if it's the first non-enclosed, non-delimiter token
        if all([
                t.enclosed or t.category == TokenCategory.DELIMITER
                for t in Tokens.get_list()[:token_index]
        ]):
            continue

        # Ignore if the previous token is "Movie" or "Part"
        previous_token = Tokens.find_previous(token, TokenFlags.NOT_DELIMITER)
        if previous_token.category == TokenCategory.UNKNOWN:
            if previous_token.content.lower() == 'movie' or \
                    previous_token.content.lower() == 'part':
                continue

        # We'll use this number after all
        if set_episode_number(token.content, token, validate=True):
            return True

    return False
示例#4
0
def is_token_isolated(token):
    previous_token = Tokens.find_previous(token, TokenFlags.NOT_DELIMITER)
    if previous_token.category != TokenCategory.BRACKET:
        return False

    next_token = Tokens.find_next(token, TokenFlags.NOT_DELIMITER)
    if next_token.category != TokenCategory.BRACKET:
        return False

    return True
示例#5
0
def search_for_separated_numbers(tokens):
    for token in tokens:
        previous_token = Tokens.find_previous(token, TokenFlags.NOT_DELIMITER)

        # See if the number has a preceding "-" separator
        if previous_token.category == TokenCategory.UNKNOWN and \
                parser_helper.is_dash_character(previous_token.content):
            if set_episode_number(token.content, token, validate=True):
                previous_token.category = TokenCategory.IDENTIFIER
                return True

    return False
示例#6
0
def check_anime_season_keyword(token):
    def set_anime_season(first, second, content):
        Elements.insert(ElementCategory.ANIME_SEASON, content)
        first.category = TokenCategory.IDENTIFIER
        second.category = TokenCategory.IDENTIFIER

    previous_token = Tokens.find_previous(token, TokenFlags.NOT_DELIMITER)
    if previous_token:
        number = get_number_from_ordinal(previous_token.content)
        if number:
            set_anime_season(previous_token, token, number)
            return True

    next_token = Tokens.find_next(token, TokenFlags.NOT_DELIMITER)
    if next_token and next_token.content.isdigit():
        set_anime_season(token, next_token, next_token.content)
        return True

    return False
示例#7
0
文件: parser.py 项目: kayabe/anitopy
    def search_for_anime_title(self):
        enclosed_title = False

        # Find the first non-enclosed unknown token
        token_begin = Tokens.find(TokenFlags.NOT_ENCLOSED | TokenFlags.UNKNOWN)

        # If that doesn't work, find the first unknown token in the second
        # enclosed group, assuming that the first one is the release group
        if token_begin is None:
            enclosed_title = True
            token_begin = Tokens.get(0)
            skipped_previous_group = False
            while token_begin is not None:
                token_begin = Tokens.find_next(token_begin, TokenFlags.UNKNOWN)
                if token_begin is None:
                    break
                # Ignore groups that are composed of non-Latin characters
                if parser_helper.is_mostly_latin_string(token_begin.content):
                    if skipped_previous_group:
                        break  # Found it
                # Get the first unknown token of the next group
                token_begin = Tokens.find_next(token_begin, TokenFlags.BRACKET)
                skipped_previous_group = True

        if token_begin is None:
            return

        # Continue until an identifier (or a bracket, if the title is enclosed)
        # is found
        token_end = Tokens.find_next(
            token_begin, TokenFlags.IDENTIFIER | (
                TokenFlags.BRACKET if enclosed_title else TokenFlags.NONE
            ))

        # If within the interval there's an open bracket without its matching
        # pair, move the upper endpoint back to the bracket
        if not enclosed_title:
            last_bracket = token_end
            bracket_open = False
            for token in Tokens.get_list(TokenFlags.BRACKET, begin=token_begin,
                                         end=token_end):
                last_bracket = token
                bracket_open = not bracket_open
            if bracket_open:
                token_end = last_bracket

        # If the interval ends with an enclosed group (e.g. "Anime Title
        # [Fansub]"), move the upper endpoint back to the beginning of the
        # group. We ignore parentheses in order to keep certain groups (e.g.
        # "(TV)") intact.
        if not enclosed_title:
            token = Tokens.find_previous(token_end, TokenFlags.NOT_DELIMITER)
            while token.category == TokenCategory.BRACKET and \
                    token.content != ')':
                token = Tokens.find_previous(token, TokenFlags.BRACKET)
                if token is not None:
                    token_end = token
                    token = Tokens.find_previous(
                        token_end, TokenFlags.NOT_DELIMITER)

        # Token end is a bracket, so we get the previous token to be included
        # in the element
        token_end = Tokens.find_previous(token_end, TokenFlags.VALID)
        parser_helper.build_element(ElementCategory.ANIME_TITLE, token_begin,
                                    token_end, keep_delimiters=False)
示例#8
0
 def find_previous_valid_token(token):
     return Tokens.find_previous(token, TokenFlags.VALID)