Python find_next_comment示例，chatette.parsing.utils.find_next_comment Python示例

示例#1

0

显示文件

文件： rule_file_inclusion.py 项目： lei1993/oa_qa

    def _apply_strategy(self, **kwargs):
        if not self._text.startswith(FILE_INCLUSION_SYM, self._next_index):
            self.error_msg = \
                "Invalid token. Expected a file to be included there " + \
                "(starting with '" + FILE_INCLUSION_SYM + "')."
            return False
        self._next_index += 1
        self._update_furthest_matched_index()
        self._tokens.append(
            LexicalToken(TerminalType.file_inclusion_marker,
                         FILE_INCLUSION_SYM))

        if self._text[self._next_index].isspace():
            self.error_msg = \
                "Invalid token. Expected a file path here, got a whitespace."
            return False

        comment_start = find_next_comment(self._text, self._next_index)
        if comment_start is not None:
            file_path = self._text[self._next_index:comment_start].rstrip()
        else:
            file_path = self._text[self._next_index:].rstrip()

        self._next_index += len(file_path)
        self._update_furthest_matched_index()
        self._tokens.append(LexicalToken(TerminalType.file_path, file_path))

        return True

示例#2

0

显示文件

文件： rule_slot_val.py 项目： lei1993/oa_qa

    def _apply_strategy(self, **kwargs):
        """
        `kwargs` can contain a boolean with key `parsing_slot_def` that is
        `True` if the current text is part of a slot definition.
        If this boolean is not in `kwargs`, defaults to `False`.
        """
        parsing_slot_def = kwargs.get("parsing_slot_def", False)
        if parsing_slot_def:
            while self._text[self._next_index].isspace():
                self._next_index += 1
                self._update_furthest_matched_index()

            if self._text.startswith(SLOT_VAL_SYM, self._next_index):
                self._tokens.append(
                    LexicalToken(TerminalType.slot_val_marker, SLOT_VAL_SYM))
                self._next_index += 1
                self._update_furthest_matched_index()

                while self._text[self._next_index].isspace():
                    self._next_index += 1
                    self._update_furthest_matched_index()

                comment_sym = find_next_comment(self._text, self._next_index)
                if comment_sym is not None:
                    slot_value = \
                        self._text[self._next_index:comment_sym].rstrip()
                else:
                    slot_value = self._text[self._next_index:].rstrip()

                self._tokens.append(
                    LexicalToken(TerminalType.slot_val, slot_value))
                self._next_index += len(slot_value)
                self._update_furthest_matched_index()

                return True

            return False
        else:
            raise ValueError(
                "Tried to extract a slot value within a rule that is not " + \
                "part of a slot definition."
            )

示例#3

0

显示文件

    def _apply_strategy(self, **kwargs):
        """
        `kwargs` can contain a boolean with key `inside_choice` that is
        `True` when the current word is inside a choice and `False` otherwise.
        If this boolean is not in `kwargs`, defaults to `False`.
        ´kwargs´ can also contain a boolean with key `parsing_slot_def`
        which is `True` iff the current is in a rule inside a slot definition.
        If this boolean is not in `kwargs`, defaults to `False`.
        """
        inside_choice = kwargs.get("inside_choice", False)
        parsing_slot_def = kwargs.get("parsing_slot_def", False)

        # TODO this might be better using regexes
        if self._text[self._start_index].isspace():
            self.error_msg = \
                "Invalid token. Expected a word instead of a whitespace there."
            return False

        # Find whitespace after the word
        next_word_index = self._start_index + 1  # NOTE exclusive
        while True:
            if (next_word_index == len(self._text)
                    or self._text[next_word_index].isspace()):
                break
            next_word_index += 1

        next_word_index = \
            min_if_exist(
                next_word_index,
                find_next_comment(self._text, self._start_index)
            )

        if next_word_index == self._start_index:
            self.error_msg = "Invalid token. Expected a word to start here."
            return False
        for current_char in RuleWord._should_be_escaped_chars:
            if next_word_index == self._start_index:
                break
            next_word_index = \
                min_if_exist(
                    next_word_index,
                    find_unescaped(self._text, current_char, self._start_index)
                )

        if inside_choice and next_word_index > self._start_index:
            for char_to_escape in RuleWord._should_be_escaped_in_choices_chars:
                next_word_index = \
                    min_if_exist(
                        next_word_index,
                        find_unescaped(
                            self._text, char_to_escape, self._start_index
                        )
                    )

        if parsing_slot_def and next_word_index > self._start_index:
            for char_to_escape in RuleWord._should_be_escaped_in_slot_def_chars:
                next_word_index = \
                    min_if_exist(
                        next_word_index,
                        find_unescaped(
                            self._text, char_to_escape, self._start_index
                        )
                    )

        if next_word_index == self._start_index:
            self.error_msg = "Invalid token. Expected a word to start here."
            return False

        word = self._text[self._start_index:next_word_index]
        self._next_index = next_word_index
        self._update_furthest_matched_index()
        self._tokens.append(LexicalToken(TerminalType.word, word))
        return True