示例#1
0
    def _add_token(self, token: _TokenT) -> None:
        """
        This is the only core function for parsing. Here happens basically
        everything. Everything is well prepared by the parser generator and we
        only apply the necessary steps here.
        """
        grammar = self._pgen_grammar
        stack = self.stack
        # pyre-fixme[6]: Expected `_TokenTypeT` for 2nd param but got `TokenType`.
        transition = _token_to_transition(grammar, token.type, token.string)

        while True:
            try:
                plan = stack[-1].dfa.transitions[transition]
                break
            except KeyError:
                if stack[-1].dfa.is_final:
                    try:
                        self._pop()
                    except Exception:
                        # convert_nonterminal may fail, try to recover enough to at
                        # least tell us where in the file it failed.
                        raise ParserSyntaxError(
                            message="internal error",
                            encountered="",  # TODO: encountered/expected are nonsense
                            expected=[],
                            pos=token.start_pos,
                            lines=self.lines,
                        )
                else:
                    raise ParserSyntaxError(
                        message="incomplete input",
                        encountered=token.string,
                        expected=list(stack[-1].dfa.arcs.keys()),
                        pos=token.start_pos,
                        lines=self.lines,
                    )
            except IndexError:
                raise ParserSyntaxError(
                    message="too much input",
                    encountered=token.string,
                    expected=None,  # EOF
                    pos=token.start_pos,
                    lines=self.lines,
                )

        # Logically, `plan` is always defined, but pyre can't reasonably determine that.
        # pyre-fixme[18]: Global name `plan` is undefined.
        stack[-1].dfa = plan.next_dfa

        for push in plan.dfa_pushes:
            stack.append(StackNode(push))

        leaf = self.convert_terminal(token)
        stack[-1].nodes.append(leaf)
示例#2
0
    def parse(self) -> _NodeT:
        # Ensure that we don't re-use parsers.
        if self.__was_parse_called:
            raise Exception("Each parser object may only be used to parse once.")
        self.__was_parse_called = True

        for token in self.tokens:
            self._add_token(token)

        while True:
            tos = self.stack[-1]
            if not tos.dfa.is_final:
                expected_str = get_expected_str(
                    EOFSentinel.EOF, tos.dfa.transitions.keys()
                )
                raise ParserSyntaxError(
                    f"Incomplete input. {expected_str}",
                    lines=self.lines,
                    raw_line=len(self.lines),
                    raw_column=len(self.lines[-1]),
                )

            if len(self.stack) > 1:
                self._pop()
            else:
                return self.convert_nonterminal(tos.nonterminal, tos.nodes)
示例#3
0
    def parse(self) -> _NodeT:
        # Ensure that we don't re-use parsers.
        if self.__was_parse_called:
            raise Exception("Each parser object may only be used to parse once.")
        self.__was_parse_called = True

        for token in self.tokens:
            self._add_token(token)

        while True:
            tos = self.stack[-1]
            if not tos.dfa.is_final:
                # We never broke out -- EOF is too soon -- Unfinished statement.
                raise ParserSyntaxError(
                    message="incomplete input",
                    encountered=None,
                    expected=list(tos.dfa.arcs.keys()),
                    pos=(len(self.lines), len(self.lines[-1])),
                    lines=self.lines,
                )

            if len(self.stack) > 1:
                self._pop()
            else:
                return self.convert_nonterminal(tos.nonterminal, tos.nodes)
示例#4
0
class ExceptionsTest(UnitTest):
    @data_provider([
        (
            ParserSyntaxError(
                message="some message",
                encountered=None,  # EOF
                expected=None,  # EOF
                pos=(1, 0),
                lines=["abcd"],
            ),
            dedent("""
                    Syntax Error: some message @ 1:1.
                    Encountered end of file (EOF), but expected end of file (EOF).

                    abcd
                    ^
                    """).strip(),
        ),
        (
            ParserSyntaxError(
                message="some message",
                encountered="encountered_value",
                expected=["expected_value"],
                pos=(1, 2),
                lines=["\tabcd\r\n"],
            ),
            dedent("""
                    Syntax Error: some message @ 1:10.
                    Encountered 'encountered_value', but expected one of ['expected_value'].

                            abcd
                             ^
                    """).strip(),
        ),
    ])
    def test_parser_syntax_error_str(self, err: ParserSyntaxError,
                                     expected: str) -> None:
        self.assertEqual(str(err), expected)
示例#5
0
    def parse(self):
        for token in self.tokens:
            self._add_token(token)

        while True:
            tos = self.stack[-1]
            if not tos.dfa.is_final:
                expected_str = get_expected_str(EOFSentinel.EOF,
                                                tos.dfa.transitions.keys())
                raise ParserSyntaxError(
                    f"{expected_str}",
                    lines=self.lines,
                    raw_line=len(self.lines),
                    raw_column=len(self.lines[-1]),
                )

            if len(self.stack) > 1:
                self._pop()
            else:
                return self.convert_nonterminal(tos.nonterminal, tos.nodes)
示例#6
0
def _convert_token(  # noqa: C901: too complex
    state: _TokenizeState, curr_token: OrigToken, next_token: Optional[OrigToken]
) -> Token:
    ct_type = curr_token.type
    ct_string = curr_token.string
    ct_start_pos = curr_token.start_pos
    if ct_type is _ERRORTOKEN:
        raise ParserSyntaxError(
            f"{ct_string!r} is not a valid token.",
            lines=state.lines,
            raw_line=ct_start_pos[0],
            raw_column=ct_start_pos[1],
        )
    if ct_type is _ERROR_DEDENT:
        raise ParserSyntaxError(
            "Inconsistent indentation. Expected a dedent.",
            lines=state.lines,
            raw_line=ct_start_pos[0],
            raw_column=ct_start_pos[1],
        )

    # Compute relative indent changes for indent/dedent nodes
    relative_indent: Optional[str] = None
    if ct_type is _INDENT:
        old_indent = "" if len(state.indents) < 2 else state.indents[-2]
        new_indent = state.indents[-1]
        relative_indent = new_indent[len(old_indent) :]

    if next_token is not None:
        nt_type = next_token.type
        if nt_type is _INDENT:
            nt_line, nt_column = next_token.start_pos
            state.indents.append(state.lines[nt_line - 1][:nt_column])
        elif nt_type is _DEDENT:
            state.indents.pop()

    whitespace_before = state.previous_whitespace_state

    if ct_type is _INDENT or ct_type is _DEDENT or ct_type is _ENDMARKER:
        # Don't update whitespace state for these dummy tokens. This makes it possible
        # to partially parse whitespace for IndentedBlock footers, and then parse the
        # rest of the whitespace in the following statement's leading_lines.
        # Unfortunately, that means that the indentation is either wrong for the footer
        # comments, or for the next line. We've chosen to allow it to be wrong for the
        # IndentedBlock footer and manually override the state when parsing whitespace
        # in that particular node.
        whitespace_after = whitespace_before
        ct_end_pos = ct_start_pos
    else:
        # Not a dummy token, so update the whitespace state.

        # Compute our own end_pos, since parso's end_pos is wrong for triple-strings.
        lines = split_lines(ct_string)
        if len(lines) > 1:
            ct_end_pos = ct_start_pos[0] + len(lines) - 1, len(lines[-1])
        else:
            ct_end_pos = (ct_start_pos[0], ct_start_pos[1] + len(ct_string))

        # Figure out what mode the whitespace parser should use. If we're inside
        # parentheses, certain whitespace (e.g. newlines) are allowed where they would
        # otherwise not be. f-strings override and disable this behavior, however.
        #
        # Parso's tokenizer tracks this internally, but doesn't expose it, so we have to
        # duplicate that logic here.

        pof_stack = state.parenthesis_or_fstring_stack
        try:
            if ct_type is _FSTRING_START:
                pof_stack.append(_FSTRING_STACK_ENTRY)
            elif ct_type is _FSTRING_END:
                pof_stack.pop()
            elif ct_type is _OP:
                if ct_string in "([{":
                    pof_stack.append(_PARENTHESIS_STACK_ENTRY)
                elif ct_string in ")]}":
                    pof_stack.pop()
        except IndexError:
            # pof_stack may be empty by the time we need to read from it due to
            # mismatched braces.
            raise ParserSyntaxError(
                "Encountered a closing brace without a matching opening brace.",
                lines=state.lines,
                raw_line=ct_start_pos[0],
                raw_column=ct_start_pos[1],
            )
        is_parenthesized = (
            len(pof_stack) > 0 and pof_stack[-1] == _PARENTHESIS_STACK_ENTRY
        )

        whitespace_after = WhitespaceState(
            ct_end_pos[0], ct_end_pos[1], state.indents[-1], is_parenthesized
        )

    # Hold onto whitespace_after, so we can use it as whitespace_before in the next
    # node.
    state.previous_whitespace_state = whitespace_after

    return Token(
        ct_type,
        ct_string,
        ct_start_pos,
        ct_end_pos,
        whitespace_before,
        whitespace_after,
        relative_indent,
    )
示例#7
0
    def _add_token(self, token: _TokenT) -> None:
        """
        This is the only core function for parsing. Here happens basically
        everything. Everything is well prepared by the parser generator and we
        only apply the necessary steps here.
        """
        grammar = self._pgen_grammar
        stack = self.stack
        # pyre-fixme[6]: Expected `_TokenTypeT` for 2nd param but got `TokenType`.
        transition = _token_to_transition(grammar, token.type, token.string)

        while True:
            try:
                plan = stack[-1].dfa.transitions[transition]
                break
            except KeyError:
                if stack[-1].dfa.is_final:
                    try:
                        self._pop()
                    except PartialParserSyntaxError as ex:
                        # Upconvert the PartialParserSyntaxError to a ParserSyntaxError
                        # by backfilling the line/column information.
                        raise ParserSyntaxError(
                            ex.message,
                            lines=self.lines,
                            raw_line=token.start_pos[0],
                            raw_column=token.start_pos[1],
                        )
                    except Exception as ex:
                        # convert_nonterminal may fail due to a bug in our code. Try to
                        # recover enough to at least tell us where in the file it
                        # failed.
                        raise ParserSyntaxError(
                            f"Internal error: {ex}",
                            lines=self.lines,
                            raw_line=token.start_pos[0],
                            raw_column=token.start_pos[1],
                        )
                else:
                    # We never broke out -- EOF is too soon -- Unfinished statement.
                    #
                    # BUG: The `expected_str` may not be complete because we already
                    # popped the other possibilities off the stack at this point, but
                    # it still seems useful to list some of the possibilities that we
                    # could've expected.
                    expected_str = get_expected_str(
                        token, stack[-1].dfa.transitions.keys()
                    )
                    raise ParserSyntaxError(
                        f"Incomplete input. {expected_str}",
                        lines=self.lines,
                        raw_line=token.start_pos[0],
                        raw_column=token.start_pos[1],
                    )
            except IndexError:
                # I don't think this will ever happen with Python's grammar, because if
                # there are any extra tokens at the end of the input, we'll instead
                # complain that we expected ENDMARKER.
                #
                # However, let's leave it just in case.
                expected_str = get_expected_str(token, EOFSentinel.EOF)
                raise ParserSyntaxError(
                    f"Too much input. {expected_str}",
                    lines=self.lines,
                    raw_line=token.start_pos[0],
                    raw_column=token.start_pos[1],
                )

        # Logically, `plan` is always defined, but pyre can't reasonably determine that.
        # pyre-fixme[18]: Global name `plan` is undefined.
        stack[-1].dfa = plan.next_dfa

        for push in plan.dfa_pushes:
            stack.append(StackNode(push))

        leaf = self.convert_terminal(token)
        stack[-1].nodes.append(leaf)