Пример #1
0
def _parse_empty_line(
    config: BaseWhitespaceParserConfig,
    state: State,
    *,
    override_absolute_indent: Optional[str] = None,
) -> Optional[EmptyLine]:
    # begin speculative parsing
    speculative_state = State(state.line, state.column, state.absolute_indent,
                              state.is_parenthesized)
    try:
        indent = _parse_indent(
            config,
            speculative_state,
            override_absolute_indent=override_absolute_indent)
    except Exception:
        # We aren't on a new line, speculative parsing failed
        return None
    whitespace = parse_simple_whitespace(config, speculative_state)
    comment = _parse_comment(config, speculative_state)
    newline = _parse_newline(config, speculative_state)
    if newline is None:
        # speculative parsing failed
        return None
    # speculative parsing succeeded
    state.line = speculative_state.line
    state.column = speculative_state.column
    # don't need to copy absolute_indent/is_parenthesized because they don't change.
    return EmptyLine(indent, whitespace, comment, newline)
Пример #2
0
def parse_empty_lines(
    config: BaseWhitespaceParserConfig,
    state: State,
    *,
    override_absolute_indent: Optional[str] = None,
) -> Sequence[EmptyLine]:
    # If override_absolute_indent is true, then we need to parse all lines up
    # to and including the last line that is indented at our level. These all
    # belong to the footer and not to the next line's leading_lines. All lines
    # that have indent=False and come after the last line where indent=True
    # do not belong to this node.
    state_for_line = State(state.line, state.column, state.absolute_indent,
                           state.is_parenthesized)
    lines: List[Tuple[State, EmptyLine]] = []
    while True:
        el = _parse_empty_line(
            config,
            state_for_line,
            override_absolute_indent=override_absolute_indent)
        if el is None:
            break

        # Store the updated state with the element we parsed. Then make a new state
        # clone for the next element.
        lines.append((state_for_line, el))
        state_for_line = State(
            state_for_line.line,
            state_for_line.column,
            state.absolute_indent,
            state.is_parenthesized,
        )

    if override_absolute_indent is not None:
        # We need to find the last element that is indented, and then split the list
        # at that point.
        for i in range(len(lines) - 1, -1, -1):
            if lines[i][1].indent:
                lines = lines[:(i + 1)]
                break
        else:
            # We didn't find any lines, throw them all away
            lines = []

    if lines:
        # Update the state line and column to match the last line actually parsed.
        final_state: State = lines[-1][0]
        state.line = final_state.line
        state.column = final_state.column
    return [r[1] for r in lines]
Пример #3
0
def _parse_trailing_whitespace(config: BaseWhitespaceParserConfig,
                               state: State) -> Optional[TrailingWhitespace]:
    # Begin speculative parsing
    speculative_state = State(state.line, state.column, state.absolute_indent,
                              state.is_parenthesized)
    whitespace = parse_simple_whitespace(config, speculative_state)
    comment = _parse_comment(config, speculative_state)
    newline = _parse_newline(config, speculative_state)
    if newline is None:
        # Speculative parsing failed
        return None
    # Speculative parsing succeeded
    state.line = speculative_state.line
    state.column = speculative_state.column
    # don't need to copy absolute_indent/is_parenthesized because they don't change.
    return TrailingWhitespace(whitespace, comment, newline)
Пример #4
0
class _TokenizeState:
    lines: Sequence[str]
    previous_whitespace_state: WhitespaceState = field(
        default_factory=lambda: WhitespaceState(
            line=1, column=0, absolute_indent="", is_parenthesized=False))
    indents: List[str] = field(default_factory=lambda: [""])
    parenthesis_or_fstring_stack: List[
        _ParenthesisOrFStringStackEntry] = field(default_factory=list)
Пример #5
0
def parse_simple_whitespace(config: BaseWhitespaceParserConfig,
                            state: State) -> SimpleWhitespace:
    # The match never fails because the pattern can match an empty string
    lines = config.lines
    # pyre-fixme[16]: Optional type has no attribute `group`.
    ws_line = SIMPLE_WHITESPACE_RE.match(lines[state.line - 1],
                                         state.column).group(0)
    ws_line_list = [ws_line]
    while "\\" in ws_line:
        # continuation character
        state.line += 1
        state.column = 0
        # pyre-fixme[16]: Optional type has no attribute `group`.
        ws_line = SIMPLE_WHITESPACE_RE.match(lines[state.line - 1],
                                             state.column).group(0)
        ws_line_list.append(ws_line)

    # TODO: we could special-case the common case where there's no continuation
    # character to avoid list construction and joining.

    # once we've finished collecting continuation characters
    state.column += len(ws_line)
    return SimpleWhitespace("".join(ws_line_list))
Пример #6
0
def _parse_newline(config: BaseWhitespaceParserConfig,
                   state: State) -> Optional[Newline]:
    # begin speculative parsing
    line_str = config.lines[state.line - 1]
    newline_match = NEWLINE_RE.match(line_str, state.column)
    if newline_match is not None:
        # speculative parsing succeeded
        newline_str = newline_match.group(0)
        state.column += len(newline_str)
        if state.column != len(line_str):
            raise Exception(
                "Internal Error: Found a newline, but it wasn't the EOL.")
        if state.line < len(config.lines):
            # this newline was the end of a line, and there's another line,
            # therefore we should move to the next line
            state.line += 1
            state.column = 0
        if newline_str == config.default_newline:
            # Just inherit it from the Module instead of explicitly setting it.
            return Newline()
        else:
            return Newline(newline_str)
    else:  # no newline was found, speculative parsing failed
        return None
Пример #7
0
class WrappedTokenizeTest(UnitTest):
    maxDiff = 10000

    @data_provider({
        "simple": (
            "pass;\n",
            (
                Token(
                    type=PythonTokenTypes.NAME,
                    string="pass",
                    start_pos=(1, 0),
                    end_pos=(1, 4),
                    whitespace_before=WhitespaceState(line=1,
                                                      column=0,
                                                      absolute_indent="",
                                                      is_parenthesized=False),
                    whitespace_after=WhitespaceState(line=1,
                                                     column=4,
                                                     absolute_indent="",
                                                     is_parenthesized=False),
                    relative_indent=None,
                ),
                Token(
                    type=PythonTokenTypes.OP,
                    string=";",
                    start_pos=(1, 4),
                    end_pos=(1, 5),
                    whitespace_before=WhitespaceState(line=1,
                                                      column=4,
                                                      absolute_indent="",
                                                      is_parenthesized=False),
                    whitespace_after=WhitespaceState(line=1,
                                                     column=5,
                                                     absolute_indent="",
                                                     is_parenthesized=False),
                    relative_indent=None,
                ),
                Token(
                    type=PythonTokenTypes.NEWLINE,
                    string="\n",
                    start_pos=(1, 5),
                    end_pos=(2, 0),
                    whitespace_before=WhitespaceState(line=1,
                                                      column=5,
                                                      absolute_indent="",
                                                      is_parenthesized=False),
                    whitespace_after=WhitespaceState(line=2,
                                                     column=0,
                                                     absolute_indent="",
                                                     is_parenthesized=False),
                    relative_indent=None,
                ),
                Token(
                    type=PythonTokenTypes.ENDMARKER,
                    string="",
                    start_pos=(2, 0),
                    end_pos=(2, 0),
                    whitespace_before=WhitespaceState(line=2,
                                                      column=0,
                                                      absolute_indent="",
                                                      is_parenthesized=False),
                    whitespace_after=WhitespaceState(line=2,
                                                     column=0,
                                                     absolute_indent="",
                                                     is_parenthesized=False),
                    relative_indent=None,
                ),
            ),
        ),
        "with_indent": (
            "if foo:\n    bar\n",
            (
                Token(
                    type=PythonTokenTypes.NAME,
                    string="if",
                    start_pos=(1, 0),
                    end_pos=(1, 2),
                    whitespace_before=WhitespaceState(line=1,
                                                      column=0,
                                                      absolute_indent="",
                                                      is_parenthesized=False),
                    whitespace_after=WhitespaceState(line=1,
                                                     column=2,
                                                     absolute_indent="",
                                                     is_parenthesized=False),
                    relative_indent=None,
                ),
                Token(
                    type=PythonTokenTypes.NAME,
                    string="foo",
                    start_pos=(1, 3),
                    end_pos=(1, 6),
                    whitespace_before=WhitespaceState(line=1,
                                                      column=2,
                                                      absolute_indent="",
                                                      is_parenthesized=False),
                    whitespace_after=WhitespaceState(line=1,
                                                     column=6,
                                                     absolute_indent="",
                                                     is_parenthesized=False),
                    relative_indent=None,
                ),
                Token(
                    type=PythonTokenTypes.OP,
                    string=":",
                    start_pos=(1, 6),
                    end_pos=(1, 7),
                    whitespace_before=WhitespaceState(line=1,
                                                      column=6,
                                                      absolute_indent="",
                                                      is_parenthesized=False),
                    whitespace_after=WhitespaceState(line=1,
                                                     column=7,
                                                     absolute_indent="",
                                                     is_parenthesized=False),
                    relative_indent=None,
                ),
                Token(
                    type=PythonTokenTypes.NEWLINE,
                    string="\n",
                    start_pos=(1, 7),
                    end_pos=(2, 0),
                    whitespace_before=WhitespaceState(line=1,
                                                      column=7,
                                                      absolute_indent="",
                                                      is_parenthesized=False),
                    whitespace_after=WhitespaceState(
                        line=2,
                        column=0,
                        absolute_indent="    ",
                        is_parenthesized=False,
                    ),
                    relative_indent=None,
                ),
                Token(
                    type=PythonTokenTypes.INDENT,
                    string="",
                    start_pos=(2, 4),
                    end_pos=(2, 4),
                    whitespace_before=WhitespaceState(
                        line=2,
                        column=0,
                        absolute_indent="    ",
                        is_parenthesized=False,
                    ),
                    whitespace_after=WhitespaceState(
                        line=2,
                        column=0,
                        absolute_indent="    ",
                        is_parenthesized=False,
                    ),
                    relative_indent="    ",
                ),
                Token(
                    type=PythonTokenTypes.NAME,
                    string="bar",
                    start_pos=(2, 4),
                    end_pos=(2, 7),
                    whitespace_before=WhitespaceState(
                        line=2,
                        column=0,
                        absolute_indent="    ",
                        is_parenthesized=False,
                    ),
                    whitespace_after=WhitespaceState(
                        line=2,
                        column=7,
                        absolute_indent="    ",
                        is_parenthesized=False,
                    ),
                    relative_indent=None,
                ),
                Token(
                    type=PythonTokenTypes.NEWLINE,
                    string="\n",
                    start_pos=(2, 7),
                    end_pos=(3, 0),
                    whitespace_before=WhitespaceState(
                        line=2,
                        column=7,
                        absolute_indent="    ",
                        is_parenthesized=False,
                    ),
                    whitespace_after=WhitespaceState(line=3,
                                                     column=0,
                                                     absolute_indent="",
                                                     is_parenthesized=False),
                    relative_indent=None,
                ),
                Token(
                    type=PythonTokenTypes.DEDENT,
                    string="",
                    start_pos=(3, 0),
                    end_pos=(3, 0),
                    whitespace_before=WhitespaceState(line=3,
                                                      column=0,
                                                      absolute_indent="",
                                                      is_parenthesized=False),
                    whitespace_after=WhitespaceState(line=3,
                                                     column=0,
                                                     absolute_indent="",
                                                     is_parenthesized=False),
                    relative_indent=None,
                ),
                Token(
                    type=PythonTokenTypes.ENDMARKER,
                    string="",
                    start_pos=(3, 0),
                    end_pos=(3, 0),
                    whitespace_before=WhitespaceState(line=3,
                                                      column=0,
                                                      absolute_indent="",
                                                      is_parenthesized=False),
                    whitespace_after=WhitespaceState(line=3,
                                                     column=0,
                                                     absolute_indent="",
                                                     is_parenthesized=False),
                    relative_indent=None,
                ),
            ),
        ),
    })
    def test_tokenize(self, code: str, expected: Sequence[Token]) -> None:
        tokens = tuple(tokenize(code, _PY38))
        self.assertSequenceEqual(tokens, expected)
        for a, b in zip(tokens, tokens[1:]):
            # These must be the same object, so if whitespace gets consumed (mutated) at
            # the end of token a, it shows up at the beginning of token b.
            self.assertIs(a.whitespace_after, b.whitespace_before)

    def test_errortoken(self) -> None:
        with self.assertRaisesRegex(ParserSyntaxError, "not a valid token"):
            # use tuple() to read everything
            # The copyright symbol isn't a valid token
            tuple(tokenize("\u00a9", _PY38))

    def test_error_dedent(self) -> None:
        with self.assertRaisesRegex(ParserSyntaxError,
                                    "Inconsistent indentation"):
            # create some inconsistent indents to generate an ERROR_DEDENT token
            tuple(tokenize("    a\n  b", _PY38))
Пример #8
0
def _convert_token(  # noqa: C901: too complex
    state: _TokenizeState, curr_token: OrigToken, next_token: Optional[OrigToken]
) -> Token:
    ct_type = curr_token.type
    ct_string = curr_token.string
    ct_start_pos = curr_token.start_pos
    if ct_type is _ERRORTOKEN:
        raise ParserSyntaxError(
            f"{ct_string!r} is not a valid token.",
            lines=state.lines,
            raw_line=ct_start_pos[0],
            raw_column=ct_start_pos[1],
        )
    if ct_type is _ERROR_DEDENT:
        raise ParserSyntaxError(
            "Inconsistent indentation. Expected a dedent.",
            lines=state.lines,
            raw_line=ct_start_pos[0],
            raw_column=ct_start_pos[1],
        )

    # Compute relative indent changes for indent/dedent nodes
    relative_indent: Optional[str] = None
    if ct_type is _INDENT:
        old_indent = "" if len(state.indents) < 2 else state.indents[-2]
        new_indent = state.indents[-1]
        relative_indent = new_indent[len(old_indent) :]

    if next_token is not None:
        nt_type = next_token.type
        if nt_type is _INDENT:
            nt_line, nt_column = next_token.start_pos
            state.indents.append(state.lines[nt_line - 1][:nt_column])
        elif nt_type is _DEDENT:
            state.indents.pop()

    whitespace_before = state.previous_whitespace_state

    if ct_type is _INDENT or ct_type is _DEDENT or ct_type is _ENDMARKER:
        # Don't update whitespace state for these dummy tokens. This makes it possible
        # to partially parse whitespace for IndentedBlock footers, and then parse the
        # rest of the whitespace in the following statement's leading_lines.
        # Unfortunately, that means that the indentation is either wrong for the footer
        # comments, or for the next line. We've chosen to allow it to be wrong for the
        # IndentedBlock footer and manually override the state when parsing whitespace
        # in that particular node.
        whitespace_after = whitespace_before
        ct_end_pos = ct_start_pos
    else:
        # Not a dummy token, so update the whitespace state.

        # Compute our own end_pos, since parso's end_pos is wrong for triple-strings.
        lines = split_lines(ct_string)
        if len(lines) > 1:
            ct_end_pos = ct_start_pos[0] + len(lines) - 1, len(lines[-1])
        else:
            ct_end_pos = (ct_start_pos[0], ct_start_pos[1] + len(ct_string))

        # Figure out what mode the whitespace parser should use. If we're inside
        # parentheses, certain whitespace (e.g. newlines) are allowed where they would
        # otherwise not be. f-strings override and disable this behavior, however.
        #
        # Parso's tokenizer tracks this internally, but doesn't expose it, so we have to
        # duplicate that logic here.

        pof_stack = state.parenthesis_or_fstring_stack
        try:
            if ct_type is _FSTRING_START:
                pof_stack.append(_FSTRING_STACK_ENTRY)
            elif ct_type is _FSTRING_END:
                pof_stack.pop()
            elif ct_type is _OP:
                if ct_string in "([{":
                    pof_stack.append(_PARENTHESIS_STACK_ENTRY)
                elif ct_string in ")]}":
                    pof_stack.pop()
        except IndexError:
            # pof_stack may be empty by the time we need to read from it due to
            # mismatched braces.
            raise ParserSyntaxError(
                "Encountered a closing brace without a matching opening brace.",
                lines=state.lines,
                raw_line=ct_start_pos[0],
                raw_column=ct_start_pos[1],
            )
        is_parenthesized = (
            len(pof_stack) > 0 and pof_stack[-1] == _PARENTHESIS_STACK_ENTRY
        )

        whitespace_after = WhitespaceState(
            ct_end_pos[0], ct_end_pos[1], state.indents[-1], is_parenthesized
        )

    # Hold onto whitespace_after, so we can use it as whitespace_before in the next
    # node.
    state.previous_whitespace_state = whitespace_after

    return Token(
        ct_type,
        ct_string,
        ct_start_pos,
        ct_end_pos,
        whitespace_before,
        whitespace_after,
        relative_indent,
    )