Exemplo n.º 1
0
class TestDetectConfig(UnitTest):
    @data_provider({
        "empty_input": {
            "source":
            b"",
            "partial":
            PartialParserConfig(python_version="3.7"),
            "detect_trailing_newline":
            True,
            "detect_default_newline":
            True,
            "expected_config":
            ParserConfig(
                lines=["\n", ""],
                encoding="utf-8",
                default_indent="    ",
                default_newline="\n",
                has_trailing_newline=False,
                version=PythonVersionInfo(3, 7),
            ),
        },
        "detect_trailing_newline_disabled": {
            "source":
            b"",
            "partial":
            PartialParserConfig(python_version="3.7"),
            "detect_trailing_newline":
            False,
            "detect_default_newline":
            True,
            "expected_config":
            ParserConfig(
                lines=[""],  # the trailing newline isn't inserted
                encoding="utf-8",
                default_indent="    ",
                default_newline="\n",
                has_trailing_newline=False,
                version=PythonVersionInfo(3, 7),
            ),
        },
        "detect_default_newline_disabled": {
            "source":
            b"pass\r",
            "partial":
            PartialParserConfig(python_version="3.7"),
            "detect_trailing_newline":
            False,
            "detect_default_newline":
            False,
            "expected_config":
            ParserConfig(
                lines=["pass\r", ""],  # the trailing newline isn't inserted
                encoding="utf-8",
                default_indent="    ",
                default_newline="\n",
                has_trailing_newline=False,
                version=PythonVersionInfo(3, 7),
            ),
        },
        "newline_inferred": {
            "source":
            b"first_line\r\n\nsomething\n",
            "partial":
            PartialParserConfig(python_version="3.7"),
            "detect_trailing_newline":
            True,
            "detect_default_newline":
            True,
            "expected_config":
            ParserConfig(
                lines=["first_line\r\n", "\n", "something\n", ""],
                encoding="utf-8",
                default_indent="    ",
                default_newline="\r\n",
                has_trailing_newline=True,
                version=PythonVersionInfo(3, 7),
            ),
        },
        "newline_partial_given": {
            "source":
            b"first_line\r\nsecond_line\r\n",
            "partial":
            PartialParserConfig(default_newline="\n", python_version="3.7"),
            "detect_trailing_newline":
            True,
            "detect_default_newline":
            True,
            "expected_config":
            ParserConfig(
                lines=["first_line\r\n", "second_line\r\n", ""],
                encoding="utf-8",
                default_indent="    ",
                default_newline="\n",  # The given partial disables inference
                has_trailing_newline=True,
                version=PythonVersionInfo(3, 7),
            ),
        },
        "indent_inferred": {
            "source":
            b"if test:\n\t  something\n",
            "partial":
            PartialParserConfig(python_version="3.7"),
            "detect_trailing_newline":
            True,
            "detect_default_newline":
            True,
            "expected_config":
            ParserConfig(
                lines=["if test:\n", "\t  something\n", ""],
                encoding="utf-8",
                default_indent="\t  ",
                default_newline="\n",
                has_trailing_newline=True,
                version=PythonVersionInfo(3, 7),
            ),
        },
        "indent_partial_given": {
            "source":
            b"if test:\n\t  something\n",
            "partial":
            PartialParserConfig(default_indent="      ", python_version="3.7"),
            "detect_trailing_newline":
            True,
            "detect_default_newline":
            True,
            "expected_config":
            ParserConfig(
                lines=["if test:\n", "\t  something\n", ""],
                encoding="utf-8",
                default_indent="      ",
                default_newline="\n",
                has_trailing_newline=True,
                version=PythonVersionInfo(3, 7),
            ),
        },
        "encoding_inferred": {
            "source":
            b"#!/usr/bin/python3\n# -*- coding: latin-1 -*-\npass\n",
            "partial":
            PartialParserConfig(python_version="3.7"),
            "detect_trailing_newline":
            True,
            "detect_default_newline":
            True,
            "expected_config":
            ParserConfig(
                lines=[
                    "#!/usr/bin/python3\n",
                    "# -*- coding: latin-1 -*-\n",
                    "pass\n",
                    "",
                ],
                encoding="iso-8859-1",  # this is an alias for latin-1
                default_indent="    ",
                default_newline="\n",
                has_trailing_newline=True,
                version=PythonVersionInfo(3, 7),
            ),
        },
        "encoding_partial_given": {
            "source":
            b"#!/usr/bin/python3\n# -*- coding: latin-1 -*-\npass\n",
            "partial":
            PartialParserConfig(encoding="us-ascii", python_version="3.7"),
            "detect_trailing_newline":
            True,
            "detect_default_newline":
            True,
            "expected_config":
            ParserConfig(
                lines=[
                    "#!/usr/bin/python3\n",
                    "# -*- coding: latin-1 -*-\n",
                    "pass\n",
                    "",
                ],
                encoding="us-ascii",
                default_indent="    ",
                default_newline="\n",
                has_trailing_newline=True,
                version=PythonVersionInfo(3, 7),
            ),
        },
        "encoding_str_not_bytes_disables_inference": {
            "source":
            "#!/usr/bin/python3\n# -*- coding: latin-1 -*-\npass\n",
            "partial":
            PartialParserConfig(python_version="3.7"),
            "detect_trailing_newline":
            True,
            "detect_default_newline":
            True,
            "expected_config":
            ParserConfig(
                lines=[
                    "#!/usr/bin/python3\n",
                    "# -*- coding: latin-1 -*-\n",
                    "pass\n",
                    "",
                ],
                encoding="utf-8",  # because source is a str, don't infer latin-1
                default_indent="    ",
                default_newline="\n",
                has_trailing_newline=True,
                version=PythonVersionInfo(3, 7),
            ),
        },
        "encoding_non_ascii_compatible_utf_16_with_bom": {
            "source":
            b"\xff\xfet\x00e\x00s\x00t\x00",
            "partial":
            PartialParserConfig(encoding="utf-16", python_version="3.7"),
            "detect_trailing_newline":
            True,
            "detect_default_newline":
            True,
            "expected_config":
            ParserConfig(
                lines=["test\n", ""],
                encoding="utf-16",
                default_indent="    ",
                default_newline="\n",
                has_trailing_newline=False,
                version=PythonVersionInfo(3, 7),
            ),
        },
    })
    def test_detect_module_config(
        self,
        *,
        source: Union[str, bytes],
        partial: PartialParserConfig,
        detect_trailing_newline: bool,
        detect_default_newline: bool,
        expected_config: ParserConfig,
    ) -> None:
        self.assertEqual(
            detect_config(
                source,
                partial=partial,
                detect_trailing_newline=detect_trailing_newline,
                detect_default_newline=detect_default_newline,
            ).config,
            expected_config,
        )
Exemplo n.º 2
0
def detect_config(
    source: Union[str, bytes],
    *,
    partial: PartialParserConfig,
    detect_trailing_newline: bool,
    detect_default_newline: bool,
) -> ConfigDetectionResult:
    """
    Computes a ParserConfig given the current source code to be parsed and a partial
    config.
    """

    python_version = partial.parsed_python_version

    partial_encoding = partial.encoding
    encoding = (
        _detect_encoding(source)
        if isinstance(partial_encoding, AutoConfig)
        else partial_encoding
    )

    source_str = source if isinstance(source, str) else source.decode(encoding)

    partial_default_newline = partial.default_newline
    default_newline = (
        (
            _detect_default_newline(source_str)
            if detect_default_newline
            else _FALLBACK_DEFAULT_NEWLINE
        )
        if isinstance(partial_default_newline, AutoConfig)
        else partial_default_newline
    )

    # HACK: The grammar requires a trailing newline, but python doesn't actually require
    # a trailing newline. Add one onto the end to make the parser happy. We'll strip it
    # out again during cst.Module's codegen.
    #
    # I think parso relies on error recovery support to handle this, which we don't
    # have. lib2to3 doesn't handle this case at all AFAICT.
    has_trailing_newline = detect_trailing_newline and _detect_trailing_newline(
        source_str
    )
    if detect_trailing_newline and not has_trailing_newline:
        source_str += default_newline

    lines = split_lines(source_str, keepends=True)

    tokens = tokenize_lines(lines, python_version)

    partial_default_indent = partial.default_indent
    if isinstance(partial_default_indent, AutoConfig):
        # We need to clone `tokens` before passing it to `_detect_indent`, because
        # `_detect_indent` consumes some tokens, mutating `tokens`.
        #
        # Implementation detail: CPython's `itertools.tee` uses weakrefs to reduce the
        # size of its FIFO, so this doesn't retain items (leak memory) for `tokens_dup`
        # once `token_dup` is freed at the end of this method (subject to
        # GC/refcounting).
        tokens, tokens_dup = itertools.tee(tokens)
        default_indent = _detect_indent(tokens_dup)
    else:
        default_indent = partial_default_indent

    partial_future_imports = partial.future_imports
    if isinstance(partial_future_imports, AutoConfig):
        # Same note as above re itertools.tee, we will consume tokens.
        tokens, tokens_dup = itertools.tee(tokens)
        future_imports = _detect_future_imports(tokens_dup)
    else:
        future_imports = partial_future_imports

    return ConfigDetectionResult(
        config=ParserConfig(
            lines=lines,
            encoding=encoding,
            default_indent=default_indent,
            default_newline=default_newline,
            has_trailing_newline=has_trailing_newline,
            version=python_version,
            future_imports=future_imports,
        ),
        tokens=tokens,
    )