Exemplo n.º 1
0
def _(source, source_file):
    source_string = python_bytes_to_unicode(source)
    for token in tokenize(
        source_string,
        version_info=parse_version_string('3.10')
    ):
        pass
Exemplo n.º 2
0
def test_python_bytes_to_unicode_unicode_text():
    source = (
        b"# vim: fileencoding=utf-8\n"
        b"# \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a\n")
    actual = python_bytes_to_unicode(source)
    expected = source.decode('utf-8')
    assert actual == expected
Exemplo n.º 3
0
def test_python_bytes_to_unicode_unicode_text():
    source = (
        b"# vim: fileencoding=utf-8\n"
        b"# \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a\n"
    )
    actual = python_bytes_to_unicode(source)
    expected = source.decode('utf-8')
    assert actual == expected
Exemplo n.º 4
0
def test_normalizer_issue(normalizer_issue_case):
    def sort(issues):
        issues = sorted(issues, key=lambda i: (i.start_pos, i.code))
        return ["(%s, %s): %s" % (i.start_pos[0], i.start_pos[1], i.code)
                for i in issues]

    with open(normalizer_issue_case.path, 'rb') as f:
        code = python_bytes_to_unicode(f.read())

    desired = sort(collect_errors(code))

    grammar = parso.load_grammar(version=normalizer_issue_case.python_version)
    module = grammar.parse(code)
    issues = grammar._get_normalizer_issues(module)
    actual = sort(issues)

    diff = '\n'.join(difflib.ndiff(desired, actual))
    # To make the pytest -v diff a bit prettier, stop pytest to rewrite assert
    # statements by executing the comparison earlier.
    _bool = desired == actual
    assert _bool, '\n' + diff
Exemplo n.º 5
0
    def _parse(self,
               code=None,
               error_recovery=True,
               path=None,
               start_symbol=None,
               cache=False,
               diff_cache=False,
               cache_path=None,
               start_pos=(1, 0)):
        """
        Wanted python3.5 * operator and keyword only arguments. Therefore just
        wrap it all.
        start_pos here is just a parameter internally used. Might be public
        sometime in the future.
        """
        if code is None and path is None:
            raise TypeError("Please provide either code or a path.")

        if start_symbol is None:
            start_symbol = self._start_symbol

        if error_recovery and start_symbol != 'file_input':
            raise NotImplementedError("This is currently not implemented.")

        if cache and code is None and path is not None:
            # With the current architecture we cannot load from cache if the
            # code is given, because we just load from cache if it's not older than
            # the latest change (file last modified).
            module_node = load_module(self._hashed,
                                      path,
                                      cache_path=cache_path)
            if module_node is not None:
                return module_node

        if code is None:
            with open(path, 'rb') as f:
                code = f.read()

        code = python_bytes_to_unicode(code)

        lines = split_lines(code, keepends=True)
        if diff_cache:
            if self._diff_parser is None:
                raise TypeError("You have to define a diff parser to be able "
                                "to use this option.")
            try:
                module_cache_item = parser_cache[self._hashed][path]
            except KeyError:
                pass
            else:
                module_node = module_cache_item.node
                old_lines = module_cache_item.lines
                if old_lines == lines:
                    return module_node

                new_node = self._diff_parser(
                    self._pgen_grammar, self._tokenizer,
                    module_node).update(old_lines=old_lines, new_lines=lines)
                save_module(
                    self._hashed,
                    path,
                    new_node,
                    lines,
                    # Never pickle in pypy, it's slow as hell.
                    pickling=cache and not is_pypy,
                    cache_path=cache_path)
                return new_node

        tokens = self._tokenizer(lines, start_pos)

        p = self._parser(self._pgen_grammar,
                         error_recovery=error_recovery,
                         start_symbol=start_symbol)
        root_node = p.parse(tokens=tokens)

        if cache or diff_cache:
            save_module(
                self._hashed,
                path,
                root_node,
                lines,
                # Never pickle in pypy, it's slow as hell.
                pickling=cache and not is_pypy,
                cache_path=cache_path)
        return root_node
Exemplo n.º 6
0
    def _parse(self, code=None, error_recovery=True, path=None,
               start_symbol=None, cache=False, diff_cache=False,
               cache_path=None, file_io=None, start_pos=(1, 0)):
        """
        Wanted python3.5 * operator and keyword only arguments. Therefore just
        wrap it all.
        start_pos here is just a parameter internally used. Might be public
        sometime in the future.
        """
        if code is None and path is None and file_io is None:
            raise TypeError("Please provide either code or a path.")

        if start_symbol is None:
            start_symbol = self._start_nonterminal

        if error_recovery and start_symbol != 'file_input':
            raise NotImplementedError("This is currently not implemented.")

        if file_io is None:
            if code is None:
                file_io = FileIO(path)
            else:
                file_io = KnownContentFileIO(path, code)

        if cache and file_io.path is not None:
            module_node = load_module(self._hashed, file_io, cache_path=cache_path)
            if module_node is not None:
                return module_node

        if code is None:
            code = file_io.read()
        code = python_bytes_to_unicode(code)

        lines = split_lines(code, keepends=True)
        if diff_cache:
            if self._diff_parser is None:
                raise TypeError("You have to define a diff parser to be able "
                                "to use this option.")
            try:
                module_cache_item = parser_cache[self._hashed][file_io.path]
            except KeyError:
                pass
            else:
                module_node = module_cache_item.node
                old_lines = module_cache_item.lines
                if old_lines == lines:
                    return module_node

                new_node = self._diff_parser(
                    self._pgen_grammar, self._tokenizer, module_node
                ).update(
                    old_lines=old_lines,
                    new_lines=lines
                )
                save_module(self._hashed, file_io, new_node, lines,
                            # Never pickle in pypy, it's slow as hell.
                            pickling=cache and not is_pypy,
                            cache_path=cache_path)
                return new_node

        tokens = self._tokenizer(lines, start_pos)

        p = self._parser(
            self._pgen_grammar,
            error_recovery=error_recovery,
            start_nonterminal=start_symbol
        )
        root_node = p.parse(tokens=tokens)

        if cache or diff_cache:
            save_module(self._hashed, file_io, root_node, lines,
                        # Never pickle in pypy, it's slow as hell.
                        pickling=cache and not is_pypy,
                        cache_path=cache_path)
        return root_node
Exemplo n.º 7
0
                yield PythonToken(OP, token, spos, prefix)

    if contstr:
        yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix)
        if contstr.endswith('\n') or contstr.endswith('\r'):
            new_line = True

    end_pos = lnum, max
    # As the last position we just take the maximally possible position. We
    # remove -1 for the last new line.
    for indent in indents[1:]:
        yield PythonToken(DEDENT, '', end_pos, '')
    yield PythonToken(ENDMARKER, '', end_pos, additional_prefix)


if __name__ == "__main__":
    if len(sys.argv) >= 2:
        path = sys.argv[1]
        with open(path) as f:
            code = f.read()
    else:
        code = sys.stdin.read()

    from parso.utils import python_bytes_to_unicode, parse_version_string

    if isinstance(code, bytes):
        code = python_bytes_to_unicode(code)

    for token in tokenize(code, parse_version_string()):
        print(token)
Exemplo n.º 8
0
    def _parse(self,
               code=None,
               error_recovery=True,
               path=None,
               start_symbol=None,
               cache=False,
               diff_cache=False,
               cache_path=None,
               file_io=None,
               start_pos=(1, 0)):
        """
        Wanted python3.5 * operator and keyword only arguments. Therefore just
        wrap it all.
        start_pos here is just a parameter internally used. Might be static
        sometime in the future.
        """
        if code is None and path is None and file_io is None:
            raise TypeError("Please provide either code or a path.")

        if start_symbol is None:
            start_symbol = self._start_nonterminal

        if error_recovery and start_symbol != 'file_input':
            raise NotImplementedError("This is currently not implemented.")

        if file_io is None:
            if code is None:
                file_io = FileIO(path)
            else:
                file_io = KnownContentFileIO(path, code)

        if cache and file_io.path is not None:
            module_node = load_module(self._hashed,
                                      file_io,
                                      cache_path=cache_path)
            if module_node is not None:
                return module_node

        if code is None:
            code = file_io.read()
        code = python_bytes_to_unicode(code)

        lines = split_lines(code, keepends=True)
        if diff_cache:
            if self._diff_parser is None:
                raise TypeError("You have to define a diff parser to be able "
                                "to use this option.")
            try:
                module_cache_item = parser_cache[self._hashed][file_io.path]
            except KeyError:
                pass
            else:
                module_node = module_cache_item.node
                old_lines = module_cache_item.lines
                if old_lines == lines:
                    return module_node

                new_node = self._diff_parser(
                    self._pgen_grammar, self._tokenizer,
                    module_node).update(old_lines=old_lines, new_lines=lines)
                try_to_save_module(
                    self._hashed,
                    file_io,
                    new_node,
                    lines,
                    # Never pickle in pypy, it's slow as hell.
                    pickling=cache and not is_pypy,
                    cache_path=cache_path)
                return new_node

        tokens = self._tokenizer(lines, start_pos=start_pos)

        p = self._parser(self._pgen_grammar,
                         error_recovery=error_recovery,
                         start_nonterminal=start_symbol)
        root_node = p.parse(tokens=tokens)

        if cache or diff_cache:
            try_to_save_module(
                self._hashed,
                file_io,
                root_node,
                lines,
                # Never pickle in pypy, it's slow as hell.
                pickling=cache and not is_pypy,
                cache_path=cache_path)
        return root_node
Exemplo n.º 9
0
                yield PythonToken(typ, token, spos, prefix)

    if contstr:
        yield PythonToken(ERRORTOKEN, contstr, contstr_start, prefix)
        if contstr.endswith('\n'):
            new_line = True

    end_pos = lnum, max
    # As the last position we just take the maximally possible position. We
    # remove -1 for the last new line.
    for indent in indents[1:]:
        yield PythonToken(DEDENT, '', end_pos, '')
    yield PythonToken(ENDMARKER, '', end_pos, additional_prefix)


if __name__ == "__main__":
    if len(sys.argv) >= 2:
        path = sys.argv[1]
        with open(path) as f:
            code = f.read()
    else:
        code = sys.stdin.read()

    from parso.utils import python_bytes_to_unicode, parse_version_string

    if isinstance(code, bytes):
        code = python_bytes_to_unicode(code)

    for token in tokenize(code, parse_version_string()):
        print(token)
Exemplo n.º 10
0
    def parse(self,
              code: Union[str, bytes] = None,
              *,
              error_recovery=True,
              path: Union[os.PathLike, str] = None,
              start_symbol: str = None,
              cache=False,
              diff_cache=False,
              cache_path: Union[os.PathLike, str] = None,
              file_io: FileIO = None) -> _NodeT:
        """
        If you want to parse a Python file you want to start here, most likely.

        If you need finer grained control over the parsed instance, there will be
        other ways to access it.

        :param str code: A unicode or bytes string. When it's not possible to
            decode bytes to a string, returns a
            :py:class:`UnicodeDecodeError`.
        :param bool error_recovery: If enabled, any code will be returned. If
            it is invalid, it will be returned as an error node. If disabled,
            you will get a ParseError when encountering syntax errors in your
            code.
        :param str start_symbol: The grammar rule (nonterminal) that you want
            to parse. Only allowed to be used when error_recovery is False.
        :param str path: The path to the file you want to open. Only needed for caching.
        :param bool cache: Keeps a copy of the parser tree in RAM and on disk
            if a path is given. Returns the cached trees if the corresponding
            files on disk have not changed. Note that this stores pickle files
            on your file system (e.g. for Linux in ``~/.cache/parso/``).
        :param bool diff_cache: Diffs the cached python module against the new
            code and tries to parse only the parts that have changed. Returns
            the same (changed) module that is found in cache. Using this option
            requires you to not do anything anymore with the cached modules
            under that path, because the contents of it might change. This
            option is still somewhat experimental. If you want stability,
            please don't use it.
        :param bool cache_path: If given saves the parso cache in this
            directory. If not given, defaults to the default cache places on
            each platform.

        :return: A subclass of :py:class:`parso.tree.NodeOrLeaf`. Typically a
            :py:class:`parso.python.tree.Module`.
        """
        if code is None and path is None and file_io is None:
            raise TypeError("Please provide either code or a path.")

        if isinstance(path, str):
            path = Path(path)
        if isinstance(cache_path, str):
            cache_path = Path(cache_path)

        if start_symbol is None:
            start_symbol = self._start_nonterminal

        if error_recovery and start_symbol != 'file_input':
            raise NotImplementedError("This is currently not implemented.")

        if file_io is None:
            if code is None:
                file_io = FileIO(path)  # type: ignore
            else:
                file_io = KnownContentFileIO(path, code)

        if cache and file_io.path is not None:
            module_node = load_module(self._hashed,
                                      file_io,
                                      cache_path=cache_path)
            if module_node is not None:
                return module_node  # type: ignore

        if code is None:
            code = file_io.read()
        code = python_bytes_to_unicode(code)

        lines = split_lines(code, keepends=True)
        if diff_cache:
            if self._diff_parser is None:
                raise TypeError("You have to define a diff parser to be able "
                                "to use this option.")
            try:
                module_cache_item = parser_cache[self._hashed][file_io.path]
            except KeyError:
                pass
            else:
                module_node = module_cache_item.node
                old_lines = module_cache_item.lines
                if old_lines == lines:
                    return module_node  # type: ignore

                new_node = self._diff_parser(
                    self._pgen_grammar, self._tokenizer,
                    module_node).update(old_lines=old_lines, new_lines=lines)
                try_to_save_module(
                    self._hashed,
                    file_io,
                    new_node,
                    lines,
                    # Never pickle in pypy, it's slow as hell.
                    pickling=cache and not is_pypy,
                    cache_path=cache_path)
                return new_node  # type: ignore

        tokens = self._tokenizer(lines)

        p = self._parser(self._pgen_grammar,
                         error_recovery=error_recovery,
                         start_nonterminal=start_symbol)
        root_node = p.parse(tokens=tokens)

        if cache or diff_cache:
            try_to_save_module(
                self._hashed,
                file_io,
                root_node,
                lines,
                # Never pickle in pypy, it's slow as hell.
                pickling=cache and not is_pypy,
                cache_path=cache_path)
        return root_node  # type: ignore
Exemplo n.º 11
0
def test_bytes_to_unicode_failing_encoding(code, errors):
    if errors == 'strict':
        with pytest.raises(LookupError):
            python_bytes_to_unicode(code, errors=errors)
    else:
        python_bytes_to_unicode(code, errors=errors)