Exemplo n.º 1
0
 def get_start_pos_of_prefix(self):
     previous_leaf = self.get_previous_leaf()
     if previous_leaf is None:
         lines = split_lines(self.prefix)
         # + 1 is needed because split_lines always returns at least [''].
         return self.line - len(lines) + 1, 0  # It's the first leaf.
     return previous_leaf.end_pos
Exemplo n.º 2
0
    def close(self):
        self._base_node.finish()

        # Add an endmarker.
        try:
            last_leaf = self._module.get_last_leaf()
        except IndexError:
            end_pos = [1, 0]
        else:
            last_leaf = _skip_dedent_error_leaves(last_leaf)
            end_pos = list(last_leaf.end_pos)
        lines = split_lines(self.prefix)
        assert len(lines) > 0
        if len(lines) == 1:
            if lines[0].startswith(BOM_UTF8_STRING) and end_pos == [1, 0]:
                end_pos[1] -= 1
            end_pos[1] += len(lines[0])
        else:
            end_pos[0] += len(lines) - 1
            end_pos[1] = len(lines[-1])

        endmarker = EndMarker('', tuple(end_pos),
                              self.prefix + self._prefix_remainder)
        endmarker.parent = self._module
        self._module.children.append(endmarker)
Exemplo n.º 3
0
 def end_pos(self):
     lines = split_lines(self.value)
     end_pos_line = self.line + len(lines) - 1
     # Check for multiline token
     if self.line == end_pos_line:
         end_pos_column = self.column + len(lines[-1])
     else:
         end_pos_column = len(lines[-1])
     return end_pos_line, end_pos_column
Exemplo n.º 4
0
def _get_debug_error_message(module, old_lines, new_lines):
    current_lines = split_lines(module.get_code(), keepends=True)
    current_diff = difflib.unified_diff(new_lines, current_lines)
    old_new_diff = difflib.unified_diff(old_lines, new_lines)
    import marso
    return (
        "There's an issue with the diff parser. Please "
        "report (marso v%s) - Old/New:\n%s\nActual Diff (May be empty):\n%s" %
        (marso.__version__, ''.join(old_new_diff), ''.join(current_diff)))
Exemplo n.º 5
0
def test_open_string_literal(each_version, code):
    """
    Testing mostly if removing the last newline works.
    """
    lines = split_lines(code, keepends=True)
    end_pos = (len(lines), len(lines[-1]))
    module = parse(code, version=each_version)
    assert module.get_code() == code
    assert module.end_pos == end_pos == module.children[1].end_pos
Exemplo n.º 6
0
    def initialize(self, code):
        logging.debug('differ: initialize')
        try:
            del cache.parser_cache[self.grammar._hashed][None]
        except KeyError:
            pass

        self.lines = split_lines(code, keepends=True)
        self.module = parse(code, diff_cache=True, cache=True)
        assert code == self.module.get_code()
        _assert_valid_graph(self.module)
        return self.module
Exemplo n.º 7
0
    def get_start_pos_of_prefix(self):
        """
        Basically calls :py:meth:`marso.tree.NodeOrLeaf.get_start_pos_of_prefix`.
        """
        # TODO it is really ugly that we have to override it. Maybe change
        #   indent error leafs somehow? No idea how, though.
        previous_leaf = self.get_previous_leaf()
        if previous_leaf is not None and previous_leaf.type == 'error_leaf' \
                and previous_leaf.token_type in ('INDENT', 'DEDENT', 'ERROR_DEDENT'):
            previous_leaf = previous_leaf.get_previous_leaf()

        if previous_leaf is None:  # It's the first leaf.
            lines = split_lines(self.prefix)
            # + 1 is needed because split_lines always returns at least [''].
            return self.line - len(lines) + 1, 0  # It's the first leaf.
        return previous_leaf.end_pos
Exemplo n.º 8
0
def _assert_valid_graph(node):
    """
    Checks if the parent/children relationship is correct.

    This is a check that only runs during debugging/testing.
    """
    try:
        children = node.children
    except AttributeError:
        # Ignore INDENT is necessary, because indent/dedent tokens don't
        # contain value/prefix and are just around, because of the tokenizer.
        if node.type == 'error_leaf' and node.token_type in _INDENTATION_TOKENS:
            assert not node.value
            assert not node.prefix
            return

        # Calculate the content between two start positions.
        previous_leaf = _get_previous_leaf_if_indentation(
            node.get_previous_leaf())
        if previous_leaf is None:
            content = node.prefix
            previous_start_pos = 1, 0
        else:
            assert previous_leaf.end_pos <= node.start_pos, \
                (previous_leaf, node)

            content = previous_leaf.value + node.prefix
            previous_start_pos = previous_leaf.start_pos

        if '\n' in content or '\r' in content:
            splitted = split_lines(content)
            line = previous_start_pos[0] + len(splitted) - 1
            actual = line, len(splitted[-1])
        else:
            actual = previous_start_pos[0], previous_start_pos[1] + len(
                content)
            if content.startswith(BOM_UTF8_STRING) \
                    and node.get_start_pos_of_prefix() == (1, 0):
                # Remove the byte order mark
                actual = actual[0], actual[1] - 1

        assert node.start_pos == actual, (node.start_pos, actual)
    else:
        for child in children:
            assert child.parent == node, (node, child)
            _assert_valid_graph(child)
Exemplo n.º 9
0
    def get_last_line(self, suffix):
        line = 0
        if self._children_groups:
            children_group = self._children_groups[-1]
            last_leaf = _get_previous_leaf_if_indentation(
                children_group.last_line_offset_leaf)

            line = last_leaf.end_pos[0] + children_group.line_offset

            # Newlines end on the next line, which means that they would cover
            # the next line. That line is not fully parsed at this point.
            if _ends_with_newline(last_leaf, suffix):
                line -= 1
        line += len(split_lines(suffix)) - 1

        if suffix and not suffix.endswith('\n') and not suffix.endswith('\r'):
            # This is the end of a file (that doesn't end with a newline).
            line += 1

        if self._node_children:
            return max(line, self._node_children[-1].get_last_line(suffix))
        return line
Exemplo n.º 10
0
    def parse(self, code, copies=0, parsers=0, expect_error_leaves=False):
        logging.debug('differ: parse copies=%s parsers=%s', copies, parsers)
        lines = split_lines(code, keepends=True)
        diff_parser = DiffParser(
            self.grammar._pgen_grammar,
            self.grammar._tokenizer,
            self.module,
        )
        new_module = diff_parser.update(self.lines, lines)
        self.lines = lines
        assert code == new_module.get_code()

        _assert_valid_graph(new_module)

        without_diff_parser_module = parse(code)
        _assert_nodes_are_equal(new_module, without_diff_parser_module)

        error_node = _check_error_leaves_nodes(new_module)
        assert expect_error_leaves == (error_node is not None), error_node
        if parsers is not ANY:
            assert diff_parser._parser_count == parsers
        if copies is not ANY:
            assert diff_parser._copy_count == copies
        return new_module
Exemplo n.º 11
0
def tokenize(code, version_info, start_pos=(1, 0)):
    """Generate tokens from a the source code (string)."""
    lines = split_lines(code, keepends=True)
    return tokenize_lines(lines, version_info, start_pos=start_pos)
Exemplo n.º 12
0
 def end_pos(self):
     lines = split_lines(self.string)
     if len(lines) > 1:
         return self.start_pos[0] + len(lines) - 1, 0
     else:
         return self.start_pos[0], self.start_pos[1] + len(self.string)
Exemplo n.º 13
0
def insert_line_into_code(code, index, line):
    lines = split_lines(code, keepends=True)
    lines.insert(index, line)
    return ''.join(lines)
Exemplo n.º 14
0
def test_split_lines(string, expected_result, keepends):
    assert split_lines(string, keepends=keepends) == expected_result
Exemplo n.º 15
0
    def _copy_nodes(self,
                    working_stack,
                    nodes,
                    until_line,
                    line_offset,
                    prefix='',
                    is_nested=False):
        new_nodes = []
        added_indents = []

        nodes = list(
            self._get_matching_indent_nodes(
                nodes,
                is_new_suite=is_nested,
            ))

        new_prefix = ''
        for node in nodes:
            if node.start_pos[0] > until_line:
                break

            if node.type == 'endmarker':
                break

            if node.type == 'error_leaf' and node.token_type in (
                    'DEDENT', 'ERROR_DEDENT'):
                break
            # TODO this check might take a bit of time for large files. We
            # might want to change this to do more intelligent guessing or
            # binary search.
            if _get_last_line(node) > until_line:
                # We can split up functions and classes later.
                if _func_or_class_has_suite(node):
                    new_nodes.append(node)
                break
            try:
                c = node.children
            except AttributeError:
                pass
            else:
                # This case basically appears with error recovery of one line
                # suites like `def foo(): bar.-`. In this case we might not
                # include a newline in the statement and we need to take care
                # of that.
                n = node
                if n.type == 'decorated':
                    n = n.children[-1]
                if n.type in ('async_funcdef', 'async_stmt'):
                    n = n.children[-1]
                if n.type in ('classdef', 'funcdef'):
                    suite_node = n.children[-1]
                else:
                    suite_node = c[-1]

                if suite_node.type in ('error_leaf', 'error_node'):
                    break

            new_nodes.append(node)

        # Pop error nodes at the end from the list
        if new_nodes:
            while new_nodes:
                last_node = new_nodes[-1]
                if (last_node.type in ('error_leaf', 'error_node')
                        or _is_flow_node(new_nodes[-1])):
                    # Error leafs/nodes don't have a defined start/end. Error
                    # nodes might not end with a newline (e.g. if there's an
                    # open `(`). Therefore ignore all of them unless they are
                    # succeeded with valid parser state.
                    # If we copy flows at the end, they might be continued
                    # after the copy limit (in the new parser).
                    # In this while loop we try to remove until we find a newline.
                    new_prefix = ''
                    new_nodes.pop()
                    while new_nodes:
                        last_node = new_nodes[-1]
                        if last_node.get_last_leaf().type == 'newline':
                            break
                        new_nodes.pop()
                    continue
                if len(new_nodes) > 1 and new_nodes[-2].type == 'error_node':
                    # The problem here is that Marso error recovery sometimes
                    # influences nodes before this node.
                    # Since the new last node is an error node this will get
                    # cleaned up in the next while iteration.
                    new_nodes.pop()
                    continue
                break

        if not new_nodes:
            return [], working_stack, prefix, added_indents

        tos = working_stack[-1]
        last_node = new_nodes[-1]
        had_valid_suite_last = False
        # Pop incomplete suites from the list
        if _func_or_class_has_suite(last_node):
            suite = last_node
            while suite.type != 'suite':
                suite = suite.children[-1]

            indent = _get_suite_indentation(suite)
            added_indents.append(indent)

            suite_tos = _NodesTreeNode(suite,
                                       indentation=_get_indentation(last_node))
            # Don't need to pass line_offset here, it's already done by the
            # parent.
            suite_nodes, new_working_stack, new_prefix, ai = self._copy_nodes(
                working_stack + [suite_tos],
                suite.children,
                until_line,
                line_offset,
                is_nested=True,
            )
            added_indents += ai
            if len(suite_nodes) < 2:
                # A suite only with newline is not valid.
                new_nodes.pop()
                new_prefix = ''
            else:
                assert new_nodes
                tos.add_child_node(suite_tos)
                working_stack = new_working_stack
                had_valid_suite_last = True

        if new_nodes:
            if not _ends_with_newline(new_nodes[-1].get_last_leaf()
                                      ) and not had_valid_suite_last:
                p = new_nodes[-1].get_next_leaf().prefix
                # We are not allowed to remove the newline at the end of the
                # line, otherwise it's going to be missing. This happens e.g.
                # if a bracket is around before that moves newlines to
                # prefixes.
                new_prefix = split_lines(p, keepends=True)[0]

            if had_valid_suite_last:
                last = new_nodes[-1]
                if last.type == 'decorated':
                    last = last.children[-1]
                if last.type in ('async_funcdef', 'async_stmt'):
                    last = last.children[-1]
                last_line_offset_leaf = last.children[-2].get_last_leaf()
                assert last_line_offset_leaf == ':'
            else:
                last_line_offset_leaf = new_nodes[-1].get_last_leaf()
            tos.add_tree_nodes(
                prefix,
                new_nodes,
                line_offset,
                last_line_offset_leaf,
            )
            prefix = new_prefix
            self._prefix_remainder = ''

        return new_nodes, working_stack, prefix, added_indents
Exemplo n.º 16
0
    def _parse(self,
               code=None,
               error_recovery=True,
               path=None,
               start_symbol=None,
               cache=False,
               diff_cache=False,
               cache_path=None,
               file_io=None,
               start_pos=(1, 0)):
        """
        Wanted python3.5 * operator and keyword only arguments. Therefore just
        wrap it all.
        start_pos here is just a parameter internally used. Might be public
        sometime in the future.
        """
        if code is None and path is None and file_io is None:
            raise TypeError("Please provide either code or a path.")

        if start_symbol is None:
            start_symbol = self._start_nonterminal

        if error_recovery and start_symbol != 'file_input':
            raise NotImplementedError("This is currently not implemented.")

        if file_io is None:
            if code is None:
                file_io = FileIO(path)
            else:
                file_io = KnownContentFileIO(path, code)

        if cache and file_io.path is not None:
            module_node = load_module(self._hashed,
                                      file_io,
                                      cache_path=cache_path)
            if module_node is not None:
                return module_node

        if code is None:
            code = file_io.read()
        code = python_bytes_to_unicode(code)

        lines = split_lines(code, keepends=True)
        if diff_cache and self._diff_parser is not None:
            try:
                module_cache_item = parser_cache[self._hashed][file_io.path]
            except KeyError:
                pass
            else:
                module_node = module_cache_item.node
                old_lines = module_cache_item.lines
                if old_lines == lines:
                    return module_node

                new_node = self._diff_parser(
                    self._pgen_grammar, self._tokenizer,
                    module_node).update(old_lines=old_lines, new_lines=lines)
                save_module(
                    self._hashed,
                    file_io,
                    new_node,
                    lines,
                    # Never pickle in pypy, it's slow as hell.
                    pickling=cache and not is_pypy,
                    cache_path=cache_path)
                return new_node

        tokens = self._tokenizer(lines, start_pos=start_pos)

        p = self._parser(self._pgen_grammar,
                         error_recovery=error_recovery,
                         start_nonterminal=start_symbol)
        # breakpoint()
        root_node = p.parse(tokens=tokens)

        if cache or diff_cache:
            save_module(
                self._hashed,
                file_io,
                root_node,
                lines,
                # Never pickle in pypy, it's slow as hell.
                pickling=cache and not is_pypy,
                cache_path=cache_path)
        return root_node