Python Parser примеры использования

Язык программирования: Python

Пространство имен/Пакет: parso.python.parser

Класс/Тип: Parser

Примеров на hotexamples.com: 7

Python Parser - 7 примеров найдено. Это лучшие примеры Python кода для parso.python.parser.Parser, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Parser(3)

parse(2)

Основные методы

Parser (3)

parse (2)

Пример #1

Показать файл

Файл: helpers.py Проект: fariasjr/CitiTuirer

def get_stack_at_position(grammar, code_lines, module_node, pos):
    """
    Returns the possible node names (e.g. import_from, xor_test or yield_stmt).
    """
    class EndMarkerReached(Exception):
        pass

    def tokenize_without_endmarker(code):
        # TODO This is for now not an official parso API that exists purely
        #   for Jedi.
        tokens = grammar._tokenize(code)
        for token_ in tokens:
            if token_.string == safeword:
                raise EndMarkerReached()
            elif token_.prefix.endswith(safeword):
                # This happens with comments.
                raise EndMarkerReached()
            else:
                yield token_

    # The code might be indedented, just remove it.
    code = dedent(_get_code_for_stack(code_lines, module_node, pos))
    # We use a word to tell Jedi when we have reached the start of the
    # completion.
    # Use Z as a prefix because it's not part of a number suffix.
    safeword = 'ZZZ_USER_WANTS_TO_COMPLETE_HERE_WITH_JEDI'
    code = code + ' ' + safeword

    p = Parser(grammar._pgen_grammar, error_recovery=True)
    try:
        p.parse(tokens=tokenize_without_endmarker(code))
    except EndMarkerReached:
        return p.stack
    raise SystemError("This really shouldn't happen. There's a bug in Jedi.")

Пример #2

Показать файл

Файл: helpers.py Проект: Marslo/VimConfig

def get_stack_at_position(grammar, code_lines, module_node, pos):
    """
    Returns the possible node names (e.g. import_from, xor_test or yield_stmt).
    """
    class EndMarkerReached(Exception):
        pass

    def tokenize_without_endmarker(code):
        # TODO This is for now not an official parso API that exists purely
        #   for Jedi.
        tokens = grammar._tokenize(code)
        for token_ in tokens:
            if token_.string == safeword:
                raise EndMarkerReached()
            elif token_.prefix.endswith(safeword):
                # This happens with comments.
                raise EndMarkerReached()
            else:
                yield token_

    # The code might be indedented, just remove it.
    code = dedent(_get_code_for_stack(code_lines, module_node, pos))
    # We use a word to tell Jedi when we have reached the start of the
    # completion.
    # Use Z as a prefix because it's not part of a number suffix.
    safeword = 'ZZZ_USER_WANTS_TO_COMPLETE_HERE_WITH_JEDI'
    code = code + ' ' + safeword

    p = Parser(grammar._pgen_grammar, error_recovery=True)
    try:
        p.parse(tokens=tokenize_without_endmarker(code))
    except EndMarkerReached:
        return p.stack
    raise SystemError("This really shouldn't happen. There's a bug in Jedi.")

Пример #3

Показать файл

Файл: diff.py Проект: nnd2890/scrap_org

 def _try_parse_part(self, until_line):
     """
     Sets up a normal parser that uses a spezialized tokenizer to only parse
     until a certain position (or a bit longer if the statement hasn't
     ended.
     """
     self._parser_count += 1
     # TODO speed up, shouldn't copy the whole list all the time.
     # memoryview?
     parsed_until_line = self._nodes_stack.parsed_until_line
     lines_after = self._parser_lines_new[parsed_until_line:]
     tokens = self._diff_tokenize(lines_after,
                                  until_line,
                                  line_offset=parsed_until_line)
     self._active_parser = Parser(self._pgen_grammar, error_recovery=True)
     return self._active_parser.parse(tokens=tokens)

Пример #4

Показать файл

Файл: diff.py Проект: gorhan/LFOS

 def _try_parse_part(self, until_line):
     """
     Sets up a normal parser that uses a spezialized tokenizer to only parse
     until a certain position (or a bit longer if the statement hasn't
     ended.
     """
     self._parser_count += 1
     # TODO speed up, shouldn't copy the whole list all the time.
     # memoryview?
     parsed_until_line = self._nodes_stack.parsed_until_line
     lines_after = self._parser_lines_new[parsed_until_line:]
     tokens = self._diff_tokenize(
         lines_after,
         until_line,
         line_offset=parsed_until_line
     )
     self._active_parser = Parser(
         self._pgen_grammar,
         error_recovery=True
     )
     return self._active_parser.parse(tokens=tokens)

Пример #5

Показать файл

Файл: diff.py Проект: gorhan/LFOS

class DiffParser(object):
    """
    An advanced form of parsing a file faster. Unfortunately comes with huge
    side effects. It changes the given module.
    """
    def __init__(self, pgen_grammar, tokenizer, module):
        self._pgen_grammar = pgen_grammar
        self._tokenizer = tokenizer
        self._module = module

    def _reset(self):
        self._copy_count = 0
        self._parser_count = 0

        self._nodes_stack = _NodesStack(self._module)

    def update(self, old_lines, new_lines):
        '''
        The algorithm works as follows:

        Equal:
            - Assure that the start is a newline, otherwise parse until we get
              one.
            - Copy from parsed_until_line + 1 to max(i2 + 1)
            - Make sure that the indentation is correct (e.g. add DEDENT)
            - Add old and change positions
        Insert:
            - Parse from parsed_until_line + 1 to min(j2 + 1), hopefully not
              much more.

        Returns the new module node.
        '''
        LOG.debug('diff parser start')
        # Reset the used names cache so they get regenerated.
        self._module._used_names = None

        self._parser_lines_new = new_lines

        self._reset()

        line_length = len(new_lines)
        sm = difflib.SequenceMatcher(None, old_lines, self._parser_lines_new)
        opcodes = sm.get_opcodes()
        LOG.debug('diff parser calculated')
        LOG.debug('diff: line_lengths old: %s, new: %s' % (len(old_lines), line_length))

        for operation, i1, i2, j1, j2 in opcodes:
            LOG.debug('diff code[%s] old[%s:%s] new[%s:%s]',
                      operation, i1 + 1, i2, j1 + 1, j2)

            if j2 == line_length and new_lines[-1] == '':
                # The empty part after the last newline is not relevant.
                j2 -= 1

            if operation == 'equal':
                line_offset = j1 - i1
                self._copy_from_old_parser(line_offset, i2, j2)
            elif operation == 'replace':
                self._parse(until_line=j2)
            elif operation == 'insert':
                self._parse(until_line=j2)
            else:
                assert operation == 'delete'

        # With this action all change will finally be applied and we have a
        # changed module.
        self._nodes_stack.close()

        last_pos = self._module.end_pos[0]
        if last_pos != line_length:
            current_lines = split_lines(self._module.get_code(), keepends=True)
            diff = difflib.unified_diff(current_lines, new_lines)
            raise Exception(
                "There's an issue (%s != %s) with the diff parser. Please report:\n%s"
                % (last_pos, line_length, ''.join(diff))
            )

        LOG.debug('diff parser end')
        return self._module

    def _enabled_debugging(self, old_lines, lines_new):
        if self._module.get_code() != ''.join(lines_new):
            LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines), ''.join(lines_new))

    def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new):
        copied_nodes = [None]

        last_until_line = -1
        while until_line_new > self._nodes_stack.parsed_until_line:
            parsed_until_line_old = self._nodes_stack.parsed_until_line - line_offset
            line_stmt = self._get_old_line_stmt(parsed_until_line_old + 1)
            if line_stmt is None:
                # Parse 1 line at least. We don't need more, because we just
                # want to get into a state where the old parser has statements
                # again that can be copied (e.g. not lines within parentheses).
                self._parse(self._nodes_stack.parsed_until_line + 1)
            elif not copied_nodes:
                # We have copied as much as possible (but definitely not too
                # much). Therefore we just parse the rest.
                # We might not reach the end, because there's a statement
                # that is not finished.
                self._parse(until_line_new)
            else:
                p_children = line_stmt.parent.children
                index = p_children.index(line_stmt)

                copied_nodes = self._nodes_stack.copy_nodes(
                    p_children[index:],
                    until_line_old,
                    line_offset
                )
                # Match all the nodes that are in the wanted range.
                if copied_nodes:
                    self._copy_count += 1

                    from_ = copied_nodes[0].get_start_pos_of_prefix()[0] + line_offset
                    to = self._nodes_stack.parsed_until_line

                    LOG.debug('diff actually copy %s to %s', from_, to)
            # Since there are potential bugs that might loop here endlessly, we
            # just stop here.
            assert last_until_line != self._nodes_stack.parsed_until_line \
                or not copied_nodes, last_until_line
            last_until_line = self._nodes_stack.parsed_until_line

    def _get_old_line_stmt(self, old_line):
        leaf = self._module.get_leaf_for_position((old_line, 0), include_prefixes=True)

        if _ends_with_newline(leaf):
            leaf = leaf.get_next_leaf()
        if leaf.get_start_pos_of_prefix()[0] == old_line:
            node = leaf
            while node.parent.type not in ('file_input', 'suite'):
                node = node.parent
            return node
        # Must be on the same line. Otherwise we need to parse that bit.
        return None

    def _get_before_insertion_node(self):
        if self._nodes_stack.is_empty():
            return None

        line = self._nodes_stack.parsed_until_line + 1
        node = self._new_module.get_last_leaf()
        while True:
            parent = node.parent
            if parent.type in ('suite', 'file_input'):
                assert node.end_pos[0] <= line
                assert node.end_pos[1] == 0 or '\n' in self._prefix
                return node
            node = parent

    def _parse(self, until_line):
        """
        Parses at least until the given line, but might just parse more until a
        valid state is reached.
        """
        last_until_line = 0
        while until_line > self._nodes_stack.parsed_until_line:
            node = self._try_parse_part(until_line)
            nodes = node.children

            self._nodes_stack.add_parsed_nodes(nodes)
            LOG.debug(
                'parse_part from %s to %s (to %s in part parser)',
                nodes[0].get_start_pos_of_prefix()[0],
                self._nodes_stack.parsed_until_line,
                node.end_pos[0] - 1
            )
            # Since the tokenizer sometimes has bugs, we cannot be sure that
            # this loop terminates. Therefore assert that there's always a
            # change.
            assert last_until_line != self._nodes_stack.parsed_until_line, last_until_line
            last_until_line = self._nodes_stack.parsed_until_line

    def _try_parse_part(self, until_line):
        """
        Sets up a normal parser that uses a spezialized tokenizer to only parse
        until a certain position (or a bit longer if the statement hasn't
        ended.
        """
        self._parser_count += 1
        # TODO speed up, shouldn't copy the whole list all the time.
        # memoryview?
        parsed_until_line = self._nodes_stack.parsed_until_line
        lines_after = self._parser_lines_new[parsed_until_line:]
        tokens = self._diff_tokenize(
            lines_after,
            until_line,
            line_offset=parsed_until_line
        )
        self._active_parser = Parser(
            self._pgen_grammar,
            error_recovery=True
        )
        return self._active_parser.parse(tokens=tokens)

    def _diff_tokenize(self, lines, until_line, line_offset=0):
        is_first_token = True
        omitted_first_indent = False
        indents = []
        tokens = self._tokenizer(lines, (1, 0))
        stack = self._active_parser.stack
        for typ, string, start_pos, prefix in tokens:
            start_pos = start_pos[0] + line_offset, start_pos[1]
            if typ == PythonTokenTypes.INDENT:
                indents.append(start_pos[1])
                if is_first_token:
                    omitted_first_indent = True
                    # We want to get rid of indents that are only here because
                    # we only parse part of the file. These indents would only
                    # get parsed as error leafs, which doesn't make any sense.
                    is_first_token = False
                    continue
            is_first_token = False

            # In case of omitted_first_indent, it might not be dedented fully.
            # However this is a sign for us that a dedent happened.
            if typ == PythonTokenTypes.DEDENT \
                    or typ == PythonTokenTypes.ERROR_DEDENT \
                    and omitted_first_indent and len(indents) == 1:
                indents.pop()
                if omitted_first_indent and not indents:
                    # We are done here, only thing that can come now is an
                    # endmarker or another dedented code block.
                    typ, string, start_pos, prefix = next(tokens)
                    if '\n' in prefix:
                        prefix = re.sub(r'(<=\n)[^\n]+$', '', prefix)
                    else:
                        prefix = ''
                    yield PythonToken(
                        PythonTokenTypes.ENDMARKER, '',
                        (start_pos[0] + line_offset, 0),
                        prefix
                    )
                    break
            elif typ == PythonTokenTypes.NEWLINE and start_pos[0] >= until_line:
                yield PythonToken(typ, string, start_pos, prefix)
                # Check if the parser is actually in a valid suite state.
                if suite_or_file_input_is_valid(self._pgen_grammar, stack):
                    start_pos = start_pos[0] + 1, 0
                    while len(indents) > int(omitted_first_indent):
                        indents.pop()
                        yield PythonToken(PythonTokenTypes.DEDENT, '', start_pos, '')

                    yield PythonToken(PythonTokenTypes.ENDMARKER, '', start_pos, '')
                    break
                else:
                    continue

            yield PythonToken(typ, string, start_pos, prefix)

Пример #6

Показать файл

Файл: diff.py Проект: ctomiao2/vim

class DiffParser(object):
    """
    An advanced form of parsing a file faster. Unfortunately comes with huge
    side effects. It changes the given module.
    """
    def __init__(self, pgen_grammar, tokenizer, module):
        self._pgen_grammar = pgen_grammar
        self._tokenizer = tokenizer
        self._module = module

    def _reset(self):
        self._copy_count = 0
        self._parser_count = 0

        self._nodes_tree = _NodesTree(self._module)

    def update(self, old_lines, new_lines):
        '''
        The algorithm works as follows:

        Equal:
            - Assure that the start is a newline, otherwise parse until we get
              one.
            - Copy from parsed_until_line + 1 to max(i2 + 1)
            - Make sure that the indentation is correct (e.g. add DEDENT)
            - Add old and change positions
        Insert:
            - Parse from parsed_until_line + 1 to min(j2 + 1), hopefully not
              much more.

        Returns the new module node.
        '''
        LOG.debug('diff parser start')
        # Reset the used names cache so they get regenerated.
        self._module._used_names = None

        self._parser_lines_new = new_lines

        self._reset()

        line_length = len(new_lines)
        sm = difflib.SequenceMatcher(None, old_lines, self._parser_lines_new)
        opcodes = sm.get_opcodes()
        LOG.debug('line_lengths old: %s; new: %s' % (len(old_lines), line_length))

        for operation, i1, i2, j1, j2 in opcodes:
            LOG.debug('-> code[%s] old[%s:%s] new[%s:%s]',
                      operation, i1 + 1, i2, j1 + 1, j2)

            if j2 == line_length and new_lines[-1] == '':
                # The empty part after the last newline is not relevant.
                j2 -= 1

            if operation == 'equal':
                line_offset = j1 - i1
                self._copy_from_old_parser(line_offset, i2, j2)
            elif operation == 'replace':
                self._parse(until_line=j2)
            elif operation == 'insert':
                self._parse(until_line=j2)
            else:
                assert operation == 'delete'

        # With this action all change will finally be applied and we have a
        # changed module.
        self._nodes_tree.close()

        if DEBUG_DIFF_PARSER:
            # If there is reasonable suspicion that the diff parser is not
            # behaving well, this should be enabled.
            try:
                assert self._module.get_code() == ''.join(new_lines)
                _assert_valid_graph(self._module)
            except AssertionError:
                print(_get_debug_error_message(self._module, old_lines, new_lines))
                raise

        last_pos = self._module.end_pos[0]
        if last_pos != line_length:
            raise Exception(
                ('(%s != %s) ' % (last_pos, line_length))
                + _get_debug_error_message(self._module, old_lines, new_lines)
            )
        LOG.debug('diff parser end')
        return self._module

    def _enabled_debugging(self, old_lines, lines_new):
        if self._module.get_code() != ''.join(lines_new):
            LOG.warning('parser issue:\n%s\n%s', ''.join(old_lines), ''.join(lines_new))

    def _copy_from_old_parser(self, line_offset, until_line_old, until_line_new):
        last_until_line = -1
        while until_line_new > self._nodes_tree.parsed_until_line:
            parsed_until_line_old = self._nodes_tree.parsed_until_line - line_offset
            line_stmt = self._get_old_line_stmt(parsed_until_line_old + 1)
            if line_stmt is None:
                # Parse 1 line at least. We don't need more, because we just
                # want to get into a state where the old parser has statements
                # again that can be copied (e.g. not lines within parentheses).
                self._parse(self._nodes_tree.parsed_until_line + 1)
            else:
                p_children = line_stmt.parent.children
                index = p_children.index(line_stmt)

                from_ = self._nodes_tree.parsed_until_line + 1
                copied_nodes = self._nodes_tree.copy_nodes(
                    p_children[index:],
                    until_line_old,
                    line_offset
                )
                # Match all the nodes that are in the wanted range.
                if copied_nodes:
                    self._copy_count += 1

                    to = self._nodes_tree.parsed_until_line

                    LOG.debug('copy old[%s:%s] new[%s:%s]',
                              copied_nodes[0].start_pos[0],
                              copied_nodes[-1].end_pos[0] - 1, from_, to)
                else:
                    # We have copied as much as possible (but definitely not too
                    # much). Therefore we just parse a bit more.
                    self._parse(self._nodes_tree.parsed_until_line + 1)
            # Since there are potential bugs that might loop here endlessly, we
            # just stop here.
            assert last_until_line != self._nodes_tree.parsed_until_line, last_until_line
            last_until_line = self._nodes_tree.parsed_until_line

    def _get_old_line_stmt(self, old_line):
        leaf = self._module.get_leaf_for_position((old_line, 0), include_prefixes=True)

        if _ends_with_newline(leaf):
            leaf = leaf.get_next_leaf()
        if leaf.get_start_pos_of_prefix()[0] == old_line:
            node = leaf
            while node.parent.type not in ('file_input', 'suite'):
                node = node.parent

            # Make sure that if only the `else:` line of an if statement is
            # copied that not the whole thing is going to be copied.
            if node.start_pos[0] >= old_line:
                return node
        # Must be on the same line. Otherwise we need to parse that bit.
        return None

    def _parse(self, until_line):
        """
        Parses at least until the given line, but might just parse more until a
        valid state is reached.
        """
        last_until_line = 0
        while until_line > self._nodes_tree.parsed_until_line:
            node = self._try_parse_part(until_line)
            nodes = node.children

            self._nodes_tree.add_parsed_nodes(nodes)
            LOG.debug(
                'parse_part from %s to %s (to %s in part parser)',
                nodes[0].get_start_pos_of_prefix()[0],
                self._nodes_tree.parsed_until_line,
                node.end_pos[0] - 1
            )
            # Since the tokenizer sometimes has bugs, we cannot be sure that
            # this loop terminates. Therefore assert that there's always a
            # change.
            assert last_until_line != self._nodes_tree.parsed_until_line, last_until_line
            last_until_line = self._nodes_tree.parsed_until_line

    def _try_parse_part(self, until_line):
        """
        Sets up a normal parser that uses a spezialized tokenizer to only parse
        until a certain position (or a bit longer if the statement hasn't
        ended.
        """
        self._parser_count += 1
        # TODO speed up, shouldn't copy the whole list all the time.
        # memoryview?
        parsed_until_line = self._nodes_tree.parsed_until_line
        lines_after = self._parser_lines_new[parsed_until_line:]
        tokens = self._diff_tokenize(
            lines_after,
            until_line,
            line_offset=parsed_until_line
        )
        self._active_parser = Parser(
            self._pgen_grammar,
            error_recovery=True
        )
        return self._active_parser.parse(tokens=tokens)

    def _diff_tokenize(self, lines, until_line, line_offset=0):
        is_first_token = True
        omitted_first_indent = False
        indents = []
        tokens = self._tokenizer(lines, (1, 0))
        stack = self._active_parser.stack
        for typ, string, start_pos, prefix in tokens:
            start_pos = start_pos[0] + line_offset, start_pos[1]
            if typ == PythonTokenTypes.INDENT:
                indents.append(start_pos[1])
                if is_first_token:
                    omitted_first_indent = True
                    # We want to get rid of indents that are only here because
                    # we only parse part of the file. These indents would only
                    # get parsed as error leafs, which doesn't make any sense.
                    is_first_token = False
                    continue
            is_first_token = False

            # In case of omitted_first_indent, it might not be dedented fully.
            # However this is a sign for us that a dedent happened.
            if typ == PythonTokenTypes.DEDENT \
                    or typ == PythonTokenTypes.ERROR_DEDENT \
                    and omitted_first_indent and len(indents) == 1:
                indents.pop()
                if omitted_first_indent and not indents:
                    # We are done here, only thing that can come now is an
                    # endmarker or another dedented code block.
                    typ, string, start_pos, prefix = next(tokens)
                    if '\n' in prefix or '\r' in prefix:
                        prefix = re.sub(r'[^\n\r]+\Z', '', prefix)
                    else:
                        assert start_pos[1] >= len(prefix), repr(prefix)
                        if start_pos[1] - len(prefix) == 0:
                            prefix = ''
                    yield PythonToken(
                        PythonTokenTypes.ENDMARKER, '',
                        (start_pos[0] + line_offset, 0),
                        prefix
                    )
                    break
            elif typ == PythonTokenTypes.NEWLINE and start_pos[0] >= until_line:
                yield PythonToken(typ, string, start_pos, prefix)
                # Check if the parser is actually in a valid suite state.
                if _suite_or_file_input_is_valid(self._pgen_grammar, stack):
                    start_pos = start_pos[0] + 1, 0
                    while len(indents) > int(omitted_first_indent):
                        indents.pop()
                        yield PythonToken(PythonTokenTypes.DEDENT, '', start_pos, '')

                    yield PythonToken(PythonTokenTypes.ENDMARKER, '', start_pos, '')
                    break
                else:
                    continue

            yield PythonToken(typ, string, start_pos, prefix)

Пример #7

Показать файл

    def update(self, old_lines, new_lines):
        '''
        The algorithm works as follows:

        Equal:
            - Assure that the start is a newline, otherwise parse until we get
              one.
            - Copy from parsed_until_line + 1 to max(i2 + 1)
            - Make sure that the indentation is correct (e.g. add DEDENT)
            - Add old and change positions
        Insert:
            - Parse from parsed_until_line + 1 to min(j2 + 1), hopefully not
              much more.

        Returns the new module node.
        '''
        LOG.debug('diff parser start')
        # Reset the used names cache so they get regenerated.
        self._module._used_names = None

        self._parser_lines_new = new_lines

        self._reset()

        line_length = len(new_lines)
        sm = difflib.SequenceMatcher(None, old_lines, self._parser_lines_new)
        opcodes = sm.get_opcodes()
        LOG.debug('line_lengths old: %s; new: %s' %
                  (len(old_lines), line_length))

        for operation, i1, i2, j1, j2 in opcodes:
            LOG.debug('-> code[%s] old[%s:%s] new[%s:%s]', operation, i1 + 1,
                      i2, j1 + 1, j2)

            if j2 == line_length and new_lines[-1] == '':
                # The empty part after the last newline is not relevant.
                j2 -= 1

            if operation == 'equal':
                line_offset = j1 - i1
                self._copy_from_old_parser(line_offset, i1 + 1, i2, j2)
            elif operation == 'replace':
                self._parse(until_line=j2)
            elif operation == 'insert':
                self._parse(until_line=j2)
            else:
                assert operation == 'delete'

        # With this action all change will finally be applied and we have a
        # changed module.
        self._nodes_tree.close()

        if DEBUG_DIFF_PARSER:
            # If there is reasonable suspicion that the diff parser is not
            # behaving well, this should be enabled.
            try:
                code = ''.join(new_lines)
                assert self._module.get_code() == code
                _assert_valid_graph(self._module)
                without_diff_parser_module = Parser(
                    self._pgen_grammar,
                    error_recovery=True).parse(self._tokenizer(new_lines))
                _assert_nodes_are_equal(self._module,
                                        without_diff_parser_module)
            except AssertionError:
                print(
                    _get_debug_error_message(self._module, old_lines,
                                             new_lines))
                raise

        last_pos = self._module.end_pos[0]
        if last_pos != line_length:
            raise Exception(
                ('(%s != %s) ' % (last_pos, line_length)) +
                _get_debug_error_message(self._module, old_lines, new_lines))
        LOG.debug('diff parser end')
        return self._module