Exemplo n.º 1
0
def format_lines(statements, lines, arcs=None):
    """Nicely format a list of line numbers.

    Format a list of line numbers for printing by coalescing groups of lines as
    long as the lines represent consecutive statements.  This will coalesce
    even if there are gaps between statements.

    For example, if `statements` is [1,2,3,4,5,10,11,12,13,14] and
    `lines` is [1,2,5,10,11,13,14] then the result will be "1-2, 5-11, 13-14".

    Both `lines` and `statements` can be any iterable. All of the elements of
    `lines` must be in `statements`, and all of the values must be positive
    integers.

    If `arcs` is provided, they are (start,[end,end,end]) pairs that will be
    included in the output as long as start isn't in `lines`.

    """
    line_items = [(pair[0], nice_pair(pair))
                  for pair in _line_ranges(statements, lines)]
    if arcs:
        line_exits = sorted(arcs)
        for line, exits in line_exits:
            for ex in sorted(exits):
                if line not in lines:
                    dest = (ex if ex > 0 else "exit")
                    line_items.append((line, "%d->%s" % (line, dest)))

    ret = ', '.join(t[-1] for t in sorted(line_items))
    return ret
Exemplo n.º 2
0
    def _raw_parse(self):
        """Parse the source to find the interesting facts about its lines.
        
        A handful of member fields are updated.
        
        """
        if self.exclude:
            self.excluded = self.lines_matching(self.exclude)
        indent = 0
        exclude_indent = 0
        excluding = False
        prev_toktype = token.INDENT
        first_line = None
        empty = True
        tokgen = tokenize.generate_tokens(StringIO(self.text).readline)
        for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
            if self.show_tokens:
                print '%10s %5s %-20r %r' % (tokenize.tok_name.get(
                    toktype, toktype), nice_pair(
                        (slineno, elineno)), ttext, ltext)
            if toktype == token.INDENT:
                indent += 1
            elif toktype == token.DEDENT:
                indent -= 1
            elif toktype == token.NAME and ttext == 'class':
                self.classdefs.add(slineno)
            elif toktype == token.OP and ttext == ':':
                if not excluding and elineno in self.excluded:
                    exclude_indent = indent
                    excluding = True
            elif toktype == token.STRING and prev_toktype == token.INDENT:
                self.docstrings.update(range(slineno, elineno + 1))
            elif toktype == token.NEWLINE:
                if first_line is not None and elineno != first_line:
                    rng = (first_line, elineno)
                    for l in range(first_line, elineno + 1):
                        self.multiline[l] = rng

                first_line = None
            if ttext.strip() and toktype != tokenize.COMMENT:
                empty = False
                if first_line is None:
                    first_line = slineno
                    if excluding and indent <= exclude_indent:
                        excluding = False
                    if excluding:
                        self.excluded.add(elineno)
            prev_toktype = toktype

        if not empty:
            self.statement_starts.update(self.byte_parser._find_statements())
Exemplo n.º 3
0
    def _raw_parse(self):
        if self.exclude:
            self.excluded = self.lines_matching(self.exclude)
        indent = 0
        exclude_indent = 0
        excluding = False
        prev_toktype = token.INDENT
        first_line = None
        empty = True
        tokgen = tokenize.generate_tokens(StringIO(self.text).readline)
        for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
            if self.show_tokens:
                print '%10s %5s %-20r %r' % (tokenize.tok_name.get(toktype, toktype),
                 nice_pair((slineno, elineno)),
                 ttext,
                 ltext)
            if toktype == token.INDENT:
                indent += 1
            elif toktype == token.DEDENT:
                indent -= 1
            elif toktype == token.NAME and ttext == 'class':
                self.classdefs.add(slineno)
            elif toktype == token.OP and ttext == ':':
                if not excluding and elineno in self.excluded:
                    exclude_indent = indent
                    excluding = True
            elif toktype == token.STRING and prev_toktype == token.INDENT:
                self.docstrings.update(range(slineno, elineno + 1))
            elif toktype == token.NEWLINE:
                if first_line is not None and elineno != first_line:
                    rng = (first_line, elineno)
                    for l in range(first_line, elineno + 1):
                        self.multiline[l] = rng

                first_line = None
            if ttext.strip() and toktype != tokenize.COMMENT:
                empty = False
                if first_line is None:
                    first_line = slineno
                    if excluding and indent <= exclude_indent:
                        excluding = False
                    if excluding:
                        self.excluded.add(elineno)
            prev_toktype = toktype

        if not empty:
            self.statement_starts.update(self.byte_parser._find_statements())
Exemplo n.º 4
0
    def _raw_parse(self):
        """Parse the source to find the interesting facts about its lines.

        A handful of attributes are updated.

        """
        # Find lines which match an exclusion pattern.
        if self.exclude:
            self.raw_excluded = self.lines_matching(self.exclude)

        # Tokenize, to find excluded suites, to find docstrings, and to find
        # multi-line statements.
        indent = 0
        exclude_indent = 0
        excluding = False
        excluding_decorators = False
        prev_toktype = token.INDENT
        first_line = None
        empty = True
        first_on_line = True

        tokgen = generate_tokens(self.text)
        for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
            if self.show_tokens:  # pragma: debugging
                print("%10s %5s %-20r %r" % (tokenize.tok_name.get(
                    toktype, toktype), nice_pair(
                        (slineno, elineno)), ttext, ltext))
            if toktype == token.INDENT:
                indent += 1
            elif toktype == token.DEDENT:
                indent -= 1
            elif toktype == token.NAME:
                if ttext == 'class':
                    # Class definitions look like branches in the bytecode, so
                    # we need to exclude them.  The simplest way is to note the
                    # lines with the 'class' keyword.
                    self.raw_classdefs.add(slineno)
            elif toktype == token.OP:
                if ttext == ':':
                    should_exclude = (
                        elineno in self.raw_excluded) or excluding_decorators
                    if not excluding and should_exclude:
                        # Start excluding a suite.  We trigger off of the colon
                        # token so that the #pragma comment will be recognized on
                        # the same line as the colon.
                        self.raw_excluded.add(elineno)
                        exclude_indent = indent
                        excluding = True
                        excluding_decorators = False
                elif ttext == '@' and first_on_line:
                    # A decorator.
                    if elineno in self.raw_excluded:
                        excluding_decorators = True
                    if excluding_decorators:
                        self.raw_excluded.add(elineno)
            elif toktype == token.STRING and prev_toktype == token.INDENT:
                # Strings that are first on an indented line are docstrings.
                # (a trick from trace.py in the stdlib.) This works for
                # 99.9999% of cases.  For the rest (!) see:
                # http://stackoverflow.com/questions/1769332/x/1769794#1769794
                self.raw_docstrings.update(range(slineno, elineno + 1))
            elif toktype == token.NEWLINE:
                if first_line is not None and elineno != first_line:
                    # We're at the end of a line, and we've ended on a
                    # different line than the first line of the statement,
                    # so record a multi-line range.
                    for l in range(first_line, elineno + 1):
                        self._multiline[l] = first_line
                first_line = None
                first_on_line = True

            if ttext.strip() and toktype != tokenize.COMMENT:
                # A non-whitespace token.
                empty = False
                if first_line is None:
                    # The token is not whitespace, and is the first in a
                    # statement.
                    first_line = slineno
                    # Check whether to end an excluded suite.
                    if excluding and indent <= exclude_indent:
                        excluding = False
                    if excluding:
                        self.raw_excluded.add(elineno)
                    first_on_line = False

            prev_toktype = toktype

        # Find the starts of the executable statements.
        if not empty:
            self.raw_statements.update(self.byte_parser._find_statements())

        # The first line of modules can lie and say 1 always, even if the first
        # line of code is later. If so, map 1 to the actual first line of the
        # module.
        if env.PYBEHAVIOR.module_firstline_1 and self._multiline:
            self._multiline[1] = min(self.raw_statements)
Exemplo n.º 5
0
    def _raw_parse(self):
        """Parse the source to find the interesting facts about its lines.

        A handful of member fields are updated.

        """
        # Find lines which match an exclusion pattern.
        if self.exclude:
            self.excluded = self.lines_matching(self.exclude)

        # Tokenize, to find excluded suites, to find docstrings, and to find
        # multi-line statements.
        indent = 0
        exclude_indent = 0
        excluding = False
        prev_toktype = token.INDENT
        first_line = None
        empty = True

        tokgen = generate_tokens(self.text)
        for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
            if self.show_tokens:                # pragma: not covered
                print("%10s %5s %-20r %r" % (
                    tokenize.tok_name.get(toktype, toktype),
                    nice_pair((slineno, elineno)), ttext, ltext
                    ))
            if toktype == token.INDENT:
                indent += 1
            elif toktype == token.DEDENT:
                indent -= 1
            elif toktype == token.NAME and ttext == 'class':
                # Class definitions look like branches in the byte code, so
                # we need to exclude them.  The simplest way is to note the
                # lines with the 'class' keyword.
                self.classdefs.add(slineno)
            elif toktype == token.OP and ttext == ':':
                if not excluding and elineno in self.excluded:
                    # Start excluding a suite.  We trigger off of the colon
                    # token so that the #pragma comment will be recognized on
                    # the same line as the colon.
                    exclude_indent = indent
                    excluding = True
            elif toktype == token.STRING and prev_toktype == token.INDENT:
                # Strings that are first on an indented line are docstrings.
                # (a trick from trace.py in the stdlib.) This works for
                # 99.9999% of cases.  For the rest (!) see:
                # http://stackoverflow.com/questions/1769332/x/1769794#1769794
                self.docstrings.update(range(slineno, elineno+1))
            elif toktype == token.NEWLINE:
                if first_line is not None and elineno != first_line:
                    # We're at the end of a line, and we've ended on a
                    # different line than the first line of the statement,
                    # so record a multi-line range.
                    for l in range(first_line, elineno+1):
                        self.multiline[l] = first_line
                first_line = None

            if ttext.strip() and toktype != tokenize.COMMENT:
                # A non-whitespace token.
                empty = False
                if first_line is None:
                    # The token is not whitespace, and is the first in a
                    # statement.
                    first_line = slineno
                    # Check whether to end an excluded suite.
                    if excluding and indent <= exclude_indent:
                        excluding = False
                    if excluding:
                        self.excluded.add(elineno)

            prev_toktype = toktype

        # Find the starts of the executable statements.
        if not empty:
            self.statement_starts.update(self.byte_parser._find_statements())
Exemplo n.º 6
0
    def _raw_parse(self):
        """Parse the source to find the interesting facts about its lines.

        A handful of member fields are updated.

        """
        # Find lines which match an exclusion pattern.
        if self.exclude:
            self.excluded = self.lines_matching(self.exclude)

        # Tokenize, to find excluded suites, to find docstrings, and to find
        # multi-line statements.
        indent = 0
        exclude_indent = 0
        excluding = False
        prev_toktype = token.INDENT
        first_line = None
        empty = True

        tokgen = tokenize.generate_tokens(StringIO(self.text).readline)
        for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
            if self.show_tokens:                # pragma: no cover
                print("%10s %5s %-20r %r" % (
                    tokenize.tok_name.get(toktype, toktype),
                    nice_pair((slineno, elineno)), ttext, ltext
                    ))
            if toktype == token.INDENT:
                indent += 1
            elif toktype == token.DEDENT:
                indent -= 1
            elif toktype == token.NAME and ttext == 'class':
                # Class definitions look like branches in the byte code, so
                # we need to exclude them.  The simplest way is to note the
                # lines with the 'class' keyword.
                self.classdefs.add(slineno)
            elif toktype == token.OP and ttext == ':':
                if not excluding and elineno in self.excluded:
                    # Start excluding a suite.  We trigger off of the colon
                    # token so that the #pragma comment will be recognized on
                    # the same line as the colon.
                    exclude_indent = indent
                    excluding = True
            elif toktype == token.STRING and prev_toktype == token.INDENT:
                # Strings that are first on an indented line are docstrings.
                # (a trick from trace.py in the stdlib.) This works for
                # 99.9999% of cases.  For the rest (!) see:
                # http://stackoverflow.com/questions/1769332/x/1769794#1769794
                for i in range(slineno, elineno+1):
                    self.docstrings.add(i)
            elif toktype == token.NEWLINE:
                if first_line is not None and elineno != first_line:
                    # We're at the end of a line, and we've ended on a
                    # different line than the first line of the statement,
                    # so record a multi-line range.
                    rng = (first_line, elineno)
                    for l in range(first_line, elineno+1):
                        self.multiline[l] = rng
                first_line = None

            if ttext.strip() and toktype != tokenize.COMMENT:
                # A non-whitespace token.
                empty = False
                if first_line is None:
                    # The token is not whitespace, and is the first in a
                    # statement.
                    first_line = slineno
                    # Check whether to end an excluded suite.
                    if excluding and indent <= exclude_indent:
                        excluding = False
                    if excluding:
                        self.excluded.add(elineno)

            prev_toktype = toktype

        # Find the starts of the executable statements.
        if not empty:
            self.statement_starts.update(self.byte_parser._find_statements())
    def _raw_parse(self):
        """Parse the source to find the interesting facts about its lines.

        A handful of attributes are updated.

        """
        # Find lines which match an exclusion pattern.
        if self.exclude:
            self.raw_excluded = self.lines_matching(self.exclude)

        # Tokenize, to find excluded suites, to find docstrings, and to find
        # multi-line statements.
        indent = 0
        exclude_indent = 0
        excluding = False
        excluding_decorators = False
        prev_toktype = lexer.indent(1)
        first_line = None
        empty = True
        first_on_line = True
        operators = {op[1] for op in lexer.operators}

        def generate_tokens(text):
            """Use enaml lexer to generate the tokens.

            """
            lexer.input(text)
            while True:
                tok = lexer.token()
                length = 0
                if tok is None:
                    break
                if tok.type == 'STRING':
                    # HINT use a string when counting to avoid encoding issues.
                    length = tok.value.count(str('\n'))
                yield (tok.type, tok.value, (tok.lineno, 0),
                       (tok.lineno + length, 0), '')

        tokgen = generate_tokens(self.text)
        for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
            if self.show_tokens:                # pragma: not covered
                print("%10s %5s %-20r %r" % (
                    toktype, nice_pair((slineno, elineno)), ttext, ltext
                ))
            if toktype == 'INDENT':
                indent += 1
            elif toktype == 'DEDENT':
                indent -= 1
            elif toktype == 'NAME':
                if ttext == 'class' or ttext == 'enamldef':
                    # Class definitions look like branches in the bytecode, so
                    # we need to exclude them.  The simplest way is to note the
                    # lines with the 'class' keyword.
                    self.raw_classdefs.add(slineno)
            elif toktype in operators:
                if ttext == ':':
                    should_exclude = ((elineno in self.raw_excluded) or
                                      excluding_decorators)
                    if not excluding and should_exclude:
                        # Start excluding a suite.  We trigger off of the colon
                        # token so that the #pragma comment will be recognized
                        # on the same line as the colon.
                        self.raw_excluded.add(elineno)
                        exclude_indent = indent
                        excluding = True
                        excluding_decorators = False
                elif ttext == '@' and first_on_line:
                    # A decorator.
                    if elineno in self.raw_excluded:
                        excluding_decorators = True
                    if excluding_decorators:
                        self.raw_excluded.add(elineno)
            elif toktype == 'STRING 'and prev_toktype == 'INDENT':
                # Strings that are first on an indented line are docstrings.
                # (a trick from trace.py in the stdlib.) This works for
                # 99.9999% of cases.  For the rest (!) see:
                # http://stackoverflow.com/questions/1769332/x/1769794#1769794
                self.raw_docstrings.update(range(slineno, elineno+1))
            elif toktype == 'NEWLINE':
                if first_line is not None and elineno != first_line:
                    # We're at the end of a line, and we've ended on a
                    # different line than the first line of the statement,
                    # so record a multi-line range.
                    for l in range(first_line, elineno+1):
                        self._multiline[l] = first_line
                first_line = None
                first_on_line = True

            if ttext is not None:
                # A non-whitespace token.
                empty = False
                if first_line is None:
                    # The token is not whitespace, and is the first in a
                    # statement.
                    first_line = slineno
                    # Check whether to end an excluded suite.
                    if excluding and indent <= exclude_indent:
                        excluding = False
                    if excluding:
                        self.raw_excluded.add(elineno)
                    first_on_line = False

            prev_toktype = toktype

        # Find the starts of the executable statements.
        if not empty:
            self.raw_statements.update(self.byte_parser._find_statements())