Exemplo n.º 1
0
def run(s, expected_last_future=(0, 0)):
    source_lines = s.splitlines(True)
    tokens = pytokenizer.generate_tokens(source_lines, 0)
    #
    flags, last_future_import = future.add_future_flags(
        future.futureFlags_2_7, tokens)
    assert last_future_import == expected_last_future
    return flags
Exemplo n.º 2
0
def run(s, expected_last_future=None):
    source_lines = s.splitlines(True)
    tokens = pytokenizer.generate_tokens(source_lines, 0)
    expected_last_future = expected_last_future or tokens[-1][2:4]
    #
    flags, last_future_import = future.add_future_flags(
        future.futureFlags_2_7, tokens)
    assert last_future_import == expected_last_future
    return flags
Exemplo n.º 3
0
def python_format(line):
    """
    Format the given text string as a Python line
    
    Args:
        line - String
    """
    try:
        toks = generate_tokens(line.split('\n'), 0)
    except error.TokenError:
        return line
        return 1

    while toks and toks[-1][0] in (tokens.ENDMARKER, tokens.NEWLINE):
        toks = toks[:-1]

    indent = ''

    if toks and toks[0][0] == tokens.INDENT:
        indent = toks[0][1]
        toks = toks[1:]

    return format_line(Line(indent, toks))
Exemplo n.º 4
0
    def _parse(self, textsrc, compile_info):
        flags = compile_info.flags

        # The tokenizer is very picky about how it wants its input.
        source_lines = textsrc.splitlines(True)
        if source_lines and not source_lines[-1].endswith("\n"):
            source_lines[-1] += '\n'
        if textsrc and textsrc[-1] == "\n":
            flags &= ~consts.PyCF_DONT_IMPLY_DEDENT

        self.prepare(_targets[compile_info.mode])
        try:
            try:
                # Note: we no longer pass the CO_FUTURE_* to the tokenizer,
                # which is expected to work independently of them.  It's
                # certainly the case for all futures in Python <= 2.7.
                tokens = pytokenizer.generate_tokens(source_lines, flags)
            except error.TokenError as e:
                e.filename = compile_info.filename
                raise
            except error.TokenIndentationError as e:
                e.filename = compile_info.filename
                raise

            newflags, last_future_import = (future.add_future_flags(
                self.future_flags, tokens))
            compile_info.last_future_import = last_future_import
            compile_info.flags |= newflags

            self.grammar = pygram.choose_grammar(
                print_function=compile_info.flags
                & consts.CO_FUTURE_PRINT_FUNCTION,
                revdb=self.space.config.translation.reverse_debugger)

            try:
                for token in tokens:
                    if self.add_token(token):
                        break
            except parser.ParseError as e:
                # Catch parse errors, pretty them up and reraise them as a
                # SyntaxError.
                new_err = error.IndentationError
                if token.token_type == pygram.tokens.INDENT:
                    msg = "unexpected indent"
                elif e.expected == pygram.tokens.INDENT:
                    msg = "expected an indented block"
                else:
                    new_err = error.SyntaxError
                    msg = "invalid syntax"
                    if e.expected_str is not None:
                        msg += " (expected '%s')" % e.expected_str

                # parser.ParseError(...).column is 0-based, but the offsets in the
                # exceptions in the error module are 1-based, hence the '+ 1'
                raise new_err(msg, e.token.lineno, e.token.column + 1,
                              e.token.line, compile_info.filename)
            else:
                tree = self.root
        finally:
            # Avoid hanging onto the tree.
            self.root = None
        return tree
Exemplo n.º 5
0
def tokenize(s):
    return pytokenizer.generate_tokens(s.splitlines(True) + ["\n"], 0)
Exemplo n.º 6
0
    def parse_source(self, textsrc, compile_info):
        """Main entry point for parsing Python source.

        Everything from decoding the source to tokenizing to building the parse
        tree is handled here.
        """
        # Detect source encoding.
        enc = None
        if textsrc.startswith("\xEF\xBB\xBF"):
            textsrc = textsrc[3:]
            enc = 'utf-8'
            # If an encoding is explicitly given check that it is utf-8.
            decl_enc = _check_for_encoding(textsrc)
            if decl_enc and decl_enc != "utf-8":
                raise error.SyntaxError("UTF-8 BOM with %s coding cookie" %
                                        decl_enc,
                                        filename=compile_info.filename)
        elif compile_info.flags & consts.PyCF_SOURCE_IS_UTF8:
            enc = 'utf-8'
            if _check_for_encoding(textsrc) is not None:
                raise error.SyntaxError("coding declaration in unicode string",
                                        filename=compile_info.filename)
        else:
            enc = _normalize_encoding(_check_for_encoding(textsrc))
            if enc is not None and enc not in ('utf-8', 'iso-8859-1'):
                try:
                    textsrc = recode_to_utf8(self.space, textsrc, enc)
                except OperationError as e:
                    # if the codec is not found, LookupError is raised.  we
                    # check using 'is_w' not to mask potential IndexError or
                    # KeyError
                    space = self.space
                    if e.match(space, space.w_LookupError):
                        raise error.SyntaxError(
                            "Codificación desconocida: %s" % enc,
                            filename=compile_info.filename)
                    # Transform unicode errors into SyntaxError
                    if e.match(space, space.w_UnicodeDecodeError):
                        e.normalize_exception(space)
                        w_message = space.str(e.get_w_value(space))
                        raise error.SyntaxError(space.text_w(w_message))
                    raise

        flags = compile_info.flags

        # The tokenizer is very picky about how it wants its input.
        source_lines = textsrc.splitlines(True)
        if source_lines and not source_lines[-1].endswith("\n"):
            source_lines[-1] += '\n'
        if textsrc and textsrc[-1] == "\n":
            flags &= ~consts.PyCF_DONT_IMPLY_DEDENT

        self.prepare(_targets[compile_info.mode])
        tp = 0
        try:
            try:
                # Note: we no longer pass the CO_FUTURE_* to the tokenizer,
                # which is expected to work independently of them.  It's
                # certainly the case for all futures in Python <= 2.7.
                tokens = pytokenizer.generate_tokens(source_lines, flags)

                newflags, last_future_import = (future.add_future_flags(
                    self.future_flags, tokens))
                compile_info.last_future_import = last_future_import
                compile_info.flags |= newflags

                if compile_info.flags & consts.CO_FUTURE_PRINT_FUNCTION:
                    self.grammar = pygram.python_grammar_no_print
                else:
                    self.grammar = pygram.python_grammar

                for tp, value, lineno, column, line in tokens:
                    if self.add_token(tp, value, lineno, column, line):
                        break
            except error.TokenError as e:
                e.filename = compile_info.filename
                raise
            except error.TokenIndentationError as e:
                e.filename = compile_info.filename
                raise
            except parser.ParseError as e:
                # Catch parse errors, pretty them up and reraise them as a
                # SyntaxError.
                new_err = error.IndentationError
                if tp == pygram.tokens.INDENT:
                    msg = "sangría inesperado"
                elif e.expected == pygram.tokens.INDENT:
                    msg = "esperó un bloque sangriado"
                else:
                    new_err = error.SyntaxError
                    msg = "sintaxis no válida"
                raise new_err(msg, e.lineno, e.column, e.line,
                              compile_info.filename)
            else:
                tree = self.root
        finally:
            # Avoid hanging onto the tree.
            self.root = None
        if enc is not None:
            compile_info.encoding = enc
        return tree
Exemplo n.º 7
0
class PythonParser(parser.Parser):
    def __init__(self,
                 space,
                 future_flags=future.futureFlags_3_2,
                 grammar=pygram.python_grammar):
        parser.Parser.__init__(self, grammar)
        self.space = space
        self.future_flags = future_flags

    def parse_source(self, bytessrc, compile_info):
        """Main entry point for parsing Python source.

        Everything from decoding the source to tokenizing to building the parse
        tree is handled here.
        """
        # Detect source encoding.
        enc = None
        if compile_info.flags & consts.PyCF_SOURCE_IS_UTF8:
            enc = 'utf-8'

        if compile_info.flags & consts.PyCF_IGNORE_COOKIE:
            textsrc = bytessrc
        elif bytessrc.startswith("\xEF\xBB\xBF"):
            bytessrc = bytessrc[3:]
            enc = 'utf-8'
            # If an encoding is explicitly given check that it is utf-8.
            decl_enc = _check_for_encoding(bytessrc)
            if decl_enc and decl_enc != "utf-8":
                raise error.SyntaxError("UTF-8 BOM with %s coding cookie" %
                                        decl_enc,
                                        filename=compile_info.filename)
            textsrc = bytessrc
        else:
            enc = _normalize_encoding(_check_for_encoding(bytessrc))
            if enc is None:
                enc = 'utf-8'
            try:
                textsrc = recode_to_utf8(self.space, bytessrc, enc)
            except OperationError, e:
                # if the codec is not found, LookupError is raised.  we
                # check using 'is_w' not to mask potential IndexError or
                # KeyError
                space = self.space
                if e.match(space, space.w_LookupError):
                    raise error.SyntaxError("Unknown encoding: %s" % enc,
                                            filename=compile_info.filename)
                # Transform unicode errors into SyntaxError
                if e.match(space, space.w_UnicodeDecodeError):
                    e.normalize_exception(space)
                    w_message = space.str(e.get_w_value(space))
                    raise error.SyntaxError(space.str_w(w_message))
                raise

        flags = compile_info.flags

        # The tokenizer is very picky about how it wants its input.
        source_lines = textsrc.splitlines(True)
        if source_lines and not source_lines[-1].endswith("\n"):
            source_lines[-1] += '\n'
        if textsrc and textsrc[-1] == "\n":
            flags &= ~consts.PyCF_DONT_IMPLY_DEDENT

        self.prepare(_targets[compile_info.mode])
        tp = 0
        try:
            try:
                # Note: we no longer pass the CO_FUTURE_* to the tokenizer,
                # which is expected to work independently of them.  It's
                # certainly the case for all futures in Python <= 2.7.
                tokens = pytokenizer.generate_tokens(source_lines, flags)

                newflags, last_future_import = (future.add_future_flags(
                    self.future_flags, tokens))
                compile_info.last_future_import = last_future_import
                compile_info.flags |= newflags
                self.grammar = pygram.python_grammar

                for tp, value, lineno, column, line in tokens:
                    if self.add_token(tp, value, lineno, column, line):
                        break
            except error.TokenError, e:
                e.filename = compile_info.filename
                raise
            except parser.ParseError, e:
                # Catch parse errors, pretty them up and reraise them as a
                # SyntaxError.
                new_err = error.IndentationError
                if tp == pygram.tokens.INDENT:
                    msg = "unexpected indent"
                elif e.expected == pygram.tokens.INDENT:
                    msg = "expected an indented block"
                else:
                    new_err = error.SyntaxError
                    msg = "invalid syntax"
                raise new_err(msg, e.lineno, e.column, e.line,
                              compile_info.filename)
Exemplo n.º 8
0
    def parse_source(self, textsrc, compile_info):
        """Main entry point for parsing Python source.

        Everything from decoding the source to tokenizing to building the parse
        tree is handled here.
        """
        # Detect source encoding.
        enc = None
        if textsrc.startswith("\xEF\xBB\xBF"):
            textsrc = textsrc[3:]
            enc = 'utf-8'
            # If an encoding is explicitly given check that it is utf-8.
            decl_enc = _check_for_encoding(textsrc)
            if decl_enc and decl_enc != "utf-8":
                raise error.SyntaxError("UTF-8 BOM with %s coding cookie" % decl_enc,
                                        filename=compile_info.filename)
        elif compile_info.flags & consts.PyCF_SOURCE_IS_UTF8:
            enc = 'utf-8'
            if _check_for_encoding(textsrc) is not None:
                raise error.SyntaxError("coding declaration in unicode string",
                                        filename=compile_info.filename)
        else:
            enc = _normalize_encoding(_check_for_encoding(textsrc))
            if enc is not None and enc not in ('utf-8', 'iso-8859-1'):
                try:
                    textsrc = recode_to_utf8(self.space, textsrc, enc)
                except OperationError as e:
                    # if the codec is not found, LookupError is raised.  we
                    # check using 'is_w' not to mask potential IndexError or
                    # KeyError
                    space = self.space
                    if e.match(space, space.w_LookupError):
                        raise error.SyntaxError("Unknown encoding: %s" % enc,
                                                filename=compile_info.filename)
                    # Transform unicode errors into SyntaxError
                    if e.match(space, space.w_UnicodeDecodeError):
                        e.normalize_exception(space)
                        w_message = space.str(e.get_w_value(space))
                        raise error.SyntaxError(space.str_w(w_message))
                    raise

        flags = compile_info.flags

        # The tokenizer is very picky about how it wants its input.
        source_lines = textsrc.splitlines(True)
        if source_lines and not source_lines[-1].endswith("\n"):
            source_lines[-1] += '\n'
        if textsrc and textsrc[-1] == "\n":
            flags &= ~consts.PyCF_DONT_IMPLY_DEDENT

        self.prepare(_targets[compile_info.mode])
        tp = 0
        try:
            try:
                # Note: we no longer pass the CO_FUTURE_* to the tokenizer,
                # which is expected to work independently of them.  It's
                # certainly the case for all futures in Python <= 2.7.
                tokens = pytokenizer.generate_tokens(source_lines, flags)

                newflags, last_future_import = (
                    future.add_future_flags(self.future_flags, tokens))
                compile_info.last_future_import = last_future_import
                compile_info.flags |= newflags

                if compile_info.flags & consts.CO_FUTURE_PRINT_FUNCTION:
                    self.grammar = pygram.python_grammar_no_print
                else:
                    self.grammar = pygram.python_grammar

                for tp, value, lineno, column, line in tokens:
                    if self.add_token(tp, value, lineno, column, line):
                        break
            except error.TokenError as e:
                e.filename = compile_info.filename
                raise
            except parser.ParseError as e:
                # Catch parse errors, pretty them up and reraise them as a
                # SyntaxError.
                new_err = error.IndentationError
                if tp == pygram.tokens.INDENT:
                    msg = "unexpected indent"
                elif e.expected == pygram.tokens.INDENT:
                    msg = "expected an indented block"
                else:
                    new_err = error.SyntaxError
                    msg = "invalid syntax"
                raise new_err(msg, e.lineno, e.column, e.line,
                              compile_info.filename)
            else:
                tree = self.root
        finally:
            # Avoid hanging onto the tree.
            self.root = None
        if enc is not None:
            compile_info.encoding = enc
        return tree
Exemplo n.º 9
0
    def _parse(self, textsrc, compile_info):
        flags = compile_info.flags

        # The tokenizer is very picky about how it wants its input.
        source_lines = textsrc.splitlines(True)
        if source_lines and not source_lines[-1].endswith("\n"):
            source_lines[-1] += '\n'
        if textsrc and textsrc[-1] == "\n":
            flags &= ~consts.PyCF_DONT_IMPLY_DEDENT

        self.prepare(_targets[compile_info.mode])
        tp = 0
        try:
            last_value_seen = None
            next_value_seen = None
            try:
                # Note: we no longer pass the CO_FUTURE_* to the tokenizer,
                # which is expected to work independently of them.  It's
                # certainly the case for all futures in Python <= 2.7.
                tokens = pytokenizer.generate_tokens(source_lines, flags)

                newflags, last_future_import = (future.add_future_flags(
                    self.future_flags, tokens))
                compile_info.last_future_import = last_future_import
                compile_info.flags |= newflags
                self.grammar = pygram.python_grammar
                tokens_stream = iter(tokens)

                for tp, value, lineno, column, line in tokens_stream:
                    next_value_seen = value
                    if self.add_token(tp, value, lineno, column, line):
                        break
                    last_value_seen = value
                last_value_seen = None
                next_value_seen = None

                if compile_info.mode == 'single':
                    for tp, value, lineno, column, line in tokens_stream:
                        if tp == pygram.tokens.ENDMARKER:
                            break
                        if tp == pygram.tokens.NEWLINE:
                            continue

                        if tp == pygram.tokens.COMMENT:
                            for tp, _, _, _, _ in tokens_stream:
                                if tp == pygram.tokens.NEWLINE:
                                    break
                        else:
                            new_err = error.SyntaxError
                            msg = ("multiple statements found while "
                                   "compiling a single statement")
                            raise new_err(msg, lineno, column, line,
                                          compile_info.filename)

            except error.TokenError as e:
                e.filename = compile_info.filename
                raise
            except error.TokenIndentationError as e:
                e.filename = compile_info.filename
                raise
            except parser.ParseError as e:
                # Catch parse errors, pretty them up and reraise them as a
                # SyntaxError.
                new_err = error.IndentationError
                if tp == pygram.tokens.INDENT:
                    msg = "unexpected indent"
                elif e.expected == pygram.tokens.INDENT:
                    msg = "expected an indented block"
                else:
                    new_err = error.SyntaxError
                    if (last_value_seen in ('print', 'exec')
                            and bool(next_value_seen)
                            and next_value_seen != '('):
                        msg = "Missing parentheses in call to '%s'" % (
                            last_value_seen, )
                    else:
                        msg = "invalid syntax"
                    if e.expected_str is not None:
                        msg += " (expected '%s')" % e.expected_str

                # parser.ParseError(...).column is 0-based, but the offsets in the
                # exceptions in the error module are 1-based, hence the '+ 1'
                raise new_err(msg, e.lineno, e.column + 1, e.line,
                              compile_info.filename)
            else:
                tree = self.root
        finally:
            # Avoid hanging onto the tree.
            self.root = None
        return tree
Exemplo n.º 10
0
class PythonParser(parser.Parser):
    def __init__(self, space, grammar=pygram.python_grammar):
        parser.Parser.__init__(self, grammar)
        self.space = space

    def parse_source(self, textsrc, compile_info):
        """Main entry point for parsing Python source.

        Everything from decoding the source to tokenizing to building the parse
        tree is handled here.
        """
        # Detect source encoding.
        enc = None
        if textsrc.startswith("\xEF\xBB\xBF"):
            textsrc = textsrc[3:]
            enc = 'utf-8'
            # If an encoding is explicitly given check that it is utf-8.
            decl_enc = _check_for_encoding(textsrc)
            if decl_enc and decl_enc != "utf-8":
                raise error.SyntaxError(
                    "UTF-8 BOM with non-utf8 coding cookie",
                    filename=compile_info.filename)
        elif compile_info.flags & consts.PyCF_SOURCE_IS_UTF8:
            enc = 'utf-8'
            if _check_for_encoding(textsrc) is not None:
                raise error.SyntaxError("coding declaration in unicode string",
                                        filename=compile_info.filename)
        else:
            enc = _normalize_encoding(_check_for_encoding(textsrc))
            if enc is not None and enc not in ('utf-8', 'iso-8859-1'):
                try:
                    textsrc = recode_to_utf8(self.space, textsrc, enc)
                except OperationError, e:
                    # if the codec is not found, LookupError is raised.  we
                    # check using 'is_w' not to mask potential IndexError or
                    # KeyError
                    space = self.space
                    if space.is_w(e.w_type, space.w_LookupError):
                        raise error.SyntaxError("Unknown encoding: %s" % enc,
                                                filename=compile_info.filename)
                    raise

        flags = compile_info.flags

        # In order to not raise errors when 'as' or 'with' are used as names in
        # code that does not explicitly enable the with statement, we have two
        # grammars.  One with 'as' and 'with' and keywords and one without.
        # This is far better than CPython, where the parser is hacked up to
        # check for __future__ imports and recognize new keywords accordingly.
        if flags & consts.CO_FUTURE_WITH_STATEMENT:
            self.grammar = pygram.python_grammar
        else:
            self.grammar = pygram.python_grammar_no_with_statement

        # The tokenizer is very picky about how it wants its input.
        source_lines = textsrc.splitlines(True)
        if source_lines and not source_lines[-1].endswith("\n"):
            source_lines[-1] += '\n'
        if textsrc and textsrc[-1] == "\n":
            flags &= ~consts.PyCF_DONT_IMPLY_DEDENT

        self.prepare(_targets[compile_info.mode])
        tp = 0
        try:
            try:
                tokens = pytokenizer.generate_tokens(source_lines, flags)
                for tp, value, lineno, column, line in tokens:
                    if self.add_token(tp, value, lineno, column, line):
                        break
            except error.TokenError, e:
                e.filename = compile_info.filename
                raise
            except parser.ParseError, e:
                # Catch parse errors, pretty them up and reraise them as a
                # SyntaxError.
                new_err = error.IndentationError
                if tp == pygram.tokens.INDENT:
                    msg = "unexpected indent"
                elif e.expected == pygram.tokens.INDENT:
                    msg = "expected an indented block"
                else:
                    new_err = error.SyntaxError
                    msg = "invalid syntax"
                raise new_err(msg, e.lineno, e.column, e.line,
                              compile_info.filename)