def run(s, expected_last_future=(0, 0)): source_lines = s.splitlines(True) tokens = pytokenizer.generate_tokens(source_lines, 0) # flags, last_future_import = future.add_future_flags( future.futureFlags_2_7, tokens) assert last_future_import == expected_last_future return flags
def run(s, expected_last_future=None): source_lines = s.splitlines(True) tokens = pytokenizer.generate_tokens(source_lines, 0) expected_last_future = expected_last_future or tokens[-1][2:4] # flags, last_future_import = future.add_future_flags( future.futureFlags_2_7, tokens) assert last_future_import == expected_last_future return flags
def _parse(self, textsrc, compile_info): flags = compile_info.flags # The tokenizer is very picky about how it wants its input. source_lines = textsrc.splitlines(True) if source_lines and not source_lines[-1].endswith("\n"): source_lines[-1] += '\n' if textsrc and textsrc[-1] == "\n": flags &= ~consts.PyCF_DONT_IMPLY_DEDENT self.prepare(_targets[compile_info.mode]) try: try: # Note: we no longer pass the CO_FUTURE_* to the tokenizer, # which is expected to work independently of them. It's # certainly the case for all futures in Python <= 2.7. tokens = pytokenizer.generate_tokens(source_lines, flags) except error.TokenError as e: e.filename = compile_info.filename raise except error.TokenIndentationError as e: e.filename = compile_info.filename raise newflags, last_future_import = (future.add_future_flags( self.future_flags, tokens)) compile_info.last_future_import = last_future_import compile_info.flags |= newflags self.grammar = pygram.choose_grammar( print_function=compile_info.flags & consts.CO_FUTURE_PRINT_FUNCTION, revdb=self.space.config.translation.reverse_debugger) try: for token in tokens: if self.add_token(token): break except parser.ParseError as e: # Catch parse errors, pretty them up and reraise them as a # SyntaxError. new_err = error.IndentationError if token.token_type == pygram.tokens.INDENT: msg = "unexpected indent" elif e.expected == pygram.tokens.INDENT: msg = "expected an indented block" else: new_err = error.SyntaxError msg = "invalid syntax" if e.expected_str is not None: msg += " (expected '%s')" % e.expected_str # parser.ParseError(...).column is 0-based, but the offsets in the # exceptions in the error module are 1-based, hence the '+ 1' raise new_err(msg, e.token.lineno, e.token.column + 1, e.token.line, compile_info.filename) else: tree = self.root finally: # Avoid hanging onto the tree. self.root = None return tree
def parse_source(self, textsrc, compile_info): """Main entry point for parsing Python source. Everything from decoding the source to tokenizing to building the parse tree is handled here. """ # Detect source encoding. enc = None if textsrc.startswith("\xEF\xBB\xBF"): textsrc = textsrc[3:] enc = 'utf-8' # If an encoding is explicitly given check that it is utf-8. decl_enc = _check_for_encoding(textsrc) if decl_enc and decl_enc != "utf-8": raise error.SyntaxError("UTF-8 BOM with %s coding cookie" % decl_enc, filename=compile_info.filename) elif compile_info.flags & consts.PyCF_SOURCE_IS_UTF8: enc = 'utf-8' if _check_for_encoding(textsrc) is not None: raise error.SyntaxError("coding declaration in unicode string", filename=compile_info.filename) else: enc = _normalize_encoding(_check_for_encoding(textsrc)) if enc is not None and enc not in ('utf-8', 'iso-8859-1'): try: textsrc = recode_to_utf8(self.space, textsrc, enc) except OperationError as e: # if the codec is not found, LookupError is raised. we # check using 'is_w' not to mask potential IndexError or # KeyError space = self.space if e.match(space, space.w_LookupError): raise error.SyntaxError( "Codificación desconocida: %s" % enc, filename=compile_info.filename) # Transform unicode errors into SyntaxError if e.match(space, space.w_UnicodeDecodeError): e.normalize_exception(space) w_message = space.str(e.get_w_value(space)) raise error.SyntaxError(space.text_w(w_message)) raise flags = compile_info.flags # The tokenizer is very picky about how it wants its input. source_lines = textsrc.splitlines(True) if source_lines and not source_lines[-1].endswith("\n"): source_lines[-1] += '\n' if textsrc and textsrc[-1] == "\n": flags &= ~consts.PyCF_DONT_IMPLY_DEDENT self.prepare(_targets[compile_info.mode]) tp = 0 try: try: # Note: we no longer pass the CO_FUTURE_* to the tokenizer, # which is expected to work independently of them. It's # certainly the case for all futures in Python <= 2.7. tokens = pytokenizer.generate_tokens(source_lines, flags) newflags, last_future_import = (future.add_future_flags( self.future_flags, tokens)) compile_info.last_future_import = last_future_import compile_info.flags |= newflags if compile_info.flags & consts.CO_FUTURE_PRINT_FUNCTION: self.grammar = pygram.python_grammar_no_print else: self.grammar = pygram.python_grammar for tp, value, lineno, column, line in tokens: if self.add_token(tp, value, lineno, column, line): break except error.TokenError as e: e.filename = compile_info.filename raise except error.TokenIndentationError as e: e.filename = compile_info.filename raise except parser.ParseError as e: # Catch parse errors, pretty them up and reraise them as a # SyntaxError. new_err = error.IndentationError if tp == pygram.tokens.INDENT: msg = "sangría inesperado" elif e.expected == pygram.tokens.INDENT: msg = "esperó un bloque sangriado" else: new_err = error.SyntaxError msg = "sintaxis no válida" raise new_err(msg, e.lineno, e.column, e.line, compile_info.filename) else: tree = self.root finally: # Avoid hanging onto the tree. self.root = None if enc is not None: compile_info.encoding = enc return tree
class PythonParser(parser.Parser): def __init__(self, space, future_flags=future.futureFlags_3_2, grammar=pygram.python_grammar): parser.Parser.__init__(self, grammar) self.space = space self.future_flags = future_flags def parse_source(self, bytessrc, compile_info): """Main entry point for parsing Python source. Everything from decoding the source to tokenizing to building the parse tree is handled here. """ # Detect source encoding. enc = None if compile_info.flags & consts.PyCF_SOURCE_IS_UTF8: enc = 'utf-8' if compile_info.flags & consts.PyCF_IGNORE_COOKIE: textsrc = bytessrc elif bytessrc.startswith("\xEF\xBB\xBF"): bytessrc = bytessrc[3:] enc = 'utf-8' # If an encoding is explicitly given check that it is utf-8. decl_enc = _check_for_encoding(bytessrc) if decl_enc and decl_enc != "utf-8": raise error.SyntaxError("UTF-8 BOM with %s coding cookie" % decl_enc, filename=compile_info.filename) textsrc = bytessrc else: enc = _normalize_encoding(_check_for_encoding(bytessrc)) if enc is None: enc = 'utf-8' try: textsrc = recode_to_utf8(self.space, bytessrc, enc) except OperationError, e: # if the codec is not found, LookupError is raised. we # check using 'is_w' not to mask potential IndexError or # KeyError space = self.space if e.match(space, space.w_LookupError): raise error.SyntaxError("Unknown encoding: %s" % enc, filename=compile_info.filename) # Transform unicode errors into SyntaxError if e.match(space, space.w_UnicodeDecodeError): e.normalize_exception(space) w_message = space.str(e.get_w_value(space)) raise error.SyntaxError(space.str_w(w_message)) raise flags = compile_info.flags # The tokenizer is very picky about how it wants its input. source_lines = textsrc.splitlines(True) if source_lines and not source_lines[-1].endswith("\n"): source_lines[-1] += '\n' if textsrc and textsrc[-1] == "\n": flags &= ~consts.PyCF_DONT_IMPLY_DEDENT self.prepare(_targets[compile_info.mode]) tp = 0 try: try: # Note: we no longer pass the CO_FUTURE_* to the tokenizer, # which is expected to work independently of them. It's # certainly the case for all futures in Python <= 2.7. tokens = pytokenizer.generate_tokens(source_lines, flags) newflags, last_future_import = (future.add_future_flags( self.future_flags, tokens)) compile_info.last_future_import = last_future_import compile_info.flags |= newflags self.grammar = pygram.python_grammar for tp, value, lineno, column, line in tokens: if self.add_token(tp, value, lineno, column, line): break except error.TokenError, e: e.filename = compile_info.filename raise except parser.ParseError, e: # Catch parse errors, pretty them up and reraise them as a # SyntaxError. new_err = error.IndentationError if tp == pygram.tokens.INDENT: msg = "unexpected indent" elif e.expected == pygram.tokens.INDENT: msg = "expected an indented block" else: new_err = error.SyntaxError msg = "invalid syntax" raise new_err(msg, e.lineno, e.column, e.line, compile_info.filename)
def parse_source(self, textsrc, compile_info): """Main entry point for parsing Python source. Everything from decoding the source to tokenizing to building the parse tree is handled here. """ # Detect source encoding. enc = None if textsrc.startswith("\xEF\xBB\xBF"): textsrc = textsrc[3:] enc = 'utf-8' # If an encoding is explicitly given check that it is utf-8. decl_enc = _check_for_encoding(textsrc) if decl_enc and decl_enc != "utf-8": raise error.SyntaxError("UTF-8 BOM with %s coding cookie" % decl_enc, filename=compile_info.filename) elif compile_info.flags & consts.PyCF_SOURCE_IS_UTF8: enc = 'utf-8' if _check_for_encoding(textsrc) is not None: raise error.SyntaxError("coding declaration in unicode string", filename=compile_info.filename) else: enc = _normalize_encoding(_check_for_encoding(textsrc)) if enc is not None and enc not in ('utf-8', 'iso-8859-1'): try: textsrc = recode_to_utf8(self.space, textsrc, enc) except OperationError as e: # if the codec is not found, LookupError is raised. we # check using 'is_w' not to mask potential IndexError or # KeyError space = self.space if e.match(space, space.w_LookupError): raise error.SyntaxError("Unknown encoding: %s" % enc, filename=compile_info.filename) # Transform unicode errors into SyntaxError if e.match(space, space.w_UnicodeDecodeError): e.normalize_exception(space) w_message = space.str(e.get_w_value(space)) raise error.SyntaxError(space.str_w(w_message)) raise flags = compile_info.flags # The tokenizer is very picky about how it wants its input. source_lines = textsrc.splitlines(True) if source_lines and not source_lines[-1].endswith("\n"): source_lines[-1] += '\n' if textsrc and textsrc[-1] == "\n": flags &= ~consts.PyCF_DONT_IMPLY_DEDENT self.prepare(_targets[compile_info.mode]) tp = 0 try: try: # Note: we no longer pass the CO_FUTURE_* to the tokenizer, # which is expected to work independently of them. It's # certainly the case for all futures in Python <= 2.7. tokens = pytokenizer.generate_tokens(source_lines, flags) newflags, last_future_import = ( future.add_future_flags(self.future_flags, tokens)) compile_info.last_future_import = last_future_import compile_info.flags |= newflags if compile_info.flags & consts.CO_FUTURE_PRINT_FUNCTION: self.grammar = pygram.python_grammar_no_print else: self.grammar = pygram.python_grammar for tp, value, lineno, column, line in tokens: if self.add_token(tp, value, lineno, column, line): break except error.TokenError as e: e.filename = compile_info.filename raise except parser.ParseError as e: # Catch parse errors, pretty them up and reraise them as a # SyntaxError. new_err = error.IndentationError if tp == pygram.tokens.INDENT: msg = "unexpected indent" elif e.expected == pygram.tokens.INDENT: msg = "expected an indented block" else: new_err = error.SyntaxError msg = "invalid syntax" raise new_err(msg, e.lineno, e.column, e.line, compile_info.filename) else: tree = self.root finally: # Avoid hanging onto the tree. self.root = None if enc is not None: compile_info.encoding = enc return tree
def _parse(self, textsrc, compile_info): flags = compile_info.flags # The tokenizer is very picky about how it wants its input. source_lines = textsrc.splitlines(True) if source_lines and not source_lines[-1].endswith("\n"): source_lines[-1] += '\n' if textsrc and textsrc[-1] == "\n": flags &= ~consts.PyCF_DONT_IMPLY_DEDENT self.prepare(_targets[compile_info.mode]) tp = 0 try: last_value_seen = None next_value_seen = None try: # Note: we no longer pass the CO_FUTURE_* to the tokenizer, # which is expected to work independently of them. It's # certainly the case for all futures in Python <= 2.7. tokens = pytokenizer.generate_tokens(source_lines, flags) newflags, last_future_import = (future.add_future_flags( self.future_flags, tokens)) compile_info.last_future_import = last_future_import compile_info.flags |= newflags self.grammar = pygram.python_grammar tokens_stream = iter(tokens) for tp, value, lineno, column, line in tokens_stream: next_value_seen = value if self.add_token(tp, value, lineno, column, line): break last_value_seen = value last_value_seen = None next_value_seen = None if compile_info.mode == 'single': for tp, value, lineno, column, line in tokens_stream: if tp == pygram.tokens.ENDMARKER: break if tp == pygram.tokens.NEWLINE: continue if tp == pygram.tokens.COMMENT: for tp, _, _, _, _ in tokens_stream: if tp == pygram.tokens.NEWLINE: break else: new_err = error.SyntaxError msg = ("multiple statements found while " "compiling a single statement") raise new_err(msg, lineno, column, line, compile_info.filename) except error.TokenError as e: e.filename = compile_info.filename raise except error.TokenIndentationError as e: e.filename = compile_info.filename raise except parser.ParseError as e: # Catch parse errors, pretty them up and reraise them as a # SyntaxError. new_err = error.IndentationError if tp == pygram.tokens.INDENT: msg = "unexpected indent" elif e.expected == pygram.tokens.INDENT: msg = "expected an indented block" else: new_err = error.SyntaxError if (last_value_seen in ('print', 'exec') and bool(next_value_seen) and next_value_seen != '('): msg = "Missing parentheses in call to '%s'" % ( last_value_seen, ) else: msg = "invalid syntax" if e.expected_str is not None: msg += " (expected '%s')" % e.expected_str # parser.ParseError(...).column is 0-based, but the offsets in the # exceptions in the error module are 1-based, hence the '+ 1' raise new_err(msg, e.lineno, e.column + 1, e.line, compile_info.filename) else: tree = self.root finally: # Avoid hanging onto the tree. self.root = None return tree