def test_no_asserts(self): """bzr shouldn't use the 'assert' statement.""" # assert causes too much variation between -O and not, and tends to # give bad errors to the user def search(x): # scan down through x for assert statements, report any problems # this is a bit cheesy; it may get some false positives? if x[0] == symbol.assert_stmt: return True elif x[0] == token.NAME: # can't search further down return False for sub in x[1:]: if sub and search(sub): return True return False badfiles = [] assert_re = re.compile(r'\bassert\b') for fname, text in self.get_source_file_contents(): if not self.is_our_code(fname): continue if not assert_re.search(text): continue st = parser.suite(text) code = parser.st2tuple(st) if search(code): badfiles.append(fname) if badfiles: self.fail( "these files contain an assert statement and should not:\n%s" % '\n'.join(badfiles))
def test_py_cst_visitor(self): transformer = mython.cst.PyConcreteToMyConcreteTransformer() visitor = VisitPassStmt() visitor.visit( transformer.visit( parser.st2tuple( parser.suite(TEST_SOURCE)))) self.assertTrue(visitor.saw_pass)
def __parsesingle(self, text): """Return a modified parse tree for the given suite text.""" node = parser.st2tuple(parser.suite(text), line_info = 1) n = node[0] if n == symbol.encoding_decl: self.encoding = node[2] node = node[1] n = node[0] return self.file_input(node[1:], False)
def __parsesingle(self, text): """Return a modified parse tree for the given suite text.""" node = parser.st2tuple(parser.suite(text), line_info=1) n = node[0] if n == symbol.encoding_decl: self.encoding = node[2] node = node[1] n = node[0] return self.file_input(node[1:], False)
def extractVars(expr): "Return the set of all variables occurring in the given expression." tree = parser.expr(expr) treeAsTuple = parser.st2tuple(tree) Operators = { '+', '-', '*', '/', '//', '%', '**', '<', '>', '<=', '>=', '!=', '==', '|', '&', '<<', '>>', '^', '>>=', '<<=', '|=', '**=', '+=', '-=', '*=', '/=', '//=', '%=', '^=', '&=', '(', ')' } Result = extractVarsFromTree(treeAsTuple) - Operators return { v for v in Result if v not in dir(__builtins__) }
def extractVars(expr): "Return the set of all variables occurring in the given expression." tree = parser.expr(expr) treeAsTuple = parser.st2tuple(tree) Operators = { '+', '-', '*', '/', '//', '%', '**', '<', '>', '<=', '>=', '!=', '==', '|', '&', '<<', '>>', '^', '>>=', '<<=', '|=', '**=', '+=', '-=', '*=', '/=', '//=', '%=', '^=', '&=', '(', ')' } Result = extractVarsFromTree(treeAsTuple) - Operators return { v for v in Result }
def testChunk(t, fileName): global _numFailed print('----', fileName, end=' ') try: st = parser.suite(t) tup = parser.st2tuple(st) # this discards the first ST; a huge memory savings when running # against a large source file like Tkinter.py. st = None new = parser.tuple2st(tup) except parser.ParserError as err: print() print('parser module raised exception on input file', fileName + ':') traceback.print_exc() _numFailed = _numFailed + 1 else: if tup != parser.st2tuple(new): print() print('parser module failed on input file', fileName) _numFailed = _numFailed + 1 else: print('o.k.')
def test_position(self): # An absolutely minimal test of position information. Better # tests would be a big project. code = "def f(x):\n return x + 1" st = parser.suite(code) def walk(tree): node_type = tree[0] next = tree[1] if isinstance(next, (tuple, list)): for elt in tree[1:]: for x in walk(elt): yield x else: yield tree expected = [ (1, 'def', 1, 0), (1, 'f', 1, 4), (7, '(', 1, 5), (1, 'x', 1, 6), (8, ')', 1, 7), (11, ':', 1, 8), (4, '', 1, 9), (5, '', 2, -1), (1, 'return', 2, 4), (1, 'x', 2, 11), (14, '+', 2, 13), (2, '1', 2, 15), (4, '', 2, 16), (6, '', 2, -1), (4, '', 2, -1), (0, '', 2, -1), ] self.assertEqual(list(walk(st.totuple(line_info=True, col_info=True))), expected) self.assertEqual(list(walk(st.totuple())), [(t, n) for t, n, l, c in expected]) self.assertEqual(list(walk(st.totuple(line_info=True))), [(t, n, l) for t, n, l, c in expected]) self.assertEqual(list(walk(st.totuple(col_info=True))), [(t, n, c) for t, n, l, c in expected]) self.assertEqual(list(walk(st.tolist(line_info=True, col_info=True))), [list(x) for x in expected]) self.assertEqual( list(walk(parser.st2tuple(st, line_info=True, col_info=True))), expected) self.assertEqual( list(walk(parser.st2list(st, line_info=True, col_info=True))), [list(x) for x in expected])
def test_position(self): # An absolutely minimal test of position information. Better # tests would be a big project. code = "def f(x):\n return x + 1" st = parser.suite(code) def walk(tree): node_type = tree[0] next = tree[1] if isinstance(next, (tuple, list)): for elt in tree[1:]: for x in walk(elt): yield x else: yield tree expected = [ (1, 'def', 1, 0), (1, 'f', 1, 4), (7, '(', 1, 5), (1, 'x', 1, 6), (8, ')', 1, 7), (11, ':', 1, 8), (4, '', 1, 9), (5, '', 2, -1), (1, 'return', 2, 4), (1, 'x', 2, 11), (14, '+', 2, 13), (2, '1', 2, 15), (4, '', 2, 16), (6, '', 2, -1), (4, '', 2, -1), (0, '', 2, -1), ] self.assertEqual(list(walk(st.totuple(line_info=True, col_info=True))), expected) self.assertEqual(list(walk(st.totuple())), [(t, n) for t, n, l, c in expected]) self.assertEqual(list(walk(st.totuple(line_info=True))), [(t, n, l) for t, n, l, c in expected]) self.assertEqual(list(walk(st.totuple(col_info=True))), [(t, n, c) for t, n, l, c in expected]) self.assertEqual(list(walk(st.tolist(line_info=True, col_info=True))), [list(x) for x in expected]) self.assertEqual(list(walk(parser.st2tuple(st, line_info=True, col_info=True))), expected) self.assertEqual(list(walk(parser.st2list(st, line_info=True, col_info=True))), [list(x) for x in expected])
def testChunk(t, fileName): global _numFailed print "----", fileName, try: st = parser.suite(t) tup = parser.st2tuple(st) # this discards the first ST; a huge memory savings when running # against a large source file like Tkinter.py. st = None new = parser.tuple2st(tup) except parser.ParserError, err: print print "parser module raised exception on input file", fileName + ":" traceback.print_exc() _numFailed = _numFailed + 1
def testChunk(t, fileName): global _numFailed print '----', fileName, try: st = parser.suite(t) tup = parser.st2tuple(st) # this discards the first ST; a huge memory savings when running # against a large source file like Tkinter.py. st = None new = parser.tuple2st(tup) except parser.ParserError, err: print print 'parser module raised exception on input file', fileName + ':' traceback.print_exc() _numFailed = _numFailed + 1
def main (): """main() Main routine for the SyntaxTupleHandler module. Used to run rudimentary unit tests from the command line. """ global __DEBUG__ import sys, symbol, parser fileName = None if len(sys.argv) > 1: args = sys.argv[1:] if "-d" in args: __DEBUG__ = True args.remove("-d") if len(args) > 0: fileName = args[0] if fileName != None: text = open(fileName).read() else: text = sys.stdin.read() st = parser.suite(text) tup = parser.st2tuple(st) handler = SyntaxTupleHandler(symbol.sym_name, True) print handler.handle(tup)
import parser from symbol import sym_name import token from block import Block def pretty_print(node, margin=""): if type(node) is tuple or type(node) is list: for i in node: pretty_print(i, margin=margin+"|") elif type(node) is int: try: if node < token.NT_OFFSET: print(margin + token.tok_name[node]) else: print(margin + sym_name[node]) except KeyError: print("Shoot, it borked. Blame Paul.") elif type(node) is str: print(margin + "|" + node) else: print("error") q = parser.suite("2**4*3") tup = parser.st2tuple(q) pretty_print(tup)
def transform(self, tree): """Transform an AST into a modified parse tree.""" if not (isinstance(tree, tuple) or isinstance(tree, list)): tree = parser.st2tuple(tree, line_info=1) return self.compile_node(tree)
""" # OLD FASHIOINED PARSER import parser import pprint source = """ class A: def hello(self): print 90 + "something" class Fred(Mary, Sam): pass """ o = parser.suite(source) pprint.pprint(parser.st2tuple(o)) print "-" * 88 # AST """ http://stackoverflow.com/questions/4947783/python-ast-module http://eli.thegreenplace.net/2009/11/28/python-internals-working-with-python-asts/ ** GOOD http://www.breti.org/tech/files/a7b5fcecb0596b9bf127212e847584f9-66.html ** to read """ import ast class Py2Neko(ast.NodeVisitor):
def transform(self, tree): if not (isinstance(tree, tuple) or isinstance(tree, list)): tree = parser.st2tuple(tree, line_info=1) return self.compile_node(tree)
def main(argv): action = argv[1] argv = argv[2:] # Used at grammar BUILD time. OPS = { '.': Id.Expr_Dot, '->': Id.Expr_RArrow, '::': Id.Expr_DColon, '@': Id.Expr_At, '...': Id.Expr_Ellipsis, '$': Id.Expr_Dollar, # Only for legacy eggex /d+$/ } # Note: We have two lists of ops because Id.Op_Semi is used, not # Id.Arith_Semi. for _, token_str, id_ in lex.EXPR_OPS: assert token_str not in OPS, token_str OPS[token_str] = id_ # Tokens that look like / or ${ or @{ triples = ( meta.ID_SPEC.LexerPairs(Kind.Arith) + lex.OIL_LEFT_SUBS + lex.OIL_LEFT_UNQUOTED + lex.EXPR_WORDS ) more_ops = {} for _, token_str, id_ in triples: assert token_str not in more_ops, token_str more_ops[token_str] = id_ # Tokens that look like 'for' keyword_ops = {} for _, token_str, id_ in lex.EXPR_WORDS: # for, in, etc. assert token_str not in keyword_ops, token_str keyword_ops[token_str] = id_ if 0: from pprint import pprint pprint(OPS) print('---') pprint(more_ops) print('---') pprint(keyword_ops) print('---') tok_def = OilTokenDef(OPS, more_ops, keyword_ops) if action == 'marshal': # generate the grammar and parse it grammar_path = argv[0] out_dir = argv[1] basename, _ = os.path.splitext(os.path.basename(grammar_path)) # HACK for find: if basename == 'find': from tools.find import tokenizer as find_tokenizer tok_def = find_tokenizer.TokenDef() with open(grammar_path) as f: gr = pgen.MakeGrammar(f, tok_def=tok_def) marshal_path = os.path.join(out_dir, basename + '.marshal') with open(marshal_path, 'wb') as out_f: gr.dump(out_f) nonterm_path = os.path.join(out_dir, basename + '_nt.py') with open(nonterm_path, 'w') as out_f: gr.dump_nonterminals(out_f) log('Compiled %s -> %s and %s', grammar_path, marshal_path, nonterm_path) #gr.report() elif action == 'parse': # generate the grammar and parse it # Remove build dependency from frontend import parse_lib from oil_lang import expr_parse grammar_path = argv[0] start_symbol = argv[1] code_str = argv[2] # For choosing lexer and semantic actions grammar_name, _ = os.path.splitext(os.path.basename(grammar_path)) with open(grammar_path) as f: gr = pgen.MakeGrammar(f, tok_def=tok_def) arena = alloc.Arena() lex_ = MakeOilLexer(code_str, arena) is_expr = grammar_name in ('calc', 'grammar') parse_opts = parse_lib.OilParseOptions() parse_ctx = parse_lib.ParseContext(arena, parse_opts, {}, gr) p = expr_parse.ExprParser(parse_ctx, gr) try: pnode, _ = p.Parse(lex_, gr.symbol2number[start_symbol]) except parse.ParseError as e: log('Parse Error: %s', e) return 1 names = parse_lib.MakeGrammarNames(gr) p_printer = expr_parse.ParseTreePrinter(names) # print raw nodes p_printer.Print(pnode) if is_expr: from oil_lang import expr_to_ast tr = expr_to_ast.Transformer(gr) if start_symbol == 'eval_input': ast_node = tr.Expr(pnode) else: ast_node = tr.VarDecl(pnode) ast_node.PrettyPrint() print() elif action == 'stdlib-test': # This shows how deep Python's parse tree is. It doesn't use semantic # actions to prune on the fly! import parser # builtin module t = parser.expr('1+2') print(t) t2 = parser.st2tuple(t) print(t2) else: raise RuntimeError('Invalid action %r' % action)
""" # OLD FASHIOINED PARSER import parser import pprint source = """ class A: def hello(self): print 90 + "something" class Fred(Mary, Sam): pass """ o = parser.suite(source) pprint.pprint(parser.st2tuple(o)) print "-"*88 # AST """ http://stackoverflow.com/questions/4947783/python-ast-module http://eli.thegreenplace.net/2009/11/28/python-internals-working-with-python-asts/ ** GOOD http://www.breti.org/tech/files/a7b5fcecb0596b9bf127212e847584f9-66.html ** to read """
def parse_tree(fname): with open(fname) as f: source = f.read() f.close() return parser.st2tuple(parser.suite(source))
def convert_suite(suite): return convert_readable(parser.st2tuple(parser.suite(suite)))
def convert_expr(expr): return convert_readable(parser.st2tuple(parser.expr(expr)))
program_list = list(tokenize_python(program)) bench("topdown", parse) bench("topdown pretokenized", lambda program: parse(program_list)) tokenize_python = custom_tokenize_python bench("custom topdown", parse) if pytoken: tokenize_python = pytoken.token_list bench("built-in topdown", parse) print bench("built-in compile", lambda program: compile(program, "", "eval")) bench("parser.parse", lambda program: parser.st2tuple(parser.expr(program))) print bench("compiler.parse", lambda program: compiler.parse(program, "eval")) bench("compiler.compile", lambda program: compiler.compile(program, "", "eval")) sys.exit(0) # samples test("1") test("+1") test("-1") test("1+2") test("1+2+3") test("1+2*3")
def main(argv): action = argv[1] argv = argv[2:] # Common initialization arith_ops = {} for _, token_str, id_ in meta.ID_SPEC.LexerPairs(Kind.Arith): arith_ops[token_str] = id_ if 0: from pprint import pprint pprint(arith_ops) tok_def = OilTokenDef(arith_ops) if action == 'marshal': # generate the grammar and parse it grammar_path = argv[0] out_dir = argv[1] basename, _ = os.path.splitext(os.path.basename(grammar_path)) # HACK for find: if basename == 'find': from tools.find import tokenizer as find_tokenizer tok_def = find_tokenizer.TokenDef() with open(grammar_path) as f: gr = pgen.MakeGrammar(f, tok_def=tok_def) marshal_path = os.path.join(out_dir, basename + '.marshal') with open(marshal_path, 'wb') as out_f: gr.dump(out_f) nonterm_path = os.path.join(out_dir, basename + '_nt.py') with open(nonterm_path, 'w') as out_f: gr.dump_nonterminals(out_f) log('Compiled %s -> %s and %s', grammar_path, marshal_path, nonterm_path) #gr.report() elif action == 'parse': # generate the grammar and parse it # Remove build dependency from frontend import parse_lib from oil_lang import expr_parse grammar_path = argv[0] start_symbol = argv[1] code_str = argv[2] # For choosing lexer and semantic actions grammar_name, _ = os.path.splitext(os.path.basename(grammar_path)) with open(grammar_path) as f: gr = pgen.MakeGrammar(f, tok_def=tok_def) arena = alloc.Arena() lex = MakeOilLexer(code_str, arena) is_expr = grammar_name in ('calc', 'grammar') parse_opts = parse_lib.OilParseOptions() parse_ctx = parse_lib.ParseContext(arena, parse_opts, {}, gr) p = expr_parse.ExprParser(parse_ctx, gr) try: pnode, _ = p.Parse(lex, gr.symbol2number[start_symbol]) except parse.ParseError as e: log('Parse Error: %s', e) return 1 names = parse_lib.MakeGrammarNames(gr) p_printer = expr_parse.ParseTreePrinter(names) # print raw nodes p_printer.Print(pnode) if is_expr: from oil_lang import expr_to_ast tr = expr_to_ast.Transformer(gr) if start_symbol == 'eval_input': ast_node = tr.Expr(pnode) else: ast_node = tr.OilAssign(pnode) ast_node.PrettyPrint() print() elif action == 'stdlib-test': # This shows how deep Python's parse tree is. It doesn't use semantic # actions to prune on the fly! import parser # builtin module t = parser.expr('1+2') print(t) t2 = parser.st2tuple(t) print(t2) else: raise RuntimeError('Invalid action %r' % action)
def display_source(self, source): # parse it q = parser.suite(source) tup = parser.st2tuple(q) # display the AST self.display_ast(tup)
bench("topdown", parse) bench("topdown pretokenized", lambda program: parse(program_list)) tokenize_python = custom_tokenize_python bench("custom topdown", parse) if pytoken: tokenize_python = pytoken.token_list bench("built-in topdown", parse) print bench("built-in compile", lambda program: compile(program, "", "eval")) bench("parser.parse", lambda program: parser.st2tuple(parser.expr(program))) print bench("compiler.parse", lambda program: compiler.parse(program, "eval")) bench("compiler.compile", lambda program: compiler.compile(program, "", "eval")) sys.exit(0) # samples test("1") test("+1") test("-1") test("1+2") test("1+2+3")
global _numFailed print '----', fileName, try: st = parser.suite(t) tup = parser.st2tuple(st) # this discards the first ST; a huge memory savings when running # against a large source file like Tkinter.py. st = None new = parser.tuple2st(tup) except parser.ParserError, err: print print 'parser module raised exception on input file', fileName + ':' traceback.print_exc() _numFailed = _numFailed + 1 else: if tup != parser.st2tuple(new): print print 'parser module failed on input file', fileName _numFailed = _numFailed + 1 else: print 'o.k.' def testFile(fileName): t = open(fileName).read() testChunk(t, fileName) def test(): import sys args = sys.argv[1:] if not args: import glob