def WriteGrammar(grammar_path, marshal_path): """Used for py27.grammar. oil_lang/grammar.pgen2 uses oil_lang/grammar_gen.py """ with open(grammar_path) as f: gr = pgen.MakeGrammar(f) with open(marshal_path, 'wb') as out_f: gr.dump(out_f) log('Compiled %s -> grammar tables in %s', grammar_path, marshal_path)
def main(argv): grammar_path = 'tools/find/find.pgen2' tok_def = TokenDef() with open(grammar_path) as f: gr = pgen.MakeGrammar(f, tok_def=tok_def) p = parse.Parser(gr, convert=NoSingletonAction) tokens = Tokens(argv[1:]) #print(list(tokens)) start_symbol = 'start' pnode = driver.PushTokens(p, tokens, gr, start_symbol, opmap=OPMAP) names = {} for k, v in gr.number2symbol.items(): # eval_input == 256. Remove? assert k >= 256, (k, v) assert k not in names, k names[k] = v # TODO: These overlap for k, v in token.tok_name.items(): if k != token.NT_OFFSET: assert k not in names, k names[k] = v for name, num in OPMAP.items(): assert num not in names, num names[num] = name #print(pnode) printer = ParseTreePrinter(names) printer.Print(pnode) # TODO: Translate pnode into a tree like this. left = expr.True_() right = expr.PathTest(False, '*.py') ast_node = expr.Binary(op_e.And, left, right) ast_node.PrettyPrint() print()
def main(argv): action = argv[1] argv = argv[2:] # Used at grammar BUILD time. OPS = { '.': Id.Expr_Dot, '->': Id.Expr_RArrow, '::': Id.Expr_DColon, '@': Id.Expr_At, '...': Id.Expr_Ellipsis, '$': Id.Expr_Dollar, # Only for legacy eggex /d+$/ } # Note: We have two lists of ops because Id.Op_Semi is used, not # Id.Arith_Semi. for _, token_str, id_ in lex.EXPR_OPS: assert token_str not in OPS, token_str OPS[token_str] = id_ # Tokens that look like / or ${ or @{ triples = ( meta.ID_SPEC.LexerPairs(Kind.Arith) + lex.OIL_LEFT_SUBS + lex.OIL_LEFT_UNQUOTED + lex.EXPR_WORDS ) more_ops = {} for _, token_str, id_ in triples: assert token_str not in more_ops, token_str more_ops[token_str] = id_ # Tokens that look like 'for' keyword_ops = {} for _, token_str, id_ in lex.EXPR_WORDS: # for, in, etc. assert token_str not in keyword_ops, token_str keyword_ops[token_str] = id_ if 0: from pprint import pprint pprint(OPS) print('---') pprint(more_ops) print('---') pprint(keyword_ops) print('---') tok_def = OilTokenDef(OPS, more_ops, keyword_ops) if action == 'marshal': # generate the grammar and parse it grammar_path = argv[0] out_dir = argv[1] basename, _ = os.path.splitext(os.path.basename(grammar_path)) # HACK for find: if basename == 'find': from tools.find import tokenizer as find_tokenizer tok_def = find_tokenizer.TokenDef() with open(grammar_path) as f: gr = pgen.MakeGrammar(f, tok_def=tok_def) marshal_path = os.path.join(out_dir, basename + '.marshal') with open(marshal_path, 'wb') as out_f: gr.dump(out_f) nonterm_path = os.path.join(out_dir, basename + '_nt.py') with open(nonterm_path, 'w') as out_f: gr.dump_nonterminals(out_f) log('Compiled %s -> %s and %s', grammar_path, marshal_path, nonterm_path) #gr.report() elif action == 'parse': # generate the grammar and parse it # Remove build dependency from frontend import parse_lib from oil_lang import expr_parse grammar_path = argv[0] start_symbol = argv[1] code_str = argv[2] # For choosing lexer and semantic actions grammar_name, _ = os.path.splitext(os.path.basename(grammar_path)) with open(grammar_path) as f: gr = pgen.MakeGrammar(f, tok_def=tok_def) arena = alloc.Arena() lex_ = MakeOilLexer(code_str, arena) is_expr = grammar_name in ('calc', 'grammar') parse_opts = parse_lib.OilParseOptions() parse_ctx = parse_lib.ParseContext(arena, parse_opts, {}, gr) p = expr_parse.ExprParser(parse_ctx, gr) try: pnode, _ = p.Parse(lex_, gr.symbol2number[start_symbol]) except parse.ParseError as e: log('Parse Error: %s', e) return 1 names = parse_lib.MakeGrammarNames(gr) p_printer = expr_parse.ParseTreePrinter(names) # print raw nodes p_printer.Print(pnode) if is_expr: from oil_lang import expr_to_ast tr = expr_to_ast.Transformer(gr) if start_symbol == 'eval_input': ast_node = tr.Expr(pnode) else: ast_node = tr.VarDecl(pnode) ast_node.PrettyPrint() print() elif action == 'stdlib-test': # This shows how deep Python's parse tree is. It doesn't use semantic # actions to prune on the fly! import parser # builtin module t = parser.expr('1+2') print(t) t2 = parser.st2tuple(t) print(t2) else: raise RuntimeError('Invalid action %r' % action)
def main(argv): action = argv[1] argv = argv[2:] # Common initialization arith_ops = {} for _, token_str, id_ in meta.ID_SPEC.LexerPairs(Kind.Arith): arith_ops[token_str] = id_ if 0: from pprint import pprint pprint(arith_ops) tok_def = OilTokenDef(arith_ops) if action == 'marshal': # generate the grammar and parse it grammar_path = argv[0] out_dir = argv[1] basename, _ = os.path.splitext(os.path.basename(grammar_path)) # HACK for find: if basename == 'find': from tools.find import tokenizer as find_tokenizer tok_def = find_tokenizer.TokenDef() with open(grammar_path) as f: gr = pgen.MakeGrammar(f, tok_def=tok_def) marshal_path = os.path.join(out_dir, basename + '.marshal') with open(marshal_path, 'wb') as out_f: gr.dump(out_f) nonterm_path = os.path.join(out_dir, basename + '_nt.py') with open(nonterm_path, 'w') as out_f: gr.dump_nonterminals(out_f) log('Compiled %s -> %s and %s', grammar_path, marshal_path, nonterm_path) #gr.report() elif action == 'parse': # generate the grammar and parse it # Remove build dependency from frontend import parse_lib from oil_lang import expr_parse grammar_path = argv[0] start_symbol = argv[1] code_str = argv[2] # For choosing lexer and semantic actions grammar_name, _ = os.path.splitext(os.path.basename(grammar_path)) with open(grammar_path) as f: gr = pgen.MakeGrammar(f, tok_def=tok_def) arena = alloc.Arena() lex = MakeOilLexer(code_str, arena) is_expr = grammar_name in ('calc', 'grammar') parse_opts = parse_lib.OilParseOptions() parse_ctx = parse_lib.ParseContext(arena, parse_opts, {}, gr) p = expr_parse.ExprParser(parse_ctx, gr) try: pnode, _ = p.Parse(lex, gr.symbol2number[start_symbol]) except parse.ParseError as e: log('Parse Error: %s', e) return 1 names = parse_lib.MakeGrammarNames(gr) p_printer = expr_parse.ParseTreePrinter(names) # print raw nodes p_printer.Print(pnode) if is_expr: from oil_lang import expr_to_ast tr = expr_to_ast.Transformer(gr) if start_symbol == 'eval_input': ast_node = tr.Expr(pnode) else: ast_node = tr.OilAssign(pnode) ast_node.PrettyPrint() print() elif action == 'stdlib-test': # This shows how deep Python's parse tree is. It doesn't use semantic # actions to prune on the fly! import parser # builtin module t = parser.expr('1+2') print(t) t2 = parser.st2tuple(t) print(t2) else: raise RuntimeError('Invalid action %r' % action)
def OpyCommandMain(argv): """Dispatch to the right action.""" # TODO: Use core/arg_def. #opts, argv = Options().parse_args(argv) try: action = argv[0] except IndexError: raise error.Usage('opy: Missing required subcommand.') argv = argv[1:] # TODO: Should I do input.ReadRequiredArg()? # That will shift the input. if action in ( 'parse', 'parse-with', 'compile', 'dis', 'ast', 'symbols', 'cfg', 'compile-ovm', 'eval', 'repl', 'run', 'run-ovm'): loader = pyutil.GetResourceLoader() f = loader.open(GRAMMAR_REL_PATH) contents = f.read() f.close() gr = grammar.Grammar() gr.loads(contents) # In Python 2 code, always use from __future__ import print_function. try: del gr.keywords["print"] except KeyError: pass symbols = Symbols(gr) pytree.Init(symbols) # for type_repr() pretty printing transformer.Init(symbols) # for _names and other dicts compiler = skeleton.Compiler(gr) else: # e.g. pgen2 doesn't use any of these. Maybe we should make a different # tool. compiler = None # TODO: Also have a run_spec for 'opyc run'. compile_spec = arg_def.OilFlags('opy') compile_spec.Flag('-emit-docstring', args.Bool, default=True, help='Whether to emit docstrings') compile_spec.Flag('-fast-ops', args.Bool, default=True, help='Whether to emit LOAD_FAST, STORE_FAST, etc.') compile_spec.Flag('-oil-subset', args.Bool, default=False, help='Only allow the constructs necessary to implement' 'Oil. Example: using multiple inheritance will abort ' 'compilation.') # # Actions # if action == 'pgen2': grammar_path = argv[0] marshal_path = argv[1] WriteGrammar(grammar_path, marshal_path) elif action == 'stdlib-parse': # This is what the compiler/ package was written against. import parser py_path = argv[1] with open(py_path) as f: st = parser.suite(f.read()) tree = st.totuple() printer = TupleTreePrinter(HostStdlibNames()) printer.Print(tree) n = CountTupleTree(tree) log('COUNT %d', n) elif action == 'lex': py_path = argv[0] with open(py_path) as f: tokens = tokenize.generate_tokens(f.readline) for typ, val, start, end, unused_line in tokens: print('%10s %10s %-10s %r' % (start, end, token.tok_name[typ], val)) elif action == 'lex-names': # Print all the NAME tokens. for py_path in argv: log('Lexing %s', py_path) with open(py_path) as f: tokens = tokenize.generate_tokens(f.readline) for typ, val, start, end, unused_line in tokens: if typ == token.NAME: print(val) elif action == 'parse': py_path = argv[0] with open(py_path) as f: tokens = tokenize.generate_tokens(f.readline) p = parse.Parser(gr) pnode = driver.PushTokens(p, tokens, gr, 'file_input') printer = ParseTreePrinter(transformer._names) # print raw nodes printer.Print(pnode) # Parse with an arbitrary grammar, but the Python lexer. elif action == 'parse-with': grammar_path = argv[0] start_symbol = argv[1] code_str = argv[2] with open(grammar_path) as f: gr = pgen.MakeGrammar(f) f = cStringIO.StringIO(code_str) tokens = tokenize.generate_tokens(f.readline) p = parse.Parser(gr) # no convert= try: pnode = driver.PushTokens(p, tokens, gr, start_symbol) except parse.ParseError as e: # Extract location information and show it. _, _, (lineno, offset) = e.opaque # extra line needed for '\n' ? lines = code_str.splitlines() + [''] line = lines[lineno-1] log(' %s', line) log(' %s^', ' '*offset) log('Parse Error: %s', e) return 1 printer = ParseTreePrinter(transformer._names) # print raw nodes printer.Print(pnode) elif action == 'ast': # output AST opt, i = compile_spec.ParseArgv(argv) py_path = argv[i] with open(py_path) as f: graph = compiler.Compile(f, opt, 'exec', print_action='ast') elif action == 'symbols': # output symbols opt, i = compile_spec.ParseArgv(argv) py_path = argv[i] with open(py_path) as f: graph = compiler.Compile(f, opt, 'exec', print_action='symbols') elif action == 'cfg': # output Control Flow Graph opt, i = compile_spec.ParseArgv(argv) py_path = argv[i] with open(py_path) as f: graph = compiler.Compile(f, opt, 'exec', print_action='cfg') elif action == 'compile': # 'opyc compile' is pgen2 + compiler2 # spec.Arg('action', ['foo', 'bar']) # But that leads to some duplication. opt, i = compile_spec.ParseArgv(argv) py_path = argv[i] out_path = argv[i+1] with open(py_path) as f: co = compiler.Compile(f, opt, 'exec') log("Compiled to %d bytes of top-level bytecode", len(co.co_code)) # Write the .pyc file with open(out_path, 'wb') as out_f: h = misc.getPycHeader(py_path) out_f.write(h) marshal.dump(co, out_f) elif action == 'compile-ovm': # NOTE: obsolete from ovm2 import oheap2 opt, i = compile_spec.ParseArgv(argv) py_path = argv[i] out_path = argv[i+1] # Compile to Python bytecode (TODO: remove ovm_codegen.py) mode = 'exec' with open(py_path) as f: co = compiler.Compile(f, opt, mode) if 1: with open(out_path, 'wb') as out_f: oheap2.Write(co, out_f) return 0 log("Compiled to %d bytes of top-level bytecode", len(co.co_code)) # Write the .pyc file with open(out_path, 'wb') as out_f: if 1: out_f.write(co.co_code) else: h = misc.getPycHeader(py_path) out_f.write(h) marshal.dump(co, out_f) log('Wrote only the bytecode to %r', out_path) elif action == 'eval': # Like compile, but parses to a code object and prints it opt, i = compile_spec.ParseArgv(argv) py_expr = argv[i] f = skeleton.StringInput(py_expr, '<eval input>') co = compiler.Compile(f, opt, 'eval') v = dis_tool.Visitor() v.show_code(co) print() print('RESULT:') print(eval(co)) elif action == 'repl': # Like eval in a loop while True: py_expr = raw_input('opy> ') f = skeleton.StringInput(py_expr, '<REPL input>') # TODO: change this to 'single input'? Why doesn't this work? co = compiler.Compile(f, opt, 'eval') v = dis_tool.Visitor() v.show_code(co) print(eval(co)) elif action == 'dis-tables': out_dir = argv[0] pyc_paths = argv[1:] out = TableOutput(out_dir) for pyc_path in pyc_paths: with open(pyc_path) as f: magic, unixtime, timestamp, code = dis_tool.unpack_pyc(f) WriteDisTables(pyc_path, code, out) out.Close() elif action == 'dis': opt, i = compile_spec.ParseArgv(argv) path = argv[i] v = dis_tool.Visitor() if path.endswith('.py'): with open(path) as f: co = compiler.Compile(f, opt, 'exec') log("Compiled to %d bytes of top-level bytecode", len(co.co_code)) v.show_code(co) else: # assume pyc_path with open(path, 'rb') as f: v.Visit(f) elif action == 'dis-md5': pyc_paths = argv if not pyc_paths: raise error.Usage('dis-md5: At least one .pyc path is required.') for path in pyc_paths: h = hashlib.md5() with open(path) as f: magic = f.read(4) h.update(magic) ignored_timestamp = f.read(4) while True: b = f.read(64 * 1024) if not b: break h.update(b) print('%6d %s %s' % (os.path.getsize(path), h.hexdigest(), path)) elif action == 'run': # Compile and run, without writing pyc file # TODO: Add an option like -v in __main__ #level = logging.DEBUG if args.verbose else logging.WARNING #logging.basicConfig(level=level) #logging.basicConfig(level=logging.DEBUG) opt, i = compile_spec.ParseArgv(argv) py_path = argv[i] opy_argv = argv[i:] if py_path.endswith('.py'): with open(py_path) as f: co = compiler.Compile(f, opt, 'exec') num_ticks = execfile.run_code_object(co, opy_argv) elif py_path.endswith('.pyc') or py_path.endswith('.opyc'): with open(py_path) as f: f.seek(8) # past header. TODO: validate it! co = marshal.load(f) num_ticks = execfile.run_code_object(co, opy_argv) else: raise error.Usage('Invalid path %r' % py_path) elif action == 'run-ovm': # Compile and run, without writing pyc file opt, i = compile_spec.ParseArgv(argv) py_path = argv[i] opy_argv = argv[i+1:] if py_path.endswith('.py'): #mode = 'exec' mode = 'ovm' # OVM bytecode is different! with open(py_path) as f: co = compiler.Compile(f, opt, mode) log('Compiled to %d bytes of OVM code', len(co.co_code)) num_ticks = ovm.run_code_object(co, opy_argv) elif py_path.endswith('.pyc') or py_path.endswith('.opyc'): with open(py_path) as f: f.seek(8) # past header. TODO: validate it! co = marshal.load(f) num_ticks = ovm.run_code_object(co, opy_argv) else: raise error.Usage('Invalid path %r' % py_path) else: raise error.Usage('Invalid action %r' % action)