示例#1
0
def tokenize_wrapper(input):
    """Tokenizes a string suppressing significant whitespace."""
    skip = (token.NEWLINE, token.INDENT, token.DEDENT)
    tokens = tokenize.generate_tokens(driver.generate_lines(input).next)
    for quintuple in tokens:
        type, value, start, end, line_text = quintuple
        if type not in skip:
            yield quintuple
示例#2
0
文件: opy_main.py 项目: silky/oil
 def suite(self, text):
   #if util.PY2:
   if _READ_SOURCE_AS_UNICODE:
     f = io.StringIO(text)
   else:
     import cStringIO
     f = cStringIO.StringIO()
   tokens = tokenize.generate_tokens(f.readline)
   tree = self.driver.parse_tokens(tokens, start_symbol=self.start_symbol)
   return tree
示例#3
0
文件: pgen.py 项目: silky/oil
 def __init__(self, filename, stream=None):
     close_stream = None
     if stream is None:
         stream = open(filename)
         close_stream = stream.close
     self.filename = filename
     self.stream = stream
     self.generator = tokenize.generate_tokens(stream.readline)
     self.gettoken()  # Initialize lookahead
     self.dfas, self.startsymbol = self.parse()
     if close_stream is not None:
         close_stream()
     self.first = {}  # map from symbol name to set of tokens
     self.addfirstsets()
示例#4
0
文件: opy_main.py 项目: silky/oil
def main(argv):
  grammar_path = argv[1]
  # NOTE: This is cached as a pickle
  grammar = driver.load_grammar(grammar_path)
  FILE_INPUT = grammar.symbol2number['file_input']

  symbols = Symbols(grammar)
  pytree.Init(symbols)  # for type_repr() pretty printing
  transformer.Init(symbols)  # for _names and other dicts

  # In Python 2 code, always use from __future__ import print_function.
  try:
    del grammar.keywords["print"]
  except KeyError:
    pass

  #do_glue = False
  do_glue = True

  if do_glue:  # Make it a flag
    # Emulating parser.st structures from parsermodule.c.
    # They have a totuple() method, which outputs tuples like this.
    def py2st(grammar, raw_node):
      type, value, context, children = raw_node
      # See pytree.Leaf
      if context:
        _, (lineno, column) = context
      else:
        lineno = 0  # default in Leaf
        column = 0

      if children:
        return (type,) + tuple(children)
      else:
        return (type, value, lineno, column)
    convert = py2st
  else:
    convert = pytree.convert

  d = driver.Driver(grammar, convert=convert)

  action = argv[2]

  if action == 'stdlib-parse':
    # This is what the compiler/ package was written against.
    import parser

    py_path = argv[3]
    with open(py_path) as f:
      st = parser.suite(f.read())

    tree = st.totuple()

    n = transformer.CountTupleTree(tree)
    log('COUNT %d', n)
    printer = transformer.TupleTreePrinter(HostStdlibNames())
    printer.Print(tree)

  elif action == 'parse':
    py_path = argv[3]
    with open(py_path) as f:
      tokens = tokenize.generate_tokens(f.readline)
      tree = d.parse_tokens(tokens, start_symbol=FILE_INPUT)

    if isinstance(tree, tuple):
      n = transformer.CountTupleTree(tree)
      log('COUNT %d', n)

      printer = transformer.TupleTreePrinter(transformer._names)
      printer.Print(tree)
    else:
      tree.PrettyPrint(sys.stdout)
      log('\tChildren: %d' % len(tree.children), file=sys.stderr)

  elif action == 'compile':
    py_path = argv[3]
    out_path = argv[4]

    if do_glue:
      py_parser = Pgen2PythonParser(d, FILE_INPUT)
      printer = transformer.TupleTreePrinter(transformer._names)
      tr = transformer.Pgen2Transformer(py_parser, printer)
    else:
      tr = transformer.Transformer()

    # for Python 2.7 compatibility:
    if _READ_SOURCE_AS_UNICODE:
      f = codecs.open(py_path, encoding='utf-8')
    else:
      f = open(py_path)

    contents = f.read()
    co = pycodegen.compile(contents, py_path, 'exec', transformer=tr)
    file_size = os.path.getsize(py_path)
    log("Code length: %d", len(co.co_code))

    # Write the .pyc file
    with open(out_path, 'wb') as out_f:
      h = pycodegen.getPycHeader(py_path)
      out_f.write(h)
      marshal.dump(co, out_f)

  elif action == 'compile2':
    in_path = argv[3]
    out_path = argv[4]

    from compiler2 import pycodegen as pycodegen2
    from misc import stdlib_compile

    stdlib_compile.compileAndWrite(in_path, out_path, pycodegen2.compile)

  else: 
    raise RuntimeError('Invalid action %r' % action)
示例#5
0
def OpyCommandMain(argv):
  """Dispatch to the right action."""

  # TODO: Use core/arg_def.
  #opts, argv = Options().parse_args(argv)

  try:
    action = argv[0]
  except IndexError:
    raise error.Usage('opy: Missing required subcommand.')

  argv = argv[1:]  # TODO: Should I do input.ReadRequiredArg()?
                   # That will shift the input.

  if action in (
      'parse', 'parse-with', 'compile', 'dis', 'ast', 'symbols', 'cfg',
      'compile-ovm', 'eval', 'repl', 'run', 'run-ovm'):
    loader = pyutil.GetResourceLoader()
    f = loader.open(GRAMMAR_REL_PATH)
    contents = f.read()
    f.close()
    gr = grammar.Grammar()
    gr.loads(contents)

    # In Python 2 code, always use from __future__ import print_function.
    try:
      del gr.keywords["print"]
    except KeyError:
      pass

    symbols = Symbols(gr)
    pytree.Init(symbols)  # for type_repr() pretty printing
    transformer.Init(symbols)  # for _names and other dicts

    compiler = skeleton.Compiler(gr)
  else:
    # e.g. pgen2 doesn't use any of these.  Maybe we should make a different
    # tool.
    compiler = None

  # TODO: Also have a run_spec for 'opyc run'.
  compile_spec = arg_def.OilFlags('opy')
  compile_spec.Flag('-emit-docstring', args.Bool, default=True,
                    help='Whether to emit docstrings')
  compile_spec.Flag('-fast-ops', args.Bool, default=True,
                    help='Whether to emit LOAD_FAST, STORE_FAST, etc.')
  compile_spec.Flag('-oil-subset', args.Bool, default=False,
                    help='Only allow the constructs necessary to implement'
                    'Oil. Example: using multiple inheritance will abort '
                    'compilation.')

  #
  # Actions
  #

  if action == 'pgen2':
    grammar_path = argv[0]
    marshal_path = argv[1]
    WriteGrammar(grammar_path, marshal_path)

  elif action == 'stdlib-parse':
    # This is what the compiler/ package was written against.
    import parser

    py_path = argv[1]
    with open(py_path) as f:
      st = parser.suite(f.read())

    tree = st.totuple()

    printer = TupleTreePrinter(HostStdlibNames())
    printer.Print(tree)
    n = CountTupleTree(tree)
    log('COUNT %d', n)

  elif action == 'lex':
    py_path = argv[0]
    with open(py_path) as f:
      tokens = tokenize.generate_tokens(f.readline)
      for typ, val, start, end, unused_line in tokens:
        print('%10s %10s %-10s %r' % (start, end, token.tok_name[typ], val))

  elif action == 'lex-names':  # Print all the NAME tokens.
    for py_path in argv:
      log('Lexing %s', py_path)
      with open(py_path) as f:
        tokens = tokenize.generate_tokens(f.readline)
        for typ, val, start, end, unused_line in tokens:
          if typ == token.NAME:
            print(val)

  elif action == 'parse':
    py_path = argv[0]
    with open(py_path) as f:
      tokens = tokenize.generate_tokens(f.readline)
      p = parse.Parser(gr)
      pnode  = driver.PushTokens(p, tokens, gr, 'file_input')

    printer = ParseTreePrinter(transformer._names)  # print raw nodes
    printer.Print(pnode)

  # Parse with an arbitrary grammar, but the Python lexer.
  elif action == 'parse-with':
    grammar_path = argv[0]
    start_symbol = argv[1]
    code_str = argv[2]

    with open(grammar_path) as f:
      gr = pgen.MakeGrammar(f)

    f = cStringIO.StringIO(code_str)
    tokens = tokenize.generate_tokens(f.readline)
    p = parse.Parser(gr)  # no convert=
    try:
      pnode = driver.PushTokens(p, tokens, gr, start_symbol)
    except parse.ParseError as e:
      # Extract location information and show it.
      _, _, (lineno, offset) = e.opaque
      # extra line needed for '\n' ?
      lines = code_str.splitlines() + ['']

      line = lines[lineno-1]
      log('  %s', line)
      log('  %s^', ' '*offset)
      log('Parse Error: %s', e)
      return 1
    printer = ParseTreePrinter(transformer._names)  # print raw nodes
    printer.Print(pnode)

  elif action == 'ast':  # output AST
    opt, i = compile_spec.ParseArgv(argv)
    py_path = argv[i]
    with open(py_path) as f:
      graph = compiler.Compile(f, opt, 'exec', print_action='ast')

  elif action == 'symbols':  # output symbols
    opt, i = compile_spec.ParseArgv(argv)
    py_path = argv[i]
    with open(py_path) as f:
      graph = compiler.Compile(f, opt, 'exec', print_action='symbols')

  elif action == 'cfg':  # output Control Flow Graph
    opt, i = compile_spec.ParseArgv(argv)
    py_path = argv[i]
    with open(py_path) as f:
      graph = compiler.Compile(f, opt, 'exec', print_action='cfg')

  elif action == 'compile':  # 'opyc compile' is pgen2 + compiler2
    # spec.Arg('action', ['foo', 'bar'])
    # But that leads to some duplication.

    opt, i = compile_spec.ParseArgv(argv)

    py_path = argv[i]
    out_path = argv[i+1]

    with open(py_path) as f:
      co = compiler.Compile(f, opt, 'exec')

    log("Compiled to %d bytes of top-level bytecode", len(co.co_code))

    # Write the .pyc file
    with open(out_path, 'wb') as out_f:
      h = misc.getPycHeader(py_path)
      out_f.write(h)
      marshal.dump(co, out_f)

  elif action == 'compile-ovm':
    # NOTE: obsolete
    from ovm2 import oheap2
    opt, i = compile_spec.ParseArgv(argv)
    py_path = argv[i]
    out_path = argv[i+1]

    # Compile to Python bytecode (TODO: remove ovm_codegen.py)
    mode = 'exec'
    with open(py_path) as f:
      co = compiler.Compile(f, opt, mode)

    if 1:
      with open(out_path, 'wb') as out_f:
        oheap2.Write(co, out_f)
      return 0

    log("Compiled to %d bytes of top-level bytecode", len(co.co_code))
    # Write the .pyc file
    with open(out_path, 'wb') as out_f:
      if 1:
        out_f.write(co.co_code)
      else:
        h = misc.getPycHeader(py_path)
        out_f.write(h)
        marshal.dump(co, out_f)
    log('Wrote only the bytecode to %r', out_path)

  elif action == 'eval':  # Like compile, but parses to a code object and prints it
    opt, i = compile_spec.ParseArgv(argv)
    py_expr = argv[i]
    f = skeleton.StringInput(py_expr, '<eval input>')
    co = compiler.Compile(f, opt, 'eval')

    v = dis_tool.Visitor()
    v.show_code(co)
    print()
    print('RESULT:')
    print(eval(co))

  elif action == 'repl':  # Like eval in a loop
    while True:
      py_expr = raw_input('opy> ')
      f = skeleton.StringInput(py_expr, '<REPL input>')

      # TODO: change this to 'single input'?  Why doesn't this work?
      co = compiler.Compile(f, opt, 'eval')

      v = dis_tool.Visitor()
      v.show_code(co)
      print(eval(co))

  elif action == 'dis-tables':
    out_dir = argv[0]
    pyc_paths = argv[1:]

    out = TableOutput(out_dir)

    for pyc_path in pyc_paths:
      with open(pyc_path) as f:
        magic, unixtime, timestamp, code = dis_tool.unpack_pyc(f)
        WriteDisTables(pyc_path, code, out)

    out.Close()

  elif action == 'dis':
    opt, i = compile_spec.ParseArgv(argv)
    path = argv[i]
    v = dis_tool.Visitor()

    if path.endswith('.py'):
      with open(path) as f:
        co = compiler.Compile(f, opt, 'exec')

      log("Compiled to %d bytes of top-level bytecode", len(co.co_code))
      v.show_code(co)

    else:  # assume pyc_path
      with open(path, 'rb') as f:
        v.Visit(f)

  elif action == 'dis-md5':
    pyc_paths = argv
    if not pyc_paths:
      raise error.Usage('dis-md5: At least one .pyc path is required.')

    for path in pyc_paths:
      h = hashlib.md5()
      with open(path) as f:
        magic = f.read(4)
        h.update(magic)
        ignored_timestamp = f.read(4)
        while True:
          b = f.read(64 * 1024)
          if not b:
            break
          h.update(b)
      print('%6d %s %s' % (os.path.getsize(path), h.hexdigest(), path))

  elif action == 'run':  # Compile and run, without writing pyc file
    # TODO: Add an option like -v in __main__

    #level = logging.DEBUG if args.verbose else logging.WARNING
    #logging.basicConfig(level=level)
    #logging.basicConfig(level=logging.DEBUG)

    opt, i = compile_spec.ParseArgv(argv)

    py_path = argv[i]
    opy_argv = argv[i:]

    if py_path.endswith('.py'):
      with open(py_path) as f:
        co = compiler.Compile(f, opt, 'exec')
      num_ticks = execfile.run_code_object(co, opy_argv)

    elif py_path.endswith('.pyc') or py_path.endswith('.opyc'):
      with open(py_path) as f:
        f.seek(8)  # past header.  TODO: validate it!
        co = marshal.load(f)
      num_ticks = execfile.run_code_object(co, opy_argv)

    else:
      raise error.Usage('Invalid path %r' % py_path)

  elif action == 'run-ovm':  # Compile and run, without writing pyc file
    opt, i = compile_spec.ParseArgv(argv)
    py_path = argv[i]
    opy_argv = argv[i+1:]

    if py_path.endswith('.py'):
      #mode = 'exec'
      mode = 'ovm'  # OVM bytecode is different!
      with open(py_path) as f:
        co = compiler.Compile(f, opt, mode)
      log('Compiled to %d bytes of OVM code', len(co.co_code))
      num_ticks = ovm.run_code_object(co, opy_argv)

    elif py_path.endswith('.pyc') or py_path.endswith('.opyc'):
      with open(py_path) as f:
        f.seek(8)  # past header.  TODO: validate it!
        co = marshal.load(f)
      num_ticks = ovm.run_code_object(co, opy_argv)

    else:
      raise error.Usage('Invalid path %r' % py_path)

  else:
    raise error.Usage('Invalid action %r' % action)
示例#6
0
def Compile(f, opt, gr, mode, print_action=None):
  """Run the full compiler pipeline.

  Args:
    f: file handle with input source code
    opt: Parsed command line flags
    gr: Grammar
    start_symbol: name of the grammar start symbol
    mode: 'exec', 'eval', or 'single', like Python's builtin compile()
    print_action: 'ast' or 'cfg'.  Print an intermediate representation.
    opt: Command line flags
  """
  filename = f.name

  tokens = tokenize.generate_tokens(f.readline)

  p = parse.Parser(gr)
  if mode == 'single':
    start_symbol = 'single_input'
  elif mode == 'exec':
    start_symbol = 'file_input'
  elif mode == 'eval':
    start_symbol = 'eval_input'

  parse_tree = driver.PushTokens(p, tokens, gr, start_symbol)

  parse_tuples = _ParseTreeToTuples(parse_tree)

  tr = transformer.Transformer()
  as_tree = tr.transform(parse_tuples)

  if print_action == 'ast':
      print(as_tree)
      return

  # NOTE: This currently does nothing!
  v = syntax.SyntaxErrorChecker()
  v.Dispatch(as_tree)

  s = symbols.SymbolVisitor()
  s.Dispatch(as_tree)

  if print_action == 'symbols':
      _PrintScopes(s.scopes)
      return

  graph = pyassem.FlowGraph()  # Mutated by code generator

  if mode == "single":  # Not used now?
      ctx = _ModuleContext(filename, opt, s.scopes)
      # NOTE: the name of the Frame is a comment, not exposed to users.
      frame = pyassem.Frame("<interactive>", filename)  # mutated
      gen = pycodegen.InteractiveCodeGenerator(ctx, frame, graph)
      gen.set_lineno(as_tree)

  elif mode == "exec":
      # TODO: Does this need to be made more efficient?
      p1 = future.FutureParser()
      p2 = future.BadFutureParser()
      p1.Dispatch(as_tree)
      p2.Dispatch(as_tree)

      ctx = _ModuleContext(filename, opt, s.scopes, futures=p1.get_features())
      frame = pyassem.Frame("<module>", filename)  # mutated

      gen = pycodegen.TopLevelCodeGenerator(ctx, frame, graph)

  elif mode == "eval":
      ctx = _ModuleContext(filename, opt, s.scopes)
      frame = pyassem.Frame("<expression>", filename)  # mutated
      gen = pycodegen.TopLevelCodeGenerator(ctx, frame, graph)

  else:
      raise AssertionError('Invalid mode %r' % mode)

  # NOTE: There is no Start() or FindLocals() at the top level.
  gen.Dispatch(as_tree)  # mutates graph
  gen.Finish()

  if print_action == 'cfg':
      print(graph)
      return

  co = pyassem.MakeCodeObject(frame, graph, opt)

  # NOTE: Could call marshal.dump here?
  return co