def ParseCodeToTree(code): """Parse the given code to a lib2to3 pytree. Arguments: code: a string with the code to parse. Raises: SyntaxError if the code is invalid syntax. parse.ParseError if some other parsing failure. Returns: The root node of the parsed tree. """ # This function is tiny, but the incantation for invoking the parser correctly # is sufficiently magical to be worth abstracting away. try: # Try to parse using a Python 3 grammar, which is more permissive (print and # exec are not keywords). parser_driver = driver.Driver(_GRAMMAR_FOR_PY3, convert=pytree.convert) tree = parser_driver.parse_string(code, debug=False) except parse.ParseError: # Now try to parse using a Python 2 grammar; If this fails, then # there's something else wrong with the code. try: parser_driver = driver.Driver(_GRAMMAR_FOR_PY2, convert=pytree.convert) tree = parser_driver.parse_string(code, debug=False) except parse.ParseError: # Raise a syntax error if the code is invalid python syntax. try: ast.parse(code) except SyntaxError as e: raise e else: raise return _WrapEndMarker(tree)
def parse_string(code): """Parse the given code to a lib2to3 pytree. Arguments: code: a string with the code to parse. Raises: SyntaxError if the code is invalid syntax. parse.ParseError if some other parsing failure. Returns: The root node of the parsed tree. """ try: parser_driver = driver.Driver(_GRAMMAR_FOR_PY3, convert=pytree.convert) tree = parser_driver.parse_string(code, debug=False) except parse.ParseError: try: parser_driver = driver.Driver(_GRAMMAR_FOR_PY2, convert=pytree.convert) tree = parser_driver.parse_string(code, debug=False) except parse.ParseError: try: ast.parse(code) except SyntaxError as e: raise e else: raise return tree
def parse_code(file_path): parser_driver = driver.Driver(pygram.python_grammar, pytree.convert) parse_tree = parser_driver.parse_file(filename=file_path, encoding='ascii', debug=True) source_code = str(parse_tree) return source_code
def parse(code): """String -> AST Parse the string and return its AST representation. May raise a ParseError exception. """ added_newline = False if not code.endswith("\n"): code += "\n" added_newline = True try: drv = driver.Driver(pygram.python_grammar, pytree.convert) result = drv.parse_string(code, True) except ParseError: log.debug("Had problems parsing:\n%s\n" % quoted_block(code)) raise # Always return a Node, not a Leaf. if isinstance(result, Leaf): result = Node(syms.file_input, [result]) result.added_newline = added_newline return result
def lib2to3_parse(python_contents): from lib2to3 import pygram, pytree from lib2to3.pgen2 import driver drv = driver.Driver(pygram.python_grammar, pytree.convert) tree = drv.parse_string(python_contents, True) return tree
def main(): import glob import os if len(sys.argv) != 2: print "usage docstringr.py PATH_TO_PYTHON_FILES" sys.exit(1) path = sys.argv[1] for fn in glob.glob(os.path.join(path, '*.py')): print 'processing %s' % fn print 'reading...' with open(fn, 'r') as f: contents = f.read() print 'parsing...' drv = driver.Driver(pygram.python_grammar, pytree.convert) tree = drv.parse_string(contents, True) walk_tree(tree) out_file = fn + '_docstringed' print 'writing {} ...'.format(out_file) with open(out_file, 'w') as f: f.write(str(tree)) print 'all done'
class Util(object): return_expr = compile_pattern("""return_stmt< 'return' any >""") @classmethod def has_return_exprs(cls, node): """Traverse the tree below node looking for 'return expr'. Return True if at least 'return expr' is found, False if not. (If both 'return' and 'return expr' are found, return True.) """ results = {} if cls.return_expr.match(node, results): return True for child in node.children: if child.type not in (syms.funcdef, syms.classdef): if cls.has_return_exprs(child): return True return False driver = driver.Driver(pygram.python_grammar, convert=pytree.convert) @classmethod def parse_string(cls, text): """Use lib2to3 to parse text into a Node.""" text = text.strip() if not text: # self.driver.parse_string just returns the ENDMARKER Leaf, wrap in a Node # for consistency return Node(syms.file_input, [Leaf(token.ENDMARKER, '')]) # workaround: parsing text without trailing '\n' throws exception text += '\n' return cls.driver.parse_string(text)
def process(self, source): drv = driver.Driver(pygram.python_grammar_no_print_statement, pytree.convert) if "\n" not in source: source += "\n" tree = drv.parse_string(source) return {"file_input": self._traverse(tree)}
def _Parse(cls, code): """ Parses the given code string returning its lib2to3 AST tree. :return lib2to3.AST: Returns the lib2to3 AST. """ def _GetLastLeaf(node): from lib2to3.pytree import Leaf r_leaf = node while not isinstance(r_leaf, Leaf): r_leaf = r_leaf.children[-1] return r_leaf # Prioritary import. # Other imports from zerotk.reraiseit import reraise from lib2to3 import pygram, pytree from lib2to3.pgen2 import driver from lib2to3.pgen2.parse import ParseError from lib2to3.pygram import python_symbols from lib2to3.pytree import Leaf, Node from lib2to3.refactor import _detect_future_features added_newline = code and not code.endswith('\n') if added_newline: code += '\n' # Selects the appropriate grammar depending on the usage of # "print_function" future feature. future_features = _detect_future_features(code) if 'print_function' in future_features: grammar = pygram.python_grammar_no_print_statement else: grammar = pygram.python_grammar try: drv = driver.Driver(grammar, pytree.convert) result = drv.parse_string(code, True) except ParseError as e: reraise(e, "Had problems parsing:\n%s\n" % cls._QuotedBlock(code)) # Always return a Node, not a Leaf. if isinstance(result, Leaf): result = Node(python_symbols.file_input, [result]) # Remove AST-leaf for the added newline. if added_newline: last_leaf = _GetLastLeaf(result) if not (last_leaf.type == 0 and last_leaf.value == ''): if last_leaf.prefix: last_leaf.prefix = last_leaf.prefix[:-1] else: last_leaf.remove() return result
def remove_obsolete(path, moves=None): # type: (Text, Dict[Text, Text]) -> Text from lib2to3 import ( pygram, # type: ignore pytree, patcomp) from lib2to3.pgen2 import driver files_pattern = (u"with_stmt< 'with' power< 'Files' " "trailer< '(' arg=any any* ')' > any* > any* >") base_dir = os.path.dirname(path) or "." d = driver.Driver(pygram.python_grammar, convert=pytree.convert) tree = d.parse_file(path) pc = patcomp.PatternCompiler() pat = pc.compile_pattern(files_pattern) unmatched_patterns = set() node_patterns = {} for node in tree.children: match_values = {} # type: Dict[Any, Any] if pat.match(node, match_values): path_pat = literal_eval(match_values['arg'].value) unmatched_patterns.add(path_pat) node_patterns[path_pat] = (node, match_values) for base_path, _, files in os.walk(base_dir): for filename in files: full_path = os.path.join(base_path, filename) path = os.path.relpath(full_path, base_dir) try: assert (u"../" not in path and not path.endswith(u"/..") ), "Path %s is outside %s" % (full_path, base_dir) except AssertionError: newrelic.agent.record_exception(params={"path": full_path}) continue if path[:2] == u"./": path = path[2:] for pattern in unmatched_patterns.copy(): if match(path, pattern): unmatched_patterns.remove(pattern) if moves: moved_patterns = compute_moves(moves, unmatched_patterns) unmatched_patterns -= set(moved_patterns.keys()) for old_pattern, new_pattern in iteritems(moved_patterns): node, match_values = node_patterns[old_pattern] arg = match_values["arg"] arg.replace(arg.__class__(arg.type, '"%s"' % new_pattern)) for pattern in unmatched_patterns: logger.debug("Removing %s" % pattern) node_patterns[pattern][0].remove() return six.text_type(tree)
def main(): gr = driver.load_grammar("Grammar.txt") dr = driver.Driver(gr, convert=pytree.convert) fn = "example.py" tree = dr.parse_file(fn, debug=True) if not diff(fn, tree): print("No diffs.") if not sys.argv[1:]: return # Pass a dummy argument to run the complete test suite below problems = [] # Process every imported module for name in sys.modules: mod = sys.modules[name] if mod is None or not hasattr(mod, "__file__"): continue fn = mod.__file__ if fn.endswith(".pyc"): fn = fn[:-1] if not fn.endswith(".py"): continue print("Parsing", fn, file=sys.stderr) tree = dr.parse_file(fn, debug=True) if diff(fn, tree): problems.append(fn) # Process every single module on sys.path (but not in packages) for dir in sys.path: try: names = os.listdir(dir) except OSError: continue print("Scanning", dir, "...", file=sys.stderr) for name in names: if not name.endswith(".py"): continue print("Parsing", name, file=sys.stderr) fn = os.path.join(dir, name) try: tree = dr.parse_file(fn, debug=True) except pgen2.parse.ParseError as err: print("ParseError:", err) else: if diff(fn, tree): problems.append(fn) # Show summary of problem files if not problems: print("No problems. Congratulations!") else: print("Problems in following files:") for fn in problems: print("***", fn)
def lib2to3_parse(python_contents): from lib2to3 import pygram, pytree from lib2to3.pgen2 import driver from lib2to3.pgen2 import parse # Roughly stolen from: # https://github.com/google/yapf/blob/729279/yapf/yapflib/pytree_utils.py#L70-L102 py3_grammar = pygram.python_grammar_no_print_statement.copy() del py3_grammar.keywords['exec'] py2_grammar = pygram.python_grammar.copy() del py2_grammar.keywords['nonlocal'] py3_driver = driver.Driver(py3_grammar, pytree.convert) py2_driver = driver.Driver(py2_grammar, pytree.convert) # Try with the more permissive py3 grammar first try: tree = py3_driver.parse_string(python_contents, True) except parse.ParseError: tree = py2_driver.parse_string(python_contents, True) return tree
def main() -> None: filename = sys.argv[1] t0 = time.time() drv = driver.Driver(pygram.python_grammar, convert=pytree.convert) tree = drv.parse_file(filename) t1 = time.time() dt = t1 - t0 with open(filename) as file: nlines = len(file.readlines()) print("%.3f seconds for %d lines; %.0f lines/sec" % (dt, nlines, nlines / (dt or 1e-9))) print_memstats()
def run(self): self.future_features = detect_future_features(self.source) d = driver.Driver(grammar_for_future_features(self.future_features), convert=pytree.convert) tree, trailing_newline = parse_source(d, self.source) if not trailing_newline: yield self._message_for_pos(self.lines.last_pos, Errors.no_trailing_newline) for error in self._check_tree(tree): yield error for error in self._check_line_lengths(): yield error
def ParseCodeToTree(code): """Parse the given code to a lib2to3 pytree. Arguments: code: a string with the code to parse. Returns: The root node of the parsed tree. """ # This function is tiny, but the incantation for invoking the parser correctly # is sufficiently magical to be worth abstracting away. try: # Try to parse the code treating 'print' as a function call (3.0 behavior). parser_driver = driver.Driver(pygram.python_grammar_no_print_statement, convert=pytree.convert) tree = parser_driver.parse_string(code, debug=False) except parse.ParseError: # Treating 'print' as a function call failed. Now try to parse the code # with 'print' as a statement (pre-3.0 behavior). If this fails, then # there's something else wrong with the code. parser_driver = driver.Driver(pygram.python_grammar, convert=pytree.convert) tree = parser_driver.parse_string(code, debug=False) return tree
def pythoncile2(path, content=None): if False: # This tweak to the python grammar allows the parser to parse Python 3 # content. Else it will choke on, at least, the keyword argument in # this: # print("foo", file=sys.stderr) # TODO: understand why this is del pygram.python_grammar.keywords["print"] else: # However, to parse Python 2 with print as a statement, we need that # grammar item. # TODO: not sure about Python 2 code with `from __future__ import # print_function` pass dvr = driver.Driver(pygram.python_grammar, convert=pytree.convert, logger=log) # Based on `RefactoringTool.refactor_string()`. if content is None: data = open(path, 'r').read() + '\n' else: data = content # try: # tree = dvr.parse_string(data) # except Exception, err: # raise PythonCILEError("Can't parse %s: %s: %s" % ( # path, err.__class__.__name__, err)) ast = dvr.parse_string(data) if log.isEnabledFor(logging.DEBUG): ast.pprint(indent="` ") # Traverse the AST (actually more of a concrete syntax tree). blob = Scope("blob", splitext(basename(path))[0], lang="Python", src=path) scanner = Scanner(blob) scanner.scan(ast) # Build the CIX tree. now = time.time() codeintel = ET.Element("codeintel", version="2.0") file = ET.SubElement(codeintel, "file", lang="Python", mtime=str(now), path=path) scanner.gen_cix_tree(file) return codeintel
def remove_obsolete(path, moves=None): from lib2to3 import pygram, pytree, patcomp from lib2to3.pgen2 import driver files_pattern = "with_stmt< 'with' power< 'Files' trailer< '(' arg=any any* ')' > any* > any* >" base_dir = os.path.dirname(path) or "." d = driver.Driver(pygram.python_grammar, convert=pytree.convert) tree = d.parse_file(path) pc = patcomp.PatternCompiler() pat = pc.compile_pattern(files_pattern) unmatched_patterns = set() node_patterns = {} for node in tree.children: match_values = {} if pat.match(node, match_values): path_pat = literal_eval(match_values['arg'].value) unmatched_patterns.add(path_pat) node_patterns[path_pat] = (node, match_values) for base_path, _, files in os.walk(base_dir): for filename in files: path = os.path.relpath(os.path.join(base_path, filename), base_dir) assert ".." not in path if path[:2] == "./": path = path[2:] for pattern in unmatched_patterns.copy(): if match(path, pattern): unmatched_patterns.remove(pattern) if moves: moved_patterns = compute_moves(moves, unmatched_patterns) unmatched_patterns -= set(moved_patterns.keys()) for old_pattern, new_pattern in moved_patterns.iteritems(): node, match_values = node_patterns[old_pattern] arg = match_values["arg"] arg.replace(arg.__class__(arg.type, '"%s"' % new_pattern)) for pattern in unmatched_patterns: logger.debug("Removing %s" % pattern) node_patterns[pattern][0].remove() return unicode(tree)
def lib2to3_parse(src_txt): """Given a string with source, return the lib2to3 Node.""" grammar = pygram.python_grammar_no_print_statement drv = driver.Driver(grammar, pytree.convert) if src_txt[-1] != "\n": nl = "\r\n" if "\r\n" in src_txt[:1024] else "\n" src_txt += nl try: result = drv.parse_string(src_txt, True) except ParseError as pe: lineno, column = pe.context[1] lines = src_txt.splitlines() try: faulty_line = lines[lineno - 1] except IndexError: faulty_line = "<line number missing in source>" raise ValueError(f"Cannot parse: {lineno}:{column}: {faulty_line}") from None if isinstance(result, Leaf): result = Node(syms.file_input, [result]) return result
class Util: """Utility functions for working with Nodes.""" return_expr = compile_pattern("""return_stmt< 'return' any >""") @classmethod def has_return_exprs(cls, node): """Traverse the tree below node looking for 'return expr'. Args: node: The AST node at the root of the subtree. Returns: True if 'return' or 'return expr' is found, False otherwise. """ results = {} if cls.return_expr.match(node, results): return True for child in node.children: if child.type not in (syms.funcdef, syms.classdef): if cls.has_return_exprs(child): return True return False driver = driver.Driver(pygram.python_grammar, convert=pytree.convert) @classmethod def parse_string(cls, text): """Use lib2to3 to parse text into a Node.""" text = text.strip() if not text: # cls.driver.parse_string just returns the ENDMARKER Leaf, wrap in # a Node for consistency return Node(syms.file_input, [Leaf(token.ENDMARKER, '')]) # workaround: parsing text without trailing '\n' throws exception text += '\n' return cls.driver.parse_string(text)
from lib2to3 import pytree from lib2to3 import pygram from lib2to3.pgen2 import driver from lib2to3.pgen2 import token # see: yapf.pytree_utils _GRAMMAR_FOR_PY3 = pygram.python_grammar_no_print_statement.copy() del _GRAMMAR_FOR_PY3.keywords['exec'] parser_driver = driver.Driver(_GRAMMAR_FOR_PY3, convert=pytree.convert) def build_ast_from_string(code): driver.Driver(_GRAMMAR_FOR_PY3, convert=pytree.convert) return parser_driver.parse_string(code, debug=True) def name_of(node): if node.type < 256: return token.tok_name[node.type] else: return pygram.python_grammar.number2symbol[node.type] def repr_node(node): if isinstance(node, pytree.Node): return '%s(%s, %r)' % ( node.__class__.__name__, name_of(node), [repr_node(c) for c in node.children] ) if isinstance(node, pytree.Leaf): return '%s(%s, %r)' % (node.__class__.__name__, name_of(node), node.value)
def build_ast_from_string(code): driver.Driver(_GRAMMAR_FOR_PY3, convert=pytree.convert) return parser_driver.parse_string(code, debug=True)
import unittest import sys import os import os.path import re from textwrap import dedent # Local imports from lib2to3 import pytree, refactor from lib2to3.pgen2 import driver test_dir = os.path.dirname(__file__) proj_dir = os.path.normpath(os.path.join(test_dir, "..")) grammar_path = os.path.join(test_dir, "..", "Grammar.txt") grammar = driver.load_grammar(grammar_path) driver = driver.Driver(grammar, convert=pytree.convert) def parse_string(string): return driver.parse_string(reformat(string), debug=True) def run_all_tests(test_mod=None, tests=None): if tests is None: tests = unittest.TestLoader().loadTestsFromModule(test_mod) unittest.TextTestRunner(verbosity=2).run(tests) def reformat(string): return dedent(string) + u"\n\n" def get_refactorer(fixer_pkg="lib2to3", fixers=None, options=None): """ A convenience function for creating a RefactoringTool for tests.
import unittest import os import os.path from textwrap import dedent # Local imports from lib2to3 import pytree, refactor from lib2to3.pgen2 import driver as pgen2_driver test_dir = os.path.dirname(__file__) proj_dir = os.path.normpath(os.path.join(test_dir, "..")) grammar_path = os.path.join(test_dir, "..", "Grammar.txt") grammar = pgen2_driver.load_grammar(grammar_path) grammar_no_print_statement = pgen2_driver.load_grammar(grammar_path) del grammar_no_print_statement.keywords["print"] driver = pgen2_driver.Driver(grammar, convert=pytree.convert) driver_no_print_statement = pgen2_driver.Driver(grammar_no_print_statement, convert=pytree.convert) def parse_string(string): return driver.parse_string(reformat(string), debug=True) def run_all_tests(test_mod=None, tests=None): if tests is None: tests = unittest.TestLoader().loadTestsFromModule(test_mod) unittest.TextTestRunner(verbosity=2).run(tests) def reformat(string):
def parse_string(code: str, grammar): """Parse a tree from the code""" d = driver.Driver(grammar, pytree.convert) return d.parse_string(code, debug=True)
arg) and processed with the -f option. """ __author__ = "Collin Winter <*****@*****.**>" # Python imports import optparse from io import StringIO import sys # Local imports from lib2to3 import pytree from lib2to3.pgen2 import driver from lib2to3.pygram import python_symbols, python_grammar driver = driver.Driver(python_grammar, convert=pytree.convert) def main(args): parser = optparse.OptionParser(usage="find_pattern.py [options] [string]") parser.add_option("-f", "--file", action="store", help="Read a code snippet from the specified file") # Parse command line arguments options, args = parser.parse_args(args) if options.file: tree = driver.parse_file(options.file) elif len(args) > 1: tree = driver.parse_stream(StringIO(args[1] + "\n"))
import sys from lib2to3 import pytree from lib2to3 import pygram from lib2to3.pgen2 import driver from lib2to3.pgen2 import token from lib2to3.pgen2.parse import ParseError default_driver = driver.Driver(pygram.python_grammar_no_print_statement, convert=pytree.convert) def parse_string(code, parser_driver=default_driver, *, debug=True): return parser_driver.parse_string(code, debug=debug) def parse_file(filename, parser_driver=default_driver, *, debug=True): try: return parser_driver.parse_file(filename, debug=debug) except ParseError as e: if "bad input:" not in repr(e): # work around raise with open(filename) as rf: body = rf.read() return parse_string(body + "\n", parser_driver=parser_driver, debug=debug) def node_name(node): # Nodes with values < 256 are tokens. Values >= 256 are grammar symbols. if node.type < 256: return token.tok_name[node.type] else: return pygram.python_grammar.number2symbol[node.type]
from lib2to3 import pytree from lib2to3 import pygram from lib2to3.pgen2 import driver from lib2to3.pgen2 import token from lib2to3.pytree import Node, Leaf # noqa from lib2to3.pgen2.parse import ParseError from lib2to3.fixer_util import find_indentation # noqa import logging from ..langhelpers import reify logger = logging.getLogger(__name__) null_logger = logging.getLogger("_null") null_logger.setLevel(logging.CRITICAL) default_driver = driver.Driver( pygram.python_grammar_no_print_statement, convert=pytree.convert, logger=null_logger, # suppress default lib2.pgen2.driver's logging ) def parse_string(code, parser_driver=default_driver, *, debug=True): return parser_driver.parse_string(code, debug=debug) def parse_file(filename, parser_driver=default_driver, *, debug=True): try: return parser_driver.parse_file(filename, debug=debug) except ParseError as e: if "bad input:" not in repr(e): # work around raise with open(filename) as rf:
import unittest import sys import os import os.path import re from textwrap import dedent # Local imports from lib2to3 import pytree, refactor from lib2to3.pgen2 import driver as pgen2_driver test_dir = os.path.dirname(__file__) proj_dir = os.path.normpath(os.path.join(test_dir, "..")) grammar_path = os.path.join(test_dir, "..", "Grammar.txt") grammar = pgen2_driver.load_grammar(grammar_path) driver = pgen2_driver.Driver(grammar, convert=pytree.convert) def parse_string(string): return driver.parse_string(reformat(string), debug=True) def run_all_tests(test_mod=None, tests=None): if tests is None: tests = unittest.TestLoader().loadTestsFromModule(test_mod) unittest.TextTestRunner(verbosity=2).run(tests) def reformat(string): return dedent(string) + "\n\n"
def main(): parser = argparse.ArgumentParser() g1 = parser.add_mutually_exclusive_group(required=True) g1.add_argument("-pf", "--pattern-file", dest="pattern_file", type=str, help='Read pattern from the specified file') g1.add_argument("-ps", "--pattern-string", dest="pattern_string", type=str, help='A pattern string') g2 = parser.add_mutually_exclusive_group(required=True) g2.add_argument("-sf", "--source-file", dest="source_file", type=str, help="Read code snippet from the specified file") g2.add_argument("-ss", "--source-string", dest="source_string", type=str, help="A code snippet string") parser.add_argument("--print-results", dest="print_results", action='store_true', default=False, help="Print match results") parser.add_argument("--print-lineno", dest="print_lineno", action='store_true', default=False, help="Print match code with line number") # Parse command line arguments args = parser.parse_args() # parse source snippet to CST tree driver_ = driver.Driver(python_grammar, convert=pytree.convert) if args.source_file: tree = driver_.parse_file(args.source_file) else: tree = driver_.parse_stream(StringIO(args.source_string + "\n")) # compile pattern if args.pattern_file: with open(args.pattern_file, 'r') as f: pattern = f.read() else: pattern = args.pattern_string PC = PatternCompiler() pattern, pattern_tree = PC.compile_pattern(pattern, with_tree=True) for node in tree.post_order(): results = {'node': node} if pattern.match(node, results): match_node = results['node'] src_lines = str(match_node).splitlines() if args.print_lineno: # calculate lineno_list according to the right most leaf node. # because some node includes prefix, which is not a node, and we can't get it's lineno. right_most_leaf = match_node while not isinstance(right_most_leaf, pytree.Leaf): right_most_leaf = right_most_leaf.children[-1] last_lineno = right_most_leaf.get_lineno() lineno_list = list( range(last_lineno - len(src_lines) + 1, last_lineno + 1)) src_lines = [ str(lineno) + ' ' + line for lineno, line in zip(lineno_list, src_lines) ] for line in src_lines: print(line) if args.print_results: print(results) print('-' * 20)