def code_repr(src: str): """ convert source code to node tree, e.g. src = 'path.to.api' output = "Node(power, [Leaf(1, 'path'), Node(trailer, [Leaf(23, '.'), Leaf(1, 'to')]), Node(trailer, [Leaf(23, '.'), Leaf(1, 'api')])])" """ driver_ = driver.Driver(python_grammar, convert=pytree.convert) tree = driver_.parse_stream(StringIO(src + '\n')) return tree
def main(): gr = driver.load_grammar("Grammar.txt") dr = driver.Driver(gr, convert=pytree.convert) fn = "example.py" tree = dr.parse_file(fn, debug=True) if not diff(fn, tree): print("No diffs.") if not sys.argv[1:]: return # Pass a dummy argument to run the complete test suite below problems = [] # Process every imported module for name in sys.modules: mod = sys.modules[name] if mod is None or not hasattr(mod, "__file__"): continue fn = mod.__file__ if fn.endswith(".pyc"): fn = fn[:-1] if not fn.endswith(".py"): continue print("Parsing", fn, file=sys.stderr) tree = dr.parse_file(fn, debug=True) if diff(fn, tree): problems.append(fn) # Process every single module on sys.path (but not in packages) for dir in sys.path: try: names = os.listdir(dir) except OSError: continue print("Scanning", dir, "...", file=sys.stderr) for name in names: if not name.endswith(".py"): continue print("Parsing", name, file=sys.stderr) fn = os.path.join(dir, name) try: tree = dr.parse_file(fn, debug=True) except pgen2.parse.ParseError as err: print("ParseError:", err) else: if diff(fn, tree): problems.append(fn) # Show summary of problem files if not problems: print("No problems. Congratulations!") else: print("Problems in following files:") for fn in problems: print("***", fn)
def parse_file(raw_blob): try: as_json = json.loads(raw_blob) contents = as_json['source_code'] + '\n' from_file = as_json['from_file'] parser = driver.Driver(pygram.python_grammar, convert=pytree.convert) names_map = token.tok_name for key, value in pygram.python_grammar.symbol2number.items(): names_map[value] = key the_ast = parser.parse_string(contents) flattened_json = [] def _traverse(node): cur_idx = len(flattened_json) if node.type in names_map: flattened_json.append({ 'type': names_map[node.type], 'value': node.value if isinstance(node, pytree.Leaf) else names_map[node.type], 'children': [] }) else: assert False, "Type not in map." if not isinstance(node, pytree.Leaf): for child in node.children: flattened_json[cur_idx]["children"].append( _traverse(child)) return cur_idx _traverse(the_ast) final_tree = {'from_file': from_file, 'ast': flattened_json} return json.dumps(final_tree, separators=(',', ':')) except Exception as ex: return None
def setUp(self): self.grammar = pygram.python_grammar self.driver = driver.Driver(self.grammar, convert=pytree.convert, logger=log)
def process(target): DataProcessor.PARSER.set_language(Language('/src/build/py-tree-sitter-languages.so', sys.argv[1])) processor = DataProcessor( language=sys.argv[1], language_parser=LANGUAGE_METADATA[sys.argv[1]]['language_parser'] ) results = [] if target['language'] == 'java': try: javalang.parse.parse(target['the_code']) except Exception as ex: if sys.argv[2] != 'gz': print('Failed to validate: ' + target['from_file']) print(target['the_code']) print(ex) return False, [] elif target['language'] == 'python': try: parser = driver.Driver(pygram.python_grammar, convert=pytree.convert) parser.parse_string(target['the_code'].strip() + '\n') ast.parse(target['the_code']) except Exception: if sys.argv[2] != 'gz': print('Failed to validate: ' + target['from_file']) return False, [] functions = processor.process_blob(target['the_code']) for function in functions: sha256 = hashlib.sha256( function["function"].strip().encode('utf-8') ).hexdigest() if target['language'] == 'java': if JAVA_REJECT_REGEX.search(function["function"]): continue if sha256 in BANNED_JAVA_SHAS: # print(" - Skipped '{}'".format(sha256)) continue # Spoon transformer chokes on these, so exclude elif target['language'] == 'python': if PY_REJECT_REGEX.search(function["function"]): continue if sha256 in BANNED_PY_SHAS: # print(" - Skipped '{}'".format(sha256)) continue # Spoon transformer chokes on these, so exclude tokens_pre, tokens_post = ([], []) try: tokens_pre, tokens_post = remove_func_name( function["identifier"].split('.')[-1], function["function_tokens"] ) except: continue results.append({ "language": function["language"], "identifier": function["identifier"].split('.')[-1], "target_tokens": subtokenize(function["identifier"].split('.')[-1]), "source_tokens": tokens_post, "elided_tokens": tokens_pre, "source_code": function["function"] if function["language"] != "java" else ( 'class WRAPPER {\n' + function["function"] + '\n}\n' ), "sha256_hash": sha256, "split": target['split'], "from_file": target['from_file'] }) return True, results
from fissix.pgen2 import token as pgen2_token from fissix.pygram import python_symbols from fissix import patcomp # it's handy to import this from here when developing from bowler.helpers import print_tree as dumpTree import os from textwrap import dedent # -------------------------------------------------------------------------------------------------- # load grammar & make a driver we can use to parse fissix_dir = os.path.dirname( fissix.__file__ ) grammar_path = os.path.join( fissix_dir, "Grammar.txt" ) m_grammar = pgen2_driver.load_grammar( grammar_path ) driver = pgen2_driver.Driver( m_grammar, convert=fissix.pytree.convert ) def parseString( string ): return driver.parse_string( dedent( string ) + "\n\n", debug=True ) def getGrammar(): return m_grammar # -------------------------------------------------------------------------------------------------- def makeLeaf( type_name, value, prefix="" ): type_num = typeNameToNum( type_name ) return Leaf( type_num, value, prefix=prefix ) def makeStatement(): return Node( python_symbols.stmt, [] )
import unittest import os import os.path from textwrap import dedent # Local imports from fissix import pytree, refactor from fissix.pgen2 import driver as pgen2_driver test_dir = os.path.dirname(__file__) proj_dir = os.path.normpath(os.path.join(test_dir, "..")) grammar_path = os.path.join(test_dir, "..", "Grammar.txt") grammar = pgen2_driver.load_grammar(grammar_path) grammar_no_print_statement = pgen2_driver.load_grammar(grammar_path) del grammar_no_print_statement.keywords["print"] driver = pgen2_driver.Driver(grammar, convert=pytree.convert) driver_no_print_statement = pgen2_driver.Driver( grammar_no_print_statement, convert=pytree.convert ) def parse_string(string): return driver.parse_string(reformat(string), debug=True) def run_all_tests(test_mod=None, tests=None): if tests is None: tests = unittest.TestLoader().loadTestsFromModule(test_mod) unittest.TextTestRunner(verbosity=2).run(tests)