def asttok(code_str: str, tree: ast.Module) -> asttokens.ASTTokens: return asttokens.ASTTokens(code_str, tree=tree)
def parse_file(filename): tree = asttokens.ASTTokens(read_file_to_string(filename), parse=True).tree json_tree = [] def localize(node, json_node): json_node['lineno'] = str(node.first_token.start[0]) json_node['col'] = str(node.first_token.start[1]) json_node['end_line_no'] = str(node.last_token.end[0]) json_node['end_col'] = str(node.last_token.end[1]) def gen_identifier(identifier, node_type='identifier', node=None): pos = len(json_tree) json_node = {} json_tree.append(json_node) json_node['type'] = node_type json_node['value'] = identifier localize(node, json_node) return pos def traverse_list(l, node_type='list', node=None): pos = len(json_tree) json_node = {} json_tree.append(json_node) json_node['type'] = node_type localize(node, json_node) children = [] for item in l: children.append(traverse(item)) if (len(children) != 0): json_node['children'] = children return pos def traverse(node): pos = len(json_tree) json_node = {} json_tree.append(json_node) json_node['type'] = type(node).__name__ localize(node, json_node) children = [] if isinstance(node, ast.Name): json_node['value'] = node.id elif isinstance(node, ast.NameConstant): json_node['value'] = node.value elif isinstance(node, ast.Constant): json_node['value'] = node.value elif isinstance(node, ast.Num): json_node['value'] = (node.n) elif isinstance(node, ast.Str): json_node['value'] = node.s elif isinstance(node, ast.alias): json_node['value'] = (node.name) if node.asname: children.append(gen_identifier(node.asname, node=node)) elif isinstance(node, ast.FunctionDef): json_node['value'] = (node.name) elif isinstance(node, ast.ExceptHandler): if node.name: json_node['value'] = node.name elif isinstance(node, ast.ClassDef): json_node['value'] = (node.name) elif isinstance(node, ast.ImportFrom): if node.module: json_node['value'] = (node.module) elif isinstance(node, ast.Global): for n in node.names: children.append(gen_identifier(n, node=node)) elif isinstance(node, ast.keyword): json_node['value'] = (node.arg) elif isinstance(node, ast.arg): json_node['value'] = (node.arg) # Process children. if isinstance(node, ast.For): children.append(traverse(node.target)) children.append(traverse(node.iter)) children.append(traverse_list(node.body, 'body', node)) if node.orelse: children.append(traverse_list(node.orelse, 'orelse', node)) elif isinstance(node, ast.If) or isinstance(node, ast.While): children.append(traverse(node.test)) children.append(traverse_list(node.body, 'body', node)) if node.orelse: children.append(traverse_list(node.orelse, 'orelse', node)) elif isinstance(node, ast.With): children.append(traverse_list(node.items, 'items', node)) children.append(traverse_list(node.body, 'body', node)) elif isinstance(node, ast.withitem): children.append(traverse(node.context_expr)) if node.optional_vars: children.append(traverse(node.optional_vars)) elif isinstance(node, ast.Try): children.append(traverse_list(node.body, 'body', node)) children.append(traverse_list(node.handlers, 'handlers', node)) if node.orelse: children.append(traverse_list(node.orelse, 'orelse', node)) if node.finalbody: children.append( traverse_list(node.finalbody, 'finalbody', node)) elif isinstance(node, ast.arguments): children.append(traverse_list(node.args, 'args', node)) children.append(traverse_list(node.defaults, 'defaults', node)) children.append(traverse_list(node.kwonlyargs, 'defaults', node)) children.append(traverse_list(node.kw_defaults, 'defaults', node)) if node.vararg: children.append( gen_identifier(node.vararg.arg, 'vararg', node.vararg)) if node.kwarg: children.append( gen_identifier(node.kwarg.arg, 'kwarg', node.kwarg)) elif isinstance(node, ast.ExceptHandler): if node.type: children.append(traverse_list([node.type], 'type', node)) children.append(traverse_list(node.body, 'body', node)) elif isinstance(node, ast.ClassDef): children.append(traverse_list(node.bases, 'bases', node)) children.append(traverse_list(node.body, 'body', node)) children.append( traverse_list(node.decorator_list, 'decorator_list', node)) elif isinstance(node, ast.FunctionDef): children.append(traverse(node.args)) children.append(traverse_list(node.body, 'body', node)) children.append( traverse_list(node.decorator_list, 'decorator_list', node)) else: # Default handling: iterate over children. for child in ast.iter_child_nodes(node): if isinstance(child, ast.expr_context) or isinstance( child, ast.operator) or isinstance( child, ast.boolop) or isinstance( child, ast.unaryop) or isinstance( child, ast.cmpop): # Directly include expr_context, and operators into the type instead of creating a child. json_node['type'] = json_node['type'] + type( child).__name__ else: children.append(traverse(child)) if isinstance(node, ast.Attribute): children.append(gen_identifier(node.attr, 'attr', node)) if (len(children) != 0): json_node['children'] = children return pos traverse(tree) return json_tree
def __init__(self, source: str): self.source = source self.tokens = asttokens.ASTTokens(source, parse=True) self.replacements: List[Tuple[Tuple[int, int], str]] = []
def parse(self): source_code = cache.read_file(self.file_path) self.ast_tokenized = asttokens.ASTTokens(source_code, parse=True) self.set_root_method() meta = {"method": self._root_method} self.visit(self.ast_tokenized.tree, meta)
def _get_text(self, code): atok = asttokens.ASTTokens(code, tree=self) return atok.get_text(self)
def load(self): self.nodes = {} self.marked_ast = asttokens.ASTTokens(self.source, parse=True) self.visit(self.marked_ast.tree) return self.nodes
def loop(clauses, meta_data, back_end, code_object, *args, **kwargs): """ From the docs: The loop construct can describe what type of parallelism to use to execute the loop and declare private variables and arrays and reduction operations. Allowable clauses are: - collapse( n ) - gang [( gang-arg-list )] - worker [( [num:]int-expr )] - vector [( [length:]int-expr )] - seq - auto - tile( size-expr-list ) - device_type( device-type-list ) - independent - private( var-list ) - reduction( operator:var-list ) Where gang-arg is one of: - [num:]int-expr - static:size-expr and gang-arg-list may have at most one num and one static argument, and where size-expr is one of: - * - int-expr Restrictions: - Only the collapse, gang, worker, vector, seq, auto and tile clauses may follow a device_type clause. - The int-expr argument to the worker and vector clauses must be invariant in the kernels region. - A loop associated with a loop construct that does not have a seq clause must be written such that the loop iteration count is computable when entering the loop construct. """ index = 0 while index != -1: index, code_object = _apply_clause(index, clauses, code_object, meta_data, back_end) # TODO: This is proof of concept stuff atok = asttokens.ASTTokens(code_object.src, parse=True) tree = atok.tree v = loop_visitor(atok) v.visit(tree) meta_data.region_source = v.loop_code meta_data.region_vars = set(v.loop_vars) frame = meta_data.stackframe[0] # In 3.5, this can be stackframe.frame func_names = util.get_function_names_from_source(code_object.src, meta_data.funcs_name) meta_data.callers_mods = util.get_modules_from_stackframe(frame) meta_data.callers_funcs = util.get_functions_from_stackframe( frame, func_names) meta_data.funcs_funcs = util.get_functions_from_module( meta_data.funcs_module, func_names) meta_data.funcs_mods = util.get_modules_from_module(meta_data.funcs_module) funcs = meta_data.funcs_funcs + meta_data.callers_funcs module_vars = meta_data.funcs_mods + meta_data.callers_mods new_source = back_end.for_loop(code_object, meta_data) return Code(new_source)
def get_text(self, full_text=None): atok = asttokens.ASTTokens(full_text, tree=self) return atok.get_text(self)
def collect_data(filename: str, args: argparse.ArgumentParser) -> List[List[str]]: """ Read an 2 unparallel corpuses: functions and docstrings. --- Returns: data: List[List[str]] Summarized data from functions. is_appropriate: bool A flag indicating that the file is appropriate (enough scope size or no errors in parsing). """ global error_counter # Convert Python 2 to Python 3 # os.system(f"~/anaconda3/envs/scs/bin/2to3 {filename} -w -n") # run(["/home/masaidov/.conda/envs/scs/bin/2to3", filename, "-w", "-n"], # stdout=DEVNULL, stderr=STDOUT) run(["/home/marat/anaconda3/envs/scs-ext/bin/2to3", filename, "-w", "-n"], stdout=DEVNULL, stderr=STDOUT) print("Building AST tree from a filename:", filename) try: code = read_file_to_string(filename) except: print("File with bad encoding:", filename) error_counter += 1 is_appropriate = False return None, is_appropriate # let's replace tabs for spaces in the future code = re.sub('\t', ' ' * 4, code) code_lines = code.splitlines() try: atok = asttokens.ASTTokens(code, parse=True) astree = atok.tree except: print("Files with an error:", error_counter) error_counter += 1 is_appropriate = False return None, is_appropriate data = [] # Global loop: iterating over functions from file for fun_ind, fun in enumerate(ast.walk(astree)): if isinstance(fun, ast.FunctionDef) and len(fun.body) > 0: fun_begin = fun.first_token.startpos fun_end = fun.last_token.endpos prev_comment = get_previous_comments(fun, code_lines) docstring = ast.get_docstring(fun) if not docstring: docstring = "" else: docstring = DOCSTRING_PREFIX + docstring + "\n" # Forming scope -- set of node ids (variables) scope = [arg.arg for arg in fun.args.args] for node in ast.walk(fun): if isinstance(node, ast.Name) and \ isinstance(node.ctx, ast.Store): scope.append(node.id) scope = set(scope) if len(scope) < 2: # print(f"Note: Function with fun.name = {fun.name} has too " # "small scope.") continue function_code = code[fun_begin:fun_end] # if met @classmethod keyword, # should relax tabulation start_def = function_code.find("def") function_code = function_code[start_def:] function_code, tokens, comments, docstring, stopwords_count, \ is_tokenizable = get_tokens(function_code) if not is_tokenizable: error_counter += 1 function_code = "" tokens = [] # print(f"In filename = {filename}, fun_ind = {fun_ind}") # print(f"Found {stopwords_count} stopwords.") if len(prev_comment) > 0: comments = [prev_comment] + comments data.append([filename, function_code, tokens, comments, docstring]) is_appropriate = len(data) > 0 return data, is_appropriate
def _collapse(index, clause_list, code_object, meta_data, back_end): """ The 'collapse' clause is used to specify how many tightly nested loops are associated with the 'loop' construct. The argument to the 'collapse' clause must be a constant positive integer expression. If no 'collapse' clause is present, only the immediately following loop is associated with the 'loop' construct. If more than one loop is associated with the 'loop' construct, the iterations of all the associated loops are all scheduled according to the rest of the clauses. The trip count for all loops associated with the 'collapse' clause must be computable and invariant in all the loops. It is implementation-defined whether a 'gang', 'worker' or 'vector' clause on the construct is applied ot each loop, or to the linearized iteration space. """ #TODO: This one's easy enough: just make sure that the number of iterations # on each of the loops is invariant and countable and then set in the # code_object a value to tell it which loops are talked about by the # rest of the clauses. # for node in tree: # if node is not a loop: # break # elif not loop is invariant and countable: # break # else: # num_loops += 1 # if num_loops != n: # raise some sort of error that explains how many and which loops were # found, and that you want n loops collapsed, but we could only # guarantee num_loops # # code_object.num_loops = num_loops class _visitor(ast.NodeVisitor): def __init__(self, atok): self.atok = atok self._seen = set() def generic_visit(self, node): type_name = type(node).__name__ if type_name == "comprehension": # TODO pass elif type_name == "For": # TODO pass ast.NodeVisitor.generic_visit(self, node) atok = asttokens.ASTTokens(code_object.src, parse=True) tree = atok.tree v = _visitor(atok) v.visit(tree) print("Done; exiting") exit() return -1, code_object
def python2tree(line): atok = asttokens.ASTTokens(line, parse=True) return atok, atok.tree
def get_ranges(code): tokens = asttokens.ASTTokens(code, parse=True) visitor = RangeFinder() visitor.visit(tokens.tree) return visitor.ranges
def _parse_source(path_, obj): with open(path_, "r") as f: src = f.read() atok = asttokens.ASTTokens(src, parse=True) def parse_stmt(node): """Return (list of) instructions""" if isinstance(node, ast.FunctionDef): if node.name == "_formula": method = "set_formula" else: method = "new_cells" funcdef = atok.get_text(node) # The code below is just for adding back comment in the last line # such as: # def foo(): # return 0 # Comment nxtok = node.last_token.index + 1 if nxtok < len(atok.tokens) and ( atok.tokens[nxtok].type == tokenize.COMMENT ) and node.last_token.line == atok.tokens[nxtok].line: deflines = funcdef.splitlines() deflines.pop() deflines.append(node.last_token.line.rstrip()) funcdef = "\n".join(deflines) return [ _Instruction(obj=obj, method=method, kwargs={"formula": funcdef}) ] if isinstance(node, ast.Assign): if node.first_token.string == "_name": method = "rename" val = ast.literal_eval(atok.get_text(node.value)) _Instruction(obj=obj, method=method, args=(val, ), kwargs={ "rename_old": True }).run() return [] elif node.first_token.string == "_formula": # lambda formula definition method = "set_formula" val = atok.get_text(node.value) if val == "None": val = None kwargs = {"formula": val} return [_Instruction(obj=obj, method=method, kwargs=kwargs)] elif node.first_token.string == "_refs": def bound_decode_refs(data): return _decode_refs(data, obj.fullname) refs = json.loads(atok.get_text(node.value), object_hook=bound_decode_refs) def refhook(args, kwargs): if args: key, val = args val = _restore_ref(val) args = (key, val) return args, kwargs result = [] for key, val in refs.items(): result.append( _Instruction(obj=obj, method="__setattr__", args=(key, val), arghook=refhook)) return result elif node.first_token.string == "_bases": bases = [ _RefData(rel_to_abs(base, obj.fullname)) for base in ast.literal_eval(atok.get_text(node.value)) ] def basehook(args, kwargs): if args: args = _restore_ref(args) return args, kwargs return [ _Instruction(obj=obj, method="add_bases", args=bases, arghook=basehook) ] elif node.first_token.string == "_method": def excelhook(args, kwargs): # path_ is free variable # Add path to file name args[0] = str(path_.with_name(args[0])) return args, kwargs _method = json.loads(atok.get_text(node.value)) return [ _Instruction(obj=obj, method=_method["method"], args=_method["args"], kwargs=_method["kwargs"], arghook=excelhook) ] elif node.first_token.string == "_allow_none": args = json.loads(atok.get_text(node.value)) return [ _Instruction(obj=obj, method="set_property", args=["allow_none", args]) ] else: # lambda cells definition return [ _Instruction(obj=obj, method="new_cells", kwargs={ "name": atok.get_text(node.targets[0]), "formula": atok.get_text(node.value) }) ] result = [] for i, stmt in enumerate(atok.tree.body): if (i == 0 and isinstance(stmt, ast.Expr) and isinstance(stmt.value, ast.Str)): inst = _Instruction(obj=type(obj).doc, method="fset", args=(obj, stmt.value.s)) result.append(inst) else: result.extend(parse_stmt(stmt)) return result
def find_missing_trailing_commas(source_code, *, filename='<unknown>'): atok = asttokens.ASTTokens(source_code, filename=filename, parse=True) comma_finder = MissingTrailingCommaFinder(atok) comma_finder.visit(atok.tree) return comma_finder
def parse(source): assert (isinstance(source, str)) atok = asttokens.ASTTokens(source, parse=True) return atok