def _get_assign_records(self, node): id2call = {} for tmp_node in ast.walk(node): if isinstance(tmp_node, ast.Assign) and len(tmp_node.targets) == 1: left = tmp_node.targets[0] right = tmp_node.value if isinstance(left, ast.Name) and isinstance(right, ast.Call): func_name = get_func_calls(right)[0] id2call[left.id] = func_name all_func_names = get_func_calls(node) for func_name in all_func_names: if func_name in id2call: # this function call is a value of an assignment self.type_hint_pairs += [(id2call[func_name], "callable")]
def visit_IfExp(self, node): if isinstance(node.test, ast.Call): func_name = get_func_calls(node.test) func_name = func_name[0] self.type_hint_pairs += [(func_name, "bool")] self.generic_visit(node) return node
def visit_While(self, node): if isinstance(node.test, ast.Call): func_name = get_func_calls(node.test) func_name = func_name[0] self.bo_test += [func_name] self.generic_visit(node) return node
def visit_Compare(self, node): left = node.left right = node.comparators[0] left_type = tell_type(left) right_type = tell_type(right) if left_type not in ["unknown", "ID", "subscript", "attr" ] and right_type == "call": self.type_hint_pairs += [(get_func_calls(right)[0], left_type)] if right_type not in ["unknown", "ID", "subscript", "attr" ] and left_type == "call": self.type_hint_pairs += [(get_func_calls(left)[0], right_type)] if left_type == "call" and right_type == "call": self.call_links = [(get_func_calls(left)[0], get_func_calls(right)[0])] self.generic_visit(node) return node
def type_infer(self, node): # if node is None, return None?? type_val = tell_type(node) if type_val == 'ID': right = self.query_assign_records(node.id) if right is not None: self.type_infer(right) else: #if node.value.id in self.args: # self.r_types += ['input'] # TODO to include the entire module rather than spliting them # TODO build better data flow to track its defintion ##TODO the Name object not in assign records # TODO re-constructure the implementation # TODO add alias anaysis copy() and == pass elif type_val == 'call': # returns a funcuntion call func_name = get_func_calls(node) func_name = func_name[0] first_part_name = func_name.split('.')[0] if first_part_name in self.args or first_part_name in self.class_assign_records[ "init_arg_name_lst"]: self.r_types += ['input'] elif func_name in self.args: self.r_types += ['input'] elif func_name in self.class_assign_records["init_arg_name_lst"]: self.r_types += ['input'] elif func_name in [ 'copy', 'deepcopy', 'copy.copy', 'copy.deepcopy' ]: pass else: self.stem_from.append( func_name ) # if this is a function call # self.r_types += [type_val] elif type_val == "subscript": if hasattr(node, "value") and isinstance( node.value, ast.Name) and node.value.id in self.args: self.r_types += ['input'] else: # known type self.r_types += [type_val]
def visit_Call(self, node): def visit_func(node): if type(node) == ast.Name: return node.id elif type(node) == ast.Attribute: # Recursion on series of calls to attributes. func_name = visit_func(node.value) func_name += "." + node.attr return func_name elif type(node) == ast.Str: return node.s elif type(node) == ast.Subscript: return node.value.id #func = node.func #func_name = visit_func(func) func_name = get_func_calls(node)[0] self.current_block.func_calls.append(func_name)
def backward(self, cfg, block, return_value): is_visited = set() if return_value is None: self.r_types += ["empty"] return elif isinstance(return_value, ast.Name): if return_value.id == "self": self.r_types += ["self"] return elif return_value.id in self.inner_fun_names: self.r_types += ["callable"] return init_val = cfg.backward(block, return_value, is_visited, None) if init_val is None: init_val = return_value type_val = tell_type(init_val) if init_val is None and isinstance(return_value, ast.Name): if return_value.id in self.args: self.r_types += ['input'] if return_value.id in self.args: self.r_types += ['input'] return # name object or attribute object if type_val in ["ID", "attr"]: lookup_name = init_val.id if type_val == "ID" else get_attr_name( init_val) if lookup_name in self.local_assign_records: right = self.local_assign_records[lookup_name][-1] self.type_infer(right) elif lookup_name[0:5] == "self." and lookup_name[ 5:] in self.class_assign_records: right = self.class_assign_records[lookup_name[5:]][-1] self.type_infer(right) # use self.name again elif lookup_name in self.class_assign_records: right = self.class_assign_records[lookup_name][-1] self.type_infer(right) elif lookup_name in self.assign_records: # TBD inspect right = self.assign_records[lookup_name][-1] self.type_infer(right) else: pass elif type_val == "call": #if func_name in ['copy', 'deepcopy', 'copy.copy', 'copy.deepcopy']: # pass func_name = get_func_calls(init_val) func_name = func_name[0] first_part = func_name.split('.')[0] if func_name == "self.__class__": # same as class itself self.r_types += ['self'] elif first_part != 'self' and first_part in self.args: self.r_types += ['input'] elif first_part != 'self' and first_part in self.class_assign_records[ "init_arg_name_lst"]: self.r_types += ['input'] else: self.stem_from.append( func_name ) # if this is a function call # self.r_types += [type_val] elif type_val == "subscript": if isinstance(init_val, ast.Name): if init_val.id in self.args or init_val.id in self.class_assign_records[ "init_arg_name_lst"]: self.r_types += ['input'] else: # known type self.r_types += [type_val]
def tell_type(node): # does not return anything if node is None: return "NC" elif isinstance(node, str) and node[0:3] == "org": return node[4:] elif isinstance(node, ast.BoolOp): return "bool" # eq not eq lt lte gt gte elif isinstance(node, ast.cmpop): return "bool" elif isinstance(node, ast.Compare): return "bool" elif isinstance(node, ast.UnaryOp) and isinstance(node.op, ast.Not): return "bool" elif isinstance(node, ast.BinOp): if isinstance(node.op, (ast.Div, ast.Mult)): return "float" elif isinstance(node.op, ast.Mod) and isinstance( node.left, ast.Constant) and isinstance(node.left, ast.Str): # '(step %d of %d) Processing %s' % (idx, total, name) return "str" elif isinstance(node.op, ast.Mod) and isinstance( node.left, ast.Name) and isinstance(node.right, ast.Dict): return "str" elif isinstance(node.op, ast.Add): if isinstance(node.left, (ast.Constant, ast.Num, ast.List, ast.ListComp, ast.Set, ast.SetComp, ast.Dict, ast.DictComp)): return tell_type(node.left) if isinstance(node.right, (ast.Constant, ast.Num, ast.List, ast.ListComp, ast.Set, ast.SetComp, ast.Dict, ast.DictComp)): return tell_type(node.right) left_type = tell_type(node.left) if left_type is not None and left_type not in [ "unknown", "ID", "attr" ]: return left_type right_type = tell_type(node.right) if right_type is not None and right_type not in [ "unknown", "ID", "attr" ]: return right_type if isinstance(node, ast.Name): if node.id == 'self': return "self" return 'ID' if isinstance(node, ast.Num): if isinstance(node.n, int): return "int" elif isinstance(node.n, float): return "float" return "num" elif isinstance(node, ast.List): return "list" #elif isinstance(node, ast.Subscript) and isinstance(node.slice, ast.Slice): elif isinstance(node, ast.Subscript): return "subscript" elif isinstance(node, ast.Tuple): return "tuple" elif isinstance(node, ast.Dict): return "dict" elif isinstance(node, ast.Set): return "set" elif isinstance(node, ast.SetComp): return "set" elif isinstance(node, ast.Str): return "str" elif isinstance(node, ast.JoinedStr): return "str" elif isinstance(node, ast.NameConstant): if isinstance(node.value, bool): return "bool" return "NC" elif isinstance(node, ast.Constant): ################## ############## return tell_type(node.value) elif isinstance(node, ast.Lambda): return "lambda" elif isinstance(node, ast.DictComp): return "dict" elif isinstance(node, ast.ListComp): return "list" elif isinstance(node, ast.GeneratorExp): return "generator" elif isinstance(node, ast.Call): func_name = get_func_calls(node) func_name = func_name[0] if isinstance(node.func, ast.Name): if node.func.id == "dict": return "dict" elif node.func.id == "list": return "list" elif node.func.id == "tuple": return "tuple" elif node.func.id == "set": return "set" elif node.func.id == "str": return "str" elif node.func.id in [ "id", "sum", "len", "int", "float", "ceil", "floor", "max", "min" ]: return "num" elif node.func.id in ["all", "any", "assert", "bool"]: return "NC" elif node.func.id in ["iter"]: return "iterator" elif node.func.id in ["isinstance"]: return "NC" elif node.func.id in ['bytes']: return "bytes" elif is_camel_case(func_name): return func_name else: return "call" elif is_camel_case(func_name.split(".")[-1]): return func_name elif func_name in ['join', 'format']: return "str" else: return "call" elif isinstance(node, ast.Attribute): return "attr" else: return "unknown"