def inline_calls(func_ir, _locals): work_list = list(func_ir.blocks.items()) while work_list: label, block = work_list.pop() for i, instr in enumerate(block.body): if isinstance(instr, ir.Assign): lhs = instr.target expr = instr.value if isinstance(expr, ir.Expr) and expr.op == 'call': func_def = guard(get_definition, func_ir, expr.func) if (isinstance(func_def, (ir.Global, ir.FreeVar)) and isinstance(func_def.value, CPUDispatcher)): py_func = func_def.value.py_func inline_out = inline_closure_call( func_ir, py_func.__globals__, block, i, py_func, work_list=work_list) # TODO remove if when inline_closure_call() output fix # is merged in Numba if isinstance(inline_out, tuple): var_dict = inline_out[1] # TODO: update '##distributed' and '##threaded' in _locals _locals.update((var_dict[k].name, v) for k, v in func_def.value.locals.items() if k in var_dict) # for block in new_blocks: # work_list.append(block) # current block is modified, skip the rest # (included in new blocks) break # sometimes type inference fails after inlining since blocks are inserted # at the end and there are agg constraints (categorical_split case) # CFG simplification fixes this case func_ir.blocks = ir_utils.simplify_CFG(func_ir.blocks)
def inline_new_blocks(func_ir, block, i, callee_blocks, work_list=None): # adopted from inline_closure_call scope = block.scope instr = block.body[i] # 1. relabel callee_ir by adding an offset callee_blocks = add_offset_to_labels(callee_blocks, ir_utils._max_label + 1) callee_blocks = ir_utils.simplify_CFG(callee_blocks) max_label = max(callee_blocks.keys()) # reset globals in ir_utils before we use it ir_utils._max_label = max_label topo_order = find_topo_order(callee_blocks) # 5. split caller blocks into two new_blocks = [] new_block = ir.Block(scope, block.loc) new_block.body = block.body[i + 1:] new_label = ir_utils.next_label() func_ir.blocks[new_label] = new_block new_blocks.append((new_label, new_block)) block.body = block.body[:i] min_label = topo_order[0] block.body.append(ir.Jump(min_label, instr.loc)) # 6. replace Return with assignment to LHS numba.inline_closurecall._replace_returns(callee_blocks, instr.target, new_label) # remove the old definition of instr.target too if (instr.target.name in func_ir._definitions): func_ir._definitions[instr.target.name] = [] # 7. insert all new blocks, and add back definitions for label in topo_order: # block scope must point to parent's block = callee_blocks[label] block.scope = scope numba.inline_closurecall._add_definitions(func_ir, block) func_ir.blocks[label] = block new_blocks.append((label, block)) if work_list is not None: for block in new_blocks: work_list.append(block) return callee_blocks
def _mk_stencil_parfor(self, label, in_args, out_arr, stencil_ir, index_offsets, target, return_type, stencil_func, arg_to_arr_dict): """ Converts a set of stencil kernel blocks to a parfor. """ gen_nodes = [] stencil_blocks = stencil_ir.blocks if config.DEBUG_ARRAY_OPT == 1: print("_mk_stencil_parfor", label, in_args, out_arr, index_offsets, return_type, stencil_func, stencil_blocks) ir_utils.dump_blocks(stencil_blocks) in_arr = in_args[0] # run copy propagate to replace in_args copies (e.g. a = A) in_arr_typ = self.typemap[in_arr.name] in_cps, out_cps = ir_utils.copy_propagate(stencil_blocks, self.typemap) name_var_table = ir_utils.get_name_var_table(stencil_blocks) ir_utils.apply_copy_propagate( stencil_blocks, in_cps, name_var_table, self.typemap, self.calltypes) if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after copy_propagate") ir_utils.dump_blocks(stencil_blocks) ir_utils.remove_dead(stencil_blocks, self.func_ir.arg_names, stencil_ir, self.typemap) if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after removing dead code") ir_utils.dump_blocks(stencil_blocks) # create parfor vars ndims = self.typemap[in_arr.name].ndim scope = in_arr.scope loc = in_arr.loc parfor_vars = [] for i in range(ndims): parfor_var = ir.Var(scope, mk_unique_var( "$parfor_index_var"), loc) self.typemap[parfor_var.name] = types.intp parfor_vars.append(parfor_var) start_lengths, end_lengths = self._replace_stencil_accesses( stencil_blocks, parfor_vars, in_args, index_offsets, stencil_func, arg_to_arr_dict) if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after replace stencil accesses") ir_utils.dump_blocks(stencil_blocks) # create parfor loop nests loopnests = [] equiv_set = self.array_analysis.get_equiv_set(label) in_arr_dim_sizes = equiv_set.get_shape(in_arr) assert ndims == len(in_arr_dim_sizes) for i in range(ndims): last_ind = self._get_stencil_last_ind(in_arr_dim_sizes[i], end_lengths[i], gen_nodes, scope, loc) start_ind = self._get_stencil_start_ind( start_lengths[i], gen_nodes, scope, loc) # start from stencil size to avoid invalid array access loopnests.append(numba.parfor.LoopNest(parfor_vars[i], start_ind, last_ind, 1)) # We have to guarantee that the exit block has maximum label and that # there's only one exit block for the parfor body. # So, all return statements will change to jump to the parfor exit block. parfor_body_exit_label = max(stencil_blocks.keys()) + 1 stencil_blocks[parfor_body_exit_label] = ir.Block(scope, loc) exit_value_var = ir.Var(scope, mk_unique_var("$parfor_exit_value"), loc) self.typemap[exit_value_var.name] = return_type.dtype # create parfor index var for_replacing_ret = [] if ndims == 1: parfor_ind_var = parfor_vars[0] else: parfor_ind_var = ir.Var(scope, mk_unique_var( "$parfor_index_tuple_var"), loc) self.typemap[parfor_ind_var.name] = types.containers.UniTuple( types.intp, ndims) tuple_call = ir.Expr.build_tuple(parfor_vars, loc) tuple_assign = ir.Assign(tuple_call, parfor_ind_var, loc) for_replacing_ret.append(tuple_assign) if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after creating parfor index var") ir_utils.dump_blocks(stencil_blocks) # empty init block init_block = ir.Block(scope, loc) if out_arr == None: in_arr_typ = self.typemap[in_arr.name] shape_name = ir_utils.mk_unique_var("in_arr_shape") shape_var = ir.Var(scope, shape_name, loc) shape_getattr = ir.Expr.getattr(in_arr, "shape", loc) self.typemap[shape_name] = types.containers.UniTuple(types.intp, in_arr_typ.ndim) init_block.body.extend([ir.Assign(shape_getattr, shape_var, loc)]) zero_name = ir_utils.mk_unique_var("zero_val") zero_var = ir.Var(scope, zero_name, loc) if "cval" in stencil_func.options: cval = stencil_func.options["cval"] # TODO: Loosen this restriction to adhere to casting rules. if return_type.dtype != typing.typeof.typeof(cval): raise ValueError("cval type does not match stencil return type.") temp2 = return_type.dtype(cval) else: temp2 = return_type.dtype(0) full_const = ir.Const(temp2, loc) self.typemap[zero_name] = return_type.dtype init_block.body.extend([ir.Assign(full_const, zero_var, loc)]) so_name = ir_utils.mk_unique_var("stencil_output") out_arr = ir.Var(scope, so_name, loc) self.typemap[out_arr.name] = numba.types.npytypes.Array( return_type.dtype, in_arr_typ.ndim, in_arr_typ.layout) dtype_g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc) self.typemap[dtype_g_np_var.name] = types.misc.Module(np) dtype_g_np = ir.Global('np', np, loc) dtype_g_np_assign = ir.Assign(dtype_g_np, dtype_g_np_var, loc) init_block.body.append(dtype_g_np_assign) dtype_np_attr_call = ir.Expr.getattr(dtype_g_np_var, return_type.dtype.name, loc) dtype_attr_var = ir.Var(scope, mk_unique_var("$np_attr_attr"), loc) self.typemap[dtype_attr_var.name] = types.functions.NumberClass(return_type.dtype) dtype_attr_assign = ir.Assign(dtype_np_attr_call, dtype_attr_var, loc) init_block.body.append(dtype_attr_assign) stmts = ir_utils.gen_np_call("full", np.full, out_arr, [shape_var, zero_var, dtype_attr_var], self.typingctx, self.typemap, self.calltypes) equiv_set.insert_equiv(out_arr, in_arr_dim_sizes) init_block.body.extend(stmts) self.replace_return_with_setitem(stencil_blocks, exit_value_var, parfor_body_exit_label) if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after replacing return") ir_utils.dump_blocks(stencil_blocks) setitem_call = ir.SetItem(out_arr, parfor_ind_var, exit_value_var, loc) self.calltypes[setitem_call] = signature( types.none, self.typemap[out_arr.name], self.typemap[parfor_ind_var.name], self.typemap[out_arr.name].dtype ) stencil_blocks[parfor_body_exit_label].body.extend(for_replacing_ret) stencil_blocks[parfor_body_exit_label].body.append(setitem_call) # simplify CFG of parfor body (exit block could be simplified often) # add dummy return to enable CFG stencil_blocks[parfor_body_exit_label].body.append(ir.Return(0, ir.Loc("stencilparfor_dummy", -1))) stencil_blocks = ir_utils.simplify_CFG(stencil_blocks) stencil_blocks[max(stencil_blocks.keys())].body.pop() if config.DEBUG_ARRAY_OPT == 1: print("stencil_blocks after adding SetItem") ir_utils.dump_blocks(stencil_blocks) pattern = ('stencil', [start_lengths, end_lengths]) parfor = numba.parfor.Parfor(loopnests, init_block, stencil_blocks, loc, parfor_ind_var, equiv_set, pattern, self.flags) gen_nodes.append(parfor) gen_nodes.append(ir.Assign(out_arr, target, loc)) return gen_nodes
def inline_closure_call(func_ir, glbls, block, i, callee, typingctx=None, arg_typs=None, typemap=None, calltypes=None, work_list=None): """Inline the body of `callee` at its callsite (`i`-th instruction of `block`) `func_ir` is the func_ir object of the caller function and `glbls` is its global variable environment (func_ir.func_id.func.__globals__). `block` is the IR block of the callsite and `i` is the index of the callsite's node. `callee` is either the called function or a make_function node. `typingctx`, `typemap` and `calltypes` are typing data structures of the caller, available if we are in a typed pass. `arg_typs` includes the types of the arguments at the callsite. """ scope = block.scope instr = block.body[i] call_expr = instr.value debug_print = _make_debug_print("inline_closure_call") debug_print("Found closure call: ", instr, " with callee = ", callee) # support both function object and make_function Expr callee_code = callee.code if hasattr(callee, 'code') else callee.__code__ callee_defaults = callee.defaults if hasattr(callee, 'defaults') else callee.__defaults__ callee_closure = callee.closure if hasattr(callee, 'closure') else callee.__closure__ # first, get the IR of the callee callee_ir = get_ir_of_code(glbls, callee_code) callee_blocks = callee_ir.blocks # 1. relabel callee_ir by adding an offset max_label = max(func_ir.blocks.keys()) callee_blocks = add_offset_to_labels(callee_blocks, max_label + 1) callee_blocks = simplify_CFG(callee_blocks) callee_ir.blocks = callee_blocks min_label = min(callee_blocks.keys()) max_label = max(callee_blocks.keys()) # reset globals in ir_utils before we use it ir_utils._max_label = max_label debug_print("After relabel") _debug_dump(callee_ir) # 2. rename all local variables in callee_ir with new locals created in func_ir callee_scopes = _get_all_scopes(callee_blocks) debug_print("callee_scopes = ", callee_scopes) # one function should only have one local scope assert(len(callee_scopes) == 1) callee_scope = callee_scopes[0] var_dict = {} for var in callee_scope.localvars._con.values(): if not (var.name in callee_code.co_freevars): new_var = scope.define(mk_unique_var(var.name), loc=var.loc) var_dict[var.name] = new_var debug_print("var_dict = ", var_dict) replace_vars(callee_blocks, var_dict) debug_print("After local var rename") _debug_dump(callee_ir) # 3. replace formal parameters with actual arguments args = list(call_expr.args) if callee_defaults: debug_print("defaults = ", callee_defaults) if isinstance(callee_defaults, tuple): # Python 3.5 args = args + list(callee_defaults) elif isinstance(callee_defaults, ir.Var) or isinstance(callee_defaults, str): defaults = func_ir.get_definition(callee_defaults) assert(isinstance(defaults, ir.Const)) loc = defaults.loc args = args + [ir.Const(value=v, loc=loc) for v in defaults.value] else: raise NotImplementedError( "Unsupported defaults to make_function: {}".format(defaults)) debug_print("After arguments rename: ") _debug_dump(callee_ir) # 4. replace freevar with actual closure var if callee_closure: closure = func_ir.get_definition(callee_closure) debug_print("callee's closure = ", closure) if isinstance(closure, tuple): cellget = ctypes.pythonapi.PyCell_Get cellget.restype = ctypes.py_object cellget.argtypes = (ctypes.py_object,) items = tuple(cellget(x) for x in closure) else: assert(isinstance(closure, ir.Expr) and closure.op == 'build_tuple') items = closure.items assert(len(callee_code.co_freevars) == len(items)) _replace_freevars(callee_blocks, items) debug_print("After closure rename") _debug_dump(callee_ir) if typingctx: from numba import compiler f_typemap, f_return_type, f_calltypes = compiler.type_inference_stage( typingctx, callee_ir, arg_typs, None) canonicalize_array_math(callee_ir, f_typemap, f_calltypes, typingctx) # remove argument entries like arg.a from typemap arg_names = [vname for vname in f_typemap if vname.startswith("arg.")] for a in arg_names: f_typemap.pop(a) typemap.update(f_typemap) calltypes.update(f_calltypes) _replace_args_with(callee_blocks, args) # 5. split caller blocks into two new_blocks = [] new_block = ir.Block(scope, block.loc) new_block.body = block.body[i + 1:] new_label = next_label() func_ir.blocks[new_label] = new_block new_blocks.append((new_label, new_block)) block.body = block.body[:i] block.body.append(ir.Jump(min_label, instr.loc)) # 6. replace Return with assignment to LHS topo_order = find_topo_order(callee_blocks) _replace_returns(callee_blocks, instr.target, new_label) # remove the old definition of instr.target too if (instr.target.name in func_ir._definitions): func_ir._definitions[instr.target.name] = [] # 7. insert all new blocks, and add back definitions for label in topo_order: # block scope must point to parent's block = callee_blocks[label] block.scope = scope _add_definitions(func_ir, block) func_ir.blocks[label] = block new_blocks.append((label, block)) debug_print("After merge in") _debug_dump(func_ir) if work_list != None: for block in new_blocks: work_list.append(block) return callee_blocks