def test1(self): typingctx = typing.Context() targetctx = cpu.CPUContext(typingctx) test_ir = compiler.run_frontend(test_will_propagate) with cpu_target.nested_context(typingctx, targetctx): typingctx.refresh() targetctx.refresh() args = (types.int64, types.int64, types.int64) typemap, return_type, calltypes = type_inference_stage( typingctx, test_ir, args, None) type_annotation = type_annotations.TypeAnnotation( func_ir=test_ir, typemap=typemap, calltypes=calltypes, lifted=(), lifted_from=None, args=args, return_type=return_type, html_output=config.HTML) remove_dels(test_ir.blocks) in_cps, out_cps = copy_propagate(test_ir.blocks, typemap) apply_copy_propagate(test_ir.blocks, in_cps, get_name_var_table(test_ir.blocks), typemap, calltypes) remove_dead(test_ir.blocks, test_ir.arg_names, test_ir) self.assertFalse(findLhsAssign(test_ir, "x"))
def run_pass(self, state): parfor_pass = numba.parfors.parfor.ParforPass( state.func_ir, state.type_annotation.typemap, state.type_annotation.calltypes, state.return_type, state.typingctx, state.flags.auto_parallel, state.flags, state.parfor_diagnostics, ) remove_dels(state.func_ir.blocks) parfor_pass.array_analysis.run(state.func_ir.blocks) parfor_pass._convert_loop(state.func_ir.blocks) remove_dead( state.func_ir.blocks, state.func_ir.arg_names, state.func_ir, state.type_annotation.typemap, ) numba.parfors.parfor.get_parfor_params( state.func_ir.blocks, parfor_pass.options.fusion, parfor_pass.nested_fusion_info, ) return True
def compare_ir(self, ir_list): outputs = [] for func_ir in ir_list: remove_dead(func_ir.blocks, func_ir.arg_names, func_ir) output = StringIO() func_ir.dump(file=output) outputs.append(output.getvalue()) self.assertTrue(len(set(outputs)) == 1) # assert all outputs are equal
def run_pass(self, state): rewrite_ndarray_function_name_pass = RewriteNdarrayFunctions( state, rewrite_function_name_map) mutated = rewrite_ndarray_function_name_pass.run() if mutated: remove_dead(state.func_ir.blocks, state.func_ir.arg_names, state.func_ir) state.func_ir.blocks = simplify_CFG(state.func_ir.blocks) return mutated
def test2(self): def call_np_random_seed(): np.random.seed(2) def seed_call_exists(func_ir): for inst in func_ir.blocks[0].body: if (isinstance(inst, ir.Assign) and isinstance(inst.value, ir.Expr) and inst.value.op == 'call' and func_ir.get_definition( inst.value.func).attr == 'seed'): return True return False test_ir = compiler.run_frontend(call_np_random_seed) remove_dead(test_ir.blocks, test_ir.arg_names, test_ir) self.assertTrue(seed_call_exists(test_ir))
def test1(self): typingctx = typing.Context() targetctx = cpu.CPUContext(typingctx) test_ir = compiler.run_frontend(test_will_propagate) with cpu_target.nested_context(typingctx, targetctx): typingctx.refresh() targetctx.refresh() args = (types.int64, types.int64, types.int64) typemap, _, calltypes, _ = type_inference_stage( typingctx, targetctx, test_ir, args, None) remove_dels(test_ir.blocks) in_cps, out_cps = copy_propagate(test_ir.blocks, typemap) apply_copy_propagate(test_ir.blocks, in_cps, get_name_var_table(test_ir.blocks), typemap, calltypes) remove_dead(test_ir.blocks, test_ir.arg_names, test_ir) self.assertFalse(findLhsAssign(test_ir, "x"))
def _mk_stencil_parfor(self, label, in_args, out_arr, stencil_ir, index_offsets, target, return_type, stencil_func, arg_to_arr_dict): """ Converts a set of stencil kernel blocks to a parfor. """ gen_nodes = [] stencil_blocks = stencil_ir.blocks if config.DEBUG_ARRAY_OPT >= 1: print("_mk_stencil_parfor", label, in_args, out_arr, index_offsets, return_type, stencil_func, stencil_blocks) ir_utils.dump_blocks(stencil_blocks) in_arr = in_args[0] # run copy propagate to replace in_args copies (e.g. a = A) in_arr_typ = self.typemap[in_arr.name] in_cps, out_cps = ir_utils.copy_propagate(stencil_blocks, self.typemap) name_var_table = ir_utils.get_name_var_table(stencil_blocks) ir_utils.apply_copy_propagate(stencil_blocks, in_cps, name_var_table, self.typemap, self.calltypes) if config.DEBUG_ARRAY_OPT >= 1: print("stencil_blocks after copy_propagate") ir_utils.dump_blocks(stencil_blocks) ir_utils.remove_dead(stencil_blocks, self.func_ir.arg_names, stencil_ir, self.typemap) if config.DEBUG_ARRAY_OPT >= 1: print("stencil_blocks after removing dead code") ir_utils.dump_blocks(stencil_blocks) # create parfor vars ndims = self.typemap[in_arr.name].ndim scope = in_arr.scope loc = in_arr.loc parfor_vars = [] for i in range(ndims): parfor_var = ir.Var(scope, mk_unique_var("$parfor_index_var"), loc) self.typemap[parfor_var.name] = types.intp parfor_vars.append(parfor_var) start_lengths, end_lengths = self._replace_stencil_accesses( stencil_ir, parfor_vars, in_args, index_offsets, stencil_func, arg_to_arr_dict) if config.DEBUG_ARRAY_OPT >= 1: print("stencil_blocks after replace stencil accesses") ir_utils.dump_blocks(stencil_blocks) # create parfor loop nests loopnests = [] equiv_set = self.array_analysis.get_equiv_set(label) in_arr_dim_sizes = equiv_set.get_shape(in_arr) assert ndims == len(in_arr_dim_sizes) for i in range(ndims): last_ind = self._get_stencil_last_ind(in_arr_dim_sizes[i], end_lengths[i], gen_nodes, scope, loc) start_ind = self._get_stencil_start_ind(start_lengths[i], gen_nodes, scope, loc) # start from stencil size to avoid invalid array access loopnests.append( numba.parfors.parfor.LoopNest(parfor_vars[i], start_ind, last_ind, 1)) # We have to guarantee that the exit block has maximum label and that # there's only one exit block for the parfor body. # So, all return statements will change to jump to the parfor exit block. parfor_body_exit_label = max(stencil_blocks.keys()) + 1 stencil_blocks[parfor_body_exit_label] = ir.Block(scope, loc) exit_value_var = ir.Var(scope, mk_unique_var("$parfor_exit_value"), loc) self.typemap[exit_value_var.name] = return_type.dtype # create parfor index var for_replacing_ret = [] if ndims == 1: parfor_ind_var = parfor_vars[0] else: parfor_ind_var = ir.Var(scope, mk_unique_var("$parfor_index_tuple_var"), loc) self.typemap[parfor_ind_var.name] = types.containers.UniTuple( types.intp, ndims) tuple_call = ir.Expr.build_tuple(parfor_vars, loc) tuple_assign = ir.Assign(tuple_call, parfor_ind_var, loc) for_replacing_ret.append(tuple_assign) if config.DEBUG_ARRAY_OPT >= 1: print("stencil_blocks after creating parfor index var") ir_utils.dump_blocks(stencil_blocks) # empty init block init_block = ir.Block(scope, loc) if out_arr is None: in_arr_typ = self.typemap[in_arr.name] shape_name = ir_utils.mk_unique_var("in_arr_shape") shape_var = ir.Var(scope, shape_name, loc) shape_getattr = ir.Expr.getattr(in_arr, "shape", loc) self.typemap[shape_name] = types.containers.UniTuple( types.intp, in_arr_typ.ndim) init_block.body.extend([ir.Assign(shape_getattr, shape_var, loc)]) zero_name = ir_utils.mk_unique_var("zero_val") zero_var = ir.Var(scope, zero_name, loc) if "cval" in stencil_func.options: cval = stencil_func.options["cval"] # TODO: Loosen this restriction to adhere to casting rules. if return_type.dtype != typing.typeof.typeof(cval): raise ValueError( "cval type does not match stencil return type.") temp2 = return_type.dtype(cval) else: temp2 = return_type.dtype(0) full_const = ir.Const(temp2, loc) self.typemap[zero_name] = return_type.dtype init_block.body.extend([ir.Assign(full_const, zero_var, loc)]) so_name = ir_utils.mk_unique_var("stencil_output") out_arr = ir.Var(scope, so_name, loc) self.typemap[out_arr.name] = numba.core.types.npytypes.Array( return_type.dtype, in_arr_typ.ndim, in_arr_typ.layout) dtype_g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc) self.typemap[dtype_g_np_var.name] = types.misc.Module(np) dtype_g_np = ir.Global('np', np, loc) dtype_g_np_assign = ir.Assign(dtype_g_np, dtype_g_np_var, loc) init_block.body.append(dtype_g_np_assign) dtype_np_attr_call = ir.Expr.getattr(dtype_g_np_var, return_type.dtype.name, loc) dtype_attr_var = ir.Var(scope, mk_unique_var("$np_attr_attr"), loc) self.typemap[dtype_attr_var.name] = types.functions.NumberClass( return_type.dtype) dtype_attr_assign = ir.Assign(dtype_np_attr_call, dtype_attr_var, loc) init_block.body.append(dtype_attr_assign) stmts = ir_utils.gen_np_call("full", np.full, out_arr, [shape_var, zero_var, dtype_attr_var], self.typingctx, self.typemap, self.calltypes) equiv_set.insert_equiv(out_arr, in_arr_dim_sizes) init_block.body.extend(stmts) else: # out is present if "cval" in stencil_func.options: # do out[:] = cval cval = stencil_func.options["cval"] # TODO: Loosen this restriction to adhere to casting rules. cval_ty = typing.typeof.typeof(cval) if not self.typingctx.can_convert(cval_ty, return_type.dtype): msg = "cval type does not match stencil return type." raise ValueError(msg) # get slice ref slice_var = ir.Var(scope, mk_unique_var("$py_g_var"), loc) slice_fn_ty = self.typingctx.resolve_value_type(slice) self.typemap[slice_var.name] = slice_fn_ty slice_g = ir.Global('slice', slice, loc) slice_assigned = ir.Assign(slice_g, slice_var, loc) init_block.body.append(slice_assigned) sig = self.typingctx.resolve_function_type( slice_fn_ty, (types.none, ) * 2, {}) callexpr = ir.Expr.call(func=slice_var, args=(), kws=(), loc=loc) self.calltypes[callexpr] = sig slice_inst_var = ir.Var(scope, mk_unique_var("$slice_inst"), loc) self.typemap[slice_inst_var.name] = types.slice2_type slice_assign = ir.Assign(callexpr, slice_inst_var, loc) init_block.body.append(slice_assign) # get const val for cval cval_const_val = ir.Const(return_type.dtype(cval), loc) cval_const_var = ir.Var(scope, mk_unique_var("$cval_const"), loc) self.typemap[cval_const_var.name] = return_type.dtype cval_const_assign = ir.Assign(cval_const_val, cval_const_var, loc) init_block.body.append(cval_const_assign) # do setitem on `out` array setitemexpr = ir.StaticSetItem(out_arr, slice(None, None), slice_inst_var, cval_const_var, loc) init_block.body.append(setitemexpr) sig = signature(types.none, self.typemap[out_arr.name], self.typemap[slice_inst_var.name], self.typemap[out_arr.name].dtype) self.calltypes[setitemexpr] = sig self.replace_return_with_setitem(stencil_blocks, exit_value_var, parfor_body_exit_label) if config.DEBUG_ARRAY_OPT >= 1: print("stencil_blocks after replacing return") ir_utils.dump_blocks(stencil_blocks) setitem_call = ir.SetItem(out_arr, parfor_ind_var, exit_value_var, loc) self.calltypes[setitem_call] = signature( types.none, self.typemap[out_arr.name], self.typemap[parfor_ind_var.name], self.typemap[out_arr.name].dtype) stencil_blocks[parfor_body_exit_label].body.extend(for_replacing_ret) stencil_blocks[parfor_body_exit_label].body.append(setitem_call) # simplify CFG of parfor body (exit block could be simplified often) # add dummy return to enable CFG dummy_loc = ir.Loc("stencilparfor_dummy", -1) ret_const_var = ir.Var(scope, mk_unique_var("$cval_const"), dummy_loc) cval_const_assign = ir.Assign(ir.Const(0, loc=dummy_loc), ret_const_var, dummy_loc) stencil_blocks[parfor_body_exit_label].body.append(cval_const_assign) stencil_blocks[parfor_body_exit_label].body.append( ir.Return(ret_const_var, dummy_loc), ) stencil_blocks = ir_utils.simplify_CFG(stencil_blocks) stencil_blocks[max(stencil_blocks.keys())].body.pop() if config.DEBUG_ARRAY_OPT >= 1: print("stencil_blocks after adding SetItem") ir_utils.dump_blocks(stencil_blocks) pattern = ('stencil', [start_lengths, end_lengths]) parfor = numba.parfors.parfor.Parfor(loopnests, init_block, stencil_blocks, loc, parfor_ind_var, equiv_set, pattern, self.flags) gen_nodes.append(parfor) gen_nodes.append(ir.Assign(out_arr, target, loc)) return gen_nodes
def _create_gufunc_for_parfor_body( lowerer, parfor, typemap, typingctx, targetctx, flags, loop_ranges, locals, has_aliases, index_var_typ, races, ): """ Takes a parfor and creates a gufunc function for its body. There are two parts to this function: 1) Code to iterate across the iteration space as defined by the schedule. 2) The parfor body that does the work for a single point in the iteration space. Part 1 is created as Python text for simplicity with a sentinel assignment to mark the point in the IR where the parfor body should be added. This Python text is 'exec'ed into existence and its IR retrieved with run_frontend. The IR is scanned for the sentinel assignment where that basic block is split and the IR for the parfor body inserted. """ loc = parfor.init_block.loc # The parfor body and the main function body share ir.Var nodes. # We have to do some replacements of Var names in the parfor body # to make them legal parameter names. If we don't copy then the # Vars in the main function also would incorrectly change their name. loop_body = copy.copy(parfor.loop_body) remove_dels(loop_body) parfor_dim = len(parfor.loop_nests) loop_indices = [l.index_variable.name for l in parfor.loop_nests] # Get all the parfor params. parfor_params = parfor.params for start, stop, step in loop_ranges: if isinstance(start, ir.Var): parfor_params.add(start.name) if isinstance(stop, ir.Var): parfor_params.add(stop.name) # Get just the outputs of the parfor. parfor_outputs = numba.parfors.parfor.get_parfor_outputs( parfor, parfor_params) # Get all parfor reduction vars, and operators. typemap = lowerer.fndesc.typemap parfor_redvars, parfor_reddict = numba.parfors.parfor.get_parfor_reductions( lowerer.func_ir, parfor, parfor_params, lowerer.fndesc.calltypes) has_reduction = False if len(parfor_redvars) == 0 else True if has_reduction: _create_gufunc_for_reduction_parfor() # Compute just the parfor inputs as a set difference. parfor_inputs = sorted(list(set(parfor_params) - set(parfor_outputs))) for race in races: msg = ("Variable %s used in parallel loop may be written " "to simultaneously by multiple workers and may result " "in non-deterministic or unintended results." % race) warnings.warn(NumbaParallelSafetyWarning(msg, loc)) replace_var_with_array(races, loop_body, typemap, lowerer.fndesc.calltypes) if config.DEBUG_ARRAY_OPT >= 1: print("parfor_params = ", parfor_params, type(parfor_params)) print("parfor_outputs = ", parfor_outputs, type(parfor_outputs)) print("parfor_inputs = ", parfor_inputs, type(parfor_inputs)) # Reorder all the params so that inputs go first then outputs. parfor_params = parfor_inputs + parfor_outputs def addrspace_from(params, def_addr): addrspaces = [] for p in params: if isinstance(to_scalar_from_0d(typemap[p]), types.npytypes.Array): addrspaces.append(def_addr) else: addrspaces.append(None) return addrspaces addrspaces = addrspace_from(parfor_params, address_space.GLOBAL) if config.DEBUG_ARRAY_OPT >= 1: print("parfor_params = ", parfor_params, type(parfor_params)) print("loop_indices = ", loop_indices, type(loop_indices)) print("loop_body = ", loop_body, type(loop_body)) _print_body(loop_body) # Some Var are not legal parameter names so create a dict of # potentially illegal param name to guaranteed legal name. param_dict = legalize_names_with_typemap(parfor_params, typemap) if config.DEBUG_ARRAY_OPT >= 1: print("param_dict = ", sorted(param_dict.items()), type(param_dict)) # Some loop_indices are not legal parameter names so create a dict # of potentially illegal loop index to guaranteed legal name. ind_dict = legalize_names_with_typemap(loop_indices, typemap) # Compute a new list of legal loop index names. legal_loop_indices = [ind_dict[v] for v in loop_indices] if config.DEBUG_ARRAY_OPT >= 1: print("ind_dict = ", sorted(ind_dict.items()), type(ind_dict)) print( "legal_loop_indices = ", legal_loop_indices, type(legal_loop_indices), ) for pd in parfor_params: print("pd = ", pd) print("pd type = ", typemap[pd], type(typemap[pd])) # Get the types of each parameter. param_types = [to_scalar_from_0d(typemap[v]) for v in parfor_params] param_types_addrspaces = copy.copy(param_types) # Calculate types of args passed to gufunc. func_arg_types = [typemap[v] for v in (parfor_inputs + parfor_outputs)] assert len(param_types_addrspaces) == len(addrspaces) for i in range(len(param_types_addrspaces)): if addrspaces[i] is not None: # Convert Numba's npytype.Array to DPPYArray data type. DPPYArray # allows us to specify an address space for the data and other # pointer arguments for the array. param_types_addrspaces[i] = npytypes_array_to_dppy_array( param_types_addrspaces[i], addrspaces[i]) def print_arg_with_addrspaces(args): for a in args: print(a, type(a)) if isinstance(a, types.npytypes.Array): print("addrspace:", a.addrspace) if config.DEBUG_ARRAY_OPT >= 1: print_arg_with_addrspaces(param_types) print("func_arg_types = ", func_arg_types, type(func_arg_types)) # Replace illegal parameter names in the loop body with legal ones. replace_var_names(loop_body, param_dict) # remember the name before legalizing as the actual arguments parfor_args = parfor_params # Change parfor_params to be legal names. parfor_params = [param_dict[v] for v in parfor_params] parfor_params_orig = parfor_params parfor_params = [] ascontig = False for pindex in range(len(parfor_params_orig)): if (ascontig and pindex < len(parfor_inputs) and isinstance(param_types[pindex], types.npytypes.Array)): parfor_params.append(parfor_params_orig[pindex] + "param") else: parfor_params.append(parfor_params_orig[pindex]) # Change parfor body to replace illegal loop index vars with legal ones. replace_var_names(loop_body, ind_dict) loop_body_var_table = get_name_var_table(loop_body) sentinel_name = get_unused_var_name("__sentinel__", loop_body_var_table) if config.DEBUG_ARRAY_OPT >= 1: print("legal parfor_params = ", parfor_params, type(parfor_params)) # Determine the unique names of the scheduling and gufunc functions. gufunc_name = "__numba_parfor_gufunc_%s" % (parfor.id) if config.DEBUG_ARRAY_OPT: # print("sched_func_name ", type(sched_func_name), sched_func_name) print("gufunc_name ", type(gufunc_name), gufunc_name) gufunc_txt = "" # Create the gufunc function. gufunc_txt += "def " + gufunc_name gufunc_txt += "(" + (", ".join(parfor_params)) + "):\n" gufunc_txt += _schedule_loop(parfor_dim, legal_loop_indices, loop_ranges, param_dict) # Add the sentinel assignment so that we can find the loop body position # in the IR. gufunc_txt += " " gufunc_txt += sentinel_name + " = 0\n" # gufunc returns nothing gufunc_txt += " return None\n" if config.DEBUG_ARRAY_OPT: print("gufunc_txt = ", type(gufunc_txt), "\n", gufunc_txt) sys.stdout.flush() # Force gufunc outline into existence. globls = {"np": np, "numba": numba, "dppy": dppy} locls = {} exec(gufunc_txt, globls, locls) gufunc_func = locls[gufunc_name] if config.DEBUG_ARRAY_OPT: print("gufunc_func = ", type(gufunc_func), "\n", gufunc_func) # Get the IR for the gufunc outline. gufunc_ir = compiler.run_frontend(gufunc_func) if config.DEBUG_ARRAY_OPT: print("gufunc_ir dump ", type(gufunc_ir)) gufunc_ir.dump() print("loop_body dump ", type(loop_body)) _print_body(loop_body) # rename all variables in gufunc_ir afresh var_table = get_name_var_table(gufunc_ir.blocks) new_var_dict = {} reserved_names = ([sentinel_name] + list(param_dict.values()) + legal_loop_indices) for name, var in var_table.items(): if not (name in reserved_names): new_var_dict[name] = mk_unique_var(name) replace_var_names(gufunc_ir.blocks, new_var_dict) if config.DEBUG_ARRAY_OPT: print("gufunc_ir dump after renaming ") gufunc_ir.dump() prs_dict = {} pss_dict = {} pspmd_dict = {} gufunc_param_types = param_types if config.DEBUG_ARRAY_OPT: print( "gufunc_param_types = ", type(gufunc_param_types), "\n", gufunc_param_types, ) gufunc_stub_last_label = max(gufunc_ir.blocks.keys()) + 1 # Add gufunc stub last label to each parfor.loop_body label to prevent # label conflicts. loop_body = add_offset_to_labels(loop_body, gufunc_stub_last_label) # new label for splitting sentinel block new_label = max(loop_body.keys()) + 1 # If enabled, add a print statement after every assignment. if config.DEBUG_ARRAY_OPT_RUNTIME: _dbgprint_after_each_array_assignments(lowerer, loop_body, typemap) if config.DEBUG_ARRAY_OPT: print("parfor loop body") _print_body(loop_body) wrapped_blocks = wrap_loop_body(loop_body) # hoisted, not_hoisted = hoist(parfor_params, loop_body, # typemap, wrapped_blocks) setitems = set() find_setitems_body(setitems, loop_body, typemap) hoisted = [] not_hoisted = [] start_block = gufunc_ir.blocks[min(gufunc_ir.blocks.keys())] start_block.body = start_block.body[:-1] + hoisted + [start_block.body[-1]] unwrap_loop_body(loop_body) # store hoisted into diagnostics diagnostics = lowerer.metadata["parfor_diagnostics"] diagnostics.hoist_info[parfor.id] = { "hoisted": hoisted, "not_hoisted": not_hoisted, } lowerer.metadata["parfor_diagnostics"].extra_info[str(parfor.id)] = str( dpctl.get_current_queue().get_sycl_device().name) if config.DEBUG_ARRAY_OPT: print("After hoisting") _print_body(loop_body) # Search all the block in the gufunc outline for the sentinel assignment. for label, block in gufunc_ir.blocks.items(): for i, inst in enumerate(block.body): if (isinstance(inst, ir.Assign) and inst.target.name == sentinel_name): # We found the sentinel assignment. loc = inst.loc scope = block.scope # split block across __sentinel__ # A new block is allocated for the statements prior to the # sentinel but the new block maintains the current block label. prev_block = ir.Block(scope, loc) prev_block.body = block.body[:i] # The current block is used for statements after the sentinel. block.body = block.body[i + 1:] # But the current block gets a new label. body_first_label = min(loop_body.keys()) # The previous block jumps to the minimum labelled block of the # parfor body. prev_block.append(ir.Jump(body_first_label, loc)) # Add all the parfor loop body blocks to the gufunc function's # IR. for (l, b) in loop_body.items(): gufunc_ir.blocks[l] = b body_last_label = max(loop_body.keys()) gufunc_ir.blocks[new_label] = block gufunc_ir.blocks[label] = prev_block # Add a jump from the last parfor body block to the block # containing statements after the sentinel. gufunc_ir.blocks[body_last_label].append( ir.Jump(new_label, loc)) break else: continue break if config.DEBUG_ARRAY_OPT: print("gufunc_ir last dump before renaming") gufunc_ir.dump() gufunc_ir.blocks = rename_labels(gufunc_ir.blocks) remove_dels(gufunc_ir.blocks) if config.DEBUG_ARRAY_OPT: sys.stdout.flush() if config.DEBUG_ARRAY_OPT: print("gufunc_ir last dump") gufunc_ir.dump() print("flags", flags) print("typemap", typemap) old_alias = flags.noalias if not has_aliases: if config.DEBUG_ARRAY_OPT: print("No aliases found so adding noalias flag.") flags.noalias = True remove_dead(gufunc_ir.blocks, gufunc_ir.arg_names, gufunc_ir, typemap) if config.DEBUG_ARRAY_OPT: print("gufunc_ir after remove dead") gufunc_ir.dump() kernel_sig = signature(types.none, *gufunc_param_types) if config.DEBUG_ARRAY_OPT: sys.stdout.flush() if config.DEBUG_ARRAY_OPT: print("before DUFunc inlining".center(80, "-")) gufunc_ir.dump() # Inlining all DUFuncs dufunc_inliner( gufunc_ir, lowerer.fndesc.calltypes, typemap, lowerer.context.typing_context, lowerer.context, ) if config.DEBUG_ARRAY_OPT: print("after DUFunc inline".center(80, "-")) gufunc_ir.dump() kernel_func = dppy.compiler.compile_kernel_parfor( dpctl.get_current_queue(), gufunc_ir, gufunc_param_types, param_types_addrspaces, debug=flags.debuginfo, ) flags.noalias = old_alias if config.DEBUG_ARRAY_OPT: print("kernel_sig = ", kernel_sig) return kernel_func, parfor_args, kernel_sig, func_arg_types, setitems