def _inline_stencil(self, instr, call_name, func_def): from numba.stencil import StencilFunc lhs = instr.target expr = instr.value # We keep the escaping variables of the stencil kernel # alive by adding them to the actual kernel call as extra # keyword arguments, which is ignored anyway. if (isinstance(func_def, ir.Global) and func_def.name == 'stencil' and isinstance(func_def.value, StencilFunc)): if expr.kws: expr.kws += func_def.value.kws else: expr.kws = func_def.value.kws return True # Otherwise we proceed to check if it is a call to numba.stencil require(call_name == ('stencil', 'numba.stencil') or call_name == ('stencil', 'numba')) require(expr not in self._processed_stencils) self._processed_stencils.append(expr) if not len(expr.args) == 1: raise ValueError("As a minimum Stencil requires" " a kernel as an argument") stencil_def = guard(get_definition, self.func_ir, expr.args[0]) require( isinstance(stencil_def, ir.Expr) and stencil_def.op == "make_function") kernel_ir = get_ir_of_code(self.func_ir.func_id.func.__globals__, stencil_def.code) options = dict(expr.kws) if 'neighborhood' in options: fixed = guard(self._fix_stencil_neighborhood, options) if not fixed: raise ValueError( "stencil neighborhood option should be a tuple" " with constant structure such as ((-w, w),)") if 'index_offsets' in options: fixed = guard(self._fix_stencil_index_offsets, options) if not fixed: raise ValueError( "stencil index_offsets option should be a tuple" " with constant structure such as (offset, )") sf = StencilFunc(kernel_ir, 'constant', options) sf.kws = expr.kws # hack to keep variables live sf_global = ir.Global('stencil', sf, expr.loc) self.func_ir._definitions[lhs.name] = [sf_global] instr.value = sf_global return True
def _inline_stencil(self, instr, call_name, func_def): from numba.stencil import StencilFunc lhs = instr.target expr = instr.value # We keep the escaping variables of the stencil kernel # alive by adding them to the actual kernel call as extra # keyword arguments, which is ignored anyway. if (isinstance(func_def, ir.Global) and func_def.name == 'stencil' and isinstance(func_def.value, StencilFunc)): if expr.kws: expr.kws += func_def.value.kws else: expr.kws = func_def.value.kws return True # Otherwise we proceed to check if it is a call to numba.stencil require(call_name == ('stencil', 'numba.stencil') or call_name == ('stencil', 'numba')) require(expr not in self._processed_stencils) self._processed_stencils.append(expr) if not len(expr.args) == 1: raise ValueError("As a minimum Stencil requires" " a kernel as an argument") stencil_def = guard(get_definition, self.func_ir, expr.args[0]) require(isinstance(stencil_def, ir.Expr) and stencil_def.op == "make_function") kernel_ir = get_ir_of_code(self.func_ir.func_id.func.__globals__, stencil_def.code) options = dict(expr.kws) if 'neighborhood' in options: fixed = guard(self._fix_stencil_neighborhood, options) if not fixed: raise ValueError("stencil neighborhood option should be a tuple" " with constant structure such as ((-w, w),)") if 'index_offsets' in options: fixed = guard(self._fix_stencil_index_offsets, options) if not fixed: raise ValueError("stencil index_offsets option should be a tuple" " with constant structure such as (offset, )") sf = StencilFunc(kernel_ir, 'constant', options) sf.kws = expr.kws # hack to keep variables live sf_global = ir.Global('stencil', sf, expr.loc) self.func_ir._definitions[lhs.name] = [sf_global] instr.value = sf_global return True
def inline_closure_call(func_ir, glbls, block, i, callee, typingctx=None, arg_typs=None, typemap=None, calltypes=None, work_list=None): """Inline the body of `callee` at its callsite (`i`-th instruction of `block`) `func_ir` is the func_ir object of the caller function and `glbls` is its global variable environment (func_ir.func_id.func.__globals__). `block` is the IR block of the callsite and `i` is the index of the callsite's node. `callee` is either the called function or a make_function node. `typingctx`, `typemap` and `calltypes` are typing data structures of the caller, available if we are in a typed pass. `arg_typs` includes the types of the arguments at the callsite. """ scope = block.scope instr = block.body[i] call_expr = instr.value debug_print = _make_debug_print("inline_closure_call") debug_print("Found closure call: ", instr, " with callee = ", callee) # support both function object and make_function Expr callee_code = callee.code if hasattr(callee, 'code') else callee.__code__ callee_defaults = callee.defaults if hasattr(callee, 'defaults') else callee.__defaults__ callee_closure = callee.closure if hasattr(callee, 'closure') else callee.__closure__ # first, get the IR of the callee callee_ir = get_ir_of_code(glbls, callee_code) callee_blocks = callee_ir.blocks # 1. relabel callee_ir by adding an offset max_label = max(func_ir.blocks.keys()) callee_blocks = add_offset_to_labels(callee_blocks, max_label + 1) callee_blocks = simplify_CFG(callee_blocks) callee_ir.blocks = callee_blocks min_label = min(callee_blocks.keys()) max_label = max(callee_blocks.keys()) # reset globals in ir_utils before we use it ir_utils._max_label = max_label debug_print("After relabel") _debug_dump(callee_ir) # 2. rename all local variables in callee_ir with new locals created in func_ir callee_scopes = _get_all_scopes(callee_blocks) debug_print("callee_scopes = ", callee_scopes) # one function should only have one local scope assert(len(callee_scopes) == 1) callee_scope = callee_scopes[0] var_dict = {} for var in callee_scope.localvars._con.values(): if not (var.name in callee_code.co_freevars): new_var = scope.define(mk_unique_var(var.name), loc=var.loc) var_dict[var.name] = new_var debug_print("var_dict = ", var_dict) replace_vars(callee_blocks, var_dict) debug_print("After local var rename") _debug_dump(callee_ir) # 3. replace formal parameters with actual arguments args = list(call_expr.args) if callee_defaults: debug_print("defaults = ", callee_defaults) if isinstance(callee_defaults, tuple): # Python 3.5 args = args + list(callee_defaults) elif isinstance(callee_defaults, ir.Var) or isinstance(callee_defaults, str): defaults = func_ir.get_definition(callee_defaults) assert(isinstance(defaults, ir.Const)) loc = defaults.loc args = args + [ir.Const(value=v, loc=loc) for v in defaults.value] else: raise NotImplementedError( "Unsupported defaults to make_function: {}".format(defaults)) debug_print("After arguments rename: ") _debug_dump(callee_ir) # 4. replace freevar with actual closure var if callee_closure: closure = func_ir.get_definition(callee_closure) debug_print("callee's closure = ", closure) if isinstance(closure, tuple): cellget = ctypes.pythonapi.PyCell_Get cellget.restype = ctypes.py_object cellget.argtypes = (ctypes.py_object,) items = tuple(cellget(x) for x in closure) else: assert(isinstance(closure, ir.Expr) and closure.op == 'build_tuple') items = closure.items assert(len(callee_code.co_freevars) == len(items)) _replace_freevars(callee_blocks, items) debug_print("After closure rename") _debug_dump(callee_ir) if typingctx: from numba import compiler f_typemap, f_return_type, f_calltypes = compiler.type_inference_stage( typingctx, callee_ir, arg_typs, None) canonicalize_array_math(callee_ir, f_typemap, f_calltypes, typingctx) # remove argument entries like arg.a from typemap arg_names = [vname for vname in f_typemap if vname.startswith("arg.")] for a in arg_names: f_typemap.pop(a) typemap.update(f_typemap) calltypes.update(f_calltypes) _replace_args_with(callee_blocks, args) # 5. split caller blocks into two new_blocks = [] new_block = ir.Block(scope, block.loc) new_block.body = block.body[i + 1:] new_label = next_label() func_ir.blocks[new_label] = new_block new_blocks.append((new_label, new_block)) block.body = block.body[:i] block.body.append(ir.Jump(min_label, instr.loc)) # 6. replace Return with assignment to LHS topo_order = find_topo_order(callee_blocks) _replace_returns(callee_blocks, instr.target, new_label) # remove the old definition of instr.target too if (instr.target.name in func_ir._definitions): func_ir._definitions[instr.target.name] = [] # 7. insert all new blocks, and add back definitions for label in topo_order: # block scope must point to parent's block = callee_blocks[label] block.scope = scope _add_definitions(func_ir, block) func_ir.blocks[label] = block new_blocks.append((label, block)) debug_print("After merge in") _debug_dump(func_ir) if work_list != None: for block in new_blocks: work_list.append(block) return callee_blocks