def _build_wrapper(self, library, name): """ The LLVM IRBuilder code to create the gufunc wrapper. The *library* arg is the CodeLibrary for which the wrapper should be added to. The *name* arg is the name of the wrapper function being created. """ byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = self.context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function( Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapper_module = library.create_ir_module('') func_type = self.call_conv.get_function_type(self.fndesc.restype, self.fndesc.argtypes) fname = self.fndesc.llvm_func_name func = wrapper_module.add_function(func_type, name=fname) func.attributes.add("alwaysinline") wrapper = wrapper_module.add_function(fnty, name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder(wrapper.append_basic_block("entry")) loopcount = builder.load(arg_dims, name="loopcount") pyapi = self.context.get_python_api(builder) # Unpack shapes unique_syms = set() for grp in (self.sin, self.sout): for syms in grp: unique_syms |= set(syms) sym_map = {} for syms in self.sin: for s in syms: if s not in sym_map: sym_map[s] = len(sym_map) sym_dim = {} for s, i in sym_map.items(): sym_dim[s] = builder.load( builder.gep(arg_dims, [self.context.get_constant(types.intp, i + 1)])) # Prepare inputs arrays = [] step_offset = len(self.sin) + len(self.sout) for i, (typ, sym) in enumerate( zip(self.signature.args, self.sin + self.sout)): ary = GUArrayArg(self.context, builder, arg_args, arg_steps, i, step_offset, typ, sym, sym_dim) step_offset += len(sym) arrays.append(ary) bbreturn = builder.append_basic_block('.return') # Prologue self.gen_prologue(builder, pyapi) # Loop with cgutils.for_range(builder, loopcount, intp=intp_t) as loop: args = [a.get_array_at_offset(loop.index) for a in arrays] innercall, error = self.gen_loop_body(builder, pyapi, func, args) # If error, escape cgutils.cbranch_or_continue(builder, error, bbreturn) builder.branch(bbreturn) builder.position_at_end(bbreturn) # Epilogue self.gen_epilogue(builder, pyapi) builder.ret_void() # Link library.add_ir_module(wrapper_module) library.add_linking_library(self.library)
def _prepare_call_to_object_mode(context, builder, pyapi, func, signature, args, env): mod = builder.module bb_core_return = builder.append_basic_block('ufunc.core.return') # Call to # PyObject* ndarray_new(int nd, # npy_intp *dims, /* shape */ # npy_intp *strides, # void* data, # int type_num, # int itemsize) ll_int = context.get_value_type(types.int32) ll_intp = context.get_value_type(types.intp) ll_intp_ptr = Type.pointer(ll_intp) ll_voidptr = context.get_value_type(types.voidptr) ll_pyobj = context.get_value_type(types.pyobject) fnty = Type.function( ll_pyobj, [ll_int, ll_intp_ptr, ll_intp_ptr, ll_voidptr, ll_int, ll_int]) fn_array_new = mod.get_or_insert_function(fnty, name="numba_ndarray_new") # Convert each llarray into pyobject error_pointer = cgutils.alloca_once(builder, Type.int(1), name='error') builder.store(cgutils.true_bit, error_pointer) # The PyObject* arguments to the kernel function object_args = [] object_pointers = [] for i, (arg, argty) in enumerate(zip(args, signature.args)): # Allocate NULL-initialized slot for this argument objptr = cgutils.alloca_once(builder, ll_pyobj, zfill=True) object_pointers.append(objptr) if isinstance(argty, types.Array): # Special case arrays: we don't need full-blown NRT reflection # since the argument will be gone at the end of the kernel arycls = context.make_array(argty) array = arycls(context, builder, value=arg) zero = Constant.int(ll_int, 0) # Extract members of the llarray nd = Constant.int(ll_int, argty.ndim) dims = builder.gep(array._get_ptr_by_name('shape'), [zero, zero]) strides = builder.gep(array._get_ptr_by_name('strides'), [zero, zero]) data = builder.bitcast(array.data, ll_voidptr) dtype = np.dtype(str(argty.dtype)) # Prepare other info for reconstruction of the PyArray type_num = Constant.int(ll_int, dtype.num) itemsize = Constant.int(ll_int, dtype.itemsize) # Call helper to reconstruct PyArray objects obj = builder.call(fn_array_new, [nd, dims, strides, data, type_num, itemsize]) else: # Other argument types => use generic boxing obj = pyapi.from_native_value(argty, arg) builder.store(obj, objptr) object_args.append(obj) obj_is_null = cgutils.is_null(builder, obj) builder.store(obj_is_null, error_pointer) cgutils.cbranch_or_continue(builder, obj_is_null, bb_core_return) # Call ufunc core function object_sig = [types.pyobject] * len(object_args) status, retval = context.call_conv.call_function(builder, func, types.pyobject, object_sig, object_args, env=env) builder.store(status.is_error, error_pointer) # Release returned object pyapi.decref(retval) builder.branch(bb_core_return) # At return block builder.position_at_end(bb_core_return) # Release argument objects for objptr in object_pointers: pyapi.decref(builder.load(objptr)) innercall = status.code return innercall, builder.load(error_pointer)
def build(self): byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = self.context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function( Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapper_module = self.library.create_ir_module('') func_type = self.call_conv.get_function_type(self.fndesc.restype, self.fndesc.argtypes) func = wrapper_module.add_function(func_type, name=self.func.name) func.attributes.add("alwaysinline") wrapper = wrapper_module.add_function(fnty, "__gufunc__." + self.func.name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder.new(wrapper.append_basic_block("entry")) loopcount = builder.load(arg_dims, name="loopcount") # Unpack shapes unique_syms = set() for grp in (self.sin, self.sout): for syms in grp: unique_syms |= set(syms) sym_map = {} for syms in self.sin: for s in syms: if s not in sym_map: sym_map[s] = len(sym_map) sym_dim = {} for s, i in sym_map.items(): sym_dim[s] = builder.load( builder.gep(arg_dims, [self.context.get_constant(types.intp, i + 1)])) # Prepare inputs arrays = [] step_offset = len(self.sin) + len(self.sout) for i, (typ, sym) in enumerate( zip(self.signature.args, self.sin + self.sout)): ary = GUArrayArg(self.context, builder, arg_args, arg_dims, arg_steps, i, step_offset, typ, sym, sym_dim) if not ary.as_scalar: step_offset += ary.ndim arrays.append(ary) bbreturn = cgutils.get_function(builder).append_basic_block('.return') # Prologue self.gen_prologue(builder) # Loop with cgutils.for_range(builder, loopcount, intp=intp_t) as ind: args = [a.array_value for a in arrays] innercall, error = self.gen_loop_body(builder, func, args) # If error, escape cgutils.cbranch_or_continue(builder, error, bbreturn) for a in arrays: a.next(ind) builder.branch(bbreturn) builder.position_at_end(bbreturn) # Epilogue self.gen_epilogue(builder) builder.ret_void() self.library.add_ir_module(wrapper_module) wrapper = self.library.get_function(wrapper.name) # Set core function to internal so that it is not generated self.func.linkage = LINKAGE_INTERNAL return wrapper, self.env
def _prepare_call_to_object_mode(context, builder, func, signature, args, env): mod = cgutils.get_module(builder) thisfunc = cgutils.get_function(builder) bb_core_return = thisfunc.append_basic_block('ufunc.core.return') pyapi = context.get_python_api(builder) # Call to # PyObject* ndarray_new(int nd, # npy_intp *dims, /* shape */ # npy_intp *strides, # void* data, # int type_num, # int itemsize) ll_int = context.get_value_type(types.int32) ll_intp = context.get_value_type(types.intp) ll_intp_ptr = Type.pointer(ll_intp) ll_voidptr = context.get_value_type(types.voidptr) ll_pyobj = context.get_value_type(types.pyobject) fnty = Type.function( ll_pyobj, [ll_int, ll_intp_ptr, ll_intp_ptr, ll_voidptr, ll_int, ll_int]) fn_array_new = mod.get_or_insert_function(fnty, name="numba_ndarray_new") # Convert each llarray into pyobject error_pointer = cgutils.alloca_once(builder, Type.int(1), name='error') builder.store(cgutils.true_bit, error_pointer) ndarray_pointers = [] ndarray_objects = [] for i, (arr, arrtype) in enumerate(zip(args, signature.args)): ptr = cgutils.alloca_once(builder, ll_pyobj) ndarray_pointers.append(ptr) builder.store(Constant.null(ll_pyobj), ptr) # initialize to NULL arycls = context.make_array(arrtype) array = arycls(context, builder, ref=arr) zero = Constant.int(ll_int, 0) # Extract members of the llarray nd = Constant.int(ll_int, arrtype.ndim) dims = builder.gep(array._get_ptr_by_name('shape'), [zero, zero]) strides = builder.gep(array._get_ptr_by_name('strides'), [zero, zero]) data = builder.bitcast(array.data, ll_voidptr) dtype = np.dtype(str(arrtype.dtype)) # Prepare other info for reconstruction of the PyArray type_num = Constant.int(ll_int, dtype.num) itemsize = Constant.int(ll_int, dtype.itemsize) # Call helper to reconstruct PyArray objects obj = builder.call(fn_array_new, [nd, dims, strides, data, type_num, itemsize]) builder.store(obj, ptr) ndarray_objects.append(obj) obj_is_null = cgutils.is_null(builder, obj) builder.store(obj_is_null, error_pointer) cgutils.cbranch_or_continue(builder, obj_is_null, bb_core_return) # Call ufunc core function object_sig = [types.pyobject] * len(ndarray_objects) status, retval = context.call_conv.call_function(builder, func, ll_pyobj, object_sig, ndarray_objects, env=env) builder.store(status.is_error, error_pointer) # Release returned object pyapi.decref(retval) builder.branch(bb_core_return) # At return block builder.position_at_end(bb_core_return) # Release argument object for ndary_ptr in ndarray_pointers: pyapi.decref(builder.load(ndary_ptr)) innercall = status.code return innercall, builder.load(error_pointer)
def _prepare_call_to_object_mode(context, builder, pyapi, func, signature, args, env): mod = builder.module bb_core_return = builder.append_basic_block('ufunc.core.return') # Call to # PyObject* ndarray_new(int nd, # npy_intp *dims, /* shape */ # npy_intp *strides, # void* data, # int type_num, # int itemsize) ll_int = context.get_value_type(types.int32) ll_intp = context.get_value_type(types.intp) ll_intp_ptr = Type.pointer(ll_intp) ll_voidptr = context.get_value_type(types.voidptr) ll_pyobj = context.get_value_type(types.pyobject) fnty = Type.function(ll_pyobj, [ll_int, ll_intp_ptr, ll_intp_ptr, ll_voidptr, ll_int, ll_int]) fn_array_new = mod.get_or_insert_function(fnty, name="numba_ndarray_new") # Convert each llarray into pyobject error_pointer = cgutils.alloca_once(builder, Type.int(1), name='error') builder.store(cgutils.true_bit, error_pointer) ndarray_pointers = [] ndarray_objects = [] for i, (arr, arrtype) in enumerate(zip(args, signature.args)): ptr = cgutils.alloca_once(builder, ll_pyobj) ndarray_pointers.append(ptr) builder.store(Constant.null(ll_pyobj), ptr) # initialize to NULL arycls = context.make_array(arrtype) array = arycls(context, builder, value=arr) zero = Constant.int(ll_int, 0) # Extract members of the llarray nd = Constant.int(ll_int, arrtype.ndim) dims = builder.gep(array._get_ptr_by_name('shape'), [zero, zero]) strides = builder.gep(array._get_ptr_by_name('strides'), [zero, zero]) data = builder.bitcast(array.data, ll_voidptr) dtype = np.dtype(str(arrtype.dtype)) # Prepare other info for reconstruction of the PyArray type_num = Constant.int(ll_int, dtype.num) itemsize = Constant.int(ll_int, dtype.itemsize) # Call helper to reconstruct PyArray objects obj = builder.call(fn_array_new, [nd, dims, strides, data, type_num, itemsize]) builder.store(obj, ptr) ndarray_objects.append(obj) obj_is_null = cgutils.is_null(builder, obj) builder.store(obj_is_null, error_pointer) cgutils.cbranch_or_continue(builder, obj_is_null, bb_core_return) # Call ufunc core function object_sig = [types.pyobject] * len(ndarray_objects) status, retval = context.call_conv.call_function( builder, func, types.pyobject, object_sig, ndarray_objects, env=env) builder.store(status.is_error, error_pointer) # Release returned object pyapi.decref(retval) builder.branch(bb_core_return) # At return block builder.position_at_end(bb_core_return) # Release argument object for ndary_ptr in ndarray_pointers: pyapi.decref(builder.load(ndary_ptr)) innercall = status.code return innercall, builder.load(error_pointer)
def build(self): byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = self.context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapper_module = self.library.create_ir_module('') func_type = self.call_conv.get_function_type(self.fndesc.restype, self.fndesc.argtypes) func = wrapper_module.add_function(func_type, name=self.func.name) func.attributes.add("alwaysinline") wrapper = wrapper_module.add_function(fnty, "__gufunc__." + self.func.name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder.new(wrapper.append_basic_block("entry")) loopcount = builder.load(arg_dims, name="loopcount") pyapi = self.context.get_python_api(builder) # Unpack shapes unique_syms = set() for grp in (self.sin, self.sout): for syms in grp: unique_syms |= set(syms) sym_map = {} for syms in self.sin: for s in syms: if s not in sym_map: sym_map[s] = len(sym_map) sym_dim = {} for s, i in sym_map.items(): sym_dim[s] = builder.load(builder.gep(arg_dims, [self.context.get_constant( types.intp, i + 1)])) # Prepare inputs arrays = [] step_offset = len(self.sin) + len(self.sout) for i, (typ, sym) in enumerate(zip(self.signature.args, self.sin + self.sout)): ary = GUArrayArg(self.context, builder, arg_args, arg_steps, i, step_offset, typ, sym, sym_dim) step_offset += len(sym) arrays.append(ary) bbreturn = builder.append_basic_block('.return') # Prologue self.gen_prologue(builder, pyapi) # Loop with cgutils.for_range(builder, loopcount, intp=intp_t) as loop: args = [a.get_array_at_offset(loop.index) for a in arrays] innercall, error = self.gen_loop_body(builder, pyapi, func, args) # If error, escape cgutils.cbranch_or_continue(builder, error, bbreturn) builder.branch(bbreturn) builder.position_at_end(bbreturn) # Epilogue self.gen_epilogue(builder, pyapi) builder.ret_void() self.library.add_ir_module(wrapper_module) wrapper = self.library.get_function(wrapper.name) # Set core function to internal so that it is not generated self.func.linkage = LINKAGE_INTERNAL return wrapper, self.env
def _prepare_call_to_object_mode(context, builder, pyapi, func, signature, args, env): mod = builder.module bb_core_return = builder.append_basic_block('ufunc.core.return') # Call to # PyObject* ndarray_new(int nd, # npy_intp *dims, /* shape */ # npy_intp *strides, # void* data, # int type_num, # int itemsize) ll_int = context.get_value_type(types.int32) ll_intp = context.get_value_type(types.intp) ll_intp_ptr = Type.pointer(ll_intp) ll_voidptr = context.get_value_type(types.voidptr) ll_pyobj = context.get_value_type(types.pyobject) fnty = Type.function(ll_pyobj, [ll_int, ll_intp_ptr, ll_intp_ptr, ll_voidptr, ll_int, ll_int]) fn_array_new = mod.get_or_insert_function(fnty, name="numba_ndarray_new") # Convert each llarray into pyobject error_pointer = cgutils.alloca_once(builder, Type.int(1), name='error') builder.store(cgutils.true_bit, error_pointer) # The PyObject* arguments to the kernel function object_args = [] object_pointers = [] for i, (arg, argty) in enumerate(zip(args, signature.args)): # Allocate NULL-initialized slot for this argument objptr = cgutils.alloca_once(builder, ll_pyobj, zfill=True) object_pointers.append(objptr) if isinstance(argty, types.Array): # Special case arrays: we don't need full-blown NRT reflection # since the argument will be gone at the end of the kernel arycls = context.make_array(argty) array = arycls(context, builder, value=arg) zero = Constant.int(ll_int, 0) # Extract members of the llarray nd = Constant.int(ll_int, argty.ndim) dims = builder.gep(array._get_ptr_by_name('shape'), [zero, zero]) strides = builder.gep(array._get_ptr_by_name('strides'), [zero, zero]) data = builder.bitcast(array.data, ll_voidptr) dtype = np.dtype(str(argty.dtype)) # Prepare other info for reconstruction of the PyArray type_num = Constant.int(ll_int, dtype.num) itemsize = Constant.int(ll_int, dtype.itemsize) # Call helper to reconstruct PyArray objects obj = builder.call(fn_array_new, [nd, dims, strides, data, type_num, itemsize]) else: # Other argument types => use generic boxing obj = pyapi.from_native_value(argty, arg) builder.store(obj, objptr) object_args.append(obj) obj_is_null = cgutils.is_null(builder, obj) builder.store(obj_is_null, error_pointer) cgutils.cbranch_or_continue(builder, obj_is_null, bb_core_return) # Call ufunc core function object_sig = [types.pyobject] * len(object_args) status, retval = context.call_conv.call_function( builder, func, types.pyobject, object_sig, object_args, env=env) builder.store(status.is_error, error_pointer) # Release returned object pyapi.decref(retval) builder.branch(bb_core_return) # At return block builder.position_at_end(bb_core_return) # Release argument objects for objptr in object_pointers: pyapi.decref(builder.load(objptr)) innercall = status.code return innercall, builder.load(error_pointer)
def _build_wrapper(self, library, name): """ The LLVM IRBuilder code to create the gufunc wrapper. The *library* arg is the CodeLibrary for which the wrapper should be added to. The *name* arg is the name of the wrapper function being created. """ byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = self.context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapper_module = library.create_ir_module('') func_type = self.call_conv.get_function_type(self.fndesc.restype, self.fndesc.argtypes) fname = self.fndesc.llvm_func_name func = wrapper_module.add_function(func_type, name=fname) func.attributes.add("alwaysinline") wrapper = wrapper_module.add_function(fnty, name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder(wrapper.append_basic_block("entry")) loopcount = builder.load(arg_dims, name="loopcount") pyapi = self.context.get_python_api(builder) # Unpack shapes unique_syms = set() for grp in (self.sin, self.sout): for syms in grp: unique_syms |= set(syms) sym_map = {} for syms in self.sin: for s in syms: if s not in sym_map: sym_map[s] = len(sym_map) sym_dim = {} for s, i in sym_map.items(): sym_dim[s] = builder.load(builder.gep(arg_dims, [self.context.get_constant( types.intp, i + 1)])) # Prepare inputs arrays = [] step_offset = len(self.sin) + len(self.sout) for i, (typ, sym) in enumerate(zip(self.signature.args, self.sin + self.sout)): ary = GUArrayArg(self.context, builder, arg_args, arg_steps, i, step_offset, typ, sym, sym_dim) step_offset += len(sym) arrays.append(ary) bbreturn = builder.append_basic_block('.return') # Prologue self.gen_prologue(builder, pyapi) # Loop with cgutils.for_range(builder, loopcount, intp=intp_t) as loop: args = [a.get_array_at_offset(loop.index) for a in arrays] innercall, error = self.gen_loop_body(builder, pyapi, func, args) # If error, escape cgutils.cbranch_or_continue(builder, error, bbreturn) builder.branch(bbreturn) builder.position_at_end(bbreturn) # Epilogue self.gen_epilogue(builder, pyapi) builder.ret_void() # Link library.add_ir_module(wrapper_module) library.add_linking_library(self.library)
def build(self): module = self.func.module byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = self.context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapper = module.add_function(fnty, "__gufunc__." + self.func.name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder.new(wrapper.append_basic_block("entry")) loopcount = builder.load(arg_dims, name="loopcount") # Unpack shapes unique_syms = set() for grp in (self.sin, self.sout): for syms in grp: unique_syms |= set(syms) sym_map = {} for syms in self.sin: for s in syms: if s not in sym_map: sym_map[s] = len(sym_map) sym_dim = {} for s, i in sym_map.items(): sym_dim[s] = builder.load(builder.gep(arg_dims, [self.context.get_constant( types.intp, i + 1)])) # Prepare inputs arrays = [] step_offset = len(self.sin) + len(self.sout) for i, (typ, sym) in enumerate(zip(self.signature.args, self.sin + self.sout)): ary = GUArrayArg(self.context, builder, arg_args, arg_dims, arg_steps, i, step_offset, typ, sym, sym_dim) if not ary.as_scalar: step_offset += ary.ndim arrays.append(ary) bbreturn = cgutils.get_function(builder).append_basic_block('.return') # Prologue self.gen_prologue(builder) # Loop with cgutils.for_range(builder, loopcount, intp=intp_t) as ind: args = [a.array_value for a in arrays] innercall, error = self.gen_loop_body(builder, args) # If error, escape cgutils.cbranch_or_continue(builder, error, bbreturn) for a in arrays: a.next(ind) builder.branch(bbreturn) builder.position_at_end(bbreturn) # Epilogue self.gen_epilogue(builder) builder.ret_void() module.verify() # Set core function to internal so that it is not generated self.func.linkage = LINKAGE_INTERNAL # Force inline of code function inline_function(innercall) # Run optimizer self.context.optimize(module) if config.DUMP_OPTIMIZED: print(module) wrapper.verify() return wrapper, self.env