def parse_tuple_and_keywords(self, args, kws, fmt, keywords, *objs): charptr = Type.pointer(Type.int(8)) charptrary = Type.pointer(charptr) argtypes = [self.pyobj, self.pyobj, charptr, charptrary] fnty = Type.function(Type.int(), argtypes, var_arg=True) fn = self._get_function(fnty, name="PyArg_ParseTupleAndKeywords") return self.builder.call(fn, [args, kws, fmt, keywords] + list(objs))
def __init__(self, context, builder): """ Note: Maybe called multiple times when lowering a function """ from numba.targets import boxing self.context = context self.builder = builder self.module = builder.basic_block.function.module # A unique mapping of serialized objects in this module try: self.module.__serialized except AttributeError: self.module.__serialized = {} # Initialize types self.pyobj = self.context.get_argument_type(types.pyobject) self.voidptr = Type.pointer(Type.int(8)) self.long = Type.int(ctypes.sizeof(ctypes.c_long) * 8) self.ulonglong = Type.int(ctypes.sizeof(ctypes.c_ulonglong) * 8) self.longlong = self.ulonglong self.double = Type.double() self.py_ssize_t = self.context.get_value_type(types.intp) self.cstring = Type.pointer(Type.int(8)) self.gil_state = Type.int(_helperlib.py_gil_state_size * 8) self.py_buffer_t = ir.ArrayType(ir.IntType(8), _helperlib.py_buffer_size)
def make_keywords(self, kws): strings = [] stringtype = Type.pointer(Type.int(8)) for k in kws: strings.append(self.make_const_string(k)) strings.append(Constant.null(stringtype)) kwlist = Constant.array(stringtype, strings) kwlist = cgutils.global_constant(self.module, ".kwlist", kwlist) return Constant.bitcast(kwlist, Type.pointer(stringtype))
def numba_array_adaptor(self, ary, ptr): voidptr = Type.pointer(Type.int(8)) fnty = Type.function(Type.int(), [self.pyobj, voidptr]) fn = self._get_function(fnty, name="numba_adapt_ndarray") fn.args[0].add_attribute(lc.ATTR_NO_CAPTURE) fn.args[1].add_attribute(lc.ATTR_NO_CAPTURE) return self.builder.call(fn, (ary, ptr))
def get_argument_type(self, ty): if ty == types.boolean: return self.get_data_type(ty) elif self.is_struct_type(ty): return Type.pointer(self.get_value_type(ty)) else: return self.get_value_type(ty)
def insert_string_const_addrspace(self, builder, string): """ Insert a constant string in the constant addresspace and return a generic i8 pointer to the data. This function attempts to deduplicate. """ lmod = builder.basic_block.function.module text = Constant.stringz(string) name = "__conststring__.%s" % string charty = Type.int(8) for gv in lmod.global_variables: if gv.name == name and gv.type.pointee == text.type: break else: gv = lmod.add_global_variable(text.type, name=name, addrspace=nvvm.ADDRSPACE_CONSTANT) gv.linkage = LINKAGE_INTERNAL gv.global_constant = True gv.initializer = text constcharptrty = Type.pointer(charty, nvvm.ADDRSPACE_CONSTANT) charptr = builder.bitcast(gv, constcharptrty) conv = nvvmutils.insert_addrspace_conv(lmod, charty, nvvm.ADDRSPACE_CONSTANT) return builder.call(conv, [charptr])
def call_function_pointer(self, builder, funcptr, signature, args, cconv=None): retty = self.get_value_type(signature.return_type) fnty = Type.function(retty, [a.type for a in args]) fnptrty = Type.pointer(fnty) addr = self.get_constant(types.intp, funcptr) ptr = builder.inttoptr(addr, fnptrty) return builder.call(ptr, args, cconv=cconv)
def __init__(self, context, builder, value=None, ref=None, cast_ref=False): self._type = context.get_struct_type(self) self._context = context self._builder = builder if ref is None: self._value = alloca_once(builder, self._type) if value is not None: assert not is_pointer(value.type) assert value.type == self._type, (value.type, self._type) builder.store(value, self._value) else: assert value is None assert is_pointer(ref.type) if self._type != ref.type.pointee: if cast_ref: ref = builder.bitcast(ref, Type.pointer(self._type)) else: raise TypeError( "mismatching pointer type: got %s, expected %s" % (ref.type.pointee, self._type)) self._value = ref self._namemap = {} self._fdmap = [] self._typemap = [] base = Constant.int(Type.int(), 0) for i, (k, tp) in enumerate(self._fields): self._namemap[k] = i self._fdmap.append((base, Constant.int(Type.int(), i))) self._typemap.append(tp)
def to_native_generator(self, obj, typ): """ Extract the generator structure pointer from a generator *obj* (a _dynfunc.Generator instance). """ gen_ptr_ty = Type.pointer(self.context.get_data_type(typ)) value = self.context.get_generator_state(self.builder, obj, gen_ptr_ty) return NativeValue(value)
def nrt_adapt_buffer_from_python(self, buf, ptr): assert self.context.enable_nrt fnty = Type.function(Type.void(), [Type.pointer(self.py_buffer_t), self.voidptr]) fn = self._get_function(fnty, name="NRT_adapt_buffer_from_python") fn.args[0].add_attribute(lc.ATTR_NO_CAPTURE) fn.args[1].add_attribute(lc.ATTR_NO_CAPTURE) return self.builder.call(fn, (buf, ptr))
def get_return_type(self, ty): if isinstance(ty, types.Optional): return self.get_return_type(ty.type) elif self.is_struct_type(ty): return self.get_argument_type(ty) else: argty = self.get_argument_type(ty) return Type.pointer(argty)
def gil_release(self, gil): """ Release the acquired GIL by gil_ensure(). Must be paired with a gil_ensure(). """ gilptrty = Type.pointer(self.gil_state) fnty = Type.function(Type.void(), [gilptrty]) fn = self._get_function(fnty, "numba_gil_release") return self.builder.call(fn, [gil])
def unpack_tuple(self, args, name, n_min, n_max, *objs): charptr = Type.pointer(Type.int(8)) argtypes = [self.pyobj, charptr, self.py_ssize_t, self.py_ssize_t] fnty = Type.function(Type.int(), argtypes, var_arg=True) fn = self._get_function(fnty, name="PyArg_UnpackTuple") n_min = Constant.int(self.py_ssize_t, n_min) n_max = Constant.int(self.py_ssize_t, n_max) if isinstance(name, str): name = self.context.insert_const_string(self.builder.module, name) return self.builder.call(fn, [args, name, n_min, n_max] + list(objs))
def __init__(self, context, builder): """ Note: Maybe called multiple times when lowering a function """ fix_python_api() self.context = context self.builder = builder self.module = builder.basic_block.function.module # Initialize types self.pyobj = self.context.get_argument_type(types.pyobject) self.voidptr = Type.pointer(Type.int(8)) self.long = Type.int(ctypes.sizeof(ctypes.c_long) * 8) self.ulonglong = Type.int(ctypes.sizeof(ctypes.c_ulonglong) * 8) self.longlong = self.ulonglong self.double = Type.double() self.py_ssize_t = self.context.get_value_type(types.intp) self.cstring = Type.pointer(Type.int(8)) self.gil_state = Type.int(_helperlib.py_gil_state_size * 8)
def frexp_impl(context, builder, sig, args): val, = args fltty = context.get_data_type(sig.args[0]) intty = context.get_data_type(sig.return_type[1]) expptr = cgutils.alloca_once(builder, intty, name="exp") fnty = Type.function(fltty, (fltty, Type.pointer(intty))) fname = {"float": "numba_frexpf", "double": "numba_frexp"}[str(fltty)] fn = builder.module.get_or_insert_function(fnty, name=fname) res = builder.call(fn, (val, expptr)) res = cgutils.make_anonymous_struct(builder, (res, builder.load(expptr))) return impl_ret_untracked(context, builder, sig.return_type, res)
def print_charseq(context, builder, sig, args): [x] = args py = context.get_python_api(builder) xp = cgutils.alloca_once(builder, x.type) builder.store(x, xp) byteptr = builder.bitcast(xp, Type.pointer(Type.int(8))) size = context.get_constant(types.intp, x.type.elements[0].count) cstr = py.bytes_from_string_and_size(byteptr, size) py.print_object(cstr) py.decref(cstr) return context.get_dummy_value()
def gil_ensure(self): """ Ensure the GIL is acquired. The returned value must be consumed by gil_release(). """ gilptrty = Type.pointer(self.gil_state) fnty = Type.function(Type.void(), [gilptrty]) fn = self._get_function(fnty, "numba_gil_ensure") gilptr = cgutils.alloca_once(self.builder, self.gil_state) self.builder.call(fn, [gilptr]) return gilptr
def get_value_type(self, ty): if ty == types.boolean: return Type.int(1) dataty = self.get_data_type(ty) if isinstance(ty, types.Record): # Record data are passed by refrence memory = dataty.elements[0] return Type.struct([Type.pointer(memory)]) return dataty
def from_native_generator(self, val, typ, env=None): """ Make a Numba generator (a _dynfunc.Generator instance) from a generator structure pointer *val*. *env* is an optional _dynfunc.Environment instance to be wrapped in the generator. """ llty = self.context.get_data_type(typ) assert not llty.is_pointer gen_struct_size = self.context.get_abi_sizeof(llty) gendesc = self.context.get_generator_desc(typ) # This is the PyCFunctionWithKeywords generated by PyCallWrapper genfnty = Type.function(self.pyobj, [self.pyobj, self.pyobj, self.pyobj]) genfn = self._get_function(genfnty, name=gendesc.llvm_cpython_wrapper_name) # This is the raw finalizer generated by _lower_generator_finalize_func() finalizerty = Type.function(Type.void(), [self.voidptr]) if typ.has_finalizer: finalizer = self._get_function(finalizerty, name=gendesc.llvm_finalizer_name) else: finalizer = Constant.null(Type.pointer(finalizerty)) # PyObject *numba_make_generator(state_size, initial_state, nextfunc, finalizer, env) fnty = Type.function(self.pyobj, [self.py_ssize_t, self.voidptr, Type.pointer(genfnty), Type.pointer(finalizerty), self.voidptr]) fn = self._get_function(fnty, name="numba_make_generator") state_size = ir.Constant(self.py_ssize_t, gen_struct_size) initial_state = self.builder.bitcast(val, self.voidptr) if env is None: env = self.get_null_object() env = self.builder.bitcast(env, self.voidptr) return self.builder.call(fn, (state_size, initial_state, genfn, finalizer, env))
def ptx_cmem_arylike(context, builder, sig, args): lmod = builder.module [arr] = args flat = arr.flatten(order='A') aryty = sig.return_type dtype = aryty.dtype if isinstance(dtype, types.Complex): elemtype = (types.float32 if dtype == types.complex64 else types.float64) constvals = [] for i in range(flat.size): elem = flat[i] real = context.get_constant(elemtype, elem.real) imag = context.get_constant(elemtype, elem.imag) constvals.extend([real, imag]) elif dtype in types.number_domain: constvals = [context.get_constant(dtype, flat[i]) for i in range(flat.size)] else: raise TypeError("unsupport type: %s" % dtype) constary = lc.Constant.array(constvals[0].type, constvals) addrspace = nvvm.ADDRSPACE_CONSTANT gv = lmod.add_global_variable(constary.type, name="_cudapy_cmem", addrspace=addrspace) gv.linkage = lc.LINKAGE_INTERNAL gv.global_constant = True gv.initializer = constary # Convert to generic address-space conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace) addrspaceptr = gv.bitcast(Type.pointer(Type.int(8), addrspace)) genptr = builder.call(conv, [addrspaceptr]) # Create array object ary = context.make_array(aryty)(context, builder) kshape = [context.get_constant(types.intp, s) for s in arr.shape] kstrides = [context.get_constant(types.intp, s) for s in arr.strides] context.populate_array(ary, data=builder.bitcast(genptr, ary.data.type), shape=cgutils.pack_array(builder, kshape), strides=cgutils.pack_array(builder, kstrides), itemsize=ary.itemsize, parent=ary.parent, meminfo=None) return ary._getvalue()
def pack_value(self, builder, ty, value, ptr): """Pack data for array storage """ if isinstance(ty, types.Record): pdata = cgutils.get_record_data(builder, value) databuf = builder.load(pdata) casted = builder.bitcast(ptr, Type.pointer(databuf.type)) builder.store(databuf, casted) return if ty == types.boolean: value = cgutils.as_bool_byte(builder, value) assert value.type == ptr.type.pointee builder.store(value, ptr)
def to_native_array(self, typ, ary): # TODO check matching dtype. # currently, mismatching dtype will still work and causes # potential memory corruption voidptr = Type.pointer(Type.int(8)) nativearycls = self.context.make_array(typ) nativeary = nativearycls(self.context, self.builder) aryptr = nativeary._getpointer() ptr = self.builder.bitcast(aryptr, voidptr) errcode = self.numba_array_adaptor(ary, ptr) failed = cgutils.is_not_null(self.builder, errcode) with cgutils.if_unlikely(self.builder, failed): # TODO self.builder.unreachable() return self.builder.load(aryptr)
def __init__(self, context, builder, args, steps, i, argtype): # Get data p = builder.gep(args, [context.get_constant(types.intp, i)]) if cgutils.is_struct_ptr(argtype): self.byref = True self.data = builder.bitcast(builder.load(p), argtype) else: self.byref = False self.data = builder.bitcast(builder.load(p), Type.pointer(argtype)) # Get step p = builder.gep(steps, [context.get_constant(types.intp, i)]) abisize = context.get_constant(types.intp, context.get_abi_sizeof(argtype)) self.step = builder.load(p) self.is_unit_strided = builder.icmp(ICMP_EQ, abisize, self.step) self.builder = builder
def _generic_array(context, builder, shape, dtype, symbol_name, addrspace, can_dynsized=False): elemcount = reduce(operator.mul, shape) lldtype = context.get_data_type(dtype) laryty = Type.array(lldtype, elemcount) if addrspace == nvvm.ADDRSPACE_LOCAL: # Special case local addrespace allocation to use alloca # NVVM is smart enough to only use local memory if no register is # available dataptr = cgutils.alloca_once(builder, laryty, name=symbol_name) else: lmod = builder.module # Create global variable in the requested address-space gvmem = lmod.add_global_variable(laryty, symbol_name, addrspace) # Specify alignment to avoid misalignment bug gvmem.align = context.get_abi_sizeof(lldtype) if elemcount <= 0: if can_dynsized: # dynamic shared memory gvmem.linkage = lc.LINKAGE_EXTERNAL else: raise ValueError("array length <= 0") else: ## Comment out the following line to workaround a NVVM bug ## which generates a invalid symbol name when the linkage ## is internal and in some situation. ## See _get_unique_smem_id() # gvmem.linkage = lc.LINKAGE_INTERNAL gvmem.initializer = lc.Constant.undef(laryty) if dtype not in types.number_domain: raise TypeError("unsupported type: %s" % dtype) # Convert to generic address-space conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace) addrspaceptr = gvmem.bitcast(Type.pointer(Type.int(8), addrspace)) dataptr = builder.call(conv, [addrspaceptr]) return _make_array(context, builder, dataptr, dtype, shape)
def make_constant_array(self, builder, aryty, arr): """ Unlike the parent version. This returns a a pointer in the constant addrspace. """ lmod = builder.module constvals = [ self.get_constant(types.byte, i) for i in iter(arr.tobytes(order='A')) ] constary = lc.Constant.array(Type.int(8), constvals) addrspace = nvvm.ADDRSPACE_CONSTANT gv = lmod.add_global_variable(constary.type, name="_cudapy_cmem", addrspace=addrspace) gv.linkage = lc.LINKAGE_INTERNAL gv.global_constant = True gv.initializer = constary # Preserve the underlying alignment lldtype = self.get_data_type(aryty.dtype) align = self.get_abi_sizeof(lldtype) gv.align = 2**(align - 1).bit_length() # Convert to generic address-space conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace) addrspaceptr = gv.bitcast(Type.pointer(Type.int(8), addrspace)) genptr = builder.call(conv, [addrspaceptr]) # Create array object ary = self.make_array(aryty)(self, builder) kshape = [self.get_constant(types.intp, s) for s in arr.shape] kstrides = [self.get_constant(types.intp, s) for s in arr.strides] self.populate_array(ary, data=builder.bitcast(genptr, ary.data.type), shape=kshape, strides=kstrides, itemsize=ary.itemsize, parent=ary.parent, meminfo=None) return ary._getvalue()
def ptx_cmem_arylike(context, builder, sig, args): lmod = builder.module [arr] = args aryty = sig.return_type constvals = [ context.get_constant(types.byte, i) for i in iter(arr.tobytes(order="A")) ] constary = lc.Constant.array(Type.int(8), constvals) addrspace = nvvm.ADDRSPACE_CONSTANT gv = lmod.add_global_variable(constary.type, name="_cudapy_cmem", addrspace=addrspace) gv.linkage = lc.LINKAGE_INTERNAL gv.global_constant = True gv.initializer = constary # Preserve the underlying alignment lldtype = context.get_data_type(aryty.dtype) align = context.get_abi_sizeof(lldtype) gv.align = 2**(align - 1).bit_length() # Convert to generic address-space conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace) addrspaceptr = gv.bitcast(Type.pointer(Type.int(8), addrspace)) genptr = builder.call(conv, [addrspaceptr]) # Create array object ary = context.make_array(aryty)(context, builder) kshape = [context.get_constant(types.intp, s) for s in arr.shape] kstrides = [context.get_constant(types.intp, s) for s in arr.strides] context.populate_array( ary, data=builder.bitcast(genptr, ary.data.type), shape=cgutils.pack_array(builder, kshape), strides=cgutils.pack_array(builder, kstrides), itemsize=ary.itemsize, parent=ary.parent, meminfo=None, ) return ary._getvalue()
def test_inline_rsqrt(self): mod = Module.new(__name__) fnty = Type.function(Type.void(), [Type.pointer(Type.float())]) fn = mod.add_function(fnty, "cu_rsqrt") bldr = Builder.new(fn.append_basic_block("entry")) rsqrt_approx_fnty = Type.function(Type.float(), [Type.float()]) inlineasm = InlineAsm.get(rsqrt_approx_fnty, "rsqrt.approx.f32 $0, $1;", "=f,f", side_effect=True) val = bldr.load(fn.args[0]) res = bldr.call(inlineasm, [val]) bldr.store(res, fn.args[0]) bldr.ret_void() # generate ptx nvvm.fix_data_layout(mod) nvvm.set_cuda_kernel(fn) nvvmir = str(mod) ptx = nvvm.llvm_to_ptx(nvvmir) self.assertTrue("rsqrt.approx.f32" in str(ptx))
def real_divmod(context, builder, x, y): assert x.type == y.type floatty = x.type module = builder.module fname = ".numba.python.rem.%s" % x.type fnty = Type.function(floatty, (floatty, floatty, Type.pointer(floatty))) fn = module.get_or_insert_function(fnty, fname) if fn.is_declaration: fn.linkage = lc.LINKAGE_LINKONCE_ODR fnbuilder = lc.Builder.new(fn.append_basic_block('entry')) fx, fy, pmod = fn.args div, mod = real_divmod_func_body(context, fnbuilder, fx, fy) fnbuilder.store(mod, pmod) fnbuilder.ret(div) pmod = cgutils.alloca_once(builder, floatty) quotient = builder.call(fn, (x, y, pmod)) return quotient, builder.load(pmod)
def printf(builder, format_string, *values): str_const = Constant.stringz(format_string) global_str_const = get_module(builder).add_global_variable(str_const.type, '') global_str_const.initializer = str_const idx = [Constant.int(Type.int(32), 0), Constant.int(Type.int(32), 0)] str_addr = global_str_const.gep(idx) args = [] for v in values: if isinstance(v, int): args.append(Constant.int(Type.int(), v)) elif isinstance(v, float): args.append(Constant.real(Type.double(), v)) else: args.append(v) functype = Type.function(Type.int(32), [Type.pointer(Type.int(8))], True) fn = get_module(builder).get_or_insert_function(functype, 'printf') builder.call(fn, [str_addr] + args)
def real_divmod(context, builder, x, y): assert x.type == y.type floatty = x.type module = builder.module fname = context.mangler(".numba.python.rem", [x.type]) fnty = Type.function(floatty, (floatty, floatty, Type.pointer(floatty))) fn = module.get_or_insert_function(fnty, fname) if fn.is_declaration: fn.linkage = lc.LINKAGE_LINKONCE_ODR fnbuilder = lc.Builder(fn.append_basic_block('entry')) fx, fy, pmod = fn.args div, mod = real_divmod_func_body(context, fnbuilder, fx, fy) fnbuilder.store(mod, pmod) fnbuilder.ret(div) pmod = cgutils.alloca_once(builder, floatty) quotient = builder.call(fn, (x, y, pmod)) return quotient, builder.load(pmod)
def printf(builder, format_string, *values): str_const = Constant.stringz(format_string) global_str_const = get_module(builder).add_global_variable( str_const.type, '') global_str_const.initializer = str_const idx = [Constant.int(Type.int(32), 0), Constant.int(Type.int(32), 0)] str_addr = global_str_const.gep(idx) args = [] for v in values: if isinstance(v, int): args.append(Constant.int(Type.int(), v)) elif isinstance(v, float): args.append(Constant.real(Type.double(), v)) else: args.append(v) functype = Type.function(Type.int(32), [Type.pointer(Type.int(8))], True) fn = get_module(builder).get_or_insert_function(functype, 'printf') builder.call(fn, [str_addr] + args)
def ptx_cmem_arylike(context, builder, sig, args): lmod = builder.module [arr] = args aryty = sig.return_type constvals = [ context.get_constant(types.byte, i) for i in six.iterbytes(arr.tobytes(order='A')) ] constary = lc.Constant.array(Type.int(8), constvals) addrspace = nvvm.ADDRSPACE_CONSTANT gv = lmod.add_global_variable(constary.type, name="_cudapy_cmem", addrspace=addrspace) gv.linkage = lc.LINKAGE_INTERNAL gv.global_constant = True gv.initializer = constary # Preserve the underlying alignment lldtype = context.get_data_type(aryty.dtype) align = context.get_abi_sizeof(lldtype) gv.align = 2 ** (align - 1).bit_length() # Convert to generic address-space conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace) addrspaceptr = gv.bitcast(Type.pointer(Type.int(8), addrspace)) genptr = builder.call(conv, [addrspaceptr]) # Create array object ary = context.make_array(aryty)(context, builder) kshape = [context.get_constant(types.intp, s) for s in arr.shape] kstrides = [context.get_constant(types.intp, s) for s in arr.strides] context.populate_array(ary, data=builder.bitcast(genptr, ary.data.type), shape=cgutils.pack_array(builder, kshape), strides=cgutils.pack_array(builder, kstrides), itemsize=ary.itemsize, parent=ary.parent, meminfo=None) return ary._getvalue()
def test_inline_rsqrt(self): mod = Module.new(__name__) fnty = Type.function(Type.void(), [Type.pointer(Type.float())]) fn = mod.add_function(fnty, 'cu_rsqrt') bldr = Builder.new(fn.append_basic_block('entry')) rsqrt_approx_fnty = Type.function(Type.float(), [Type.float()]) inlineasm = InlineAsm.get(rsqrt_approx_fnty, 'rsqrt.approx.f32 $0, $1;', '=f,f', side_effect=True) val = bldr.load(fn.args[0]) res = bldr.call(inlineasm, [val]) bldr.store(res, fn.args[0]) bldr.ret_void() # generate ptx nvvm.fix_data_layout(mod) nvvm.set_cuda_kernel(fn) nvvmir = str(mod) ptx = nvvm.llvm_to_ptx(nvvmir) self.assertTrue('rsqrt.approx.f32' in str(ptx))
def _python_array_obj_to_native_list(typ, obj, c, size, listptr, errorptr): """ Construct a new native list from a Python array of objects. copied from _python_list_to_native but list_getitem is converted to array getitem. """ def check_element_type(nth, itemobj, expected_typobj): typobj = nth.typeof(itemobj) # Check if *typobj* is NULL with c.builder.if_then( cgutils.is_null(c.builder, typobj), likely=False, ): c.builder.store(cgutils.true_bit, errorptr) loop.do_break() # Mandate that objects all have the same exact type type_mismatch = c.builder.icmp_signed('!=', typobj, expected_typobj) with c.builder.if_then(type_mismatch, likely=False): c.builder.store(cgutils.true_bit, errorptr) c.pyapi.err_format( "PyExc_TypeError", "can't unbox heterogeneous list: %S != %S", expected_typobj, typobj, ) c.pyapi.decref(typobj) loop.do_break() c.pyapi.decref(typobj) # Allocate a new native list ok, list = listobj.ListInstance.allocate_ex(c.context, c.builder, typ, size) # Array getitem call arr_get_fnty = LLType.function(LLType.pointer(c.pyapi.pyobj), [c.pyapi.pyobj, c.pyapi.py_ssize_t]) arr_get_fn = c.pyapi._get_function(arr_get_fnty, name="array_getptr1") with c.builder.if_else(ok, likely=True) as (if_ok, if_not_ok): with if_ok: list.size = size zero = lir.Constant(size.type, 0) with c.builder.if_then(c.builder.icmp_signed('>', size, zero), likely=True): # Traverse Python list and unbox objects into native list with _NumbaTypeHelper(c) as nth: # Note: *expected_typobj* can't be NULL # TODO: enable type checking when emty list item in # list(list(str)) case can be handled # expected_typobj = nth.typeof(c.builder.load( # c.builder.call(arr_get_fn, [obj, zero]))) with cgutils.for_range(c.builder, size) as loop: itemobj = c.builder.call(arr_get_fn, [obj, loop.index]) # extra load since we have ptr to object itemobj = c.builder.load(itemobj) # c.pyapi.print_object(itemobj) # check_element_type(nth, itemobj, expected_typobj) # XXX we don't call native cleanup for each # list element, since that would require keeping # of which unboxings have been successful. native = c.unbox(typ.dtype, itemobj) with c.builder.if_then(native.is_error, likely=False): c.builder.store(cgutils.true_bit, errorptr) loop.do_break() # The object (e.g. string) is stored so incref=True list.setitem(loop.index, native.value, incref=True) # c.pyapi.decref(expected_typobj) if typ.reflected: list.parent = obj # Stuff meminfo pointer into the Python object for # later reuse. with c.builder.if_then(c.builder.not_(c.builder.load(errorptr)), likely=False): c.pyapi.object_set_private_data(obj, list.meminfo) list.set_dirty(False) c.builder.store(list.value, listptr) with if_not_ok: c.builder.store(cgutils.true_bit, errorptr) # If an error occurred, drop the whole native list with c.builder.if_then(c.builder.load(errorptr)): c.context.nrt.decref(c.builder, typ, list.value)
def _prepare_call_to_object_mode(context, builder, func, signature, args, env): mod = cgutils.get_module(builder) thisfunc = cgutils.get_function(builder) bb_core_return = thisfunc.append_basic_block('ufunc.core.return') pyapi = context.get_python_api(builder) # Call to # PyObject* ndarray_new(int nd, # npy_intp *dims, /* shape */ # npy_intp *strides, # void* data, # int type_num, # int itemsize) ll_int = context.get_value_type(types.int32) ll_intp = context.get_value_type(types.intp) ll_intp_ptr = Type.pointer(ll_intp) ll_voidptr = context.get_value_type(types.voidptr) ll_pyobj = context.get_value_type(types.pyobject) fnty = Type.function( ll_pyobj, [ll_int, ll_intp_ptr, ll_intp_ptr, ll_voidptr, ll_int, ll_int]) fn_array_new = mod.get_or_insert_function(fnty, name="numba_ndarray_new") # Convert each llarray into pyobject error_pointer = cgutils.alloca_once(builder, Type.int(1), name='error') builder.store(cgutils.true_bit, error_pointer) ndarray_pointers = [] ndarray_objects = [] for i, (arr, arrtype) in enumerate(zip(args, signature.args)): ptr = cgutils.alloca_once(builder, ll_pyobj) ndarray_pointers.append(ptr) builder.store(Constant.null(ll_pyobj), ptr) # initialize to NULL arycls = context.make_array(arrtype) array = arycls(context, builder, ref=arr) zero = Constant.int(ll_int, 0) # Extract members of the llarray nd = Constant.int(ll_int, arrtype.ndim) dims = builder.gep(array._get_ptr_by_name('shape'), [zero, zero]) strides = builder.gep(array._get_ptr_by_name('strides'), [zero, zero]) data = builder.bitcast(array.data, ll_voidptr) dtype = np.dtype(str(arrtype.dtype)) # Prepare other info for reconstruction of the PyArray type_num = Constant.int(ll_int, dtype.num) itemsize = Constant.int(ll_int, dtype.itemsize) # Call helper to reconstruct PyArray objects obj = builder.call(fn_array_new, [nd, dims, strides, data, type_num, itemsize]) builder.store(obj, ptr) ndarray_objects.append(obj) obj_is_null = cgutils.is_null(builder, obj) builder.store(obj_is_null, error_pointer) cgutils.cbranch_or_continue(builder, obj_is_null, bb_core_return) # Call ufunc core function object_sig = [types.pyobject] * len(ndarray_objects) status, retval = context.call_conv.call_function(builder, func, ll_pyobj, object_sig, ndarray_objects, env=env) builder.store(status.is_error, error_pointer) # Release returned object pyapi.decref(retval) builder.branch(bb_core_return) # At return block builder.position_at_end(bb_core_return) # Release argument object for ndary_ptr in ndarray_pointers: pyapi.decref(builder.load(ndary_ptr)) innercall = status.code return innercall, builder.load(error_pointer)
def build(self): byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = self.context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function( Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapper_module = self.library.create_ir_module('') func_type = self.call_conv.get_function_type(self.fndesc.restype, self.fndesc.argtypes) func = wrapper_module.add_function(func_type, name=self.func.name) func.attributes.add("alwaysinline") wrapper = wrapper_module.add_function(fnty, "__gufunc__." + self.func.name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder.new(wrapper.append_basic_block("entry")) loopcount = builder.load(arg_dims, name="loopcount") # Unpack shapes unique_syms = set() for grp in (self.sin, self.sout): for syms in grp: unique_syms |= set(syms) sym_map = {} for syms in self.sin: for s in syms: if s not in sym_map: sym_map[s] = len(sym_map) sym_dim = {} for s, i in sym_map.items(): sym_dim[s] = builder.load( builder.gep(arg_dims, [self.context.get_constant(types.intp, i + 1)])) # Prepare inputs arrays = [] step_offset = len(self.sin) + len(self.sout) for i, (typ, sym) in enumerate( zip(self.signature.args, self.sin + self.sout)): ary = GUArrayArg(self.context, builder, arg_args, arg_dims, arg_steps, i, step_offset, typ, sym, sym_dim) if not ary.as_scalar: step_offset += ary.ndim arrays.append(ary) bbreturn = cgutils.get_function(builder).append_basic_block('.return') # Prologue self.gen_prologue(builder) # Loop with cgutils.for_range(builder, loopcount, intp=intp_t) as ind: args = [a.array_value for a in arrays] innercall, error = self.gen_loop_body(builder, func, args) # If error, escape cgutils.cbranch_or_continue(builder, error, bbreturn) for a in arrays: a.next(ind) builder.branch(bbreturn) builder.position_at_end(bbreturn) # Epilogue self.gen_epilogue(builder) builder.ret_void() self.library.add_ir_module(wrapper_module) wrapper = self.library.get_function(wrapper.name) # Set core function to internal so that it is not generated self.func.linkage = LINKAGE_INTERNAL return wrapper, self.env
def build_ufunc_wrapper(library, context, func, signature, objmode, env): """ Wrap the scalar function with a loop that iterates over the arguments """ byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapper_module = library.create_ir_module('') if objmode: func_type = context.call_conv.get_function_type( types.pyobject, [types.pyobject] * len(signature.args)) else: func_type = context.call_conv.get_function_type( signature.return_type, signature.args) oldfunc = func func = wrapper_module.add_function(func_type, name=func.name) func.attributes.add("alwaysinline") wrapper = wrapper_module.add_function(fnty, "__ufunc__." + func.name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder.new(wrapper.append_basic_block("entry")) loopcount = builder.load(arg_dims, name="loopcount") actual_args = context.call_conv.get_arguments(func) # Prepare inputs arrays = [] for i, typ in enumerate(signature.args): arrays.append( UArrayArg(context, builder, arg_args, arg_steps, i, context.get_argument_type(typ))) # Prepare output valty = context.get_data_type(signature.return_type) out = UArrayArg(context, builder, arg_args, arg_steps, len(actual_args), valty) # Setup indices offsets = [] zero = context.get_constant(types.intp, 0) for _ in arrays: p = cgutils.alloca_once(builder, intp_t) offsets.append(p) builder.store(zero, p) store_offset = cgutils.alloca_once(builder, intp_t) builder.store(zero, store_offset) unit_strided = cgutils.true_bit for ary in arrays: unit_strided = builder.and_(unit_strided, ary.is_unit_strided) if objmode: # General loop pyapi = context.get_python_api(builder) gil = pyapi.gil_ensure() with cgutils.for_range(builder, loopcount, intp=intp_t): slowloop = build_obj_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature, pyapi, env) pyapi.gil_release(gil) builder.ret_void() else: with cgutils.ifelse(builder, unit_strided) as (is_unit_strided, is_strided): with is_unit_strided: with cgutils.for_range(builder, loopcount, intp=intp_t) as ind: fastloop = build_fast_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature, ind) builder.ret_void() with is_strided: # General loop with cgutils.for_range(builder, loopcount, intp=intp_t): slowloop = build_slow_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature) builder.ret_void() builder.ret_void() del builder # Run optimizer library.add_ir_module(wrapper_module) wrapper = library.get_function(wrapper.name) oldfunc.linkage = LINKAGE_INTERNAL return wrapper
def recreate_record(self, pdata, size, dtypeaddr): fnty = Type.function( self.pyobj, [Type.pointer(Type.int(8)), Type.int(), self.pyobj]) fn = self._get_function(fnty, name="numba_recreate_record") return self.builder.call(fn, [pdata, size, dtypeaddr])
from llvmlite import ir as llvmir import llvmlite.llvmpy.core as lc from llvmlite.llvmpy.core import Type, Constant, LLVMException import llvmlite.binding as ll from numba import types, utils, cgutils, typing, funcdesc, debuginfo from numba import _dynfunc, _helperlib from numba.compiler_lock import global_compiler_lock from numba.pythonapi import PythonAPI from . import arrayobj, builtins, imputils from .imputils import (user_function, user_generator, builtin_registry, impl_ret_borrowed, RegistryLoader) from numba import datamodel GENERIC_POINTER = Type.pointer(Type.int(8)) PYOBJECT = GENERIC_POINTER void_ptr = GENERIC_POINTER class OverloadSelector(object): """ An object matching an actual signature against a registry of formal signatures and choosing the best candidate, if any. In the current implementation: - a "signature" is a tuple of type classes or type instances - the "best candidate" is the most specific match """ def __init__(self):
def get_data_type(self, ty): """ Get a LLVM data representation of the Numba type *ty* that is safe for storage. Record data are stored as byte array. The return value is a llvmlite.ir.Type object, or None if the type is an opaque pointer (???). """ try: fac = type_registry.match(ty) except KeyError: pass else: return fac(self, ty) if (isinstance(ty, types.Dummy) or isinstance(ty, types.Module) or isinstance(ty, types.Function) or isinstance(ty, types.Dispatcher) or isinstance(ty, types.Object) or isinstance(ty, types.Macro)): return PYOBJECT elif isinstance(ty, types.CPointer): dty = self.get_data_type(ty.dtype) return Type.pointer(dty) elif isinstance(ty, types.Optional): return self.get_struct_type(self.make_optional(ty)) elif isinstance(ty, types.Array): return self.get_struct_type(self.make_array(ty)) elif isinstance(ty, types.UniTuple): dty = self.get_value_type(ty.dtype) return Type.array(dty, ty.count) elif isinstance(ty, types.Tuple): dtys = [self.get_value_type(t) for t in ty] return Type.struct(dtys) elif isinstance(ty, types.Record): # Record are represented as byte array return Type.struct([Type.array(Type.int(8), ty.size)]) elif isinstance(ty, types.UnicodeCharSeq): charty = Type.int(numpy_support.sizeof_unicode_char * 8) return Type.struct([Type.array(charty, ty.count)]) elif isinstance(ty, types.CharSeq): charty = Type.int(8) return Type.struct([Type.array(charty, ty.count)]) elif ty in STRUCT_TYPES: return self.get_struct_type(STRUCT_TYPES[ty]) else: try: impl = struct_registry.match(ty) except KeyError: pass else: return self.get_struct_type(impl(ty)) if isinstance(ty, types.Pair): pairty = self.make_pair(ty.first_type, ty.second_type) return self.get_struct_type(pairty) else: return LTYPEMAP[ty]
from functools import singledispatch from llvmlite.llvmpy.core import Type from numba.core import types, cgutils from numba.core.imputils import Registry from numba.cuda import nvvmutils registry = Registry() lower = registry.lower voidptr = Type.pointer(Type.int(8)) # NOTE: we don't use @lower here since print_item() doesn't return a LLVM value @singledispatch def print_item(ty, context, builder, val): """ Handle printing of a single value of the given Numba type. A (format string, [list of arguments]) is returned that will allow forming the final printf()-like call. """ raise NotImplementedError("printing unimplemented for values of type %s" % (ty, )) @print_item.register(types.Integer) @print_item.register(types.IntegerLiteral) def int_print_impl(ty, context, builder, val): if ty in types.unsigned_domain: rawfmt = "%llu" dsttype = types.uint64
def recreate_record(self, pdata, size, dtype, env_manager): fnty = Type.function(self.pyobj, [Type.pointer(Type.int(8)), Type.int(), self.pyobj]) fn = self._get_function(fnty, name="numba_recreate_record") dtypeaddr = env_manager.read_const(env_manager.add_const(dtype)) return self.builder.call(fn, [pdata, size, dtypeaddr])
def parse_tuple(self, args, fmt, *objs): charptr = Type.pointer(Type.int(8)) argtypes = [self.pyobj, charptr] fnty = Type.function(Type.int(), argtypes, var_arg=True) fn = self._get_function(fnty, name="PyArg_ParseTuple") return self.builder.call(fn, [args, fmt] + list(objs))
from llvmlite import ir as llvmir import llvmlite.llvmpy.core as lc from llvmlite.llvmpy.core import Type, Constant, LLVMException import llvmlite.binding as ll from numba import types, utils, cgutils, typing, funcdesc, debuginfo from numba import _dynfunc, _helperlib from numba.pythonapi import PythonAPI from . import arrayobj, builtins, imputils from .imputils import (user_function, user_generator, builtin_registry, impl_ret_borrowed, RegistryLoader) from numba import datamodel GENERIC_POINTER = Type.pointer(Type.int(8)) PYOBJECT = GENERIC_POINTER void_ptr = GENERIC_POINTER class OverloadSelector(object): """ An object matching an actual signature against a registry of formal signatures and choosing the best candidate, if any. In the current implementation: - a "signature" is a tuple of type classes or type instances - the "best candidate" is the most specific match """ def __init__(self):
def build_ufunc_wrapper(library, context, fname, signature, objmode, envptr, env): """ Wrap the scalar function with a loop that iterates over the arguments """ assert isinstance(fname, str) byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function(Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapperlib = context.codegen().create_library('ufunc_wrapper') wrapper_module = wrapperlib.create_ir_module('') if objmode: func_type = context.call_conv.get_function_type( types.pyobject, [types.pyobject] * len(signature.args)) else: func_type = context.call_conv.get_function_type( signature.return_type, signature.args) func = wrapper_module.add_function(func_type, name=fname) func.attributes.add("alwaysinline") wrapper = wrapper_module.add_function(fnty, "__ufunc__." + func.name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder(wrapper.append_basic_block("entry")) loopcount = builder.load(arg_dims, name="loopcount") # Prepare inputs arrays = [] for i, typ in enumerate(signature.args): arrays.append(UArrayArg(context, builder, arg_args, arg_steps, i, typ)) # Prepare output out = UArrayArg(context, builder, arg_args, arg_steps, len(arrays), signature.return_type) # Setup indices offsets = [] zero = context.get_constant(types.intp, 0) for _ in arrays: p = cgutils.alloca_once(builder, intp_t) offsets.append(p) builder.store(zero, p) store_offset = cgutils.alloca_once(builder, intp_t) builder.store(zero, store_offset) unit_strided = cgutils.true_bit for ary in arrays: unit_strided = builder.and_(unit_strided, ary.is_unit_strided) pyapi = context.get_python_api(builder) if objmode: # General loop gil = pyapi.gil_ensure() with cgutils.for_range(builder, loopcount, intp=intp_t): slowloop = build_obj_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature, pyapi, envptr, env) pyapi.gil_release(gil) builder.ret_void() else: with builder.if_else(unit_strided) as (is_unit_strided, is_strided): with is_unit_strided: with cgutils.for_range(builder, loopcount, intp=intp_t) as loop: fastloop = build_fast_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature, loop.index, pyapi) with is_strided: # General loop with cgutils.for_range(builder, loopcount, intp=intp_t): slowloop = build_slow_loop_body(context, func, builder, arrays, out, offsets, store_offset, signature, pyapi) builder.ret_void() del builder # Link and finalize wrapperlib.add_ir_module(wrapper_module) wrapperlib.add_linking_library(library) return wrapperlib.get_pointer_to_function(wrapper.name)
def get_record_member(builder, record, offset, typ): pval = gep_inbounds(builder, record, 0, offset) assert not is_pointer(pval.type.pointee) return builder.bitcast(pval, Type.pointer(typ))
def _prepare_call_to_object_mode(context, builder, pyapi, func, signature, args, env): mod = builder.module bb_core_return = builder.append_basic_block('ufunc.core.return') # Call to # PyObject* ndarray_new(int nd, # npy_intp *dims, /* shape */ # npy_intp *strides, # void* data, # int type_num, # int itemsize) ll_int = context.get_value_type(types.int32) ll_intp = context.get_value_type(types.intp) ll_intp_ptr = Type.pointer(ll_intp) ll_voidptr = context.get_value_type(types.voidptr) ll_pyobj = context.get_value_type(types.pyobject) fnty = Type.function( ll_pyobj, [ll_int, ll_intp_ptr, ll_intp_ptr, ll_voidptr, ll_int, ll_int]) fn_array_new = mod.get_or_insert_function(fnty, name="numba_ndarray_new") # Convert each llarray into pyobject error_pointer = cgutils.alloca_once(builder, Type.int(1), name='error') builder.store(cgutils.true_bit, error_pointer) # The PyObject* arguments to the kernel function object_args = [] object_pointers = [] for i, (arg, argty) in enumerate(zip(args, signature.args)): # Allocate NULL-initialized slot for this argument objptr = cgutils.alloca_once(builder, ll_pyobj, zfill=True) object_pointers.append(objptr) if isinstance(argty, types.Array): # Special case arrays: we don't need full-blown NRT reflection # since the argument will be gone at the end of the kernel arycls = context.make_array(argty) array = arycls(context, builder, value=arg) zero = Constant.int(ll_int, 0) # Extract members of the llarray nd = Constant.int(ll_int, argty.ndim) dims = builder.gep(array._get_ptr_by_name('shape'), [zero, zero]) strides = builder.gep(array._get_ptr_by_name('strides'), [zero, zero]) data = builder.bitcast(array.data, ll_voidptr) dtype = np.dtype(str(argty.dtype)) # Prepare other info for reconstruction of the PyArray type_num = Constant.int(ll_int, dtype.num) itemsize = Constant.int(ll_int, dtype.itemsize) # Call helper to reconstruct PyArray objects obj = builder.call(fn_array_new, [nd, dims, strides, data, type_num, itemsize]) else: # Other argument types => use generic boxing obj = pyapi.from_native_value(argty, arg) builder.store(obj, objptr) object_args.append(obj) obj_is_null = cgutils.is_null(builder, obj) builder.store(obj_is_null, error_pointer) cgutils.cbranch_or_continue(builder, obj_is_null, bb_core_return) # Call ufunc core function object_sig = [types.pyobject] * len(object_args) status, retval = context.call_conv.call_function(builder, func, types.pyobject, object_sig, object_args, env=env) builder.store(status.is_error, error_pointer) # Release returned object pyapi.decref(retval) builder.branch(bb_core_return) # At return block builder.position_at_end(bb_core_return) # Release argument objects for objptr in object_pointers: pyapi.decref(builder.load(objptr)) innercall = status.code return innercall, builder.load(error_pointer)
def _generic_array(context, builder, shape, dtype, symbol_name, addrspace, can_dynsized=False): elemcount = reduce(operator.mul, shape, 1) # Check for valid shape for this type of allocation. # Only 1d arrays can be dynamic. dynamic_smem = elemcount <= 0 and can_dynsized and len(shape) == 1 if elemcount <= 0 and not dynamic_smem: raise ValueError("array length <= 0") # Check that we support the requested dtype other_supported_type = isinstance(dtype, (types.Record, types.Boolean)) if dtype not in types.number_domain and not other_supported_type: raise TypeError("unsupported type: %s" % dtype) lldtype = context.get_data_type(dtype) laryty = Type.array(lldtype, elemcount) if addrspace == nvvm.ADDRSPACE_LOCAL: # Special case local address space allocation to use alloca # NVVM is smart enough to only use local memory if no register is # available dataptr = cgutils.alloca_once(builder, laryty, name=symbol_name) else: lmod = builder.module # Create global variable in the requested address space gvmem = lmod.add_global_variable(laryty, symbol_name, addrspace) # Specify alignment to avoid misalignment bug align = context.get_abi_sizeof(lldtype) # Alignment is required to be a power of 2 for shared memory. If it is # not a power of 2 (e.g. for a Record array) then round up accordingly. gvmem.align = 1 << (align - 1).bit_length() if dynamic_smem: gvmem.linkage = lc.LINKAGE_EXTERNAL else: ## Comment out the following line to workaround a NVVM bug ## which generates a invalid symbol name when the linkage ## is internal and in some situation. ## See _get_unique_smem_id() # gvmem.linkage = lc.LINKAGE_INTERNAL gvmem.initializer = lc.Constant.undef(laryty) # Convert to generic address-space conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace) addrspaceptr = gvmem.bitcast(Type.pointer(Type.int(8), addrspace)) dataptr = builder.call(conv, [addrspaceptr]) targetdata = _get_target_data(context) lldtype = context.get_data_type(dtype) itemsize = lldtype.get_abi_size(targetdata) # Compute strides laststride = itemsize rstrides = [] for i, lastsize in enumerate(reversed(shape)): rstrides.append(laststride) laststride *= lastsize strides = [s for s in reversed(rstrides)] kstrides = [context.get_constant(types.intp, s) for s in strides] # Compute shape if dynamic_smem: # Compute the shape based on the dynamic shared memory configuration. # Unfortunately NVVM does not provide an intrinsic for the # %dynamic_smem_size register, so we must read it using inline # assembly. get_dynshared_size = InlineAsm.get(Type.function(Type.int(), []), "mov.u32 $0, %dynamic_smem_size;", '=r', side_effect=True) dynsmem_size = builder.zext(builder.call(get_dynshared_size, []), Type.int(width=64)) # Only 1-D dynamic shared memory is supported so the following is a # sufficient construction of the shape kitemsize = context.get_constant(types.intp, itemsize) kshape = [builder.udiv(dynsmem_size, kitemsize)] else: kshape = [context.get_constant(types.intp, s) for s in shape] # Create array object ndim = len(shape) aryty = types.Array(dtype=dtype, ndim=ndim, layout='C') ary = context.make_array(aryty)(context, builder) context.populate_array(ary, data=builder.bitcast(dataptr, ary.data.type), shape=kshape, strides=kstrides, itemsize=context.get_constant(types.intp, itemsize), meminfo=None) return ary._getvalue()
def get_record_member(builder, record, offset, typ): pdata = get_record_data(builder, record) pval = inbound_gep(builder, pdata, 0, offset) assert not is_pointer(pval.type.pointee) return builder.bitcast(pval, Type.pointer(typ))
def _build_wrapper(self, library, name): """ The LLVM IRBuilder code to create the gufunc wrapper. The *library* arg is the CodeLibrary for which the wrapper should be added to. The *name* arg is the name of the wrapper function being created. """ byte_t = Type.int(8) byte_ptr_t = Type.pointer(byte_t) byte_ptr_ptr_t = Type.pointer(byte_ptr_t) intp_t = self.context.get_value_type(types.intp) intp_ptr_t = Type.pointer(intp_t) fnty = Type.function( Type.void(), [byte_ptr_ptr_t, intp_ptr_t, intp_ptr_t, byte_ptr_t]) wrapper_module = library.create_ir_module('') func_type = self.call_conv.get_function_type(self.fndesc.restype, self.fndesc.argtypes) fname = self.fndesc.llvm_func_name func = wrapper_module.add_function(func_type, name=fname) func.attributes.add("alwaysinline") wrapper = wrapper_module.add_function(fnty, name) arg_args, arg_dims, arg_steps, arg_data = wrapper.args arg_args.name = "args" arg_dims.name = "dims" arg_steps.name = "steps" arg_data.name = "data" builder = Builder(wrapper.append_basic_block("entry")) loopcount = builder.load(arg_dims, name="loopcount") pyapi = self.context.get_python_api(builder) # Unpack shapes unique_syms = set() for grp in (self.sin, self.sout): for syms in grp: unique_syms |= set(syms) sym_map = {} for syms in self.sin: for s in syms: if s not in sym_map: sym_map[s] = len(sym_map) sym_dim = {} for s, i in sym_map.items(): sym_dim[s] = builder.load( builder.gep(arg_dims, [self.context.get_constant(types.intp, i + 1)])) # Prepare inputs arrays = [] step_offset = len(self.sin) + len(self.sout) for i, (typ, sym) in enumerate( zip(self.signature.args, self.sin + self.sout)): ary = GUArrayArg(self.context, builder, arg_args, arg_steps, i, step_offset, typ, sym, sym_dim) step_offset += len(sym) arrays.append(ary) bbreturn = builder.append_basic_block('.return') # Prologue self.gen_prologue(builder, pyapi) # Loop with cgutils.for_range(builder, loopcount, intp=intp_t) as loop: args = [a.get_array_at_offset(loop.index) for a in arrays] innercall, error = self.gen_loop_body(builder, pyapi, func, args) # If error, escape cgutils.cbranch_or_continue(builder, error, bbreturn) builder.branch(bbreturn) builder.position_at_end(bbreturn) # Epilogue self.gen_epilogue(builder, pyapi) builder.ret_void() # Link library.add_ir_module(wrapper_module) library.add_linking_library(self.library)
def get_record_member(builder, record, offset, typ): pval = inbound_gep(builder, record, 0, offset) assert not is_pointer(pval.type.pointee) return builder.bitcast(pval, Type.pointer(typ))
import sys import ctypes import struct as struct_ from llvmlite.llvmpy.core import Type, Constant _trace_refs_ = hasattr(sys, "getobjects") _plat_bits = struct_.calcsize("@P") * 8 _int8 = Type.int(8) _int32 = Type.int(32) _void_star = Type.pointer(_int8) _int8_star = _void_star _sizeof_py_ssize_t = ctypes.sizeof(getattr(ctypes, "c_size_t")) _llvm_py_ssize_t = Type.int(_sizeof_py_ssize_t * 8) if _trace_refs_: _pyobject_head = Type.struct( [_void_star, _void_star, _llvm_py_ssize_t, _void_star]) _pyobject_head_init = Constant.struct([ Constant.null(_void_star), # _ob_next Constant.null(_void_star), # _ob_prev Constant.int(_llvm_py_ssize_t, 1), # ob_refcnt Constant.null(_void_star), # ob_type ]) else: _pyobject_head = Type.struct([_llvm_py_ssize_t, _void_star]) _pyobject_head_init = Constant.struct([
def from_native_value(self, val, typ): if typ == types.pyobject: return val elif typ == types.boolean: longval = self.builder.zext(val, self.long) return self.bool_from_long(longval) elif typ in types.unsigned_domain: ullval = self.builder.zext(val, self.ulonglong) return self.long_from_ulonglong(ullval) elif typ in types.signed_domain: ival = self.builder.sext(val, self.longlong) return self.long_from_longlong(ival) elif typ == types.float32: dbval = self.builder.fpext(val, self.double) return self.float_from_double(dbval) elif typ == types.float64: return self.float_from_double(val) elif typ == types.complex128: cmplxcls = self.context.make_complex(typ) cval = cmplxcls(self.context, self.builder, value=val) return self.complex_from_doubles(cval.real, cval.imag) elif typ == types.complex64: cmplxcls = self.context.make_complex(typ) cval = cmplxcls(self.context, self.builder, value=val) freal = self.context.cast(self.builder, cval.real, types.float32, types.float64) fimag = self.context.cast(self.builder, cval.imag, types.float32, types.float64) return self.complex_from_doubles(freal, fimag) elif isinstance(typ, types.NPDatetime): return self.create_np_datetime(val, typ.unit_code) elif isinstance(typ, types.NPTimedelta): return self.create_np_timedelta(val, typ.unit_code) elif typ == types.none: ret = self.make_none() return ret elif isinstance(typ, types.Optional): return self.from_native_return(val, typ.type) elif isinstance(typ, types.Array): return self.from_native_array(typ, val) elif isinstance(typ, types.Record): # Note we will create a copy of the record # This is the only safe way. pdata = cgutils.get_record_data(self.builder, val) size = Constant.int(Type.int(), pdata.type.pointee.count) ptr = self.builder.bitcast(pdata, Type.pointer(Type.int(8))) # Note: this will only work for CPU mode # The following requires access to python object dtype_addr = Constant.int(self.py_ssize_t, id(typ.dtype)) dtypeobj = dtype_addr.inttoptr(self.pyobj) return self.recreate_record(ptr, size, dtypeobj) elif isinstance(typ, (types.Tuple, types.UniTuple)): return self.from_tuple(typ, val) raise NotImplementedError(typ)