def atan2_f32_impl(context, builder, sig, args): assert len(args) == 2 mod = builder.module fnty = Type.function(Type.float(), [Type.float(), Type.float()]) fn = cgutils.insert_pure_function(builder.module, fnty, name="atan2f") res = builder.call(fn, args) return impl_ret_untracked(context, builder, sig.return_type, res)
def atan2_f32_impl(context, builder, sig, args): assert len(args) == 2 mod = builder.module fnty = Type.function(Type.float(), [Type.float(), Type.float()]) fn = cgutils.insert_pure_function(builder.module, fnty, name="atan2f") res = builder.call(fn, args) return impl_ret_untracked(context, builder, sig.return_type, res)
def round_impl_f32(context, builder, sig, args): module = cgutils.get_module(builder) fnty = Type.function(Type.float(), [Type.float()]) if utils.IS_PY3: fn = module.get_or_insert_function(fnty, name="numba.roundf") else: fn = module.get_or_insert_function(fnty, name="roundf") assert fn.is_declaration return builder.call(fn, args)
def is_scalar_neg(builder, value): """is _value_ negative?. Assumes _value_ is signed""" nullval = Constant.null(value.type) if value.type in (Type.float(), Type.double()): isneg = builder.fcmp(lc.FCMP_OLT, value, nullval) else: isneg = builder.icmp(lc.ICMP_SLT, value, nullval) return isneg
def is_scalar_neg(builder, value): """is _value_ negative?. Assumes _value_ is signed""" nullval = Constant.null(value.type) if value.type in (Type.float(), Type.double()): isneg = builder.fcmp(lc.FCMP_OLT, value, nullval) else: isneg = builder.icmp(lc.ICMP_SLT, value, nullval) return isneg
def test_inline_rsqrt(self): mod = Module.new(__name__) fnty = Type.function(Type.void(), [Type.pointer(Type.float())]) fn = mod.add_function(fnty, "cu_rsqrt") bldr = Builder.new(fn.append_basic_block("entry")) rsqrt_approx_fnty = Type.function(Type.float(), [Type.float()]) inlineasm = InlineAsm.get(rsqrt_approx_fnty, "rsqrt.approx.f32 $0, $1;", "=f,f", side_effect=True) val = bldr.load(fn.args[0]) res = bldr.call(inlineasm, [val]) bldr.store(res, fn.args[0]) bldr.ret_void() # generate ptx nvvm.fix_data_layout(mod) nvvm.set_cuda_kernel(fn) nvvmir = str(mod) ptx = nvvm.llvm_to_ptx(nvvmir) self.assertTrue("rsqrt.approx.f32" in str(ptx))
def is_scalar_zero(builder, value): """ Return a predicate representing whether *value* is equal to zero. """ assert not is_pointer(value.type) assert not is_struct(value.type) nullval = Constant.null(value.type) if value.type in (Type.float(), Type.double()): isnull = builder.fcmp(lc.FCMP_OEQ, nullval, value) else: isnull = builder.icmp(lc.ICMP_EQ, nullval, value) return isnull
def is_scalar_zero(builder, value): """ Return a predicate representing whether *value* is equal to zero. """ assert not is_pointer(value.type) assert not is_struct(value.type) nullval = Constant.null(value.type) if value.type in (Type.float(), Type.double()): isnull = builder.fcmp(lc.FCMP_OEQ, nullval, value) else: isnull = builder.icmp(lc.ICMP_EQ, nullval, value) return isnull
def is_not_scalar_zero(builder, value): """ Return a predicate representin whether a *value* is not equal to zero. not exactly "not is_scalar_zero" because of nans """ assert not is_pointer(value.type) assert not is_struct(value.type) nullval = Constant.null(value.type) if value.type in (Type.float(), Type.double()): isnull = builder.fcmp(lc.FCMP_UNE, nullval, value) else: isnull = builder.icmp(lc.ICMP_NE, nullval, value) return isnull
def test_inline_rsqrt(self): mod = Module.new(__name__) fnty = Type.function(Type.void(), [Type.pointer(Type.float())]) fn = mod.add_function(fnty, 'cu_rsqrt') bldr = Builder.new(fn.append_basic_block('entry')) rsqrt_approx_fnty = Type.function(Type.float(), [Type.float()]) inlineasm = InlineAsm.get(rsqrt_approx_fnty, 'rsqrt.approx.f32 $0, $1;', '=f,f', side_effect=True) val = bldr.load(fn.args[0]) res = bldr.call(inlineasm, [val]) bldr.store(res, fn.args[0]) bldr.ret_void() # generate ptx nvvm.fix_data_layout(mod) nvvm.set_cuda_kernel(fn) nvvmir = str(mod) ptx = nvvm.llvm_to_ptx(nvvmir) self.assertTrue('rsqrt.approx.f32' in str(ptx))
def is_not_scalar_zero(builder, value): """ Return a predicate representin whether a *value* is not equal to zero. not exactly "not is_scalar_zero" because of nans """ assert not is_pointer(value.type) assert not is_struct(value.type) nullval = Constant.null(value.type) if value.type in (Type.float(), Type.double()): isnull = builder.fcmp(lc.FCMP_UNE, nullval, value) else: isnull = builder.icmp(lc.ICMP_NE, nullval, value) return isnull
def ptx_shfl_sync_i32(context, builder, sig, args): """ The NVVM intrinsic for shfl only supports i32, but the cuda intrinsic function supports both 32 and 64 bit ints and floats, so for feature parity, i64, f32, and f64 are implemented. Floats by way of bitcasting the float to an int, then shuffling, then bitcasting back. And 64-bit values by packing them into 2 32bit values, shuffling thoose, and then packing back together. """ mask, mode, value, index, clamp = args value_type = sig.args[2] if value_type in types.real_domain: value = builder.bitcast(value, Type.int(value_type.bitwidth)) fname = 'llvm.nvvm.shfl.sync.i32' lmod = builder.module fnty = Type.function( Type.struct((Type.int(32), Type.int(1))), (Type.int(32), Type.int(32), Type.int(32), Type.int(32), Type.int(32)) ) func = lmod.get_or_insert_function(fnty, name=fname) if value_type.bitwidth == 32: ret = builder.call(func, (mask, mode, value, index, clamp)) if value_type == types.float32: rv = builder.extract_value(ret, 0) pred = builder.extract_value(ret, 1) fv = builder.bitcast(rv, Type.float()) ret = cgutils.make_anonymous_struct(builder, (fv, pred)) else: value1 = builder.trunc(value, Type.int(32)) value_lshr = builder.lshr(value, context.get_constant(types.i8, 32)) value2 = builder.trunc(value_lshr, Type.int(32)) ret1 = builder.call(func, (mask, mode, value1, index, clamp)) ret2 = builder.call(func, (mask, mode, value2, index, clamp)) rv1 = builder.extract_value(ret1, 0) rv2 = builder.extract_value(ret2, 0) pred = builder.extract_value(ret1, 1) rv1_64 = builder.zext(rv1, Type.int(64)) rv2_64 = builder.zext(rv2, Type.int(64)) rv_shl = builder.shl(rv2_64, context.get_constant(types.i8, 32)) rv = builder.or_(rv_shl, rv1_64) if value_type == types.float64: rv = builder.bitcast(rv, Type.double()) ret = cgutils.make_anonymous_struct(builder, (rv, pred)) return ret
def ptx_shfl_sync_i32(context, builder, sig, args): """ The NVVM intrinsic for shfl only supports i32, but the cuda intrinsic function supports both 32 and 64 bit ints and floats, so for feature parity, i64, f32, and f64 are implemented. Floats by way of bitcasting the float to an int, then shuffling, then bitcasting back. And 64-bit values by packing them into 2 32bit values, shuffling thoose, and then packing back together. """ mask, mode, value, index, clamp = args value_type = sig.args[2] if value_type in types.real_domain: value = builder.bitcast(value, Type.int(value_type.bitwidth)) fname = 'llvm.nvvm.shfl.sync.i32' lmod = builder.module fnty = Type.function( Type.struct((Type.int(32), Type.int(1))), (Type.int(32), Type.int(32), Type.int(32), Type.int(32), Type.int(32)) ) func = lmod.get_or_insert_function(fnty, name=fname) if value_type.bitwidth == 32: ret = builder.call(func, (mask, mode, value, index, clamp)) if value_type == types.float32: rv = builder.extract_value(ret, 0) pred = builder.extract_value(ret, 1) fv = builder.bitcast(rv, Type.float()) ret = cgutils.make_anonymous_struct(builder, (fv, pred)) else: value1 = builder.trunc(value, Type.int(32)) value_lshr = builder.lshr(value, context.get_constant(types.i8, 32)) value2 = builder.trunc(value_lshr, Type.int(32)) ret1 = builder.call(func, (mask, mode, value1, index, clamp)) ret2 = builder.call(func, (mask, mode, value2, index, clamp)) rv1 = builder.extract_value(ret1, 0) rv2 = builder.extract_value(ret2, 0) pred = builder.extract_value(ret1, 1) rv1_64 = builder.zext(rv1, Type.int(64)) rv2_64 = builder.zext(rv2, Type.int(64)) rv_shl = builder.shl(rv2_64, context.get_constant(types.i8, 32)) rv = builder.or_(rv_shl, rv1_64) if value_type == types.float64: rv = builder.bitcast(rv, Type.double()) ret = cgutils.make_anonymous_struct(builder, (rv, pred)) return ret
def __init__(self): """Initialize the instance.""" super(MiddleIrTypeFloat, self).__init__(Type.float())
GENERIC_POINTER = Type.pointer(Type.int(8)) PYOBJECT = GENERIC_POINTER LTYPEMAP = { types.pyobject: PYOBJECT, types.boolean: Type.int(8), types.uint8: Type.int(8), types.uint16: Type.int(16), types.uint32: Type.int(32), types.uint64: Type.int(64), types.int8: Type.int(8), types.int16: Type.int(16), types.int32: Type.int(32), types.int64: Type.int(64), types.float32: Type.float(), types.float64: Type.double(), } STRUCT_TYPES = { types.complex64: builtins.Complex64, types.complex128: builtins.Complex128, types.slice3_type: builtins.Slice, } class Overloads(object): def __init__(self): self.versions = [] def find(self, sig):
from numpile.lang import TCon, TApp, ftv int32 = TCon("Int32") int64 = TCon("Int64") float32 = TCon("Float") double64 = TCon("Double") void = TCon("Void") array = lambda t: TApp(TCon("Array"), t) array_int32 = array(int32) array_int64 = array(int64) array_double64 = array(double64) pointer = Type.pointer int_type = Type.int() float_type = Type.float() double_type = Type.double() bool_type = Type.int(1) void_type = Type.void() void_ptr = pointer(Type.int(8)) struct_type = Type.struct([]) def array_type(elt_type): return Type.struct([ pointer(elt_type), # data int_type, # dimensions pointer(int_type), # shape 'ndarray_' + str(elt_type), # name ])
def atan2_f32_impl(context, builder, sig, args): assert len(args) == 2 mod = cgutils.get_module(builder) fnty = Type.function(Type.float(), [Type.float(), Type.float()]) fn = mod.get_or_insert_function(fnty, name="atan2f") return builder.call(fn, args)
LTYPEMAP = { types.pyobject: PYOBJECT, types.boolean: Type.int(8), types.uint8: Type.int(8), types.uint16: Type.int(16), types.uint32: Type.int(32), types.uint64: Type.int(64), types.int8: Type.int(8), types.int16: Type.int(16), types.int32: Type.int(32), types.int64: Type.int(64), types.float32: Type.float(), types.float64: Type.double(), } STRUCT_TYPES = { types.complex64: builtins.Complex64, types.complex128: builtins.Complex128, types.slice3_type: builtins.Slice, } class Overloads(object): def __init__(self): # A list of (signature, implementation) self.versions = []
def atan2_f32_impl(context, builder, sig, args): assert len(args) == 2 mod = cgutils.get_module(builder) fnty = Type.function(Type.float(), [Type.float(), Type.float()]) fn = mod.get_or_insert_function(fnty, name="atan2f") return builder.call(fn, args)
def int32_as_f32(builder, val): """ Bitcast a 32-bit integer into a float. """ assert val.type == Type.int(32) return builder.bitcast(val, Type.float())
def int32_as_f32(builder, val): """ Bitcast a 32-bit integer into a float. """ assert val.type == Type.int(32) return builder.bitcast(val, Type.float())