Пример #1
0
def atan2_f32_impl(context, builder, sig, args):
    assert len(args) == 2
    mod = builder.module
    fnty = Type.function(Type.float(), [Type.float(), Type.float()])
    fn = cgutils.insert_pure_function(builder.module, fnty, name="atan2f")
    res = builder.call(fn, args)
    return impl_ret_untracked(context, builder, sig.return_type, res)
Пример #2
0
def atan2_f32_impl(context, builder, sig, args):
    assert len(args) == 2
    mod = builder.module
    fnty = Type.function(Type.float(), [Type.float(), Type.float()])
    fn = cgutils.insert_pure_function(builder.module, fnty, name="atan2f")
    res = builder.call(fn, args)
    return impl_ret_untracked(context, builder, sig.return_type, res)
Пример #3
0
def round_impl_f32(context, builder, sig, args):
    module = cgutils.get_module(builder)
    fnty = Type.function(Type.float(), [Type.float()])
    if utils.IS_PY3:
        fn = module.get_or_insert_function(fnty, name="numba.roundf")
    else:
        fn = module.get_or_insert_function(fnty, name="roundf")
    assert fn.is_declaration
    return builder.call(fn, args)
Пример #4
0
def is_scalar_neg(builder, value):
    """is _value_ negative?. Assumes _value_ is signed"""
    nullval = Constant.null(value.type)
    if value.type in (Type.float(), Type.double()):
        isneg = builder.fcmp(lc.FCMP_OLT, value, nullval)
    else:
        isneg = builder.icmp(lc.ICMP_SLT, value, nullval)
    return isneg
Пример #5
0
def is_scalar_neg(builder, value):
    """is _value_ negative?. Assumes _value_ is signed"""
    nullval = Constant.null(value.type)
    if value.type in (Type.float(), Type.double()):
        isneg = builder.fcmp(lc.FCMP_OLT, value, nullval)
    else:
        isneg = builder.icmp(lc.ICMP_SLT, value, nullval)
    return isneg
Пример #6
0
    def test_inline_rsqrt(self):
        mod = Module.new(__name__)
        fnty = Type.function(Type.void(), [Type.pointer(Type.float())])
        fn = mod.add_function(fnty, "cu_rsqrt")
        bldr = Builder.new(fn.append_basic_block("entry"))

        rsqrt_approx_fnty = Type.function(Type.float(), [Type.float()])
        inlineasm = InlineAsm.get(rsqrt_approx_fnty, "rsqrt.approx.f32 $0, $1;", "=f,f", side_effect=True)
        val = bldr.load(fn.args[0])
        res = bldr.call(inlineasm, [val])

        bldr.store(res, fn.args[0])
        bldr.ret_void()

        # generate ptx
        nvvm.fix_data_layout(mod)
        nvvm.set_cuda_kernel(fn)
        nvvmir = str(mod)
        ptx = nvvm.llvm_to_ptx(nvvmir)
        self.assertTrue("rsqrt.approx.f32" in str(ptx))
Пример #7
0
def is_scalar_zero(builder, value):
    """
    Return a predicate representing whether *value* is equal to zero.
    """
    assert not is_pointer(value.type)
    assert not is_struct(value.type)
    nullval = Constant.null(value.type)
    if value.type in (Type.float(), Type.double()):
        isnull = builder.fcmp(lc.FCMP_OEQ, nullval, value)
    else:
        isnull = builder.icmp(lc.ICMP_EQ, nullval, value)
    return isnull
Пример #8
0
def is_scalar_zero(builder, value):
    """
    Return a predicate representing whether *value* is equal to zero.
    """
    assert not is_pointer(value.type)
    assert not is_struct(value.type)
    nullval = Constant.null(value.type)
    if value.type in (Type.float(), Type.double()):
        isnull = builder.fcmp(lc.FCMP_OEQ, nullval, value)
    else:
        isnull = builder.icmp(lc.ICMP_EQ, nullval, value)
    return isnull
Пример #9
0
def is_not_scalar_zero(builder, value):
    """
    Return a predicate representin whether a *value* is not equal to zero.
    not exactly "not is_scalar_zero" because of nans
    """
    assert not is_pointer(value.type)
    assert not is_struct(value.type)
    nullval = Constant.null(value.type)
    if value.type in (Type.float(), Type.double()):
        isnull = builder.fcmp(lc.FCMP_UNE, nullval, value)
    else:
        isnull = builder.icmp(lc.ICMP_NE, nullval, value)
    return isnull
Пример #10
0
    def test_inline_rsqrt(self):
        mod = Module.new(__name__)
        fnty = Type.function(Type.void(), [Type.pointer(Type.float())])
        fn = mod.add_function(fnty, 'cu_rsqrt')
        bldr = Builder.new(fn.append_basic_block('entry'))

        rsqrt_approx_fnty = Type.function(Type.float(), [Type.float()])
        inlineasm = InlineAsm.get(rsqrt_approx_fnty,
                                  'rsqrt.approx.f32 $0, $1;',
                                  '=f,f', side_effect=True)
        val = bldr.load(fn.args[0])
        res = bldr.call(inlineasm, [val])

        bldr.store(res, fn.args[0])
        bldr.ret_void()

        # generate ptx
        nvvm.fix_data_layout(mod)
        nvvm.set_cuda_kernel(fn)
        nvvmir = str(mod)
        ptx = nvvm.llvm_to_ptx(nvvmir)
        self.assertTrue('rsqrt.approx.f32' in str(ptx))
Пример #11
0
def is_not_scalar_zero(builder, value):
    """
    Return a predicate representin whether a *value* is not equal to zero.
    not exactly "not is_scalar_zero" because of nans
    """
    assert not is_pointer(value.type)
    assert not is_struct(value.type)
    nullval = Constant.null(value.type)
    if value.type in (Type.float(), Type.double()):
        isnull = builder.fcmp(lc.FCMP_UNE, nullval, value)
    else:
        isnull = builder.icmp(lc.ICMP_NE, nullval, value)
    return isnull
Пример #12
0
def ptx_shfl_sync_i32(context, builder, sig, args):
    """
    The NVVM intrinsic for shfl only supports i32, but the cuda intrinsic
    function supports both 32 and 64 bit ints and floats, so for feature parity,
    i64, f32, and f64 are implemented. Floats by way of bitcasting the float to
    an int, then shuffling, then bitcasting back. And 64-bit values by packing
    them into 2 32bit values, shuffling thoose, and then packing back together.
    """
    mask, mode, value, index, clamp = args
    value_type = sig.args[2]
    if value_type in types.real_domain:
        value = builder.bitcast(value, Type.int(value_type.bitwidth))
    fname = 'llvm.nvvm.shfl.sync.i32'
    lmod = builder.module
    fnty = Type.function(
        Type.struct((Type.int(32), Type.int(1))),
        (Type.int(32), Type.int(32), Type.int(32), Type.int(32), Type.int(32))
    )
    func = lmod.get_or_insert_function(fnty, name=fname)
    if value_type.bitwidth == 32:
        ret = builder.call(func, (mask, mode, value, index, clamp))
        if value_type == types.float32:
            rv = builder.extract_value(ret, 0)
            pred = builder.extract_value(ret, 1)
            fv = builder.bitcast(rv, Type.float())
            ret = cgutils.make_anonymous_struct(builder, (fv, pred))
    else:
        value1 = builder.trunc(value, Type.int(32))
        value_lshr = builder.lshr(value, context.get_constant(types.i8, 32))
        value2 = builder.trunc(value_lshr, Type.int(32))
        ret1 = builder.call(func, (mask, mode, value1, index, clamp))
        ret2 = builder.call(func, (mask, mode, value2, index, clamp))
        rv1 = builder.extract_value(ret1, 0)
        rv2 = builder.extract_value(ret2, 0)
        pred = builder.extract_value(ret1, 1)
        rv1_64 = builder.zext(rv1, Type.int(64))
        rv2_64 = builder.zext(rv2, Type.int(64))
        rv_shl = builder.shl(rv2_64, context.get_constant(types.i8, 32))
        rv = builder.or_(rv_shl, rv1_64)
        if value_type == types.float64:
            rv = builder.bitcast(rv, Type.double())
        ret = cgutils.make_anonymous_struct(builder, (rv, pred))
    return ret
Пример #13
0
def ptx_shfl_sync_i32(context, builder, sig, args):
    """
    The NVVM intrinsic for shfl only supports i32, but the cuda intrinsic function supports
    both 32 and 64 bit ints and floats, so for feature parity, i64, f32, and f64 are implemented.
    Floats by way of bitcasting the float to an int, then shuffling, then bitcasting back.
    And 64-bit values by packing them into 2 32bit values, shuffling thoose, and then packing back together.
    """
    mask, mode, value, index, clamp = args
    value_type = sig.args[2]
    if value_type in types.real_domain:
        value = builder.bitcast(value, Type.int(value_type.bitwidth))
    fname = 'llvm.nvvm.shfl.sync.i32'
    lmod = builder.module
    fnty = Type.function(
        Type.struct((Type.int(32), Type.int(1))),
        (Type.int(32), Type.int(32), Type.int(32), Type.int(32), Type.int(32))
    )
    func = lmod.get_or_insert_function(fnty, name=fname)
    if value_type.bitwidth == 32:
        ret = builder.call(func, (mask, mode, value, index, clamp))
        if value_type == types.float32:
            rv = builder.extract_value(ret, 0)
            pred = builder.extract_value(ret, 1)
            fv = builder.bitcast(rv, Type.float())
            ret = cgutils.make_anonymous_struct(builder, (fv, pred))
    else:
        value1 = builder.trunc(value, Type.int(32))
        value_lshr = builder.lshr(value, context.get_constant(types.i8, 32))
        value2 = builder.trunc(value_lshr, Type.int(32))
        ret1 = builder.call(func, (mask, mode, value1, index, clamp))
        ret2 = builder.call(func, (mask, mode, value2, index, clamp))
        rv1 = builder.extract_value(ret1, 0)
        rv2 = builder.extract_value(ret2, 0)
        pred = builder.extract_value(ret1, 1)
        rv1_64 = builder.zext(rv1, Type.int(64))
        rv2_64 = builder.zext(rv2, Type.int(64))
        rv_shl = builder.shl(rv2_64, context.get_constant(types.i8, 32))
        rv = builder.or_(rv_shl, rv1_64)
        if value_type == types.float64:
            rv = builder.bitcast(rv, Type.double())
        ret = cgutils.make_anonymous_struct(builder, (rv, pred))
    return ret
Пример #14
0
 def __init__(self):
     """Initialize the instance."""
     super(MiddleIrTypeFloat, self).__init__(Type.float())
Пример #15
0
GENERIC_POINTER = Type.pointer(Type.int(8))
PYOBJECT = GENERIC_POINTER

LTYPEMAP = {
    types.pyobject: PYOBJECT,
    types.boolean: Type.int(8),
    types.uint8: Type.int(8),
    types.uint16: Type.int(16),
    types.uint32: Type.int(32),
    types.uint64: Type.int(64),
    types.int8: Type.int(8),
    types.int16: Type.int(16),
    types.int32: Type.int(32),
    types.int64: Type.int(64),
    types.float32: Type.float(),
    types.float64: Type.double(),
}

STRUCT_TYPES = {
    types.complex64: builtins.Complex64,
    types.complex128: builtins.Complex128,
    types.slice3_type: builtins.Slice,
}


class Overloads(object):
    def __init__(self):
        self.versions = []

    def find(self, sig):
Пример #16
0
from numpile.lang import TCon, TApp, ftv

int32 = TCon("Int32")
int64 = TCon("Int64")
float32 = TCon("Float")
double64 = TCon("Double")
void = TCon("Void")
array = lambda t: TApp(TCon("Array"), t)

array_int32 = array(int32)
array_int64 = array(int64)
array_double64 = array(double64)

pointer = Type.pointer
int_type = Type.int()
float_type = Type.float()
double_type = Type.double()
bool_type = Type.int(1)
void_type = Type.void()
void_ptr = pointer(Type.int(8))
struct_type = Type.struct([])


def array_type(elt_type):
    return Type.struct([
        pointer(elt_type),  # data
        int_type,  # dimensions
        pointer(int_type),  # shape
        'ndarray_' + str(elt_type),  # name
    ])
Пример #17
0
def atan2_f32_impl(context, builder, sig, args):
    assert len(args) == 2
    mod = cgutils.get_module(builder)
    fnty = Type.function(Type.float(), [Type.float(), Type.float()])
    fn = mod.get_or_insert_function(fnty, name="atan2f")
    return builder.call(fn, args)
Пример #18
0
LTYPEMAP = {
    types.pyobject: PYOBJECT,

    types.boolean: Type.int(8),

    types.uint8: Type.int(8),
    types.uint16: Type.int(16),
    types.uint32: Type.int(32),
    types.uint64: Type.int(64),

    types.int8: Type.int(8),
    types.int16: Type.int(16),
    types.int32: Type.int(32),
    types.int64: Type.int(64),

    types.float32: Type.float(),
    types.float64: Type.double(),
}

STRUCT_TYPES = {
    types.complex64: builtins.Complex64,
    types.complex128: builtins.Complex128,
    types.slice3_type: builtins.Slice,
}


class Overloads(object):
    def __init__(self):
        # A list of (signature, implementation)
        self.versions = []
Пример #19
0
def atan2_f32_impl(context, builder, sig, args):
    assert len(args) == 2
    mod = cgutils.get_module(builder)
    fnty = Type.function(Type.float(), [Type.float(), Type.float()])
    fn = mod.get_or_insert_function(fnty, name="atan2f")
    return builder.call(fn, args)
Пример #20
0
def int32_as_f32(builder, val):
    """
    Bitcast a 32-bit integer into a float.
    """
    assert val.type == Type.int(32)
    return builder.bitcast(val, Type.float())
Пример #21
0
def int32_as_f32(builder, val):
    """
    Bitcast a 32-bit integer into a float.
    """
    assert val.type == Type.int(32)
    return builder.bitcast(val, Type.float())