Пример #1
0
def hsail_atomic_add_tuple(context, builder, sig, args):
    aryty, indty, valty = sig.args
    ary, inds, val = args
    dtype = aryty.dtype

    if indty == types.intp:
        indices = [inds]  # just a single integer
        indty = [indty]
    else:
        indices = cgutils.unpack_tuple(builder, inds, count=len(indty))
        indices = [
            context.cast(builder, i, t, types.intp)
            for t, i in zip(indty, indices)
        ]

    if dtype != valty:
        raise TypeError("expecting %s but got %s" % (dtype, valty))

    if aryty.ndim != len(indty):
        raise TypeError("indexing %d-D array with %d-D index" %
                        (aryty.ndim, len(indty)))

    lary = context.make_array(aryty)(context, builder, ary)
    ptr = cgutils.get_item_pointer(context, builder, aryty, lary, indices)

    return builder.atomic_rmw("add", ptr, val, ordering='monotonic')
Пример #2
0
def atomic_add(context, builder, sig, args, name):
    from .atomics import atomic_support_present

    if atomic_support_present():
        context.extra_compile_options[target.LINK_ATOMIC] = True
        aryty, indty, valty = sig.args
        ary, inds, val = args
        dtype = aryty.dtype

        if indty == types.intp:
            indices = [inds]  # just a single integer
            indty = [indty]
        else:
            indices = cgutils.unpack_tuple(builder, inds, count=len(indty))
            indices = [
                context.cast(builder, i, t, types.intp)
                for t, i in zip(indty, indices)
            ]

        if dtype != valty:
            raise TypeError("expecting %s but got %s" % (dtype, valty))

        if aryty.ndim != len(indty):
            raise TypeError("indexing %d-D array with %d-D index" %
                            (aryty.ndim, len(indty)))

        lary = context.make_array(aryty)(context, builder, ary)
        ptr = cgutils.get_item_pointer(context, builder, aryty, lary, indices)

        if (isinstance(aryty, DPPYArray)
                and aryty.addrspace == address_space.LOCAL):
            return insert_and_call_atomic_fn(
                context,
                builder,
                sig,
                name,
                dtype,
                ptr,
                val,
                address_space.LOCAL,
            )
        else:
            return insert_and_call_atomic_fn(
                context,
                builder,
                sig,
                name,
                dtype,
                ptr,
                val,
                address_space.GLOBAL,
            )
    else:
        raise ImportError(
            "Atomic support is not present, can not perform atomic_add")
Пример #3
0
def ptx_atomic_cas_tuple(context, builder, sig, args):
    aryty, oldty, valty = sig.args
    ary, old, val = args
    dtype = aryty.dtype

    lary = context.make_array(aryty)(context, builder, ary)
    zero = context.get_constant(types.intp, 0)
    ptr = cgutils.get_item_pointer(context, builder, aryty, lary, (zero, ))
    if aryty.dtype == types.int32:
        lmod = builder.module
        return builder.call(nvvmutils.declare_atomic_cas_int32(lmod),
                            (ptr, old, val))
    else:
        raise TypeError('Unimplemented atomic compare_and_swap '
                        'with %s array' % dtype)
Пример #4
0
def ptx_atomic_cas_tuple(context, builder, sig, args):
    aryty, oldty, valty = sig.args
    ary, old, val = args
    dtype = aryty.dtype

    lary = context.make_array(aryty)(context, builder, ary)
    zero = context.get_constant(types.intp, 0)
    ptr = cgutils.get_item_pointer(context, builder, aryty, lary, (zero, ))

    if aryty.dtype in (cuda.cudadecl.integer_numba_types):
        lmod = builder.module
        bitwidth = aryty.dtype.bitwidth
        return nvvmutils.atomic_cmpxchg(builder, lmod, bitwidth, ptr, old, val)
    else:
        raise TypeError('Unimplemented atomic compare_and_swap '
                        'with %s array' % dtype)
Пример #5
0
    def imp(context, builder, sig, args):
        # The common argument handling code
        aryty, indty, valty = sig.args
        ary, inds, val = args
        dtype = aryty.dtype

        indty, indices = _normalize_indices(context, builder, indty, inds)

        if dtype != valty:
            raise TypeError("expect %s but got %s" % (dtype, valty))

        if aryty.ndim != len(indty):
            raise TypeError("indexing %d-D array with %d-D index" %
                            (aryty.ndim, len(indty)))

        lary = context.make_array(aryty)(context, builder, ary)
        ptr = cgutils.get_item_pointer(context, builder, aryty, lary, indices)
        # dispatcher to implementation base on dtype
        return dispatch_fn(context, builder, dtype, ptr, val)
Пример #6
0
def native_atomic_add(context, builder, sig, args):
    aryty, indty, valty = sig.args
    ary, inds, val = args
    dtype = aryty.dtype

    if indty == types.intp:
        indices = [inds]  # just a single integer
        indty = [indty]
    else:
        indices = cgutils.unpack_tuple(builder, inds, count=len(indty))
        indices = [
            context.cast(builder, i, t, types.intp)
            for t, i in zip(indty, indices)
        ]

    if dtype != valty:
        raise TypeError("expecting %s but got %s" % (dtype, valty))

    if aryty.ndim != len(indty):
        raise TypeError("indexing %d-D array with %d-D index" %
                        (aryty.ndim, len(indty)))

    lary = context.make_array(aryty)(context, builder, ary)
    ptr = cgutils.get_item_pointer(context, builder, aryty, lary, indices)

    if dtype == types.float32 or dtype == types.float64:
        context.extra_compile_options[target.LLVM_SPIRV_ARGS] = [
            "--spirv-ext=+SPV_EXT_shader_atomic_float_add"
        ]
        name = "__spirv_AtomicFAddEXT"
    elif dtype == types.int32 or dtype == types.int64:
        name = "__spirv_AtomicIAdd"
    else:
        raise TypeError("Unsupported type")

    assert name != ""

    ptr_type = context.get_value_type(dtype).as_pointer()
    ptr_type.addrspace = aryty.addrspace

    retty = context.get_value_type(sig.return_type)
    spirv_fn_arg_types = [
        ptr_type,
        ir.IntType(32),
        ir.IntType(32),
        context.get_value_type(sig.args[2]),
    ]

    from numba_dppy import extended_numba_itanium_mangler as ext_itanium_mangler

    numba_ptr_ty = types.CPointer(dtype, addrspace=ptr_type.addrspace)
    mangled_fn_name = ext_itanium_mangler.mangle(
        name,
        [
            numba_ptr_ty,
            "__spv.Scope.Flag",
            "__spv.MemorySemanticsMask.Flag",
            valty,
        ],
    )

    fnty = ir.FunctionType(retty, spirv_fn_arg_types)
    fn = cgutils.get_or_insert_function(builder.module, fnty, mangled_fn_name)
    fn.calling_convention = target.CC_SPIR_FUNC

    sycl_memory_order = atomic_helper.sycl_memory_order.relaxed
    sycl_memory_scope = atomic_helper.sycl_memory_scope.device
    spirv_scope = atomic_helper.get_scope(sycl_memory_scope)
    spirv_memory_semantics_mask = atomic_helper.get_memory_semantics_mask(
        sycl_memory_order)
    fn_args = [
        ptr,
        context.get_constant(types.int32, spirv_scope),
        context.get_constant(types.int32, spirv_memory_semantics_mask),
        val,
    ]

    return builder.call(fn, fn_args)
Пример #7
0
    def codegen(context, builder, signature, args):
        array_type, idx_type, axis_type, extent_type = signature.args
        array, idx, axis, extent = args
        array = context.make_array(array_type)(context, builder, array)

        zero = context.get_constant(types.intp, 0)
        llvm_intp_t = context.get_value_type(types.intp)
        ndim = array_type.ndim

        view_shape = cgutils.alloca_once(builder, llvm_intp_t)
        view_stride = cgutils.alloca_once(builder, llvm_intp_t)

        # Final array indexes. We only know the slicing index at runtime
        # so we need to recreate idx but with zero at the slicing axis
        indices = cgutils.alloca_once(builder,
                                      llvm_intp_t,
                                      size=array_type.ndim)

        for ax in range(array_type.ndim):
            llvm_ax = context.get_constant(types.intp, ax)
            predicate = builder.icmp_unsigned("!=", llvm_ax, axis)

            with builder.if_else(predicate) as (not_equal, equal):
                with not_equal:
                    # If this is not the slicing axis,
                    # use the appropriate tuple index
                    value = builder.extract_value(idx, ax)
                    builder.store(value, builder.gep(indices, [llvm_ax]))

                with equal:
                    # If this is the slicing axis,
                    # store zero as the index.
                    # Also record the stride and shape
                    builder.store(zero, builder.gep(indices, [llvm_ax]))
                    size = builder.extract_value(array.shape, ax)
                    stride = builder.extract_value(array.strides, ax)

                    if have_extent:
                        ext_predicate = builder.icmp_signed(">=", extent, size)
                        size = builder.select(ext_predicate, size, extent)

                    builder.store(size, view_shape)
                    builder.store(stride, view_stride)

        # Build a python list from indices
        tmp_indices = []

        for i in range(ndim):
            i = context.get_constant(types.intp, i)
            tmp_indices.append(builder.load(builder.gep(indices, [i])))

        # Get the data pointer obtained from indexing the array
        dataptr = cgutils.get_item_pointer(context,
                                           builder,
                                           array_type,
                                           array,
                                           tmp_indices,
                                           wraparound=True,
                                           boundscheck=True)

        # Set up the shape and stride. There'll only be one
        # dimension, corresponding to the axis along which we slice
        view_shapes = [builder.load(view_shape)]
        view_strides = [builder.load(view_stride)]

        # Make a view with the data pointer, shapes and strides
        retary = make_view(context, builder, array_type, array, return_type,
                           dataptr, view_shapes, view_strides)

        result = retary._getvalue()
        return impl_ret_borrowed(context, builder, return_type, result)