示例#1
0
def _make_array(context, builder, dataptr, dtype, shape, layout='C'):
    ndim = len(shape)
    # Create array object
    aryty = types.Array(dtype=dtype, ndim=ndim, layout='C')
    ary = context.make_array(aryty)(context, builder)

    targetdata = _get_target_data(context)
    lldtype = context.get_data_type(dtype)
    itemsize = lldtype.get_abi_size(targetdata)
    # Compute strides
    rstrides = [itemsize]
    for i, lastsize in enumerate(reversed(shape[1:])):
        rstrides.append(lastsize * rstrides[-1])
    strides = [s for s in reversed(rstrides)]

    kshape = [context.get_constant(types.intp, s) for s in shape]
    kstrides = [context.get_constant(types.intp, s) for s in strides]

    context.populate_array(ary,
                           data=builder.bitcast(dataptr, ary.data.type),
                           shape=cgutils.pack_array(builder, kshape),
                           strides=cgutils.pack_array(builder, kstrides),
                           itemsize=context.get_constant(types.intp, itemsize),
                           meminfo=None)

    return ary._getvalue()
示例#2
0
 def _shape_and_strides(self, context, builder):
     # Set shape and strides for a 1D size 1 array
     one = context.get_constant(types.intp, 1)
     zero = context.get_constant(types.intp, 0)
     shape = cgutils.pack_array(builder, [one])
     strides = cgutils.pack_array(builder, [zero])
     return shape, strides
示例#3
0
def cuda_gridsize(context, builder, sig, args):
    restype = sig.return_type
    nx = _nthreads_for_dim(builder, 'x')

    if restype == types.int32:
        return nx
    elif isinstance(restype, types.UniTuple):
        ny = _nthreads_for_dim(builder, 'y')

        if restype.count == 2:
            return cgutils.pack_array(builder, (nx, ny))
        elif restype.count == 3:
            nz = _nthreads_for_dim(builder, 'z')
            return cgutils.pack_array(builder, (nx, ny, nz))

    # Fallthrough to here indicates unexpected return type or tuple length
    raise ValueError('Unexpected return type %s of cuda.gridsize' % restype)
示例#4
0
    def make_constant_array(self, builder, aryty, arr):
        """
        Unlike the parent version.  This returns a a pointer in the constant
        addrspace.
        """

        lmod = builder.module

        constvals = [
            self.get_constant(types.byte, i)
            for i in iter(arr.tobytes(order='A'))
        ]
        constary = lc.Constant.array(Type.int(8), constvals)

        addrspace = nvvm.ADDRSPACE_CONSTANT
        gv = lmod.add_global_variable(constary.type,
                                      name="_cudapy_cmem",
                                      addrspace=addrspace)
        gv.linkage = lc.LINKAGE_INTERNAL
        gv.global_constant = True
        gv.initializer = constary

        # Preserve the underlying alignment
        lldtype = self.get_data_type(aryty.dtype)
        align = self.get_abi_sizeof(lldtype)
        gv.align = 2**(align - 1).bit_length()

        # Convert to generic address-space
        conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace)
        addrspaceptr = gv.bitcast(Type.pointer(Type.int(8), addrspace))
        genptr = builder.call(conv, [addrspaceptr])

        # Create array object
        ary = self.make_array(aryty)(self, builder)
        kshape = [self.get_constant(types.intp, s) for s in arr.shape]
        kstrides = [self.get_constant(types.intp, s) for s in arr.strides]
        self.populate_array(ary,
                            data=builder.bitcast(genptr, ary.data.type),
                            shape=cgutils.pack_array(builder, kshape),
                            strides=cgutils.pack_array(builder, kstrides),
                            itemsize=ary.itemsize,
                            parent=ary.parent,
                            meminfo=None)

        return ary._getvalue()
示例#5
0
def cuda_grid(context, builder, sig, args):
    restype = sig.return_type
    if restype == types.int32:
        return nvvmutils.get_global_id(builder, dim=1)
    elif isinstance(restype, types.UniTuple):
        ids = nvvmutils.get_global_id(builder, dim=restype.count)
        return cgutils.pack_array(builder, ids)
    else:
        raise ValueError('Unexpected return type %s from cuda.grid' % restype)
示例#6
0
def unituple_constant(context, builder, ty, pyval):
    """
    Create a homogeneous tuple constant.
    """
    consts = [context.get_constant_generic(builder, ty.dtype, v)
              for v in pyval]
    return impl_ret_borrowed(
        context, builder, ty, cgutils.pack_array(builder, consts),
    )
示例#7
0
def ptx_gridsize2d(context, builder, sig, args):
    assert len(args) == 1
    ntidx = nvvmutils.call_sreg(builder, "ntid.x")
    nctaidx = nvvmutils.call_sreg(builder, "nctaid.x")

    ntidy = nvvmutils.call_sreg(builder, "ntid.y")
    nctaidy = nvvmutils.call_sreg(builder, "nctaid.y")

    r1 = builder.mul(ntidx, nctaidx)
    r2 = builder.mul(ntidy, nctaidy)
    return cgutils.pack_array(builder, [r1, r2])
示例#8
0
    def core(context, builder, sig, args):
        lmod = builder.module

        fargtys = []
        for arg in prototype_args:
            ty = context.get_value_type(arg.ty)
            if arg.is_ptr:
                ty = ty.as_pointer()
            fargtys.append(ty)

        fretty = context.get_value_type(retty)

        fnty = Type.function(fretty, fargtys)
        fn = lmod.get_or_insert_function(fnty, name=func)

        # For returned values that are returned through a pointer, we need to
        # allocate variables on the stack and pass a pointer to them.
        actual_args = []
        virtual_args = []
        arg_idx = 0
        for arg in prototype_args:
            if arg.is_ptr:
                # Allocate space for return value and add to args
                tmp_arg = cgutils.alloca_once(builder,
                                              context.get_value_type(arg.ty))
                actual_args.append(tmp_arg)
                virtual_args.append(tmp_arg)
            else:
                actual_args.append(args[arg_idx])
                arg_idx += 1

        ret = builder.call(fn, actual_args)

        # Following the call, we need to assemble the returned values into a
        # tuple for returning back to the caller.
        tuple_args = []
        if retty != types.void:
            tuple_args.append(ret)
        for arg in virtual_args:
            tuple_args.append(builder.load(arg))

        if isinstance(nb_retty, types.UniTuple):
            return cgutils.pack_array(builder, tuple_args)
        else:
            return cgutils.pack_struct(builder, tuple_args)
示例#9
0
def real_divmod_impl(context, builder, sig, args, loc=None):
    x, y = args
    quot = cgutils.alloca_once(builder, x.type, name="quot")
    rem = cgutils.alloca_once(builder, x.type, name="rem")

    with builder.if_else(cgutils.is_scalar_zero(builder, y),
                         likely=False) as (if_zero, if_non_zero):
        with if_zero:
            if not context.error_model.fp_zero_division(
                    builder, ("modulo by zero", ), loc):
                # No exception raised => compute the nan result,
                # and set the FP exception word for Numpy warnings.
                q = builder.fdiv(x, y)
                r = builder.frem(x, y)
                builder.store(q, quot)
                builder.store(r, rem)
        with if_non_zero:
            q, r = real_divmod(context, builder, x, y)
            builder.store(q, quot)
            builder.store(r, rem)

    return cgutils.pack_array(builder, (builder.load(quot), builder.load(rem)))
示例#10
0
文件: models.py 项目: zsoltc89/numba
 def as_data(self, builder, value):
     values = [
         builder.load(cgutils.gep_inbounds(builder, value, i))
         for i in range(self._fe_type.count)
     ]
     return cgutils.pack_array(builder, values)
示例#11
0
 def _shape_and_strides(self, context, builder):
     shape = cgutils.pack_array(builder, self.shape)
     strides = cgutils.pack_array(builder, self.strides)
     return shape, strides
示例#12
0
def int_divmod_impl(context, builder, sig, args):
    quot, rem = _int_divmod_impl(context, builder, sig, args,
                                 "integer divmod by zero")

    return cgutils.pack_array(builder, (builder.load(quot), builder.load(rem)))
示例#13
0
def _generic_array(context,
                   builder,
                   shape,
                   dtype,
                   symbol_name,
                   addrspace,
                   can_dynsized=False):
    elemcount = reduce(operator.mul, shape)

    # Check for valid shape for this type of allocation
    dynamic_smem = elemcount <= 0 and can_dynsized
    if elemcount <= 0 and not dynamic_smem:
        raise ValueError("array length <= 0")

    # Check that we support the requested dtype
    other_supported_type = isinstance(dtype, (types.Record, types.Boolean))
    if dtype not in types.number_domain and not other_supported_type:
        raise TypeError("unsupported type: %s" % dtype)

    lldtype = context.get_data_type(dtype)
    laryty = Type.array(lldtype, elemcount)

    if addrspace == nvvm.ADDRSPACE_LOCAL:
        # Special case local address space allocation to use alloca
        # NVVM is smart enough to only use local memory if no register is
        # available
        dataptr = cgutils.alloca_once(builder, laryty, name=symbol_name)
    else:
        lmod = builder.module

        # Create global variable in the requested address space
        gvmem = lmod.add_global_variable(laryty, symbol_name, addrspace)
        # Specify alignment to avoid misalignment bug
        align = context.get_abi_sizeof(lldtype)
        # Alignment is required to be a power of 2 for shared memory. If it is
        # not a power of 2 (e.g. for a Record array) then round up accordingly.
        gvmem.align = 1 << (align - 1).bit_length()

        if dynamic_smem:
            gvmem.linkage = lc.LINKAGE_EXTERNAL
        else:
            ## Comment out the following line to workaround a NVVM bug
            ## which generates a invalid symbol name when the linkage
            ## is internal and in some situation.
            ## See _get_unique_smem_id()
            # gvmem.linkage = lc.LINKAGE_INTERNAL

            gvmem.initializer = lc.Constant.undef(laryty)

        # Convert to generic address-space
        conv = nvvmutils.insert_addrspace_conv(lmod, Type.int(8), addrspace)
        addrspaceptr = gvmem.bitcast(Type.pointer(Type.int(8), addrspace))
        dataptr = builder.call(conv, [addrspaceptr])

    targetdata = _get_target_data(context)
    lldtype = context.get_data_type(dtype)
    itemsize = lldtype.get_abi_size(targetdata)

    # Compute strides
    rstrides = [itemsize]
    for i, lastsize in enumerate(reversed(shape[1:])):
        rstrides.append(lastsize * rstrides[-1])
    strides = [s for s in reversed(rstrides)]
    kstrides = [context.get_constant(types.intp, s) for s in strides]

    # Compute shape
    if dynamic_smem:
        # Compute the shape based on the dynamic shared memory configuration.
        # Unfortunately NVVM does not provide an intrinsic for the
        # %dynamic_smem_size register, so we must read it using inline
        # assembly.
        get_dynshared_size = InlineAsm.get(
            Type.function(Type.int(), []),
            "mov.u32 $0, %dynamic_smem_size;",
            "=r",
            side_effect=True,
        )
        dynsmem_size = builder.zext(builder.call(get_dynshared_size, []),
                                    Type.int(width=64))
        # Only 1-D dynamic shared memory is supported so the following is a
        # sufficient construction of the shape
        kitemsize = context.get_constant(types.intp, itemsize)
        kshape = [builder.udiv(dynsmem_size, kitemsize)]
    else:
        kshape = [context.get_constant(types.intp, s) for s in shape]

    # Create array object
    ndim = len(shape)
    aryty = types.Array(dtype=dtype, ndim=ndim, layout="C")
    ary = context.make_array(aryty)(context, builder)

    context.populate_array(
        ary,
        data=builder.bitcast(dataptr, ary.data.type),
        shape=cgutils.pack_array(builder, kshape),
        strides=cgutils.pack_array(builder, kstrides),
        itemsize=context.get_constant(types.intp, itemsize),
        meminfo=None,
    )
    return ary._getvalue()
示例#14
0
def ptx_grid3d(context, builder, sig, args):
    assert len(args) == 1
    r1, r2, r3 = nvvmutils.get_global_id(builder, dim=3)
    return cgutils.pack_array(builder, [r1, r2, r3])