Пример #1
0
 def exe(self, idata, odata, dir):
     postfix = cufft_dtype_to_name[self._dtype]
     handle = self._handle
         
     if self._ngpu <= 1 :
       meth = getattr(self._api, 'cufftExec' + postfix)
       if isinstance(idata,PlanDataHelper) :
           return meth(handle, device_pointer(idata._d_data), device_pointer(odata._d_data), 
                     int(dir)) 
       else :      
           return meth(handle, device_pointer(idata), device_pointer(odata), 
                     int(dir)) 
                     
     meth = getattr(self._api, 'cufftXtExecDescriptor' + postfix)
     if postfix == 'C2C' or postfix == 'Z2Z':
         return meth(handle, idata._d_data, odata._d_data, int(dir)) 
     
     return meth(handle, idata._d_data, odata._d_data) 
Пример #2
0
    def runsort(temp, keys, vals, begin_bit=0, end_bit=None):
        stream = 0
        begin_bit = 0
        dtty = np.dtype(dtype)
        end_bit = dtty.itemsize * 8
        descending = 0
        count = maxcount
        if keys:
            count = keys.size

        _arysize = int(maxcount * dtty.itemsize)
        _sort = _bind_radixsort_double()

        ctx = cuda.current_context()
        _temp_keys = ctx.memalloc(_arysize)

        return _sort(temp, ctypes.c_uint(count), device_pointer(keys),
                     device_pointer(_temp_keys), None, None, stream,
                     descending, begin_bit, end_bit)
Пример #3
0
 def __float_or_double(self, devary, floatfn, doublefn):
     if devary.dtype == np.float32:
         fn = floatfn
         fty = c_float
     elif devary.dtype == np.float64:
         fn = doublefn
         fty = c_double
     else:
         raise ValueError("Only accept float or double arrays.")
     dptr = device_pointer(devary)
     ptr = cast(c_void_p(dptr), POINTER(fty))
     return fn, ptr
Пример #4
0
 def __uint32_or_uint64(self, devary, fn32, fn64):
     if devary.dtype in (np.dtype(np.uint32), np.dtype(np.int32)):
         fn = self._api.curandGenerate
         ity = c_uint
     elif devary.dtype in (np.dtype(np.uint64), np.dtype(np.int64)):
         fn = self._api.curandGenerateLongLong
         ity = c_ulonglong
     else:
         raise ValueError("Only accept int32, int64, "
                          "uint32 or uint64 arrays")
     dptr = device_pointer(devary)
     ptr = cast(c_void_p(dptr), POINTER(ity))
     return fn, ptr
def make_array_args(arr):
    args = []
    c_intp = ctypes.c_ssize_t

    meminfo = ctypes.c_void_p(0)
    parent = ctypes.c_void_p(0)
    nitems = c_intp(arr.size)
    itemsize = c_intp(arr.dtype.itemsize)
    data = ctypes.c_void_p(device_pointer(arr))

    args.append(meminfo)
    args.append(parent)
    args.append(nitems)
    args.append(itemsize)
    args.append(data)

    for ax in range(arr.ndim):
        args.append(c_intp(arr.shape[ax]))
    for ax in range(arr.ndim):
        args.append(c_intp(arr.strides[ax]))

    return args
Пример #6
0
def _prepare_array(self, val):
    return device_pointer(val), val
Пример #7
0
def _segmentedsort(d_keys, d_vals, d_segments, stream):
    _overloads[d_keys.dtype](device_pointer(d_keys),
                             device_pointer(d_vals), d_keys.size,
                             device_pointer(d_segments), d_segments.size,
                             stream.handle if stream else 0)
Пример #8
0
    def _prepare_args(self, ty, val, stream, retr, kernelargs):
        """
        Convert arguments to ctypes and append to kernelargs
        """

        # map the arguments using any extension you've registered
        for extension in reversed(self.extensions):
            ty, val = extension.prepare_args(ty, val, stream=stream, retr=retr)

        if isinstance(ty, types.Array):
            devary = wrap_arg(val).to_device(retr, stream)

            c_intp = ctypes.c_ssize_t

            meminfo = ctypes.c_void_p(0)
            parent = ctypes.c_void_p(0)
            nitems = c_intp(devary.size)
            itemsize = c_intp(devary.dtype.itemsize)

            ptr = driver.device_pointer(devary)

            if driver.USE_NV_BINDING:
                ptr = int(ptr)

            data = ctypes.c_void_p(ptr)

            kernelargs.append(meminfo)
            kernelargs.append(parent)
            kernelargs.append(nitems)
            kernelargs.append(itemsize)
            kernelargs.append(data)
            for ax in range(devary.ndim):
                kernelargs.append(c_intp(devary.shape[ax]))
            for ax in range(devary.ndim):
                kernelargs.append(c_intp(devary.strides[ax]))

        elif isinstance(ty, types.Integer):
            cval = getattr(ctypes, "c_%s" % ty)(val)
            kernelargs.append(cval)

        elif ty == types.float16:
            cval = ctypes.c_uint16(np.float16(val).view(np.uint16))
            kernelargs.append(cval)

        elif ty == types.float64:
            cval = ctypes.c_double(val)
            kernelargs.append(cval)

        elif ty == types.float32:
            cval = ctypes.c_float(val)
            kernelargs.append(cval)

        elif ty == types.boolean:
            cval = ctypes.c_uint8(int(val))
            kernelargs.append(cval)

        elif ty == types.complex64:
            kernelargs.append(ctypes.c_float(val.real))
            kernelargs.append(ctypes.c_float(val.imag))

        elif ty == types.complex128:
            kernelargs.append(ctypes.c_double(val.real))
            kernelargs.append(ctypes.c_double(val.imag))

        elif isinstance(ty, (types.NPDatetime, types.NPTimedelta)):
            kernelargs.append(ctypes.c_int64(val.view(np.int64)))

        elif isinstance(ty, types.Record):
            devrec = wrap_arg(val).to_device(retr, stream)
            ptr = devrec.device_ctypes_pointer
            if driver.USE_NV_BINDING:
                ptr = ctypes.c_void_p(int(ptr))
            kernelargs.append(ptr)

        elif isinstance(ty, types.BaseTuple):
            assert len(ty) == len(val)
            for t, v in zip(ty, val):
                self._prepare_args(t, v, stream, retr, kernelargs)

        else:
            raise NotImplementedError(ty, val)
Пример #9
0
 def generate_poisson(self, devout, num, lmbd):
     if devout.dtype not in (np.dtype(np.uint32), np.dtype(np.int32)):
         raise ValueError("Only accept int32 or uint32 arrays")
     dptr = device_pointer(devout)
     ptr = cast(c_void_p(dptr), POINTER(c_uint))
     return self._api.curandGeneratePoisson(self._handle, ptr, num, lmbd)
Пример #10
0
def _devptr(p):
    if p is None:
        return None
    else:
        return device_pointer(p)
Пример #11
0
 def exe(self, idata, odata, dir):
     postfix = cufft_dtype_to_name[self.dtype]
     meth = getattr(self._api, 'cufftExec' + postfix)
     return meth(self._handle, device_pointer(idata), device_pointer(odata),
                 int(dir))
Пример #12
0
 def runsort(d_keys, d_vals, d_seg):
     _sort = _bind_segsort_double()
     _sort(device_pointer(d_keys), device_pointer(d_vals), d_keys.size,
           device_pointer(d_seg), d_seg.size, 0)