示例#1
0
def _univariate_pdf_call(cu_func, data, packed_params, get):
    ndata = len(data)
    nparams = len(packed_params)

    func_regs = cu_func.num_regs

    packed_params = util.prep_ndarray(packed_params)

    data_per, params_per = util.tune_blocksize(data,
                                               packed_params,
                                               func_regs)

    shared_mem = util.compute_shmem(data, packed_params,
                                    data_per, params_per)

    block_design = (data_per * params_per, 1, 1)
    grid_design = (util.get_boxes(ndata, data_per),
                   util.get_boxes(nparams, params_per))

    # see cufiles/univcaller.cu

    #gpu_dest = to_gpu(np.zeros((ndata, nparams), dtype=np.float32))
    gpu_dest = gpu_empty((ndata, nparams), dtype=np.float32)
    gpu_data = data if isinstance(data, GPUArray) else to_gpu(data)
    gpu_packed_params = to_gpu(packed_params)

    design = np.array(((data_per, params_per) + # block design
                       (len(data),) +
                       packed_params.shape), # params spec
                      dtype=np.int32)

    cu_func(gpu_dest,
            gpu_data, gpu_packed_params, design[0],
            design[1], design[2], design[3], design[4],
            block=block_design, grid=grid_design, shared=shared_mem)

    if get:
        output = gpu_dest.get()
        if nparams > 1:
            output = output.reshape((nparams, ndata), order='C').T
        return output
    else:
        return gpu_dest
示例#2
0
def _multivariate_pdf_call(cu_func, data, packed_params, get, order,
                           datadim=None):
    packed_params = util.prep_ndarray(packed_params)
    func_regs = cu_func.num_regs

    # Prep the data. Skip if gpudata ...
    if isinstance(data, GPUArray):
        padded_data = data
        if datadim==None:
            ndata, dim = data.shape
        else:
            ndata, dim = data.shape[0], datadim

    else:

        ndata, dim = data.shape
        padded_data = util.pad_data(data)

    nparams = len(packed_params)
    data_per, params_per = util.tune_blocksize(padded_data,
                                               packed_params,
                                               func_regs)

    blocksize = data_per * params_per
    #print 'the blocksize is ' + str(blocksize)
    #print 'data_per ' + str(data_per) + '. params_per ' + str(params_per)
    shared_mem = util.compute_shmem(padded_data, packed_params,
                                    data_per, params_per)
    block_design = (data_per * params_per, 1, 1)
    grid_design = (util.get_boxes(ndata, data_per),
                   util.get_boxes(nparams, params_per))

    # see cufiles/mvcaller.cu
    design = np.array(((data_per, params_per) + # block design
                       padded_data.shape + # data spec
                       (dim,) + # non-padded number of data columns
                       packed_params.shape), # params spec
                      dtype=np.int32)

    if nparams == 1:
        gpu_dest = gpu_empty(ndata, dtype=np.float32)
        #gpu_dest = to_gpu(np.zeros(ndata, dtype=np.float32))
    else:
        gpu_dest = gpu_empty((ndata, nparams), dtype=np.float32, order='F')
        #gpu_dest = to_gpu(np.zeros((ndata, nparams), dtype=np.float32, order='F'))

    # Upload data if not already uploaded
    if not isinstance(padded_data, GPUArray):
        gpu_padded_data = to_gpu(padded_data)
    else:
        gpu_padded_data = padded_data

    gpu_packed_params = to_gpu(packed_params)

    params = (gpu_dest, gpu_padded_data, gpu_packed_params) + tuple(design)
    kwds = dict(block=block_design, grid=grid_design, shared=shared_mem)
    cu_func(*params, **kwds)

    gpu_packed_params.gpudata.free()
    if get:
        if order=='F':
            return gpu_dest.get()
        else:
            return np.asarray(gpu_dest.get(), dtype=np.float32, order='C')
        #output = gpu_dest.get()
        #if nparams > 1:
        #    output = output.reshape((nparams, ndata), order='C').T
        #return output
    else:
        if order=='F' or nparams==1:
            return gpu_dest
        else:
            res = gpu_transpose(util.GPUarray_reshape(gpu_dest, (nparams, ndata), "C"))
            gpu_dest.gpudata.free()
            return res