Пример #1
0
def sample_discrete(densities, logged=False, return_gpuarray=False):
    """
    Takes a categorical sample from the unnormalized univariate
    densities defined in the rows of 'densities'

    Parameters
    ---------
    densities : ndarray or gpuarray (n, k)
    logged: boolean indicating whether densities is on the
    log scale ...

    Returns
    -------
    indices : ndarray or gpuarray (if return_gpuarray=True)
    of length n and dtype = int32
    """

    from gpustats.util import info

    n, k = densities.shape
    # prep data
    if isinstance(densities, GPUArray):
        if densities.flags.f_contiguous:
            gpu_densities = util.transpose(densities)
        else:
            gpu_densities = densities
    else:
        densities = util.prep_ndarray(densities)
        gpu_densities = to_gpu(densities)

    # get gpu function
    cu_func = cu_module.get_function('sample_discrete')

    # setup GPU data
    gpu_random = to_gpu(np.asarray(np.random.rand(n), dtype=np.float32))
    gpu_dest = gpu_empty(n, dtype=np.int32)
    dims = np.array([n, k, logged], dtype=np.int32)

    if info.max_block_threads < 1024:
        x_block_dim = 16
    else:
        x_block_dim = 32

    y_block_dim = 16
    # setup GPU call
    block_design = (x_block_dim, y_block_dim, 1)
    grid_design = (int(n / y_block_dim) + 1, 1)

    shared_mem = 4 * ((x_block_dim + 1) * y_block_dim + 2 * y_block_dim)

    cu_func(gpu_densities,
            gpu_random,
            gpu_dest,
            dims[0],
            dims[1],
            dims[2],
            block=block_design,
            grid=grid_design,
            shared=shared_mem)

    gpu_random.gpudata.free()
    if return_gpuarray:
        return gpu_dest
    else:
        res = gpu_dest.get()
        gpu_dest.gpudata.free()
        return res
Пример #2
0
def sample_discrete(in_densities, logged=False, pad=False,
                    return_gpuarray=False):
    """
    Takes a categorical sample from the unnormalized univariate
    densities defined in the rows of 'densities'

    Parameters
    ---------
    densities : ndarray or gpuarray (n, k)
    logged: boolean indicating whether densities is on the
    log scale ...

    Returns
    -------
    indices : ndarray or gpuarray (if return_gpuarray=True)
    of length n and dtype = int32
    """

    if pad:
        if logged:
            densities = util.pad_data_mult16(in_densities, fill=1)
        else:
            densities = util.pad_data_mult16(in_densities, fill=0)

    else:
        densities = in_densities

    n, k = densities.shape

    if logged:
        cu_func = cu_module.get_function('sample_discrete_logged')
    else:
        cu_func = cu_module.get_function('sample_discrete')

    if isinstance(densities, GPUArray):
        if densities.flags.f_contiguous:
            densities.reshape(k, n, 'C')
            gpu_densities = util.transpose(densities)
        else:
            gpu_densities = densities
    else:
        densities = util.prep_ndarray(densities)
        gpu_densities = to_gpu(densities)

    # setup GPU data
    #gpu_random = curand(n)
    gpu_random = to_gpu(np.asarray(np.random.rand(n), dtype=np.float32))
    #gpu_dest = to_gpu(np.zeros(n, dtype=np.float32))
    gpu_dest = gpu_empty(n, dtype=np.float32)
    stride = gpu_densities.shape[1]
    if stride % 2 == 0:
        stride += 1
    dims = np.array([n,k, gpu_densities.shape[1], stride],dtype=np.int32)


    # optimize design ...
    grid_design, block_design = _tune_sfm(n, stride, cu_func.num_regs)

    shared_mem = 4 * (block_design[0] * stride + 
                     1 * block_design[0])

    cu_func(gpu_densities, gpu_random, gpu_dest, 
            dims[0], dims[1], dims[2], dims[3],
            block=block_design, grid=grid_design, shared=shared_mem)

    gpu_random.gpudata.free()
    if return_gpuarray:
        return gpu_dest
    else:
        res = gpu_dest.get()
        gpu_dest.gpudata.free()
        return res
Пример #3
0
def sample_discrete(densities, logged=False, return_gpuarray=False):

    """
    Takes a categorical sample from the unnormalized univariate
    densities defined in the rows of 'densities'

    Parameters
    ---------
    densities : ndarray or gpuarray (n, k)
    logged: boolean indicating whether densities is on the
    log scale ...

    Returns
    -------
    indices : ndarray or gpuarray (if return_gpuarray=True)
    of length n and dtype = int32
    """

    from gpustats.util import info

    n, k = densities.shape
    # prep data
    if isinstance(densities, GPUArray):
        if densities.flags.f_contiguous:
            gpu_densities = util.transpose(densities)
        else:
            gpu_densities = densities
    else:
        densities = util.prep_ndarray(densities)
        gpu_densities = to_gpu(densities)

    # get gpu function
    cu_func = cu_module.get_function("sample_discrete")

    # setup GPU data
    gpu_random = to_gpu(np.asarray(np.random.rand(n), dtype=np.float32))
    gpu_dest = gpu_empty(n, dtype=np.int32)
    dims = np.array([n, k, logged], dtype=np.int32)

    if info.max_block_threads < 1024:
        x_block_dim = 16
    else:
        x_block_dim = 32

    y_block_dim = 16
    # setup GPU call
    block_design = (x_block_dim, y_block_dim, 1)
    grid_design = (int(n / y_block_dim) + 1, 1)

    shared_mem = 4 * ((x_block_dim + 1) * y_block_dim + 2 * y_block_dim)

    cu_func(
        gpu_densities,
        gpu_random,
        gpu_dest,
        dims[0],
        dims[1],
        dims[2],
        block=block_design,
        grid=grid_design,
        shared=shared_mem,
    )

    gpu_random.gpudata.free()
    if return_gpuarray:
        return gpu_dest
    else:
        res = gpu_dest.get()
        gpu_dest.gpudata.free()
        return res