Exemplo n.º 1
0
 def pairwise_difference(self, in_gpu, N):
     out = gpuarray.empty((N, N), in_gpu.dtype)
     block_dim, grid_dim = misc.select_block_grid_sizes(self.device, (N, N))
     if in_gpu.dtype.itemsize == 8:
         dtype = 'double'
     else:
         dtype = 'float'
     pairwised = self.pairwised(N, dtype)
     pairwised(out.gpudata,
               in_gpu.gpudata,
               np.uint32(out.size),
               block=block_dim,
               grid=grid_dim)
     return out
Exemplo n.º 2
0
 def gamma_exponential_correlation_cuda(self, image, l, y):
     if isinstance(image, pycuda.gpuarray.GPUArray):
         N = image.shape[0]
         image_g = image
     else:
         N = image.shape[0] * image.shape[1]
         image = image.ravel().reshape(N, 1)
         image_g = gpuarray.to_gpu(image)
     params = gpuarray.to_gpu(np.array([l, y]).astype(image.dtype))
     C = gpuarray.empty((N, N), image_g.dtype)
     block_dim, grid_dim = misc.select_block_grid_sizes(self.device, (N, N))
     if image.dtype.itemsize == 8:
         dtype = 'double'
     else:
         dtype = 'float'
     kernel_func = self.gamma_exp_kernel(N, dtype)
     kernel_func(C.gpudata,
                 image_g.gpudata,
                 params.gpudata,
                 np.uint64(C.size),
                 block=block_dim,
                 grid=grid_dim)
     return C.copy()
    unsigned int a = idx/(${B}*${C}*${D});
    unsigned int b = (idx%(${B}*${C}*${D}))/(${C}*${D});
    unsigned int c = ((idx%(${B}*${C}*${D}))%(${C}*${D}))/${D};
    unsigned int d = ((idx%(${B}*${C}*${D}))%(${C}*${D}))%${D};

    // Use the subscripts to access the array:
    if (idx < N) {
        if (c == 0)
           x[INDEX(a,b,c,d)] = 100;
    }
}
""")

max_threads_per_block, max_block_dim, max_grid_dim = misc.get_dev_attrs(
    pycuda.autoinit.device)
block_dim, grid_dim = misc.select_block_grid_sizes(pycuda.autoinit.device,
                                                   x.shape)
max_blocks_per_grid = max(max_grid_dim)

func_mod = \
         SourceModule(func_mod_template.substitute(max_threads_per_block=max_threads_per_block,
                                                   max_blocks_per_grid=max_blocks_per_grid,
                                                   A=A, B=B, C=C, D=D))
func = func_mod.get_function('func')
x_gpu = gpuarray.to_gpu(x)
func(x_gpu.gpudata, np.uint32(x_gpu.size), block=block_dim, grid=grid_dim)
x_np = x.copy()
x_np[:, :, 0, :] = 100

print 'Success status: ', np.allclose(x_np, x_gpu.get())
Exemplo n.º 4
0
    // Convert the linear index to subscripts:
    unsigned int a = idx/(${B}*${C});
    unsigned int b = (idx%(${B}*${C}))/${C};
    unsigned int c = (idx%(${B}*${C}))%${C};

    // Use the subscripts to access the array:
    if (idx < N) {
        if (b == 0)
           x[INDEX(a,b,c)] = 100;
    }
}
""")

max_threads_per_block, max_block_dim, max_grid_dim = misc.get_dev_attrs(pycuda.autoinit.device)
block_dim, grid_dim = misc.select_block_grid_sizes(pycuda.autoinit.device, x.shape)
max_blocks_per_grid = max(max_grid_dim)

func_mod = \
    SourceModule(func_mod_template.substitute(max_threads_per_block=max_threads_per_block,
                                              max_blocks_per_grid=max_blocks_per_grid,
                                              A=A, B=B, C=C))
func = func_mod.get_function('func')
x_gpu = gpuarray.to_gpu(x)
func(x_gpu.gpudata, np.uint32(x_gpu.size),
     block=block_dim,
     grid=grid_dim)
x_np = x.copy()
x_np[:, 0, :] = 100

print('Success status: ', np.allclose(x_np, x_gpu.get()))