def _filt(data_g, size=(3, 3,3 ), res_g=None): if not data_g.dtype.type in cl_buffer_datatype_dict: raise ValueError("dtype %s not supported"%data_g.dtype.type) DTYPE = cl_buffer_datatype_dict[data_g.dtype.type] with open(abspath("kernels/generic_separable_filter.cl"), "r") as f: tpl = Template(f.read()) rendered = tpl.render(FSIZE_X=size[-1], FSIZE_Y=size[-2], FSIZE_Z=size[-3], FUNC=FUNC, DEFAULT=DEFAULT, DTYPE = DTYPE) prog = OCLProgram(src_str=rendered, build_options = ["-cl-unsafe-math-optimizations"] ) tmp_g = OCLArray.empty_like(data_g) if res_g is None: res_g = OCLArray.empty_like(data_g) prog.run_kernel("filter_3_x", data_g.shape[::-1], None, data_g.data, res_g.data) prog.run_kernel("filter_3_y", data_g.shape[::-1], None, res_g.data, tmp_g.data) prog.run_kernel("filter_3_z", data_g.shape[::-1], None, tmp_g.data, res_g.data) return res_g
def _filt(data_g, size=(3, 3, 3), res_g=None): assert_bufs_type(np.float32, data_g) with open(abspath("kernels/generic_reduce_filter.cl"), "r") as f: tpl = Template(f.read()) rendered = tpl.render(FSIZE_X=size[-1], FSIZE_Y=size[-2], FSIZE_Z=size[-3], FUNC=FUNC, DEFAULT=DEFAULT) prog = OCLProgram(src_str=rendered) tmp_g = OCLArray.empty_like(data_g) if res_g is None: res_g = OCLArray.empty_like(data_g) prog.run_kernel("filter_3_x", data_g.shape[::-1], None, data_g.data, res_g.data) prog.run_kernel("filter_3_y", data_g.shape[::-1], None, res_g.data, tmp_g.data) prog.run_kernel("filter_3_z", data_g.shape[::-1], None, tmp_g.data, res_g.data) return res_g
def bilateral3(data, size_filter, sigma_p, sigma_x = 10.): """bilateral filter """ dtype = data.dtype.type dtypes_kernels = {np.float32:"bilat3_float",} if not dtype in dtypes_kernels.keys(): logger.info("data type %s not supported yet (%s), casting to float:"%(dtype,dtypes_kernels.keys())) data = data.astype(np.float32) dtype = data.dtype.type img = OCLImage.from_array(data) res = OCLArray.empty_like(data) prog = OCLProgram(abspath("kernels/bilateral3.cl")) print img.shape prog.run_kernel(dtypes_kernels[dtype], img.shape,None, img,res.data, np.int32(img.shape[0]),np.int32(img.shape[1]), np.int32(size_filter),np.float32(sigma_x),np.float32(sigma_p)) return res.get()
def _fft_convolve_numpy(data, h, plan = None, kernel_is_fft = False, kernel_is_fftshifted = False): """ convolving via opencl fft for numpy arrays data and h must have the same size """ dev = get_device() if data.shape != h.shape: raise ValueError("data and kernel must have same size! %s vs %s "%(str(data.shape),str(h.shape))) data_g = OCLArray.from_array(data.astype(np.complex64)) if not kernel_is_fftshifted: h = np.fft.fftshift(h) h_g = OCLArray.from_array(h.astype(np.complex64)) res_g = OCLArray.empty_like(data_g) _fft_convolve_gpu(data_g,h_g,res_g = res_g, plan = plan, kernel_is_fft = kernel_is_fft) res = abs(res_g.get()) del data_g del h_g del res_g return res
def _filter_max_2_gpu(data_g, size=10, res_g=None): assert_bufs_type(np.float32, data_g) prog = OCLProgram(abspath("kernels/minmax_filter.cl")) tmp_g = OCLArray.empty_like(data_g) if res_g is None: res_g = OCLArray.empty_like(data_g) prog.run_kernel("max_2_x", data_g.shape[::-1], None, data_g.data, tmp_g.data, np.int32(size[-1])) prog.run_kernel("max_2_y", data_g.shape[::-1], None, tmp_g.data, res_g.data, np.int32(size[-2])) return res_g
def bilateral3(data, size_filter, sigma_p, sigma_x=10.): """bilateral filter """ dtype = data.dtype.type dtypes_kernels = { np.float32: "bilat3_float", } if not dtype in dtypes_kernels: logger.info("data type %s not supported yet (%s), casting to float:" % (dtype, list(dtypes_kernels.keys()))) data = data.astype(np.float32) dtype = data.dtype.type img = OCLImage.from_array(data) res = OCLArray.empty_like(data) prog = OCLProgram(abspath("kernels/bilateral3.cl")) logger.debug("in bilateral3, image shape: {}".format(img.shape)) prog.run_kernel(dtypes_kernels[dtype], img.shape, None, img, res.data, np.int32(img.shape[0]), np.int32(img.shape[1]), np.int32(size_filter), np.float32(sigma_x), np.float32(sigma_p)) return res.get()
def _ocl_fft_gpu(plan, ocl_arr, res_arr=None, inverse=False): assert_bufs_type(np.complex64, ocl_arr) if res_arr is None: res_arr = OCLArray.empty_like(ocl_arr) plan(ocl_arr, res_arr, inverse=inverse) return res_arr
def _convolve_sep2_gpu(data_g, hx_g, hy_g, res_g = None): assert_bufs_type(np.float32,data_g,hx_g,hy_g) prog = OCLProgram(abspath("kernels/convolve_sep.cl")) Ny,Nx = hy_g.shape[0],hx_g.shape[0] tmp_g = OCLArray.empty_like(data_g) if res_g is None: res_g = OCLArray.empty_like(data_g) prog.run_kernel("conv_sep2_x",data_g.shape[::-1],None,data_g.data,hx_g.data,tmp_g.data,np.int32(Nx)) prog.run_kernel("conv_sep2_y",data_g.shape[::-1],None,tmp_g.data,hy_g.data,res_g.data,np.int32(Ny)) return res_g
def test_bessel(n,x): x_g = OCLArray.from_array(x.astype(float32)) res_g = OCLArray.empty_like(x.astype(float32)) p = OCLProgram(absPath("kernels/bessel.cl")) p.run_kernel("bessel_fill",x_g.shape,None, x_g.data,res_g.data,int32(n)) return res_g.get()
def _convolve_sep2_gpu(data_g, hx_g, hy_g, res_g=None): assert_bufs_type(np.float32, data_g, hx_g, hy_g) prog = OCLProgram(abspath("kernels/convolve_sep.cl")) Ny, Nx = hy_g.shape[0], hx_g.shape[0] tmp_g = OCLArray.empty_like(data_g) if res_g is None: res_g = OCLArray.empty_like(data_g) prog.run_kernel("conv_sep2_x", data_g.shape[::-1], None, data_g.data, hx_g.data, tmp_g.data, np.int32(Nx)) prog.run_kernel("conv_sep2_y", data_g.shape[::-1], None, tmp_g.data, hy_g.data, res_g.data, np.int32(Ny)) return res_g
def test_bessel(n, x): x_g = OCLArray.from_array(x.astype(float32)) res_g = OCLArray.empty_like(x.astype(float32)) p = OCLProgram(absPath("kernels/bessel.cl")) p.run_kernel("bessel_fill", x_g.shape, None, x_g.data, res_g.data, int32(n)) return res_g.get()
def fftshift(arr_obj, axes = None, res_g = None, return_buffer = False): """ gpu version of fftshift for numpy arrays or OCLArrays Parameters ---------- arr_obj: numpy array or OCLArray (float32/complex64) the array to be fftshifted axes: list or None the axes over which to shift (like np.fft.fftshift) if None, all axes are taken res_g: if given, fills it with the result (has to be same shape and dtype as arr_obj) else internally creates a new one Returns ------- if return_buffer, returns the result as (well :) OCLArray else returns the result as numpy array """ if axes is None: axes = range(arr_obj.ndim) if isinstance(arr_obj, OCLArray): if not arr_obj.dtype.type in DTYPE_KERNEL_NAMES.keys(): raise NotImplementedError("only works for float32 or complex64") elif isinstance(arr_obj, np.ndarray): if np.iscomplexobj(arr_obj): arr_obj = OCLArray.from_array(arr_obj.astype(np.complex64,copy = False)) else: arr_obj = OCLArray.from_array(arr_obj.astype(np.float32,copy = False)) else: raise ValueError("unknown type (%s)"%(type(arr_obj))) if not np.all([arr_obj.shape[a]%2==0 for a in axes]): raise NotImplementedError("only works on axes of even dimensions") if res_g is None: res_g = OCLArray.empty_like(arr_obj) # iterate over all axes # FIXME: this is still rather inefficient in_g = arr_obj for ax in axes: _fftshift_single(in_g, res_g, ax) in_g = res_g if return_buffer: return res_g else: return res_g.get()
def fftshift(arr_obj, axes = None, res_g = None, return_buffer = False): """ gpu version of fftshift for numpy arrays or OCLArrays Parameters ---------- arr_obj: numpy array or OCLArray (float32/complex64) the array to be fftshifted axes: list or None the axes over which to shift (like np.fft.fftshift) if None, all axes are taken res_g: if given, fills it with the result (has to be same shape and dtype as arr_obj) else internally creates a new one Returns ------- if return_buffer, returns the result as (well :) OCLArray else returns the result as numpy array """ if axes is None: axes = list(range(arr_obj.ndim)) if isinstance(arr_obj, OCLArray): if not arr_obj.dtype.type in DTYPE_KERNEL_NAMES: raise NotImplementedError("only works for float32 or complex64") elif isinstance(arr_obj, np.ndarray): if np.iscomplexobj(arr_obj): arr_obj = OCLArray.from_array(arr_obj.astype(np.complex64,copy = False)) else: arr_obj = OCLArray.from_array(arr_obj.astype(np.float32,copy = False)) else: raise ValueError("unknown type (%s)"%(type(arr_obj))) if not np.all([arr_obj.shape[a]%2==0 for a in axes]): raise NotImplementedError("only works on axes of even dimensions") if res_g is None: res_g = OCLArray.empty_like(arr_obj) # iterate over all axes # FIXME: this is still rather inefficient in_g = arr_obj for ax in axes: _fftshift_single(in_g, res_g, ax) in_g = res_g if return_buffer: return res_g else: return res_g.get()
def _filt(data_g, size=(3, 3, 3), cval = 0, res_g=None): if not data_g.dtype.type in cl_buffer_datatype_dict: raise ValueError("dtype %s not supported" % data_g.dtype.type) DTYPE = cl_buffer_datatype_dict[data_g.dtype.type] with open(abspath("kernels/median_filter.cl"), "r") as f: tpl = Template(f.read()) rendered = tpl.render(DTYPE = DTYPE,FSIZE_X=size[2], FSIZE_Y=size[1], FSIZE_Z=size[0],CVAL = cval) prog = OCLProgram(src_str=rendered) tmp_g = OCLArray.empty_like(data_g) if res_g is None: res_g = OCLArray.empty_like(data_g) prog.run_kernel("median_3", data_g.shape[::-1], None, data_g.data, res_g.data) return res_g
def transfer(data): """transfers data""" d1_g = OCLArray.from_array(data) d2_g = OCLArray.empty_like(data) if data.dtype.type == np.float32: im = OCLImage.empty(data.shape[::1],dtype = np.float32) elif data.dtype.type == np.complex64: im = OCLImage.empty(data.shape[::1],dtype = np.float32, num_channels=2) im.copy_buffer(d1_g) d2_g.copy_image(im) return d2_g.get()
def transfer(data): """transfers data""" d1_g = OCLArray.from_array(data) d2_g = OCLArray.empty_like(data) if data.dtype.type == np.float32: im = OCLImage.empty(data.shape[::1], dtype=np.float32) elif data.dtype.type == np.complex64: im = OCLImage.empty(data.shape[::1], dtype=np.float32, num_channels=2) im.copy_buffer(d1_g) d2_g.copy_image(im) return d2_g.get()
def get_gpu(N = 256, niter=100, sig = 1.): np.random.seed(0) a = np.random.normal(0,sig,(N,N)).astype(np.complex64) b = (1.*a.copy()).astype(np.complex64) c_g = OCLArray.empty_like(b) b_g = OCLArray.from_array(b) p = fft_plan((N,N), fast_math = False) rels = [] for _ in range(niter): fft(b_g,res_g = c_g, plan = p) fft(c_g, res_g = b_g, inverse = True, plan = p) # b = fft(fft(b), inverse = True) # rels.append(np.amax(np.abs(a-b))/np.amax(np.abs(a))) rels.append(np.amax(np.abs(a-b_g.get()))/np.amax(np.abs(a))) return np.array(rels)
def get_gpu(N=256, niter=100, sig=1.): np.random.seed(0) a = np.random.normal(0, sig, (N, N)).astype(np.complex64) b = (1. * a.copy()).astype(np.complex64) c_g = OCLArray.empty_like(b) b_g = OCLArray.from_array(b) p = fft_plan((N, N), fast_math=False) rels = [] for _ in range(niter): fft(b_g, res_g=c_g, plan=p) fft(c_g, res_g=b_g, inverse=True, plan=p) # b = fft(fft(b), inverse = True) # rels.append(np.amax(np.abs(a-b))/np.amax(np.abs(a))) rels.append(np.amax(np.abs(a - b_g.get())) / np.amax(np.abs(a))) return np.array(rels)
def bench(description, dshape, dtype, func_cpu, func_gpu, func_gpu_notransfer=None, niter=2): x = np.random.randint(0, 100, dshape).astype(dtype) func_cpu(x) t_cpu = time() for _ in range(niter): y = func_cpu(x) t_cpu = (time() - t_cpu) / niter func_gpu(x) t_gpu = time() for _ in range(niter): y = func_gpu(x) t_gpu = (time() - t_gpu) / niter if func_gpu_notransfer is not None: x_g = OCLArray.from_array(x) tmp_g = OCLArray.empty_like(x) func_gpu_notransfer(x_g, tmp_g) get_device().queue.finish() t_gpu_notransfer = time() for _ in range(niter): func_gpu_notransfer(x_g, tmp_g) get_device().queue.finish() t_gpu_notransfer = (time() - t_gpu_notransfer) / niter else: t_gpu_notransfer = None # print("%s\t\t %s\t%d ms \t %d ms"%(description,dshape, 1000*t1,1000*t2)) print("%s| %s %s | %d ms | %d ms | %s" % (description, dshape, type_name_dict[dtype], 1000 * t_cpu, 1000 * t_gpu, "%d ms" % (1000 * t_gpu_notransfer) if t_gpu_notransfer is not None else "-")) return t_cpu, t_gpu, t_gpu_notransfer
def bilateral2(data, fSize, sigma_p, sigma_x=10.): """bilateral filter """ dtype = data.dtype.type dtypes_kernels = {np.float32: "bilat2_float", np.uint16: "bilat2_short"} if not dtype in dtypes_kernels.keys(): logger.info("data type %s not supported yet (%s), casting to float:" % (dtype, dtypes_kernels.keys())) data = data.astype(np.float32) dtype = data.dtype.type img = OCLImage.from_array(data) res = OCLArray.empty_like(data) prog = OCLProgram(abspath("kernels/bilateral2.cl")) prog.run_kernel(dtypes_kernels[dtype], img.shape, None, img, res.data, np.int32(img.shape[0]), np.int32(img.shape[1]), np.int32(fSize), np.float32(sigma_x), np.float32(sigma_p)) return res.get()
# dtype = d_g.dtype.type # # if not isinstance(d_g, OCLArray): # raise ValueError("only works on OCLArrays") # # if not dtype in dtype_kernel_name.keys(): # raise NotImplementedError("only works for float32 or complex64") # # if not np.all([n%2==0 for n in d_g.shape]): # raise NotImplementedError("only works on even length arryas") # # prog = OCLProgram(abspath("kernels/fftshift.cl")) # prog.run_kernel(dtype_kernel_name[dtype],(Nx,Ny,),None, # d_g.data, d_g.data, # np.int32(Nx), np.int32(Ny)) # return d_g if __name__ == '__main__': Nx, Ny, Nz = (256,)*3 d = np.linspace(0,1,Nx*Ny*Nz).reshape(Nz, Ny,Nx).astype(np.float32) d[Nz/2-30:Nz/2+30,Ny/2-20:Ny/2+20,Nx/2-20:Nx/2+20] = 2. d_g = OCLArray.from_array(d) out_g = OCLArray.empty_like(d) out = fftshift(d, axes= (0,1,2))
# dtype = d_g.dtype.type # # if not isinstance(d_g, OCLArray): # raise ValueError("only works on OCLArrays") # # if not dtype in dtype_kernel_name.keys(): # raise NotImplementedError("only works for float32 or complex64") # # if not np.all([n%2==0 for n in d_g.shape]): # raise NotImplementedError("only works on even length arryas") # # prog = OCLProgram(abspath("kernels/fftshift.cl")) # prog.run_kernel(dtype_kernel_name[dtype],(Nx,Ny,),None, # d_g.data, d_g.data, # np.int32(Nx), np.int32(Ny)) # return d_g if __name__ == '__main__': Nx, Ny, Nz = (256,)*3 d = np.linspace(0,1,Nx*Ny*Nz).reshape(Nz, Ny,Nx).astype(np.float32) d[Nz//2-30:Nz//2+30,Ny//2-20:Ny//2+20,Nx//2-20:Nx//2+20] = 2. d_g = OCLArray.from_array(d) out_g = OCLArray.empty_like(d) out = fftshift(d, axes= (0,1,2))