def create_dn_buffer(size, units, points, dn_inner=.0, rad_inner=0, dn_outer=.1, rad_outer=.4): Nx, Ny, Nz = size dx, dy, dz = units program = OCLProgram(absPath("kernels/bpm_3d_spheres.cl")) dn_g = OCLArray.empty((Nz, Ny, Nx), dtype=np.float32) # sort by z ps = np.array(points) ps = ps[np.argsort(ps[:, 2]), :] Np = ps.shape[0] pointsBuf = OCLArray.from_array(ps.flatten().astype(np.float32)) program.run_kernel("fill_dn", (Nx, Ny, Nz), None, dn_g.data, pointsBuf.data, np.int32(Np), np.float32(dx), np.float32(dy), np.float32(dz), np.float32(dn_inner), np.float32(rad_inner), np.float32(dn_outer), np.float32(rad_outer)) return dn_g
def rebuild_program(self, interpolation="linear"): build_options_basic = [ "-I", "%s" % absPath("kernels/"), "-D", "maxSteps=%s" % spimagine.config.__DEFAULTMAXSTEPS__, ] if spimagine.config.__QUALIFIER_CONSTANT_TO_GLOBAL__: build_options_basic += ["-D", "QUALIFIER_CONSTANT_TO_GLOBAL"] if interpolation in VolumeRenderer.interpolation_defines: build_options_basic += VolumeRenderer.interpolation_defines[ interpolation] else: raise KeyError("interpolation = '%s' not defined ,valid: %s" % (interpolation, list(VolumeRenderer.interpolation_defines.keys()))) try: self.proc = OCLProgram( absPath("kernels/all_render_kernels.cl"), build_options=build_options_basic + [ "-cl-finite-math-only", "-cl-fast-relaxed-math", "-cl-unsafe-math-optimizations", "-cl-mad-enable" ]) except Exception as e: logger.debug(str(e)) self.proc = OCLProgram(absPath("kernels/all_render_kernels.cl"), build_options=build_options_basic) self.proc
def bilateral3(data, size_filter, sigma_p, sigma_x = 10.): """bilateral filter """ dtype = data.dtype.type dtypes_kernels = {np.float32:"bilat3_float",} if not dtype in dtypes_kernels.keys(): logger.info("data type %s not supported yet (%s), casting to float:"%(dtype,dtypes_kernels.keys())) data = data.astype(np.float32) dtype = data.dtype.type img = OCLImage.from_array(data) res = OCLArray.empty_like(data) prog = OCLProgram(abspath("kernels/bilateral3.cl")) print img.shape prog.run_kernel(dtypes_kernels[dtype], img.shape,None, img,res.data, np.int32(img.shape[0]),np.int32(img.shape[1]), np.int32(size_filter),np.float32(sigma_x),np.float32(sigma_p)) return res.get()
def gpu_kuwahara(data, N=5): """Function to convolve an imgage with the Kuwahara filter on GPU.""" # create numpy arrays if (N%2==0): raise ValueError("Data has to be a (2n+1)x(2n+1) array.") data_g = OCLArray.from_array(data.astype(float32)) res_g = OCLArray.empty((data.shape[0],data.shape[1]),float32) prog = OCLProgram("./OpenCL/gpu_kernels/gpu_kuwahara.cl") # start kernel on gput prog.run_kernel("kuwahara", # the name of the kernel in the cl file data_g.shape[::-1], # global size, the number of threads e.g. (128,128,) None, # local size, just leave it to None data_g.data,res_g.data, int32(N)) # return res_g.get()
def scale(data, scale=(1., 1., 1.), interp="linear"): """returns a interpolated, scaled version of data scale = (scale_z,scale_y,scale_x) or scale = scale_all interp = "linear" | "nearest" """ bop = {"linear": "", "nearest": "-D USENEAREST"} if not interp in bop.keys(): raise KeyError("interp = '%s' not defined ,valid: %s" % (interp, bop.keys())) if not isinstance(scale, (tuple, list, np.ndarray)): scale = (scale, ) * 3 if len(scale) != 3: raise ValueError("scale = %s misformed" % scale) d_im = OCLImage.from_array(data) nshape = np.array(data.shape) * np.array(scale) nshape = tuple(nshape.astype(np.int)) res_g = OCLArray.empty(nshape, np.float32) prog = OCLProgram(abspath("kernels/scale.cl"), build_options=[bop[interp]]) prog.run_kernel("scale", res_g.shape[::-1], None, d_im, res_g.data) return res_g.get()
def bilateral3(data, size_filter, sigma_p, sigma_x=10.): """bilateral filter """ dtype = data.dtype.type dtypes_kernels = { np.float32: "bilat3_float", } if not dtype in dtypes_kernels: logger.info("data type %s not supported yet (%s), casting to float:" % (dtype, list(dtypes_kernels.keys()))) data = data.astype(np.float32) dtype = data.dtype.type img = OCLImage.from_array(data) res = OCLArray.empty_like(data) prog = OCLProgram(abspath("kernels/bilateral3.cl")) logger.debug("in bilateral3, image shape: {}".format(img.shape)) prog.run_kernel(dtypes_kernels[dtype], img.shape, None, img, res.data, np.int32(img.shape[0]), np.int32(img.shape[1]), np.int32(size_filter), np.float32(sigma_x), np.float32(sigma_p)) return res.get()
def create_dn_buffer(size, units,points, dn_inner = .0, rad_inner = 0, dn_outer = .1, rad_outer = .4): Nx, Ny, Nz = size dx, dy, dz = units program = OCLProgram(absPath("kernels/bpm_3d_spheres.cl")) dn_g = OCLArray.empty((Nz,Ny,Nx),dtype=np.float32) # sort by z ps = np.array(points) ps = ps[np.argsort(ps[:,2]),:] Np = ps.shape[0] pointsBuf = OCLArray.from_array(ps.flatten().astype(np.float32)) program.run_kernel("fill_dn",(Nx,Ny,Nz),None,dn_g.data, pointsBuf.data,np.int32(Np), np.float32(dx),np.float32(dy),np.float32(dz), np.float32(dn_inner),np.float32(rad_inner), np.float32(dn_outer),np.float32(rad_outer)) return dn_g
def test_bessel(n, x): x_g = OCLArray.from_array(x.astype(float32)) res_g = OCLArray.empty_like(x.astype(float32)) p = OCLProgram(absPath("kernels/bessel.cl")) p.run_kernel("bessel_fill", x_g.shape, None, x_g.data, res_g.data, int32(n)) return res_g.get()
def test_bessel(n,x): x_g = OCLArray.from_array(x.astype(float32)) res_g = OCLArray.empty_like(x.astype(float32)) p = OCLProgram(absPath("kernels/bessel.cl")) p.run_kernel("bessel_fill",x_g.shape,None, x_g.data,res_g.data,int32(n)) return res_g.get()
def _ocl_star_dist(a, n_rays=32): from gputools import OCLProgram, OCLArray, OCLImage (np.isscalar(n_rays) and 0 < int(n_rays)) or _raise(ValueError()) n_rays = int(n_rays) src = OCLImage.from_array(a.astype(np.uint16, copy=False)) dst = OCLArray.empty(a.shape + (n_rays, ), dtype=np.float32) program = OCLProgram(path_absolute("kernels/stardist2d.cl"), build_options=['-D', 'N_RAYS=%d' % n_rays]) program.run_kernel('star_dist', src.shape, None, dst.data, src) return dst.get()
def scale_bicubic(data, scale=(1., 1., 1.)): """ returns a interpolated, scaled version of data the output shape is scaled too. Parameters ---------- data: ndarray 3d input array scale: float, tuple scaling factor along each axis (x,y,z) interpolation: str either "nearest" or "linear" Returns ------- scaled output """ if not (isinstance(data, np.ndarray) and data.ndim == 3): raise ValueError("input data has to be a 3d array!") options_types = { np.uint8: ["-D", "TYPENAME=uchar", "-D", "READ_IMAGE=read_imageui"], np.uint16: ["-D", "TYPENAME=short", "-D", "READ_IMAGE=read_imageui"], np.float32: ["-D", "TYPENAME=float", "-D", "READ_IMAGE=read_imagef"], } dtype = data.dtype.type if not dtype in options_types: raise ValueError("type %s not supported! Available: %s" % (dtype, str(list(options_types.keys())))) if not isinstance(scale, (tuple, list, np.ndarray)): scale = (scale, ) * 3 if len(scale) != 3: raise ValueError("scale = %s misformed" % scale) d_im = OCLImage.from_array(data) nshape = _scale_shape(data.shape, scale) res_g = OCLArray.empty(nshape, dtype) prog = OCLProgram(abspath("kernels/scale.cl"), build_options=options_types[dtype]) prog.run_kernel("scale_bicubic", res_g.shape[::-1], None, d_im, res_g.data) return res_g.get()
def focus_field_lattice(shape, units, lam=.5, NA1=.4, NA2=.5, sigma=.1, Npoly=6, n0=1., n_integration_steps=100): """ """ kxs, kys = .5 * (NA1 + NA2) * poly_points(Npoly) p = OCLProgram(absPath("kernels/psf_lattice.cl"), build_options=[ "-I", absPath("kernels"), "-D", "INT_STEPS=%s" % n_integration_steps ]) kxs = np.array(kxs) kys = np.array(kys) Nx, Ny, Nz = shape dx, dy, dz = units alpha1 = np.arcsin(NA1 / n0) alpha2 = np.arcsin(NA2 / n0) u_g = OCLArray.empty((Nz, Ny, Nx), np.float32) ex_g = OCLArray.empty((Nz, Ny, Nx), np.complex64) ey_g = OCLArray.empty((Nz, Ny, Nx), np.complex64) ez_g = OCLArray.empty((Nz, Ny, Nx), np.complex64) kxs_g = OCLArray.from_array(kxs.astype(np.float32)) kys_g = OCLArray.from_array(kys.astype(np.float32)) t = time.time() p.run_kernel( "debye_wolf_lattice", (Nx, Ny, Nz), None, ex_g.data, ey_g.data, ez_g.data, u_g.data, np.float32(1.), np.float32(0.), np.float32(-dx * (Nx - 1) / 2.), np.float32(dx * (Nx - 1) / 2.), np.float32(-dy * (Ny - 1) / 2.), np.float32(dy * (Ny - 1) / 2.), np.float32(-dz * (Nz - 1) / 2.), np.float32(dz * (Nz - 1) / 2.), np.float32(1. * lam / n0), np.float32(alpha1), np.float32(alpha2), kxs_g.data, kys_g.data, np.int32(len(kxs)), np.float32(sigma)) ex = ex_g.get() print "time in secs:", time.time() - t return ex
def perlin2(size, units, repeat=(10., ) * 2): wx, wy = repeat dx, dy = units prog = OCLProgram(abspath("perlin.cl")) d = OCLArray.empty(size[::-1], np.float32) prog.run_kernel("perlin2d", d.shape[::-1], None, d.data, np.float32(dx), np.float32(dy), np.float32(wx), np.float32(wy)) return d.get()
def stardist_from_labels(a, n_rays=32): """ assumes a to be a label image with integer values that encode object ids. id 0 denotes background. """ out_shape = a.shape + (n_rays, ) src = OCLImage.from_array(a.astype(np.uint16, copy=False)) dst = OCLArray.empty(out_shape, dtype=np.float32) # program = OCLProgram("/home/uschmidt/research/dsb2018/notebooks/kernel.cl", build_options=["-D", "N_RAYS=%d" % n_rays]) # program = OCLProgram("kernel.cl", build_options=["-D", "N_RAYS=%d" % n_rays]) program = OCLProgram(src_str=kernel, build_options=["-D", "N_RAYS=%d" % n_rays]) program.run_kernel('star_dist', src.shape, None, dst.data, src) return dst.get()
def perlin2(size, units, repeat = (10.,)*2): wx, wy = repeat dx, dy = units prog = OCLProgram(abspath("perlin.cl")) d = OCLArray.empty(size[::-1],np.float32) prog.run_kernel("perlin2d",d.shape[::-1],None, d.data, np.float32(dx),np.float32(dy), np.float32(wx),np.float32(wy)) return d.get()
def _ocl_star_dist(lbl, n_rays=32, grid=(1, 1)): from gputools import OCLProgram, OCLArray, OCLImage (np.isscalar(n_rays) and 0 < int(n_rays)) or _raise(ValueError()) n_rays = int(n_rays) # slicing with grid is done with tuple(slice(0, None, g) for g in grid) res_shape = tuple((s - 1) // g + 1 for s, g in zip(lbl.shape, grid)) src = OCLImage.from_array(lbl.astype(np.uint16, copy=False)) dst = OCLArray.empty(res_shape + (n_rays, ), dtype=np.float32) program = OCLProgram(path_absolute("kernels/stardist2d.cl"), build_options=['-D', 'N_RAYS=%d' % n_rays]) program.run_kernel('star_dist', res_shape[::-1], None, dst.data, src, np.int32(grid[0]), np.int32(grid[1])) return dst.get()
def _filt(data_g, size=(3, 3, 3), res_g=None): assert_bufs_type(np.float32, data_g) with open(abspath("kernels/generic_reduce_filter.cl"), "r") as f: tpl = Template(f.read()) rendered = tpl.render(FSIZE_X=size[-1], FSIZE_Y=size[-2], FSIZE_Z=size[-3], FUNC=FUNC, DEFAULT=DEFAULT) prog = OCLProgram(src_str=rendered) tmp_g = OCLArray.empty_like(data_g) if res_g is None: res_g = OCLArray.empty_like(data_g) prog.run_kernel("filter_3_x", data_g.shape[::-1], None, data_g.data, res_g.data) prog.run_kernel("filter_3_y", data_g.shape[::-1], None, res_g.data, tmp_g.data) prog.run_kernel("filter_3_z", data_g.shape[::-1], None, tmp_g.data, res_g.data) return res_g
def focus_field_cylindrical_plane(shape=(128, 128), units=(.1, .1), z=0., lam=.5, NA=.6, n0=1., ex_g=None, n_integration_steps=200): """ calculates the x component of the electric field at a given z position z for a perfect, aberration free optical system via the vectorial debye diffraction integral for a cylindrical lens see Colin J. R. Sheppard, Cylindrical lenses—focusing and imaging: a review Appl. Opt. 52, 538-545 (2013) if ex_g is a valid OCLArray it fills it and returns None otherwise returns ex as a numpy array """ p = OCLProgram(absPath("kernels/psf_cylindrical.cl"), build_options=str("-I %s -D INT_STEPS=%s" % (absPath("."), n_integration_steps))) Nx, Ny = shape dx, dy = units alpha = np.arcsin(NA / n0) if ex_g is None: use_buffer = False ex_g = OCLArray.empty((Ny, Nx), np.complex64) else: use_buffer = True assert ex_g.shape[::-1] == shape p.run_kernel("psf_cylindrical_plane", (Nx, Ny), None, ex_g.data, np.float32(-dy * (Ny - 1) / 2.), np.float32(dy * (Ny - 1) / 2.), np.float32(z), np.float32(lam / n0), np.float32(alpha)) if not use_buffer: return ex_g.get()
def focus_field_cylindrical(shape, units, lam=.5, NA=.3, n0=1., n_integration_steps=100): """computes focus field of cylindrical lerns with given NA see: Colin J. R. Sheppard, Cylindrical lenses—focusing and imaging: a review Appl. Opt. 52, 538-545 (2013) return u,ex,ey,ez with u being the intensity """ p = OCLProgram(absPath("kernels/psf_cylindrical.cl"), build_options=str("-I %s -D INT_STEPS=%s" % (absPath("."), n_integration_steps))) Nx, Ny, Nz = shape dx, dy, dz = units alpha = np.arcsin(NA / n0) u_g = OCLArray.empty((Nz, Ny), np.float32) ex_g = OCLArray.empty((Nz, Ny), np.complex64) ey_g = OCLArray.empty((Nz, Ny), np.complex64) ez_g = OCLArray.empty((Nz, Ny), np.complex64) t = time.time() p.run_kernel("psf_cylindrical", u_g.shape[::-1], None, ex_g.data, ey_g.data, ez_g.data, u_g.data, np.float32(-dy * (Ny - 1) / 2.), np.float32(dy * (Ny - 1) / 2.), np.float32(-dz * (Nz - 1) / 2.), np.float32(dz * (Nz - 1) / 2.), np.float32(lam / n0), np.float32(alpha)) u = np.array(np.repeat(u_g.get()[..., np.newaxis], Nx, axis=-1)) ex = np.array(np.repeat(ex_g.get()[..., np.newaxis], Nx, axis=-1)) ey = np.array(np.repeat(ey_g.get()[..., np.newaxis], Nx, axis=-1)) ez = np.array(np.repeat(ez_g.get()[..., np.newaxis], Nx, axis=-1)) print "time in secs:", time.time() - t return u, ex, ey, ez
def focus_field_cylindrical_plane(shape = (128,128), units = (.1,.1), z = 0., lam = .5, NA = .6, n0 = 1., ex_g = None, n_integration_steps = 200): """ calculates the x component of the electric field at a given z position z for a perfect, aberration free optical system via the vectorial debye diffraction integral for a cylindrical lens see Colin J. R. Sheppard, Cylindrical lenses—focusing and imaging: a review Appl. Opt. 52, 538-545 (2013) if ex_g is a valid OCLArray it fills it and returns None otherwise returns ex as a numpy array """ p = OCLProgram(absPath("kernels/psf_cylindrical.cl"),build_options = str("-I %s -D INT_STEPS=%s"%(absPath("."),n_integration_steps))) Nx, Ny = shape dx, dy = units alpha = np.arcsin(NA/n0) if ex_g is None: use_buffer = False ex_g = OCLArray.empty((Ny,Nx),np.complex64) else: use_buffer = True assert ex_g.shape[::-1] == shape p.run_kernel("psf_cylindrical_plane",(Nx,Ny),None, ex_g.data, np.float32(-dy*(Ny-1)/2.),np.float32(dy*(Ny-1)/2.), np.float32(z), np.float32(lam/n0), np.float32(alpha)) if not use_buffer: return ex_g.get()
def focus_field_cylindrical(shape,units,lam = .5,NA = .3, n0=1., n_integration_steps = 100): """computes focus field of cylindrical lerns with given NA see: Colin J. R. Sheppard, Cylindrical lenses—focusing and imaging: a review Appl. Opt. 52, 538-545 (2013) return u,ex,ey,ez with u being the intensity """ p = OCLProgram(absPath("kernels/psf_cylindrical.cl"),build_options = str("-I %s -D INT_STEPS=%s"%(absPath("."),n_integration_steps))) Nx, Ny, Nz = shape dx, dy, dz = units alpha = np.arcsin(NA/n0) u_g = OCLArray.empty((Nz,Ny),np.float32) ex_g = OCLArray.empty((Nz,Ny),np.complex64) ey_g = OCLArray.empty((Nz,Ny),np.complex64) ez_g = OCLArray.empty((Nz,Ny),np.complex64) t = time.time() p.run_kernel("psf_cylindrical",u_g.shape[::-1],None, ex_g.data, ey_g.data, ez_g.data, u_g.data, np.float32(-dy*(Ny-1)/2.),np.float32(dy*(Ny-1)/2.), np.float32(-dz*(Nz-1)/2.),np.float32(dz*(Nz-1)/2.), np.float32(lam/n0), np.float32(alpha)) u = np.array(np.repeat(u_g.get()[...,np.newaxis],Nx,axis=-1)) ex = np.array(np.repeat(ex_g.get()[...,np.newaxis],Nx,axis=-1)) ey = np.array(np.repeat(ey_g.get()[...,np.newaxis],Nx,axis=-1)) ez = np.array(np.repeat(ez_g.get()[...,np.newaxis],Nx,axis=-1)) print "time in secs:" , time.time()-t return u, ex, ey, ez
def tv2(data, weight, Niter=50): """ chambolles tv regularized denoising weight should be around 2+1.5*noise_sigma """ prog = OCLProgram(abspath("kernels/tv2.cl")) data_im = OCLImage.from_array(data.astype(np, float32, copy=False)) pImgs = [ dev.createImage(data.shape[::-1], mem_flags=cl.mem_flags.READ_WRITE, dtype=np.float32, channel_order=cl.channel_order.RGBA) for i in range(2) ] outImg = dev.createImage(data.shape[::-1], dtype=np.float32, mem_flags=cl.mem_flags.READ_WRITE) dev.writeImage(inImg, data.astype(np.float32)) dev.writeImage(pImgs[0], np.zeros((4, ) + data.shape, dtype=np.float32)) dev.writeImage(pImgs[1], np.zeros((4, ) + data.shape, dtype=np.float32)) for i in range(Niter): proc.runKernel("div_step", inImg.shape, None, inImg, pImgs[i % 2], outImg) proc.runKernel("grad_step", inImg.shape, None, outImg, pImgs[i % 2], pImgs[1 - i % 2], np.float32(weight)) return dev.readImage(outImg, dtype=np.float32)
def on_compile_timer(self): for c in CACHEDIRS: if os.path.exists(c): print("removing cache: ", c) shutil.rmtree(c) print("compiling...") try: dirname = os.path.dirname(spimagine.volumerender.__file__) proc = OCLProgram(os.path.join(dirname,"kernels/volume_kernel.cl"), build_options = ["-cl-fast-relaxed-math", "-cl-unsafe-math-optimizations", "-cl-mad-enable", "-I %s" %os.path.join(dirname,"kernels/"), "-D maxSteps=%s"%spimagine.config.__DEFAULTMAXSTEPS__] ) self.glWidget.renderer.proc = proc self.glWidget.refresh() print(np.amin(self.glWidget.output),np.amax(self.glWidget.output)) except Exception as e: print(e)
def _perlin3_single(size,units = (1.,)*3,repeat = (10.,)*3,offz = 0,Nz0 = None): if Nz0 is None: Nz0 = size[-1] dx, dy, dz = units wx, wy, wz = repeat prog = OCLProgram(abspath("perlin.cl")) d = OCLArray.empty(size[::-1],np.float32) prog.run_kernel("perlin3d",d.shape[::-1],None, d.data, np.int32(offz), np.float32(dx),np.float32(dy),np.float32(dz), np.float32(wx),np.float32(wy),np.float32(wz) ) return d.get()
def _convolve_sep2_gpu(data_g, hx_g, hy_g, res_g = None): assert_bufs_type(np.float32,data_g,hx_g,hy_g) prog = OCLProgram(abspath("kernels/convolve_sep.cl")) Ny,Nx = hy_g.shape[0],hx_g.shape[0] tmp_g = OCLArray.empty_like(data_g) if res_g is None: res_g = OCLArray.empty_like(data_g) prog.run_kernel("conv_sep2_x",data_g.shape[::-1],None,data_g.data,hx_g.data,tmp_g.data,np.int32(Nx)) prog.run_kernel("conv_sep2_y",data_g.shape[::-1],None,tmp_g.data,hy_g.data,res_g.data,np.int32(Ny)) return res_g
def _setup_gpu(self): dev = get_device() self._queue = dev.queue self._ctx = dev.context prog = OCLProgram(absPath("kernels/bpm_3d_kernels.cl")) # the buffers/ images Nx, Ny = self.simul_xy Nx0, Ny0 = self.shape[:2] self._plan = fft_plan((Ny, Nx), **self.fftplan_kwargs) self._buf_plane = OCLArray.empty((Ny, Nx), np.complex64) self._buf_H = OCLArray.empty((Ny, Nx), np.complex64) self._img_xy = OCLImage.empty((Ny, Nx), dtype=np.float32, num_channels=2) # buffer for the weighted dn average self.intens_g = OCLArray.empty((1, Ny, Nx), dtype=Bpm3d._real_type) self.intens_dn_g = OCLArray.empty((1, Ny, Nx), dtype=Bpm3d._real_type) self.intens_sum_g = OCLArray.zeros((), dtype=Bpm3d._real_type) self.intens_dn_sum_g = OCLArray.zeros((), dtype=Bpm3d._real_type) # the kernels self._kernel_compute_propagator = prog.compute_propagator self._kernel_compute_propagator.set_scalar_arg_dtypes((None, ) + (np.float32, ) * 5) self._kernel_compute_propagator_buf = prog.compute_propagator_buf self._kernel_compute_propagator_buf.set_scalar_arg_dtypes( (None, ) + (np.float32, ) * 5 + (None, ) * 2) self._kernel_mult_complex = prog.mult self._kernel_im_to_buf_field = prog.img_to_buf_field self._kernel_im_to_buf_intensity = prog.img_to_buf_intensity self._kernel_im_to_im_intensity = prog.img_to_img_intensity self._kernel_buf_to_buf_field = prog.buf_to_buf_field self._kernel_buf_to_buf_intensity = prog.buf_to_buf_intensity self._kernel_mult_dn_img_float = prog.mult_dn_image self._kernel_mult_dn_buf_float = prog.mult_dn self._kernel_mult_dn_img_complex = prog.mult_dn_image_complex self._kernel_mult_dn_buf_complex = prog.mult_dn_complex self._kernel_mult_dn_img_float_local = prog.mult_dn_image_local self._kernel_mult_dn_buf_float_local = prog.mult_dn_local self._kernel_mult_dn_img_complex_local = prog.mult_dn_image_complex_local self._kernel_mult_dn_buf_complex_local = prog.mult_dn_complex_local self._kernel_reduction = OCLMultiReductionKernel( np.float32, neutral="0", reduce_expr="a+b", map_exprs=["a[i]", "b[i]"], arguments="__global float *a, __global float *b") self._fill_propagator(self.n0)
def focus_field_debye_at(x,y,z,lam, NA, n0 = 1., n_integration_steps = 200): """ the same as focus_field_debye but for the coordinates given in x, y, z (arrays of same shape) slower than focus_field_debye as it doesnt assume the coordinates to be on a grid """ print absPath("kernels/psf_debye.cl") p = OCLProgram(absPath("kernels/psf_debye.cl"), build_options = str("-I %s -D INT_STEPS=%s"%(absPath("."),n_integration_steps))) if np.isscalar(NA): NA = [0.,NA] alphas = np.arcsin(np.array(NA)/n0) assert len(alphas)%2 ==0 assert x.shape == y.shape == z.shape dshape =x.shape N = np.prod(dshape) x_g = OCLArray.from_array(x.flatten().astype(np.float32)) y_g = OCLArray.from_array(y.flatten().astype(np.float32)) z_g = OCLArray.from_array(z.flatten().astype(np.float32)) u_g = OCLArray.empty(N,np.float32) ex_g = OCLArray.empty(N,np.complex64) ey_g = OCLArray.empty(N,np.complex64) ez_g = OCLArray.empty(N,np.complex64) alpha_g = OCLArray.from_array(alphas.astype(np.float32)) p.run_kernel("debye_wolf_at",(N,),None, x_g.data,y_g.data,z_g.data, ex_g.data,ey_g.data,ez_g.data, u_g.data, np.float32(1.),np.float32(0.), np.float32(lam/n0), alpha_g.data, np.int32(len(alphas))) u = u_g.get().reshape(dshape) ex = ex_g.get().reshape(dshape) ey = ey_g.get().reshape(dshape) ez = ez_g.get().reshape(dshape) return u, ex, ey, ez
def gpu_structure(data): """Function to convolve an imgage with a structure filter on GPU.""" # create numpy arrays data_g = OCLArray.from_array(data.astype(float32)) res_g = OCLArray.empty((data.shape[0],data.shape[1],2),float32) prog = OCLProgram("./OpenCL/gpu_kernels/gpu_structure.cl") # start kernel on gput prog.run_kernel("structure", # the name of the kernel in the cl file data_g.shape[::-1], # global size, the number of threads e.g. (128,128,) None, # local size, just leave it to None data_g.data,res_g.data) return res_g.get()
def _convolve_buf(data_g, h_g , res_g = None): """ buffer variant """ assert_bufs_type(np.float32,data_g,h_g) prog = OCLProgram(abspath("kernels/convolve.cl")) if res_g is None: res_g = OCLArray.empty(data_g.shape,dtype=np.float32) Nhs = [np.int32(n) for n in h_g.shape] kernel_name = "convolve%sd_buf"%(len(data_g.shape)) prog.run_kernel(kernel_name,data_g.shape[::-1],None, data_g.data,h_g.data,res_g.data, *Nhs) return res_g
def _perlin3_single(size, units=(1., ) * 3, repeat=(10., ) * 3, offz=0, Nz0=None): if Nz0 is None: Nz0 = size[-1] dx, dy, dz = units wx, wy, wz = repeat prog = OCLProgram(abspath("perlin.cl")) d = OCLArray.empty(size[::-1], np.float32) prog.run_kernel("perlin3d", d.shape[::-1], None, d.data, np.int32(offz), np.float32(dx), np.float32(dy), np.float32(dz), np.float32(wx), np.float32(wy), np.float32(wz)) return d.get()
def focus_field_debye_at(x, y, z, lam, NA, n0=1., n_integration_steps=200): """ the same as focus_field_debye but for the coordinates given in x, y, z (arrays of same shape) slower than focus_field_debye as it doesnt assume the coordinates to be on a grid """ print absPath("kernels/psf_debye.cl") p = OCLProgram(absPath("kernels/psf_debye.cl"), build_options=str("-I %s -D INT_STEPS=%s" % (absPath("."), n_integration_steps))) if np.isscalar(NA): NA = [0., NA] alphas = np.arcsin(np.array(NA) / n0) assert len(alphas) % 2 == 0 assert x.shape == y.shape == z.shape dshape = x.shape N = np.prod(dshape) x_g = OCLArray.from_array(x.flatten().astype(np.float32)) y_g = OCLArray.from_array(y.flatten().astype(np.float32)) z_g = OCLArray.from_array(z.flatten().astype(np.float32)) u_g = OCLArray.empty(N, np.float32) ex_g = OCLArray.empty(N, np.complex64) ey_g = OCLArray.empty(N, np.complex64) ez_g = OCLArray.empty(N, np.complex64) alpha_g = OCLArray.from_array(alphas.astype(np.float32)) p.run_kernel("debye_wolf_at", (N, ), None, x_g.data, y_g.data, z_g.data, ex_g.data, ey_g.data, ez_g.data, u_g.data, np.float32(1.), np.float32(0.), np.float32(lam / n0), alpha_g.data, np.int32(len(alphas))) u = u_g.get().reshape(dshape) ex = ex_g.get().reshape(dshape) ey = ey_g.get().reshape(dshape) ez = ez_g.get().reshape(dshape) return u, ex, ey, ez
def gpu_mean(data, Nx=10,Ny=10): """Function to convolve an imgage with a mean filter on GPU.""" # create numpy arrays data_g = OCLArray.from_array(data.astype(float32)) res_g = OCLArray.empty(data.shape,float32) prog = OCLProgram("./OpenCL/gpu_kernels/gpu_mean.cl") # start kernel on gput prog.run_kernel("mean", # the name of the kernel in the cl file data_g.shape[::-1], # global size, the number of threads e.g. (128,128,) None, # local size, just leave it to None data_g.data,res_g.data, int32(Nx),int32(Ny)) return res_g.get()
def _integral3_buf(x_g, res_g = None, tmp_g = None): if not x_g.dtype.type in _output_type_dict: raise ValueError("dtype %s currently not supported! (%s)" % (x_g.dtype.type, str(_output_type_dict.keys()))) dtype_out = _output_type_dict[x_g.dtype.type] cl_dtype_in = cl_buffer_datatype_dict[x_g.dtype.type] cl_dtype_out = cl_buffer_datatype_dict[dtype_out] dtype_itemsize = np.dtype(dtype_out).itemsize max_local_size = get_device().get_info("MAX_WORK_GROUP_SIZE") prog = OCLProgram(abspath("kernels/integral_image.cl"), build_options=["-D", "DTYPE=%s" % cl_dtype_out]) if x_g.dtype.type != dtype_out: x_g = x_g.astype(dtype_out) if tmp_g is None: tmp_g = OCLArray.empty(x_g.shape, dtype_out) if res_g is None: res_g = OCLArray.empty(x_g.shape, dtype_out) assert_bufs_type(dtype_out, tmp_g, res_g) nz, ny, nx = x_g.shape def _scan_single(src, dst, ns, strides): nx, ny, nz = ns stride_x, stride_y, stride_z = strides loc = min(next_power_of_2(nx // 2), max_local_size // 2) nx_block = 2 * loc nx_pad = math.ceil(nx / nx_block) * nx_block nblocks = math.ceil(nx_pad // 2 / loc) sum_blocks = OCLArray.empty((nz, ny, nblocks), dst.dtype) shared = cl.LocalMemory(2 * dtype_itemsize * loc) for b in range(nblocks): offset = b * loc prog.run_kernel("scan3d", (loc, ny, nz), (loc, 1, 1), src.data, dst.data, sum_blocks.data, shared, np.int32(nx_block), np.int32(stride_x), np.int32(stride_y), np.int32(stride_z), np.int32(offset), np.int32(b), np.int32(nblocks), np.int32(ny), np.int32(nx)) if nblocks > 1: _scan_single(sum_blocks, sum_blocks, (nblocks, ny, nz), (1, nblocks, nblocks * ny)) prog.run_kernel("add_sums3d", (nx_pad, ny, nz), (nx_block, 1, 1), sum_blocks.data, dst.data, np.int32(stride_x), np.int32(stride_y), np.int32(stride_z), np.int32(nblocks), np.int32(ny), np.int32(nx)) _scan_single(x_g, res_g, (nx, ny, nz), (1, nx, nx * ny)) _scan_single(res_g, tmp_g, (ny, nx, nz), (nx, 1, nx * ny)) _scan_single(tmp_g, res_g, (nz, nx, ny), (ny * nx, 1, nx)) return res_g
def perlin2(size, units=None, repeat=(10.,)*2, scale=None, shift=(0, 0)): """ 2d perlin noise either scale =(10.,10.) or units (5.,5.) have to be given.... scale is the characteristic length in pixels Parameters ---------- size: units repeat scale shift Returns ------- """ if scale: if np.isscalar(scale): scale = (scale,)*2 repeat = scale units = (1.,)*2 wx, wy = repeat dx, dy = units offset_x, offset_y = shift prog = OCLProgram(abspath("kernels/perlin.cl")) d = OCLArray.empty(size[::-1], np.float32) prog.run_kernel("perlin2d", d.shape[::-1], None, d.data, np.float32(dx), np.float32(dy), np.float32(wx), np.float32(wy), np.float32(offset_x), np.float32(offset_y), ) return d.get()
def _ocl_star_dist3D(lbl, rays, grid=(1, 1, 1)): from gputools import OCLProgram, OCLArray, OCLImage grid = _normalize_grid(grid, 3) # if not all(g==1 for g in grid): # raise NotImplementedError("grid not yet implemented for OpenCL version of star_dist3D()...") res_shape = tuple(s // g for s, g in zip(lbl.shape, grid)) lbl_g = OCLImage.from_array(lbl.astype(np.uint16, copy=False)) dist_g = OCLArray.empty(res_shape + (len(rays), ), dtype=np.float32) rays_g = OCLArray.from_array(rays.vertices.astype(np.float32, copy=False)) program = OCLProgram(path_absolute("kernels/stardist3d.cl"), build_options=['-D', 'N_RAYS=%d' % len(rays)]) program.run_kernel('stardist3d', res_shape[::-1], None, lbl_g, rays_g.data, dist_g.data, np.int32(grid[0]), np.int32(grid[1]), np.int32(grid[2])) return dist_g.get()
def _filt(data_g, size=(3, 3, 3), cval = 0, res_g=None): if not data_g.dtype.type in cl_buffer_datatype_dict: raise ValueError("dtype %s not supported" % data_g.dtype.type) DTYPE = cl_buffer_datatype_dict[data_g.dtype.type] with open(abspath("kernels/median_filter.cl"), "r") as f: tpl = Template(f.read()) rendered = tpl.render(DTYPE = DTYPE,FSIZE_X=size[2], FSIZE_Y=size[1], FSIZE_Z=size[0],CVAL = cval) prog = OCLProgram(src_str=rendered) tmp_g = OCLArray.empty_like(data_g) if res_g is None: res_g = OCLArray.empty_like(data_g) prog.run_kernel("median_3", data_g.shape[::-1], None, data_g.data, res_g.data) return res_g
def _convolve_buf(data_g, h_g, res_g=None): """ buffer variant """ assert_bufs_type(np.float32, data_g, h_g) prog = OCLProgram(abspath("kernels/convolve.cl")) if res_g is None: res_g = OCLArray.empty(data_g.shape, dtype=np.float32) Nhs = [np.int32(n) for n in h_g.shape] kernel_name = "convolve%sd_buf" % (len(data_g.shape)) try: prog.run_kernel(kernel_name, data_g.shape[::-1], None, data_g.data, h_g.data, res_g.data, *Nhs) except cl.cffi_cl.LogicError as e: # this catches the logicerror if the kernel is to big for constant memory if e.code == -52: kernel_name = "convolve%sd_buf_global" % (len(data_g.shape)) prog.run_kernel(kernel_name, data_g.shape[::-1], None, data_g.data, h_g.data, res_g.data, *Nhs) else: raise e return res_g
def _filt(data_g, size=(3, 3,3 ), res_g=None): if not data_g.dtype.type in cl_buffer_datatype_dict: raise ValueError("dtype %s not supported"%data_g.dtype.type) DTYPE = cl_buffer_datatype_dict[data_g.dtype.type] with open(abspath("kernels/generic_separable_filter.cl"), "r") as f: tpl = Template(f.read()) rendered = tpl.render(FSIZE_X=size[-1], FSIZE_Y=size[-2], FSIZE_Z=size[-3], FUNC=FUNC, DEFAULT=DEFAULT, DTYPE = DTYPE) prog = OCLProgram(src_str=rendered, build_options = ["-cl-unsafe-math-optimizations"] ) tmp_g = OCLArray.empty_like(data_g) if res_g is None: res_g = OCLArray.empty_like(data_g) prog.run_kernel("filter_3_x", data_g.shape[::-1], None, data_g.data, res_g.data) prog.run_kernel("filter_3_y", data_g.shape[::-1], None, res_g.data, tmp_g.data) prog.run_kernel("filter_3_z", data_g.shape[::-1], None, tmp_g.data, res_g.data) return res_g
def affine(data, mat = np.identity(4), mode ="linear"): """affine transform data with matrix mat """ bop = {"linear":"","nearest":"-D USENEAREST"} if not mode in bop.keys(): raise KeyError("mode = '%s' not defined ,valid: %s"%(mode, bop.keys())) d_im = OCLImage.from_array(data) res_g = OCLArray.empty(data.shape,np.float32) mat_g = OCLArray.from_array(np.linalg.inv(mat).astype(np.float32,copy=False)) prog = OCLProgram(abspath("kernels/transformations.cl") , build_options=[bop[mode]]) prog.run_kernel("affine", data.shape[::-1],None, d_im,res_g.data,mat_g.data) return res_g.get()
def _setup_impl(self): """setting up the gpu buffers and kernels """ self.bpm_program = OCLProgram(absPath("kernels/bpm_3d_kernels.cl")) Nx, Ny, Nz = self.size self._plan = fft_plan((Ny, Nx)) self._H_g = OCLArray.from_array(self._H.astype(np.complex64)) if not self.dn is None and self.n_volumes == 1: self.dn_g = OCLArray.from_array(self.dn) self.scatter_weights_g = OCLArray.from_array( self.scatter_weights.astype(np.float32)) self.gfactor_weights_g = OCLArray.from_array( self.gfactor_weights.astype(np.float32)) self.scatter_cross_sec_g = OCLArray.zeros(Nz, "float32") self.gfactor_g = OCLArray.zeros(Nz, "float32")
def bilateral2(data, fSize, sigma_p, sigma_x=10.): """bilateral filter """ dtype = data.dtype.type dtypes_kernels = {np.float32: "bilat2_float", np.uint16: "bilat2_short"} if not dtype in dtypes_kernels.keys(): logger.info("data type %s not supported yet (%s), casting to float:" % (dtype, dtypes_kernels.keys())) data = data.astype(np.float32) dtype = data.dtype.type img = OCLImage.from_array(data) res = OCLArray.empty_like(data) prog = OCLProgram(abspath("kernels/bilateral2.cl")) prog.run_kernel(dtypes_kernels[dtype], img.shape, None, img, res.data, np.int32(img.shape[0]), np.int32(img.shape[1]), np.int32(fSize), np.float32(sigma_x), np.float32(sigma_p)) return res.get()
def affine(data, mat = np.identity(4), interp = "linear"): """affine transform data with matrix mat """ bop = {"linear":"","nearest":"-D USENEAREST"} if not interp in bop.keys(): raise KeyError("interp = '%s' not defined ,valid: %s"%(interp,bop.keys())) d_im = OCLImage.from_array(data) res_g = OCLArray.empty(data.shape,np.float32) mat_g = OCLArray.from_array(np.linalg.inv(mat).astype(np.float32,copy=False)) prog = OCLProgram(abspath("kernels/transformations.cl") , build_options=[bop[interp]]) prog.run_kernel("affine", data.shape[::-1],None, d_im,res_g.data,mat_g.data) return res_g.get()
def scale(data, scale = (1.,1.,1.), interp = "linear"): """returns a interpolated, scaled version of data scale = (scale_z,scale_y,scale_x) or scale = scale_all interp = "linear" | "nearest" """ bop = {"linear":[],"nearest":["-D","USENEAREST"]} if not interp in bop.keys(): raise KeyError("interp = '%s' not defined ,valid: %s"%(interp,bop.keys())) if not isinstance(scale,(tuple, list, np.ndarray)): scale = (scale,)*3 if len(scale) != 3: raise ValueError("scale = %s misformed"%scale) d_im = OCLImage.from_array(data) nshape = np.array(data.shape)*np.array(scale) nshape = tuple(nshape.astype(np.int)) res_g = OCLArray.empty(nshape,np.float32) prog = OCLProgram(abspath("kernels/scale.cl"), build_options=bop[interp]) prog.run_kernel("scale", res_g.shape[::-1],None, d_im,res_g.data) return res_g.get()
def _fftshift_single(d_g, res_g, ax = 0): """ basic fftshift of an OCLArray shape(d_g) = [N_0,N_1...., N, .... N_{k-1, N_k] = [N1, N, N2] the we can address each element in the flat buffer by index = i + N2*j + N2*N*k where i = 1 .. N2 j = 1 .. N k = 1 .. N1 and the swap of elements is performed on the index j """ dtype_kernel_name = {np.float32:"fftshift_1_f", np.complex64:"fftshift_1_c" } N = d_g.shape[ax] N1 = 1 if ax==0 else np.prod(d_g.shape[:ax]) N2 = 1 if ax == len(d_g.shape)-1 else np.prod(d_g.shape[ax+1:]) dtype = d_g.dtype.type prog = OCLProgram(abspath("kernels/fftshift.cl")) prog.run_kernel(dtype_kernel_name[dtype],(N2,N/2,N1),None, d_g.data, res_g.data, np.int32(N), np.int32(N2)) return res_g
def _convolve3_old(data,h, dev = None): """convolves 3d data with kernel h on the GPU Device dev boundary conditions are clamping to edge. h is converted to float32 if dev == None the default one is used """ if dev is None: dev = get_device() if dev is None: raise ValueError("no OpenCLDevice found...") dtype = data.dtype.type dtypes_options = {np.float32:"", np.uint16:"-D SHORTTYPE"} if not dtype in dtypes_options.keys(): raise TypeError("data type %s not supported yet, please convert to:"%dtype,dtypes_options.keys()) prog = OCLProgram(abspath("kernels/convolve3.cl"), build_options = dtypes_options[dtype]) hbuf = OCLArray.from_array(h.astype(np.float32)) img = OCLImage.from_array(data) res = OCLArray.empty(data.shape,dtype=np.float32) Ns = [np.int32(n) for n in data.shape+h.shape] prog.run_kernel("convolve3d",img.shape,None, img,hbuf.data,res.data, *Ns) return res.get()
def _setup_impl(self): """setting up the gpu buffers and kernels """ self.bpm_program = OCLProgram(absPath("kernels/bpm_3d_kernels.cl")) Nx, Ny, Nz = self.size self._plan = fft_plan((Ny,Nx)) self._H_g = OCLArray.from_array(self._H.astype(np.complex64)) if not self.dn is None and self.n_volumes==1: self.dn_g = OCLArray.from_array(self.dn) self.scatter_weights_g = OCLArray.from_array(self.scatter_weights.astype(np.float32)) self.gfactor_weights_g = OCLArray.from_array(self.gfactor_weights.astype(np.float32)) self.scatter_cross_sec_g = OCLArray.zeros(Nz,"float32") self.gfactor_g = OCLArray.zeros(Nz,"float32")
def _convolve_spatial2(im, hs, mode = "constant", grid_dim = None, pad_factor = 2, plan = None, return_plan = False): """ spatial varying convolution of an 2d image with a 2d grid of psfs shape(im_ = (Ny,Nx) shape(hs) = (Gy,Gx, Hy,Hx) the input image im is subdivided into (Gy,Gx) blocks hs[j,i] is the psf at the center of each block (i,j) as of now each image dimension has to be divisible by the grid dim, i.e. Nx % Gx == 0 Ny % Gy == 0 mode can be: "constant" - assumed values to be zero "wrap" - periodic boundary condition """ if grid_dim: Gs = tuple(grid_dim) else: Gs = hs.shape[:2] mode_str = {"constant":"CLK_ADDRESS_CLAMP", "wrap":"CLK_ADDRESS_REPEAT"} Ny, Nx = im.shape Gy, Gx = Gs # the size of each block within the grid Nblock_y, Nblock_x = Ny/Gy, Nx/Gx # the size of the overlapping patches with safety padding Npatch_x, Npatch_y = _next_power_of_2(pad_factor*Nblock_x), _next_power_of_2(pad_factor*Nblock_y) prog = OCLProgram(abspath("kernels/conv_spatial2.cl"), build_options=["-D","ADDRESSMODE=%s"%mode_str[mode]]) if plan is None: plan = fft_plan((Npatch_y,Npatch_x)) x0s = Nblock_x*np.arange(Gx) y0s = Nblock_y*np.arange(Gy) patches_g = OCLArray.empty((Gy,Gx,Npatch_y,Npatch_x),np.complex64) #prepare psfs if grid_dim: h_g = OCLArray.zeros((Gy,Gx,Npatch_y,Npatch_x),np.complex64) tmp_g = OCLArray.from_array(hs.astype(np.float32, copy = False)) for i,_x0 in enumerate(x0s): for j,_y0 in enumerate(y0s): prog.run_kernel("fill_psf_grid2", (Nblock_x,Nblock_y),None, tmp_g.data, np.int32(Nx), np.int32(i*Nblock_x), np.int32(j*Nblock_y), h_g.data, np.int32(Npatch_x), np.int32(Npatch_y), np.int32(-Nblock_x/2+Npatch_x/2), np.int32(-Nblock_y/2+Npatch_y/2), np.int32(i*Npatch_x*Npatch_y+j*Gx*Npatch_x*Npatch_y) ) else: hs = np.fft.fftshift(pad_to_shape(hs,(Gy,Gx,Npatch_y,Npatch_x)),axes=(2,3)) h_g = OCLArray.from_array(hs.astype(np.complex64)) #prepare image im_g = OCLImage.from_array(im.astype(np.float32,copy=False)) for i,_x0 in enumerate(x0s): for j,_y0 in enumerate(y0s): prog.run_kernel("fill_patch2",(Npatch_x,Npatch_y),None, im_g, np.int32(_x0+Nblock_x/2-Npatch_x/2), np.int32(_y0+Nblock_y/2-Npatch_y/2), patches_g.data, np.int32(i*Npatch_x*Npatch_y+j*Gx*Npatch_x*Npatch_y)) #return np.abs(patches_g.get()) # convolution fft(patches_g,inplace=True, batch = Gx*Gy, plan = plan) fft(h_g,inplace=True, batch = Gx*Gy, plan = plan) prog.run_kernel("mult_inplace",(Npatch_x*Npatch_y*Gx*Gy,),None, patches_g.data, h_g.data) fft(patches_g,inplace=True, inverse = True, batch = Gx*Gy, plan = plan) print Nblock_x, Npatch_x #return np.abs(patches_g.get()) #accumulate res_g = OCLArray.empty(im.shape,np.float32) for j in xrange(Gy+1): for i in xrange(Gx+1): prog.run_kernel("interpolate2",(Nblock_x,Nblock_y),None, patches_g.data,res_g.data, np.int32(i),np.int32(j), np.int32(Gx),np.int32(Gy), np.int32(Npatch_x),np.int32(Npatch_y)) res = res_g.get() if return_plan: return res, plan else: return res
def nlm2(data,sigma, size_filter = 2, size_search = 3): """for noise level of sigma_0, choose sigma = 1.5*sigma_0 """ prog = OCLProgram(abspath("kernels/nlm2.cl"), build_options="-D FS=%i -D BS=%i"%(size_filter,size_search)) img = OCLImage.from_array(data) distImg = OCLImage.empty_like(data) distImg = OCLImage.empty_like(data) tmpImg = OCLImage.empty_like(data) tmpImg2 = OCLImage.empty_like(data) accBuf = OCLArray.zeros(data.shape,np.float32) weightBuf = OCLArray.zeros(data.shape,np.float32) for dx in range(size_search+1): for dy in range(-size_search,size_search+1): prog.run_kernel("dist",img.shape,None, img,tmpImg,np.int32(dx),np.int32(dy)) prog.run_kernel("convolve",img.shape,None, tmpImg,tmpImg2,np.int32(1)) prog.run_kernel("convolve",img.shape,None, tmpImg2,distImg,np.int32(2)) prog.run_kernel("computePlus",img.shape,None, img,distImg,accBuf.data,weightBuf.data, np.int32(img.shape[0]),np.int32(img.shape[1]), np.int32(dx),np.int32(dy),np.float32(sigma)) if any([dx,dy]): prog.run_kernel("computeMinus",img.shape,None, img,distImg,accBuf.data,weightBuf.data, np.int32(img.shape[0]),np.int32(img.shape[1]), np.int32(dx),np.int32(dy),np.float32(sigma)) acc = accBuf.get() weights = weightBuf.get() return acc/weights
class _Bpm3d_OCL(_Bpm3d_Base): """ OpenCL implementation """ def _setup_impl(self): """setting up the gpu buffers and kernels """ self.bpm_program = OCLProgram(absPath("kernels/bpm_3d_kernels.cl")) Nx, Ny, Nz = self.size self._plan = fft_plan((Ny,Nx)) self._H_g = OCLArray.from_array(self._H.astype(np.complex64)) if not self.dn is None and self.n_volumes==1: self.dn_g = OCLArray.from_array(self.dn) self.scatter_weights_g = OCLArray.from_array(self.scatter_weights.astype(np.float32)) self.gfactor_weights_g = OCLArray.from_array(self.gfactor_weights.astype(np.float32)) self.scatter_cross_sec_g = OCLArray.zeros(Nz,"float32") self.gfactor_g = OCLArray.zeros(Nz,"float32") # self.reduce_kernel = OCLReductionKernel( # np.float32, neutral="0", # reduce_expr="a+b", # map_expr="weights[i]*cfloat_abs(field[i]-(i==0)*plain)*cfloat_abs(field[i]-(i==0)*plain)", # arguments="__global cfloat_t *field, __global float * weights,cfloat_t plain") def _propagate_single(self, u0 = None, return_full = True, return_intensity = False, absorbing_width = 0, **kwargs): """ :param u0: initial complex field distribution, if None, plane wave is assumed :param kwargs: :return: """ #plane wave if none if u0 is None: u0 = np.ones(self.size2d[::-1],np.complex64) Nx,Ny,Nz = self.size dx, dy, dz = self.units plane_g = OCLArray.from_array(u0.astype(np.complex64,copy = False)) if return_full: if return_intensity: u_g = OCLArray.empty((Nz,Ny,Nx),dtype=np.float32) self.bpm_program.run_kernel("fill_with_energy",(Nx*Ny,),None, u_g.data,plane_g.data,np.int32(0)) else: u_g = OCLArray.empty((Nz,Ny,Nx),dtype=np.complex64) u_g[0] = plane_g for i in range(Nz-1): fft(plane_g,inplace = True, plan = self._plan) self.bpm_program.run_kernel("mult",(Nx*Ny,),None, plane_g.data,self._H_g.data) fft(plane_g,inplace = True, inverse = True, plan = self._plan) if self.dn is not None: if self._is_complex_dn: kernel_str = "mult_dn_complex" else: kernel_str = "mult_dn" self.bpm_program.run_kernel(kernel_str,(Nx,Ny,),None, plane_g.data,self.dn_g.data, np.float32(self.k0*dz), np.int32(Nx*Ny*(i+1)), np.int32(absorbing_width)) if return_full: if return_intensity: self.bpm_program.run_kernel("fill_with_energy",(Nx*Ny,),None, u_g.data,plane_g.data,np.int32((i+1)*Nx*Ny)) else: u_g[i+1] = plane_g if return_full: u = u_g.get() else: u = plane_g.get() return u def __repr__(self): return "Bpm3d class with size %s and units %s"%(self.size,self.units)
def focus_field_debye(shape,units,lam, NA, n0 = 1., n_integration_steps = 200): """ calculates the focus_field for a perfect, aberration free optical system via the vectorial debye diffraction integral see Matthew R. Foreman, Peter Toeroek, Computational methods in vectorial imaging, Journal of Modern Optics, 2011, 58, 5-6, 339 returns u,ex,ey,ex with u being the intensity and (ex,ey,ez) the complex field components NA can be either a single number or an even length list of NAs (for bessel beams), e.g. NA = [.1,.2,.5,.6] lets light through the annulus .1<.2 and .5<.6 """ print absPath("kernels/psf_debye.cl") #p = OCLProgram(absPath("kernels/psf_debye.cl"),build_options = str("-I %s -D INT_STEPS=%s"%(absPath("."),n_integration_steps))) p = OCLProgram(absPath("kernels/psf_debye.cl"), build_options = ["-I",absPath("kernels"),"-D","INT_STEPS=%s"%n_integration_steps]) if np.isscalar(NA): NA = [0.,NA] Nx0, Ny0, Nz0 = shape dx, dy, dz = units #FIXME: the loop below does not yet work for odd inputs if not Nx0%2+Ny0%2+Nz0%2==0: raise NotImplementedError("odd shapes not supported yet") alphas = np.arcsin(np.array(NA)/n0) assert len(alphas)%2 ==0 # as we assume the psf to be symmetric, we just have to calculate each octant Nx = Nx0/2+1 Ny = Ny0/2+1 Nz = Nz0/2+1 u_g = OCLArray.empty((Nz,Ny,Nx),np.float32) ex_g = OCLArray.empty(u_g.shape,np.complex64) ey_g = OCLArray.empty(u_g.shape,np.complex64) ez_g = OCLArray.empty(u_g.shape,np.complex64) alpha_g = OCLArray.from_array(alphas.astype(np.float32)) t = time.time() p.run_kernel("debye_wolf",u_g.shape[::-1],None, ex_g.data,ey_g.data,ez_g.data, u_g.data, np.float32(1.),np.float32(0.), np.float32(0.),np.float32(dx*(Nx-1.)), np.float32(0.),np.float32(dy*(Ny-1.)), np.float32(0.),np.float32(dz*(Nz-1.)), np.float32(1.*lam/n0), alpha_g.data, np.int32(len(alphas))) u = u_g.get() ex = ex_g.get() ey = ey_g.get() ez = ez_g.get() u_all = np.empty((Nz0,Ny0,Nx0),np.float32) ex_all = np.empty((Nz0,Ny0,Nx0),np.complex64) ey_all = np.empty((Nz0,Ny0,Nx0),np.complex64) ez_all = np.empty((Nz0,Ny0,Nx0),np.complex64) sx = [slice(0,Nx),slice(Nx,Nx0)] sy = [slice(0,Ny),slice(Ny,Ny0)] sz = [slice(0,Nz),slice(Nz,Nz0)] # spreading the calculated octant to the full volume for i,j,k in itertools.product([0,1],[0,1],[0,1]): #u_all[sz[1-i],sy[1-j],sx[1-k]] = u[::(-1)**i,::(-1)**j,::(-1)**k] u_all[sz[1-i],sy[1-j],sx[1-k]] = u[1-i:Nz-1+i,1-j :Ny-1+j,1-k :Nx-1+k][::(-1)**i,::(-1)**j,::(-1)**k] # i, j, k = 0 indicates the + octant u_all[sz[1-i],sy[1-j],sx[1-k]] = u[1-i:Nz-1+i,1-j :Ny-1+j,1-k :Nx-1+k][::(-1)**i,::(-1)**j,::(-1)**k] if i ==0: ex_all[sz[1-i],sy[1-j],sx[1-k]] = ex[1-i:Nz-1+i,1-j :Ny-1+j,1-k :Nx-1+k][::(-1)**i,::(-1)**j,::(-1)**k] ey_all[sz[1-i],sy[1-j],sx[1-k]] = ey[1-i:Nz-1+i,1-j :Ny-1+j,1-k :Nx-1+k][::(-1)**i,::(-1)**j,::(-1)**k] ez_all[sz[1-i],sy[1-j],sx[1-k]] = ez[1-i:Nz-1+i,1-j :Ny-1+j,1-k :Nx-1+k][::(-1)**i,::(-1)**j,::(-1)**k] else: ex_all[sz[1-i],sy[1-j],sx[1-k]] = np.conjugate(ex[1-i:Nz-1+i,1-j :Ny-1+j,1-k :Nx-1+k][::(-1)**i,::(-1)**j,::(-1)**k]) ey_all[sz[1-i],sy[1-j],sx[1-k]] = np.conjugate(ey[1-i:Nz-1+i,1-j :Ny-1+j,1-k :Nx-1+k][::(-1)**i,::(-1)**j,::(-1)**k]) ez_all[sz[1-i],sy[1-j],sx[1-k]] = np.conjugate(ez[1-i:Nz-1+i,1-j :Ny-1+j,1-k :Nx-1+k][::(-1)**i,::(-1)**j,::(-1)**k]) return u_all, ex_all, ey_all, ez_all
def focus_field_debye_gauss(shape,units,lam,NAs, sig = 1./np.sqrt(2), n_integration_steps = 200): """ calculates the detection psf for a perfect, aberration free optical system via the vectorial debye diffraction integral illuminated with a gaussian envelope returns u,ex,ey,ex with u being the intensity and (ex,ey,ez) the complex field components the envelope intensity is exp(-r**2/2/sig**2) where r==1 corresponds to the aperture's edge, e.g. with sig = 1/sqrt(2) the energy drops to 1/e at the rim NAs is an increasing list of NAs NAs = [.1,.2,.5,.6] lets light through the annulus .1<.2 and .5<.6 """ #p = OCLProgram(absPath("kernels/psf_debye.cl"),build_options = str("-I %s -D INT_STEPS=%s"%(absPath("."),n_integration_steps))) p = OCLProgram(absPath("kernels/psf_debye.cl"), build_options = ["-I",absPath("kernels"),"-D","INT_STEPS=%s"%n_integration_steps]) assert (sig>0) Nx0, Ny0, Nz0 = shape dx, dy, dz = units alphas = np.arcsin(np.array(NAs)) Nx = (Nx0+1)/2 Ny = (Ny0+1)/2 Nz = (Nz0+1)/2 u_g = OCLArray.empty((Nz,Ny,Nx),np.float32) ex_g = OCLArray.empty(u_g.shape,np.complex64) ey_g = OCLArray.empty(u_g.shape,np.complex64) ez_g = OCLArray.empty(u_g.shape,np.complex64) alpha_g = OCLArray.from_array(alphas.astype(np.float32)) t = time.time() p.run_kernel("debye_wolf_gauss",u_g.shape[::-1],None, ex_g.data,ey_g.data,ez_g.data, u_g.data, np.float32(1.),np.float32(0.), np.float32(0),np.float32(dx*Nx), np.float32(0),np.float32(dy*Ny), np.float32(0),np.float32(dz*Nz), np.float32(lam), np.float32(sig), alpha_g.data, np.int32(len(alphas))) u = u_g.get() ex = ex_g.get() ey = ey_g.get() ez = ez_g.get() print "time in secs:" , time.time()-t u_all = np.empty((Nz0,Ny0,Nx0),np.float32) ex_all = np.empty((Nz0,Ny0,Nx0),np.complex64) ey_all = np.empty((Nz0,Ny0,Nx0),np.complex64) ez_all = np.empty((Nz0,Ny0,Nx0),np.complex64) sx = [slice(0,Nx),slice(Nx0-Nx0/2,Nx0)] sy = [slice(0,Ny),slice(Ny0-Ny0/2,Ny0)] sz = [slice(0,Nz),slice(Nz0-Nz0/2,Nz0)] sx = [slice(0,Nx),slice(Nx0-Nx,Nx0)] sy = [slice(0,Ny),slice(Ny0-Ny,Ny0)] sz = [slice(0,Nz),slice(Nz0-Nz,Nz0)] for i,j,k in itertools.product([0,1],[0,1],[0,1]): u_all[sz[1-i],sy[1-j],sx[1-k]] = u[::(-1)**i,::(-1)**j,::(-1)**k] ex_all[sz[1-i],sy[1-j],sx[1-k]] = ex[::(-1)**i,::(-1)**j,::(-1)**k] ey_all[sz[1-i],sy[1-j],sx[1-k]] = ey[::(-1)**i,::(-1)**j,::(-1)**k] ez_all[sz[1-i],sy[1-j],sx[1-k]] = ez[::(-1)**i,::(-1)**j,::(-1)**k] return u_all, ex_all, ey_all, ez_all
def focus_field_lattice2(shape=(128, 128, 128), units=(0.1, 0.1, 0.1), lam=.5, NA1=.4, NA2=.5, sigma=.1, kpoints=6, n0=1., n_integration_steps=100): """ kpoints can be - either a (2,N) dimensional array such that kpoints[:,i] are the coordinates of the ith lattice point in back pupil coordinates - a single number, e.g. kpoints = 6, where kpoints are then assumed to lie on regular kpoints-polygon, i.e. kpoints = .5*(NA1+NA2)*np.pi*(.5+2./N*arange(N)) """ alpha1 = np.arcsin(NA1/n0) alpha2 = np.arcsin(NA2/n0) if np.isscalar(kpoints): kxs, kys = np.arcsin(.5*(NA1+NA2)/n0)*_poly_points(kpoints) else: kxs, kys = kpoints p = OCLProgram(absPath("kernels/psf_lattice.cl"), build_options=["-I", absPath("kernels"), "-D", "INT_STEPS=%s"%n_integration_steps]) kxs = np.array(kxs) kys = np.array(kys) Nx, Ny, Nz0 = shape dx, dy, dz = units # the psf is symmetric in z, we just have to calculate one half plane Nz = Nz0//2+1 u_g = OCLArray.empty((Nz, Ny, Nx), np.float32) ex_g = OCLArray.empty((Nz, Ny, Nx), np.complex64) ey_g = OCLArray.empty((Nz, Ny, Nx), np.complex64) ez_g = OCLArray.empty((Nz, Ny, Nx), np.complex64) kxs_g = OCLArray.from_array(kxs.astype(np.float32)) kys_g = OCLArray.from_array(kys.astype(np.float32)) t = time.time() p.run_kernel("debye_wolf_lattice", (Nx, Ny, Nz), None, ex_g.data, ey_g.data, ez_g.data, u_g.data, np.float32(1.), np.float32(0.), np.float32(dx*(-Nx//2)), np.float32(dx*(Nx//2-1)), np.float32(dy*(-Ny//2)), np.float32(dy*(Ny//2-1)), np.float32(0.), np.float32(dz*(Nz-1.)), np.float32(1.*lam/n0), np.float32(alpha1), np.float32(alpha2), kxs_g.data, kys_g.data, np.int32(len(kxs)), np.float32(sigma) ) u = u_g.get() ex = ex_g.get() ey = ey_g.get() ez = ez_g.get() u_all = np.empty((Nz0, Ny, Nx), np.float32) ex_all = np.empty((Nz0, Ny, Nx), np.complex64) ey_all = np.empty((Nz0, Ny, Nx), np.complex64) ez_all = np.empty((Nz0, Ny, Nx), np.complex64) sz = [slice(0, Nz), slice(Nz, Nz0)] # spreading the calculated half plane to the full volume for i in [0, 1]: u_all[sz[1-i]] = u[1-i:Nz-1+i][::(-1)**i] if i==0: ex_all[sz[1-i]] = ex[1-i:Nz-1+i][::(-1)**i] ey_all[sz[1-i]] = ey[1-i:Nz-1+i][::(-1)**i] ez_all[sz[1-i]] = ez[1-i:Nz-1+i][::(-1)**i] else: ex_all[sz[1-i]] = np.conjugate(ex[1-i:Nz-1+i][::(-1)**i]) ey_all[sz[1-i]] = np.conjugate(ey[1-i:Nz-1+i][::(-1)**i]) ez_all[sz[1-i]] = np.conjugate(ez[1-i:Nz-1+i][::(-1)**i]) print("time in secs:", time.time()-t) return u_all, ex_all, ey_all, ez_all
def focus_field_lattice_plane(shape=(256, 256), units=(.1, .1), z=0., lam=.5, NA1=.4, NA2=.5, sigma=.1, kpoints=6, n0=1., apodization_bound=10, ex_g=None, n_integration_steps=100): """calculates the complex 2d input field at position -z of a \ for a bessel lattice beam. Parameters ---------- shape: Nx,Ny the shape of the geometry units: dx,dy the pixel sizes in microns z: float defocus position in microns, such that the beam would focus at z e.g. an input field with z = 10. would have its focal spot after 10 microns lam: float the wavelength of light used in microns NA1: float/list the numerical aperture of the inner ring NA2: float/list the numerical aperture of the outer ring sigma: float the standard deviation of the gaussian smear function applied to each point on the aperture (the bigger sigma, the tighter the sheet in y) kpoints: int/ (2,N) array defines the set of points on the aperture that create the lattice, can be - a (2,N) ndarray, such that kpoints[:,i] are the coordinates of the ith point - a single int, defining points on a regular polygon (e.g. 4 for a square lattice, 6 for a hex lattice) :math:`k_i = \\arcsin\\frac{NA_1+NA_2}{2 n_0} \\begin{pmatrix} \\cos \\phi_i \\\\ \\sin \\phi_i \\end{pmatrix}\quad, \\phi_i = \\frac{\\pi}{2}+\\frac{2i}{N}` n0: float the refractive index of the medium apodization_bound: int width of the region where the input field is tapered to zero (with a hamming window) on the +/- x borders n_integration_steps: int number of integration steps to perform return_all_fields: boolean if True, returns u,ex,ey,ez where ex/ey/ez are the complex vector field components Returns ------- u: ndarray the 2d complex field Example ------- >>> u = focus_field_lattice_plane((128,128), (0.1,0.1), z = 2., lam=.5, NA1 = .44, NA2 = .55, kpoints = 6) See also -------- biobeam.focus_field_lattice: the corresponding 3d function """ p = OCLProgram(absPath("kernels/psf_lattice.cl"), build_options=["-I", absPath("kernels"), "-D", "INT_STEPS=%s"%n_integration_steps]) Nx, Ny = shape dx, dy = units alpha1 = np.arcsin(1.*NA1/n0) alpha2 = np.arcsin(1.*NA2/n0) if np.isscalar(kpoints): kxs, kys = np.arcsin(.5*(NA1+NA2)/n0)*_poly_points(kpoints) else: kxs, kys = 1.*kpoints/n0 if ex_g is None: use_buffer = False ex_g = OCLArray.empty((Ny, Nx), np.complex64) else: use_buffer = True assert ex_g.shape[::-1]==shape kxs_g = OCLArray.from_array(kxs.astype(np.float32)) kys_g = OCLArray.from_array(kys.astype(np.float32)) t = time.time() p.run_kernel("debye_wolf_lattice_plane", (Nx, Ny), None, ex_g.data, np.float32(1.), np.float32(0.), np.float32(-dx*(Nx-1)//2.), np.float32(dx*(Nx-1)//2.), np.float32(-dy*(Ny-1)//2.), np.float32(dy*(Ny-1)//2.), np.float32(-z), np.float32(1.*lam/n0), np.float32(alpha1), np.float32(alpha2), kxs_g.data, kys_g.data, np.int32(len(kxs)), np.float32(sigma), np.int32(apodization_bound), ) if not use_buffer: res = ex_g.get() print("time in secs:", time.time()-t) return res
def focus_field_cylindrical(shape=(128, 128, 128), units=(0.1, 0.1, 0.1), lam=.5, NA=.3, n0=1., return_all_fields=False, n_integration_steps=100): """calculates the focus field for a perfect, aberration free cylindrical lens after x polarized illumination via the vectorial debye diffraction integral (see [2]_). The pupil function is given by the numerical aperture NA Parameters ---------- shape: Nx,Ny,Nz the shape of the geometry units: dx,dy,dz the pixel sizes in microns lam: float the wavelength of light used in microns NA: float the numerical aperture of the lens n0: float the refractive index of the medium return_all_fields: boolean if True, returns u,ex,ey,ez where ex/ey/ez are the complex field components n_integration_steps: int number of integration steps to perform return_all_fields: boolean if True returns also the complex vectorial field components Returns ------- u: ndarray the intensity of the focus field (u,ex,ey,ez): list(ndarray) the intensity of the focus field and the complex field components (if return_all_fields is True) Example ------- >>> u, ex, ey, ez = focus_field_cylindrical((128,128,128), (0.1,0.1,.1), lam=.5, NA = .4, return_all_field=True) References ---------- .. [2] Colin J. R. Sheppard: Cylindrical lenses—focusing and imaging: a review, Appl. Opt. 52, 538-545 (2013) """ p = OCLProgram(absPath("kernels/psf_cylindrical.cl"), build_options=["-I", absPath("kernels"), "-D", "INT_STEPS=%s"%n_integration_steps]) Nx, Ny, Nz = shape dx, dy, dz = units alpha = np.arcsin(NA/n0) u_g = OCLArray.empty((Nz, Ny), np.float32) ex_g = OCLArray.empty((Nz, Ny), np.complex64) ey_g = OCLArray.empty((Nz, Ny), np.complex64) ez_g = OCLArray.empty((Nz, Ny), np.complex64) t = time.time() p.run_kernel("psf_cylindrical", u_g.shape[::-1], None, ex_g.data, ey_g.data, ez_g.data, u_g.data, np.float32(-dy*(Ny//2)), np.float32((Ny-1-Ny//2)*dy), np.float32(-dz*(Nz//2)), np.float32((Nz-1-Nz//2)*dz), np.float32(lam/n0), np.float32(alpha)) u = np.array(np.repeat(u_g.get()[..., np.newaxis], Nx, axis=-1)) ex = np.array(np.repeat(ex_g.get()[..., np.newaxis], Nx, axis=-1)) ey = np.array(np.repeat(ey_g.get()[..., np.newaxis], Nx, axis=-1)) ez = np.array(np.repeat(ez_g.get()[..., np.newaxis], Nx, axis=-1)) print("time in secs:", time.time()-t) if return_all_fields: return u, ex, ey, ez else: return u
def focus_field_lattice(shape=(128, 128, 128), units=(0.1, 0.1, 0.1), lam=.5, NA1=.4, NA2=.5, sigma=.1, kpoints=6, return_all_fields=False, n0=1., n_integration_steps=100): """Calculates the focus field for a bessel lattice. The pupil function consists out of discrete points (kpoints) superimposed on an annulus (NA1<NA2) which are smeared out by a 1d gaussian of given sigma creating an array of bessel beams in the focal plane (see [3]_ ). Parameters ---------- shape: Nx,Ny,Nz the shape of the geometry units: dx,dy,dz the pixel sizes in microns lam: float the wavelength of light used in microns NA1: float/list the numerical aperture of the inner ring NA2: float/list the numerical aperture of the outer ring sigma: float the standard deviation of the gaussian smear function applied to each point on the aperture (the bigger sigma, the tighter the sheet in y) kpoints: int/ (2,N) array defines the set of points on the aperture that create the lattice, can be - a (2,N) ndarray, such that kpoints[:,i] are the coordinates of the ith point - a single int, defining points on a regular polygon (e.g. 4 for a square lattice, 6 for a hex lattice) :math:`k_i = \\arcsin\\frac{NA_1+NA_2}{2 n_0} \\begin{pmatrix} \\cos \\phi_i \\\\ \\sin \\phi_i \\end{pmatrix}\quad, \\phi_i = \\frac{\\pi}{2}+\\frac{2i}{N}` n0: float the refractive index of the medium n_integration_steps: int number of integration steps to perform return_all_fields: boolean if True, returns u,ex,ey,ez where ex/ey/ez are the complex vector field components Returns ------- u: ndarray the intensity of the focus field (u,ex,ey,ez): list(ndarray) the intensity of the focus field and the complex field components (if return_all_fields is True) Example ------- >>> u = focus_field_lattice((128,128,128), (0.1,0.1,.1), lam=.5, NA1 = .44, NA2 = .55, kpoints = 6) References ---------- .. [3] Chen et al. Lattice light-sheet microscopy: imaging molecules to embryos at high spatiotemporal resolution. Science 346, (2014). """ alpha1 = np.arcsin(1.*NA1/n0) alpha2 = np.arcsin(1.*NA2/n0) if np.isscalar(kpoints): kxs, kys = np.arcsin(.5*(NA1+NA2)/n0)*_poly_points(kpoints) else: kxs, kys = 1.*kpoints/n0 p = OCLProgram(absPath("kernels/psf_lattice.cl"), build_options=["-I", absPath("kernels"), "-D", "INT_STEPS=%s"%n_integration_steps]) kxs = np.array(kxs) kys = np.array(kys) Nx, Ny, Nz = shape dx, dy, dz = units u_g = OCLArray.empty((Nz, Ny, Nx), np.float32) ex_g = OCLArray.empty((Nz, Ny, Nx), np.complex64) ey_g = OCLArray.empty((Nz, Ny, Nx), np.complex64) ez_g = OCLArray.empty((Nz, Ny, Nx), np.complex64) kxs_g = OCLArray.from_array(kxs.astype(np.float32)) kys_g = OCLArray.from_array(kys.astype(np.float32)) t = time.time() p.run_kernel("debye_wolf_lattice", (Nx, Ny, Nz), None, ex_g.data, ey_g.data, ez_g.data, u_g.data, np.float32(1.), np.float32(0.), # np.float32(-dx*(Nx-1)//2.),np.float32(dx*(Nx-1)//2.), # np.float32(-dy*(Ny-1)//2.),np.float32(dy*(Ny-1)//2.), # np.float32(-dz*(Nz-1)//2.),np.float32(dz*(Nz-1)//2.), np.float32(dx*(-Nx//2)), np.float32(dx*(Nx//2-1)), np.float32(dy*(-Ny//2)), np.float32(dy*(Ny//2-1)), np.float32(dz*(-Nz//2)), np.float32(dz*(Nz//2-1)), np.float32(1.*lam/n0), np.float32(alpha1), np.float32(alpha2), kxs_g.data, kys_g.data, np.int32(len(kxs)), np.float32(sigma) ) u = u_g.get() if return_all_fields: ex = ex_g.get() ey = ey_g.get() ez = ez_g.get() return u, ex, ey, ez else: return u
def _convolve_spatial3(im, hs, mode = "constant", grid_dim = None, plan = None, return_plan = False, pad_factor = 2): if im.ndim !=3: raise ValueError("wrong dimensions of input!") if not (hs.ndim==6 or (hs.ndim==3 and grid_dim)): raise ValueError("wrong dimensions of psf grid!") if grid_dim: if hs.shape != im.shape: raise ValueError("if grid_dim is set, then im.shape = hs.shape !") Gs = tuple(grid_dim) else: if not hs.ndim==6: raise ValueError("wrong dimensions of psf grid! (Gy,Gx,Ny,Nx)") Gs = hs.shape[:3] if not np.all([n%g==0 for n,g in zip(im.shape,Gs)]): raise NotImplementedError("shape of image has to be divisible by Gx Gy = %s shape mismatch"%(str(hs.shape[:2]))) mode_str = {"constant":"CLK_ADDRESS_CLAMP", "wrap":"CLK_ADDRESS_REPEAT"} Ns = im.shape # the size of each block within the grid Nblocks = [n/g for n,g in zip(Ns,Gs)] # the size of the overlapping patches with safety padding Npatchs = tuple([_next_power_of_2(pad_factor*nb) for nb in Nblocks]) prog = OCLProgram(abspath("kernels/conv_spatial3.cl"), build_options=["-D","ADDRESSMODE=%s"%mode_str[mode]]) if plan is None: plan = fft_plan(Npatchs) Xs = [nb*np.arange(g) for nb, g in zip(Nblocks,Gs)] patches_g = OCLArray.empty(Gs+Npatchs,np.complex64) #prepare psfs if grid_dim: h_g = OCLArray.zeros(Gs+Npatchs,np.complex64) tmp_g = OCLArray.from_array(hs.astype(np.float32, copy = False)) for (k,_z0), (j,_y0),(i,_x0) in product(*[enumerate(X) for X in Xs]): prog.run_kernel("fill_psf_grid3", Nblocks[::-1],None, tmp_g.data, np.int32(im.shape[2]), np.int32(im.shape[1]), np.int32(i*Nblocks[2]), np.int32(j*Nblocks[1]), np.int32(k*Nblocks[0]), h_g.data, np.int32(Npatchs[2]), np.int32(Npatchs[1]), np.int32(Npatchs[0]), np.int32(-Nblocks[2]/2+Npatchs[2]/2), np.int32(-Nblocks[1]/2+Npatchs[1]/2), np.int32(-Nblocks[0]/2+Npatchs[0]/2), np.int32(i*np.prod(Npatchs)+ j*Gs[2]*np.prod(Npatchs)+ k*Gs[2]*Gs[1]*np.prod(Npatchs))) else: hs = np.fft.fftshift(pad_to_shape(hs,Gs+Npatchs),axes=(3,4,5)) h_g = OCLArray.from_array(hs.astype(np.complex64)) im_g = OCLImage.from_array(im.astype(np.float32,copy=False)) # this loops over all i,j,k for (k,_z0), (j,_y0),(i,_x0) in product(*[enumerate(X) for X in Xs]): prog.run_kernel("fill_patch3",Npatchs[::-1],None, im_g, np.int32(_x0+Nblocks[2]/2-Npatchs[2]/2), np.int32(_y0+Nblocks[1]/2-Npatchs[1]/2), np.int32(_z0+Nblocks[0]/2-Npatchs[0]/2), patches_g.data, np.int32(i*np.prod(Npatchs)+ j*Gs[2]*np.prod(Npatchs)+ k*Gs[2]*Gs[1]*np.prod(Npatchs))) # convolution fft(patches_g,inplace=True, batch = np.prod(Gs), plan = plan) fft(h_g,inplace=True, batch = np.prod(Gs), plan = plan) prog.run_kernel("mult_inplace",(np.prod(Npatchs)*np.prod(Gs),),None, patches_g.data, h_g.data) fft(patches_g, inplace=True, inverse = True, batch = np.prod(Gs), plan = plan) #return patches_g.get() #accumulate res_g = OCLArray.zeros(im.shape,np.float32) for k, j, i in product(*[range(g+1) for g in Gs]): prog.run_kernel("interpolate3",Nblocks[::-1],None, patches_g.data, res_g.data, np.int32(i),np.int32(j),np.int32(k), np.int32(Gs[2]),np.int32(Gs[1]),np.int32(Gs[0]), np.int32(Npatchs[2]),np.int32(Npatchs[1]),np.int32(Npatchs[0])) res = res_g.get() if return_plan: return res, plan else: return res
def focus_field_debye_plane(shape = (128,128), units = (.1,.1), z = 0., lam = .5, NA = .6, n0 = 1., ex_g = None, n_integration_steps = 200): """ calculates the x component of the electric field at a given z position z for a perfect, aberration free optical system via the vectorial debye diffraction integral see Matthew R. Foreman, Peter Toeroek, Computational methods in vectorial imaging, Journal of Modern Optics, 2011, 58, 5-6, 339 NA can be either a single number or an even length list of NAs (for bessel beams), e.g. NA = [.1,.2,.5,.6] lets light through the annulus .1<.2 and .5<.6 if ex_g is a valid OCLArray it fills it and returns None otherwise returns ex as a numpy array """ #p = OCLProgram(absPath("kernels/psf_debye.cl"),build_options = str("-I %s -D INT_STEPS=%s"%(absPath("."),n_integration_steps))) p = OCLProgram(absPath("kernels/psf_debye.cl"), build_options = ["-I",absPath("kernels"),"-D","INT_STEPS=%s"%n_integration_steps]) if np.isscalar(NA): NA = [0.,NA] Nx, Ny = shape dx, dy = units alphas = np.arcsin(np.array(NA)/n0) assert len(alphas)%2 ==0 if ex_g is None: use_buffer = False ex_g = OCLArray.empty((Ny,Nx),np.complex64) else: use_buffer = True assert ex_g.shape[::-1] == shape alpha_g = OCLArray.from_array(alphas.astype(np.float32)) t = time.time() p.run_kernel("debye_wolf_plane",(Nx,Ny),None, ex_g.data, np.float32(1.),np.float32(0.), np.float32(-(Nx/2)*dx),np.float32((Nx-Nx/2)*dx), np.float32(-(Ny/2)*dy),np.float32((Ny-Ny/2)*dy), np.float32(z), np.float32(lam/n0), alpha_g.data, np.int32(len(alphas))) print "time in secs:" , time.time()-t if not use_buffer: return ex_g.get()