def test_sphere(N = 128, **kwargs): Nx, Nz = N,N dx, dz = .05, 0.05 lam = .5 units = (dx,dx,dz) x = Nx/2*dx*np.linspace(-1,1,Nx) y = Nx/2*dx*np.linspace(-1,1,Nx) x = dx*np.arange(-Nx/2,Nx/2) y = dx*np.arange(-Nx/2,Nx/2) z = dz*np.arange(0,Nz) Z,Y,X = np.meshgrid(z,y,x,indexing="ij") R = np.sqrt(X**2+Y**2+(Z-3.)**2) dn = .05*(R<1.) c = StopWatch() c.tic("bpm") u = bpm_3d((Nx,Nx,Nz),units= units, lam = lam, dn = dn, **kwargs ) c.toc("bpm") print c return u
pycuda::complex<float> tmp(cos(kdz*dnval),sin(kdz*dnval)); data[i] *= tmp; } __global__ void mult_complex(pycuda::complex<float> *data, pycuda::complex<float> *b) { int i = blockIdx.x * blockDim.x + threadIdx.x; data[i] *= b[i]; } """) func_mult_real = mod.get_function("mult_real") func_mult_comp = mod.get_function("mult_complex") clock.toc("setup") for z in xrange(Nz): fftobj(plane_g,plane_g) func_mult_comp(plane_g,H_g, grid = (Nx*Ny/256,1,1), block=(256,1,1)) fftobj(plane_g,plane_g, inverse = True) func_mult_real(plane_g, dn_g, np.float32(k0*dz), np.int32(z*Nx*Nx), grid = (Nx*Ny/256,1,1), block=(256,1,1)) u_g[z] = plane_g
pycuda::complex<float> tmp(cos(kdz*dnval),sin(kdz*dnval)); data[i] *= tmp; } __global__ void mult_complex(pycuda::complex<float> *data, pycuda::complex<float> *b) { int i = blockIdx.x * blockDim.x + threadIdx.x; data[i] *= b[i]; } """) func_mult_real = mod.get_function("mult_real") func_mult_comp = mod.get_function("mult_complex") clock.toc("setup") for z in xrange(Nz): fftobj(plane_g, plane_g) func_mult_comp(plane_g, H_g, grid=(Nx * Ny / 256, 1, 1), block=(256, 1, 1)) fftobj(plane_g, plane_g, inverse=True) func_mult_real(plane_g, dn_g, np.float32(k0 * dz), np.int32(z * Nx * Nx), grid=(Nx * Ny / 256, 1, 1), block=(256, 1, 1))
def bpm_3d_inverse(u, units, lam=.5, use_fresnel_approx=False): """ size - the dimension of the image to be calulcated in pixels (Nx,Ny,Nz) units - the unit lengths of each dimensions in microns lam - the wavelength u - the complex field distribution returns dn - the refractive index of the medium (can be complex) """ clock = StopWatch() clock.tic("setup") Nz, Ny, Nx = u.shape dx, dy, dz = units #setting up the propagator k0 = 2. * np.pi / lam kxs = np.arange(-Nx / 2., Nx / 2.) / Nx kys = np.arange(-Ny / 2., Ny / 2.) / Ny KY, KX = np.meshgrid(kxs, kys, indexing="ij") H0 = np.sqrt(0.j + (1. / lam)**2 - KX**2 / dx**2 - KY**2 / dy**2) if use_fresnel_approx: H0 = 1. / lam * (0.j + 1. - .5 * lam**2 * (KX**2 / dx**2 + KY**2 / dy**2)) outsideInds = np.isnan(H0) H = np.exp(2.j * np.pi * dz * H0) H[outsideInds] = 0. H0[outsideInds] = 0. H = np.fft.fftshift(H).astype(np.complex64) """ setting up the gpu buffers and kernels """ program = OCLProgram(absPath("kernels/bpm_3d_kernels.cl")) # program = OCLProgram(src_str = kernel_str) kernel_divide = OCLElementwiseKernel( "cfloat_t *a_g, cfloat_t *b_g,float kz, cfloat_t *res_g", "res_g[i] = (cfloat_t)(i,0.)", "divide") plan = ocl_fft_plan((Ny, Nx)) plane_g = OCLArray.empty((Ny, Nx), np.complex64) plane0_g = OCLArray.empty((Ny, Nx), np.complex64) h_g = OCLArray.from_array(H.astype(np.complex64)) u_g = OCLArray.from_array(u.astype(np.complex64)) dn_g = OCLArray.empty((Nz, Ny, Nx), dtype=np.complex64) clock.toc("setup") clock.tic("run") for i in range(Nz - 1): program.run_kernel("copy_complex", (Nx * Ny, ), None, u_g.data, plane_g.data, np.int32(i * Nx * Ny)) #calculate the propagated plane ocl_fft(plane_g, inplace=True, plan=plan) program.run_kernel("mult", (Nx * Ny, ), None, plane_g.data, h_g.data) ocl_fft(plane_g, inplace=True, inverse=True, plan=plan) dn_g[i + 1, ...] = plane_g # program.run_kernel("copy_complex",(Nx*Ny,),None, # u_g.data,plane0_g.data,np.int32((i+1)*Nx*Ny)) # program.run_kernel("divide_dn_complex",(Nx*Ny,),None, # plane0_g.data,plane_g.data,dn_g.data, # np.float32(k0*dz), # np.int32((i+1)*Nx*Ny)) clock.toc("run") print clock return dn_g.get()
def bpm_3d_inverse(u,units, lam = .5, use_fresnel_approx = False): """ size - the dimension of the image to be calulcated in pixels (Nx,Ny,Nz) units - the unit lengths of each dimensions in microns lam - the wavelength u - the complex field distribution returns dn - the refractive index of the medium (can be complex) """ clock = StopWatch() clock.tic("setup") Nz, Ny, Nx = u.shape dx, dy, dz = units #setting up the propagator k0 = 2.*np.pi/lam kxs = np.arange(-Nx/2.,Nx/2.)/Nx kys = np.arange(-Ny/2.,Ny/2.)/Ny KY, KX = np.meshgrid(kxs,kys, indexing= "ij") H0 = np.sqrt(0.j+(1./lam)**2-KX**2/dx**2-KY**2/dy**2) if use_fresnel_approx: H0 = 1./lam*(0.j+1.-.5*lam**2*(KX**2/dx**2+KY**2/dy**2)) outsideInds = np.isnan(H0) H = np.exp(2.j*np.pi*dz*H0) H[outsideInds] = 0. H0[outsideInds] = 0. H = np.fft.fftshift(H).astype(np.complex64) """ setting up the gpu buffers and kernels """ program = OCLProgram(absPath("kernels/bpm_3d_kernels.cl")) # program = OCLProgram(src_str = kernel_str) kernel_divide = OCLElementwiseKernel( "cfloat_t *a_g, cfloat_t *b_g,float kz, cfloat_t *res_g", "res_g[i] = (cfloat_t)(i,0.)", "divide") plan = ocl_fft_plan((Ny,Nx)) plane_g = OCLArray.empty((Ny,Nx),np.complex64) plane0_g = OCLArray.empty((Ny,Nx),np.complex64) h_g = OCLArray.from_array(H.astype(np.complex64)) u_g = OCLArray.from_array(u.astype(np.complex64)) dn_g = OCLArray.empty((Nz,Ny,Nx),dtype=np.complex64) clock.toc("setup") clock.tic("run") for i in range(Nz-1): program.run_kernel("copy_complex",(Nx*Ny,),None, u_g.data,plane_g.data,np.int32(i*Nx*Ny)) #calculate the propagated plane ocl_fft(plane_g,inplace = True, plan = plan) program.run_kernel("mult",(Nx*Ny,),None, plane_g.data,h_g.data) ocl_fft(plane_g,inplace = True, inverse = True, plan = plan) dn_g[i+1,...] = plane_g # program.run_kernel("copy_complex",(Nx*Ny,),None, # u_g.data,plane0_g.data,np.int32((i+1)*Nx*Ny)) # program.run_kernel("divide_dn_complex",(Nx*Ny,),None, # plane0_g.data,plane_g.data,dn_g.data, # np.float32(k0*dz), # np.int32((i+1)*Nx*Ny)) clock.toc("run") print clock return dn_g.get()
def bpm_3d_free(size, units, dz, lam = .5, u0 = None, n0 = 1., use_fresnel_approx = False): """propagates the field u0 to distance dz """ clock = StopWatch() clock.tic("setup") Nx, Ny = size dx, dy = units #setting up the propagator k0 = 2.*np.pi/lam*n0 kxs = np.arange(-Nx/2.,Nx/2.)/Nx kys = np.arange(-Ny/2.,Ny/2.)/Ny KY, KX = np.meshgrid(kxs,kys, indexing= "ij") H0 = np.sqrt(0.j+(1./lam)**2-KX**2/dx**2-KY**2/dy**2) if use_fresnel_approx: H0 = 1./lam*(0.j+1.-.5*lam**2*(KX**2/dx**2+KY**2/dy**2)) outsideInds = np.isnan(H0) H = np.exp(2.j*np.pi*dz*H0) H[outsideInds] = 0. H0[outsideInds] = 0. H = np.fft.fftshift(H).astype(np.complex64) if u0 is None: u0 = np.ones((Ny,Nx),np.complex64) """ setting up the gpu buffers and kernels """ program = OCLProgram(absPath("kernels/bpm_3d_kernels.cl")) # program = OCLProgram(src_str = kernel_str) plan = ocl_fft_plan((Ny,Nx)) plane_g = OCLArray.from_array(u0.astype(np.complex64)) h_g = OCLArray.from_array(H.astype(np.complex64)) clock.toc("setup") clock.tic("run") fft(plane_g,inplace = True, plan = plan) program.run_kernel("mult",(Nx*Ny,),None, plane_g.data,h_g.data) fft(plane_g,inplace = True, inverse = True, plan = plan) clock.toc("run") return plane_g.get()
def _bpm_3d_image(size, units, lam = .5, u0 = None, dn = None, subsample = 1, n0 = 1., return_scattering = False, return_g = False, return_full_last = False, use_fresnel_approx = False, ): """ simulates the propagation of monochromativ wave of wavelength lam with initial conditions u0 along z in a media filled with dn size - the dimension of the image to be calulcated in pixels (Nx,Ny,Nz) units - the unit lengths of each dimensions in microns lam - the wavelength u0 - the initial field distribution, if u0 = None an incident plane wave is assumed dn - the refractive index of the medium (can be complex) """ clock = StopWatch() clock.tic("setup") Nx, Ny, Nz = size dx, dy, dz = units # subsampling Nx2, Ny2, Nz2 = (subsample*N for N in size) dx2, dy2, dz2 = (1.*d/subsample for d in units) #setting up the propagator k0 = 2.*np.pi/lam kxs = 2.*np.pi*np.fft.fftfreq(Nx2,dx2) kys = 2.*np.pi*np.fft.fftfreq(Ny2,dy2) KY, KX = np.meshgrid(kys,kxs, indexing= "ij") #H0 = np.sqrt(0.j+n0**2*k0**2-KX**2-KY**2) H0 = np.sqrt(n0**2*k0**2-KX**2-KY**2) if use_fresnel_approx: H0 = 0.j+n0**2*k0-.5*(KX**2+KY**2) outsideInds = np.isnan(H0) H = np.exp(-1.j*dz2*H0) H[outsideInds] = 0. H0[outsideInds] = 0. if u0 is None: u0 = np.ones((Ny2,Nx2),np.complex64) else: if subsample >1: u0 = zoom(np.real(u0),subsample) + 1.j*zoom(np.imag(u0),subsample) # setting up the gpu buffers and kernels program = OCLProgram(absPath("kernels/bpm_3d_kernels.cl")) plan = fft_plan((Ny2,Nx2)) plane_g = OCLArray.from_array(u0.astype(np.complex64)) h_g = OCLArray.from_array(H.astype(np.complex64)) if dn is not None: if isinstance(dn,OCLImage): dn_g = dn else: if dn.dtype.type in (np.complex64,np.complex128): dn_complex = np.zeros(dn.shape+(2,),np.float32) dn_complex[...,0] = np.real(dn) dn_complex[...,1] = np.imag(dn) dn_g = OCLImage.from_array(dn_complex) else: dn_g = OCLImage.from_array(dn.astype(np.float32)) isComplexDn = dn.dtype.type in (np.complex64,np.complex128) else: #dummy dn dn_g = OCLArray.empty((1,)*3,np.float16) if return_scattering: cos_theta = np.real(H0)/n0/k0 # = cos(theta) scatter_weights = cos_theta scatter_weights_g = OCLArray.from_array(scatter_weights.astype(np.float32)) # = cos(theta)^2 gfactor_weights = cos_theta**2 gfactor_weights_g = OCLArray.from_array(gfactor_weights.astype(np.float32)) #return None,None,scatter_weights, gfactor_weights scatter_cross_sec_g = OCLArray.zeros(Nz,"float32") gfactor_g = OCLArray.zeros(Nz,"float32") plain_wave_dct = Nx2*Ny2*np.exp(-1.j*k0*n0*np.arange(Nz)*dz).astype(np.complex64) reduce_kernel = OCLReductionKernel( np.float32, neutral="0", reduce_expr="a+b", map_expr="weights[i]*cfloat_abs(field[i]-(i==0)*plain)*cfloat_abs(field[i]-(i==0)*plain)", arguments="__global cfloat_t *field, __global float * weights,cfloat_t plain") # reduce_kernel = OCLReductionKernel( # np.float32, neutral="0", # reduce_expr="a+b", # map_expr = "weights[i]*(i!=0)*cfloat_abs(field[i])*cfloat_abs(field[i])", # arguments = "__global cfloat_t *field, __global float * weights,cfloat_t plain") u_g = OCLArray.empty((Nz,Ny,Nx),dtype=np.complex64) program.run_kernel("copy_subsampled_buffer",(Nx,Ny),None, u_g.data,plane_g.data, np.int32(subsample), np.int32(0)) clock.toc("setup") clock.tic("run") for i in range(Nz-1): for substep in range(subsample): fft(plane_g,inplace = True, plan = plan) program.run_kernel("mult",(Nx2*Ny2,),None, plane_g.data,h_g.data) if return_scattering and substep == (subsample-1): scatter_cross_sec_g[i+1] = reduce_kernel(plane_g, scatter_weights_g, plain_wave_dct[i+1]) gfactor_g[i+1] = reduce_kernel(plane_g, gfactor_weights_g, plain_wave_dct[i+1]) fft(plane_g,inplace = True, inverse = True, plan = plan) if dn is not None: if isComplexDn: program.run_kernel("mult_dn_complex_image",(Nx2,Ny2),None, plane_g.data,dn_g, np.float32(k0*dz2), np.float32(n0), np.int32(subsample*(i+1.)+substep), np.int32(subsample)) else: program.run_kernel("mult_dn_image",(Nx2,Ny2),None, plane_g.data,dn_g, np.float32(k0*dz2), np.float32(n0), np.int32(subsample*(i+1.)+substep), np.int32(subsample)) program.run_kernel("copy_subsampled_buffer",(Nx,Ny),None, u_g.data,plane_g.data, np.int32(subsample), np.int32((i+1)*Nx*Ny)) clock.toc("run") print clock result = (u_g.get(), dn_g.get(),) if return_scattering: # normalizing prefactor dkx = dx2/Nx2 # prefac = 1./Nx2/Ny2*dx2*dy2/4./np.pi/n0 prefac = 1./Nx2/Ny2*dx2*dy2 p = prefac*scatter_cross_sec_g.get() result += (p,) if return_g: prefac = 1./Nx2/Ny2*dx2*dy2 g = prefac*gfactor_g.get()/p result += (g,) if return_full_last: result += (plane_g.get(),) return result
def _bpm_3d2(size, units, lam = .5, u0 = None, dn = None, subsample = 1, n0 = 1., return_scattering = False, return_g = False, return_full = True, return_field = True, use_fresnel_approx = False, absorbing_width = 0, scattering_plane_ind = 0, return_last_plane = False, store_dn_as_half = False): """ simulates the propagation of monochromatic wave of wavelength lam with initial conditions u0 along z in a media filled with dn size - the dimension of the image to be calulcated in pixels (Nx,Ny,Nz) units - the unit lengths of each dimensions in microns lam - the wavelength u0 - the initial field distribution, if u0 = None an incident plane wave is assumed dn - the refractive index of the medium (can be complex) """ if subsample != 1: raise NotImplementedError("subsample still has to be 1") clock = StopWatch() clock.tic("setup") Nx, Ny, Nz = size dx, dy, dz = units #setting up the propagator k0 = 2.*np.pi/lam kxs = 2.*np.pi*np.fft.fftfreq(Nx,dx) kys = 2.*np.pi*np.fft.fftfreq(Ny,dy) KY, KX = np.meshgrid(kys,kxs, indexing= "ij") #H0 = np.sqrt(0.j+n0**2*k0**2-KX**2-KY**2) H0 = np.sqrt(n0**2*k0**2-KX**2-KY**2) if use_fresnel_approx: H0 = 0.j+n0*k0-.5*(KX**2+KY**2)/n0/k0 outsideInds = np.isnan(H0) H = np.exp(-1.j*dz*H0) H[outsideInds] = 0. H0[outsideInds] = 0. if u0 is None: u0 = np.ones((Ny,Nx),np.complex64) # setting up the gpu buffers and kernels program = OCLProgram(absPath("kernels/bpm_3d_kernels.cl")) plan = fft_plan((Ny,Nx)) plane_g = OCLArray.from_array(u0.astype(np.complex64, copy = False)) h_g = OCLArray.from_array(H.astype(np.complex64)) if dn is not None: if isinstance(dn,OCLArray): dn_g = dn else: if dn.dtype.type in (np.complex64,np.complex128): isComplexDn = True dn_g = OCLArray.from_array(dn.astype(np.complex64,copy= False)) else: isComplexDn = False if store_dn_as_half: dn_g = OCLArray.from_array(dn.astype(np.float16,copy= False)) else: dn_g = OCLArray.from_array(dn.astype(np.float32,copy= False)) else: #dummy dn dn_g = OCLArray.empty((1,)*3,np.float32) if return_scattering: cos_theta = np.real(H0)/n0/k0 # _H = np.sqrt(n0**2*k0**2-KX**2-KY**2) # _H[np.isnan(_H)] = 0. # # cos_theta = _H/n0/k0 # # = cos(theta) scatter_weights = cos_theta #scatter_weights = np.sqrt(KX**2+KY**2)/k0/np.real(H0) #scatter_weights[outsideInds] = 0. scatter_weights_g = OCLArray.from_array(scatter_weights.astype(np.float32)) # = cos(theta)^2 gfactor_weights = cos_theta**2 gfactor_weights_g = OCLArray.from_array(gfactor_weights.astype(np.float32)) #return None,None,scatter_weights, gfactor_weights scatter_cross_sec_g = OCLArray.zeros(Nz,"float32") gfactor_g = OCLArray.zeros(Nz,"float32") plain_wave_dct = Nx*Ny*np.exp(-1.j*k0*n0*(scattering_plane_ind+np.arange(Nz))*dz).astype(np.complex64) reduce_kernel = OCLReductionKernel( np.float32, neutral="0", reduce_expr="a+b", map_expr="weights[i]*cfloat_abs(field[i]-(i==0)*plain)*cfloat_abs(field[i]-(i==0)*plain)", arguments="__global cfloat_t *field, __global float * weights,cfloat_t plain") # reduce_kernel = OCLReductionKernel( # np.float32, neutral="0", # reduce_expr="a+b", # map_expr = "weights[i]*(i!=0)*cfloat_abs(field[i])*cfloat_abs(field[i])", # arguments = "__global cfloat_t *field, __global float * weights,cfloat_t plain") if return_full: if return_field: u_g = OCLArray.empty((Nz,Ny,Nx),dtype=np.complex64) u_g[0] = plane_g else: u_g = OCLArray.empty((Nz,Ny,Nx),dtype=np.float32) program.run_kernel("copy_intens",(Nx*Ny,),None, plane_g.data,u_g.data, np.int32(0)) clock.toc("setup") clock.tic("run") for i in range(Nz-1): fft(plane_g,inplace = True, plan = plan) program.run_kernel("mult",(Nx*Ny,),None, plane_g.data,h_g.data) #a = dn_g.sum() if return_scattering: scatter_cross_sec_g[i+1] = reduce_kernel(plane_g, scatter_weights_g, plain_wave_dct[i+1]) gfactor_g[i+1] = reduce_kernel(plane_g, gfactor_weights_g, plain_wave_dct[i+1]) fft(plane_g,inplace = True, inverse = True, plan = plan) if dn is not None: if isComplexDn: kernel_str = "mult_dn_complex" else: if dn_g.dtype.type == np.float16: kernel_str = "mult_dn_half" else: kernel_str = "mult_dn" program.run_kernel(kernel_str,(Nx,Ny,),None, plane_g.data,dn_g.data, np.float32(k0*dz), np.int32(Nx*Ny*(i+1)), np.int32(absorbing_width)) if return_full: if return_field: u_g[i+1] = plane_g else: program.run_kernel("copy_intens",(Nx*Ny,),None, plane_g.data,u_g.data, np.int32(Nx*Ny*(i+1))) clock.toc("run") print clock if return_full: u = u_g.get() else: u = plane_g.get() if not return_field: u = np.abs(u)**2 if return_scattering: # normalizing prefactor dkx = dx/Nx # prefac = 1./Nx/Ny*dx*dy/4./np.pi/n0 prefac = 1./Nx/Ny*dx*dy p = prefac*scatter_cross_sec_g.get() if return_g: prefac = 1./Nx/Ny*dx*dy g = prefac*gfactor_g.get()/p if return_scattering: if return_g: result = u, p, g else: result = u, p else: result = u if return_last_plane: if isinstance(result,tuple): result = result + (plane_g.get(),) else: result = (result, plane_g.get()) return result