def test_2d_fp_surfaces(self): orden = "C" npoints = 32 for prec in [np.int16,np.float32,np.float64,np.complex64,np.complex128]: prec_str = dtype_to_ctype(prec) if prec == np.complex64: fpName_str = 'fp_tex_cfloat' elif prec == np.complex128: fpName_str = 'fp_tex_cdouble' elif prec == np.float64: fpName_str = 'fp_tex_double' else: fpName_str = prec_str A_cpu = np.zeros([npoints,npoints],order=orden,dtype=prec) A_cpu[:] = np.random.rand(npoints,npoints)[:] A_gpu = gpuarray.to_gpu(A_cpu) # Array randomized myKernRW = ''' #include <pycuda-helpers.hpp> surface<void, cudaSurfaceType2DLayered> mtx_tex; __global__ void copy_texture(cuPres *dest, int rw) { int row = blockIdx.x*blockDim.x + threadIdx.x; int col = blockIdx.y*blockDim.y + threadIdx.y; int layer = 1; int tid = row + col*blockDim.x*gridDim.x ; if (rw==0){ cuPres aux = dest[tid]; fp_surf2DLayeredwrite(aux, mtx_tex, row, col, layer,cudaBoundaryModeClamp);} else { cuPres aux = 0; fp_surf2DLayeredread(&aux, mtx_tex, col, row, layer, cudaBoundaryModeClamp); dest[tid] = aux; } } ''' myKernRW = myKernRW.replace('fpName',fpName_str) myKernRW = myKernRW.replace('cuPres',prec_str) modW = SourceModule(myKernRW) copy_texture = modW.get_function("copy_texture") mtx_tex = modW.get_surfref("mtx_tex") cuBlock = (8,8,1) if cuBlock[0]>npoints: cuBlock = (npoints,npoints,1) cuGrid = (npoints//cuBlock[0]+1*(npoints % cuBlock[0] != 0 ),npoints//cuBlock[1]+1*(npoints % cuBlock[1] != 0 ),1) copy_texture.prepare('Pi')#,texrefs=[mtx_tex]) A_gpu2 = gpuarray.zeros_like(A_gpu) # To initialize surface with zeros cudaArray = drv.gpuarray_to_array(A_gpu2,orden,allowSurfaceBind=True) A_cpu = A_gpu.get() # To remember original array mtx_tex.set_array(cudaArray) copy_texture.prepared_call(cuGrid,cuBlock,A_gpu.gpudata, np.int32(0)) # Write random array copy_texture.prepared_call(cuGrid,cuBlock,A_gpu.gpudata, np.int32(1)) # Read, but transposed assert np.sum(np.abs(A_gpu.get()-np.transpose(A_cpu))) == np.array(0,dtype=prec) A_gpu.gpudata.free()
if (x < Nx && y < Ny && z < Nz) { float value = tex3D(tex_in, (float) x, (float) y, float (z)); surf3Dwrite((float) value, surf_out, sizeof(float) * x, y, z, cudaBoundaryModeZero); } } ''' mod = SourceModule(src_module, cache_dir=False, keep=False) kernel = mod.get_function("test_3d_surf") arg_types = (np.int32, np.int32, np.int32) tex_in = mod.get_texref('tex_in') surf_out = mod.get_surfref('surf_out') # random shape shape_x = np.random.randint(1, 255) shape_y = np.random.randint(1, 255) shape_z = np.random.randint(1, 255) dtype = np.float32 # should match src_module's datatype numpy_array_in = np.random.randn(shape_z, shape_y, shape_x).astype(dtype).copy() cuda_array_in = numpy3d_to_array(numpy_array_in) tex_in.set_array(cuda_array_in) zeros = np.zeros_like(numpy_array_in) cuda_array_out = numpy3d_to_array(zeros, allow_surface_bind=True)
getAlphas = cudaCode.get_function( "getAlphas_kernel" ) getFFTderivatives = cudaCode.get_function( "getFFTderivatives_kernel" ) #V_FFT getPartialsXY = cudaCode.get_function( "getPartialsXY_kernel" ) setBoundryConditionsKernel = cudaCode.get_function( 'setBoundryConditions_kernel' ) implicitStep1 = cudaCode.get_function( "implicitStep1_kernel" ) implicitStep2 = cudaCode.get_function( "implicitStep2_kernel" ) findActivityKernel = cudaCode.get_function( "findActivity_kernel" ) getActivityKernel = cudaCode.get_function( "getActivity_kernel" ) getVelocityKernel = cudaCode.get_function( "getVelocity_kernel" ) eulerStepKernel = cudaCode.get_function( "eulerStep_kernel" ) eulerStep_FFTKernel = cudaCode.get_function( "eulerStep_fft_kernel" ) ##V_FFT #TEXTURE version eulerStep_textKernel = cudaCode.get_function( "eulerStep_texture_kernel" ) tex_psiReal = cudaCode.get_texref("tex_psiReal") tex_psiImag = cudaCode.get_texref("tex_psiImag") surf_psiReal = cudaCode.get_surfref("surf_psiReal") surf_psiImag = cudaCode.get_surfref("surf_psiImag") if showKernelMemInfo: kernelMemoryInfo(eulerStepKernel, 'eulerStepKernel') print "" kernelMemoryInfo(eulerStep_textKernel, 'eulerStepKernel_texture') print "" ######################################################################## from pycuda.elementwise import ElementwiseKernel ######################################################################## multiplyByScalarReal = ElementwiseKernel(arguments="cudaP a, cudaP *realArray".replace("cudaP", cudaP), operation = "realArray[i] = a*realArray[i] ", name = "multiplyByScalarReal_kernel") ######################################################################## multiplyByScalarComplex = ElementwiseKernel(arguments="cudaP a, pycuda::complex<cudaP> *psi".replace("cudaP", cudaP),
block3D = (block_size_x, block_size_y, block_size_z) grid3D = (gridx, gridy, gridz) grid3D_ising = (gridx//2, gridy, gridz) #initialize pyCUDA context cudaDevice = setCudaDevice( devN=useDevice, usingAnimation=True ) #Read and compile CUDA code print "\nCompiling CUDA code" cudaCodeString_raw = open("CUDAising3D.cu", "r").read() cudaCodeString = cudaCodeString_raw # % { "BLOCK_WIDTH":block2D[0], "BLOCK_HEIGHT":block2D[1], "BLOCK_DEPTH":block2D[2], } cudaCode = SourceModule(cudaCodeString) tex_spins = cudaCode.get_texref('tex_spinsIn') surf_spins = cudaCode.get_surfref('surf_spinsOut') isingKernel = cudaCode.get_function('ising_kernel') ######################################################################## from pycuda.elementwise import ElementwiseKernel ######################################################################## changeIntToFloat = ElementwiseKernel(arguments="float a, float b, int *input, float *output", operation = "output[i] = a*input[i] + b;", name = "intToFloat_kernel") ######################################################################## floatToUchar = ElementwiseKernel(arguments="float *input, unsigned char *output", operation = "output[i] = (unsigned char) ( -255*(input[i]-1));", name = "floatToUchar_kernel") ######################################################################## def sendToScreen( plotData ): floatToUchar( plotDataFloat_d, plotData_d ) copyToScreenArray()
if (x < Nx && y < Ny && z < Nz) { float value = tex3D(tex_in, (float) x, (float) y, float (z)); surf3Dwrite((float) value, surf_out, sizeof(float) * x, y, z, cudaBoundaryModeZero); } } ''' mod=SourceModule(src_module, cache_dir=False, keep=False) kernel=mod.get_function("test_3d_surf") arg_types = (np.int32, np.int32, np.int32) tex_in=mod.get_texref('tex_in') surf_out=mod.get_surfref('surf_out') # random shape shape_x = np.random.randint(1,255) shape_y = np.random.randint(1,255) shape_z = np.random.randint(1,255) dtype=np.float32 # should match src_module's datatype numpy_array_in=np.random.randn(shape_z, shape_y, shape_x).astype(dtype).copy() cuda_array_in = numpy3d_to_array(numpy_array_in) tex_in.set_array(cuda_array_in) zeros=np.zeros_like(numpy_array_in) cuda_array_out = numpy3d_to_array(zeros,allow_surface_bind=True) surf_out.set_array(cuda_array_out)
block3D = (block_size_x, block_size_y, block_size_z) #Initialize openGL volumeRender.initGL() #initialize pyCUDA context cudaDevice = setCudaDevice(devN=useDevice, usingAnimation=True ) #Read and compile CUDA code print "Compiling CUDA code" cudaCodeString_raw = open("CUDAheat3D.cu", "r").read() cudaCodeString = cudaCodeString_raw % { "BLOCK_WIDTH":block3D[0], "BLOCK_HEIGHT":block3D[1], "BLOCK_DEPTH":block3D[2], } cudaCode = SourceModule(cudaCodeString) tex_tempIn = cudaCode.get_texref("tex_tempIn") surf_tempOut = cudaCode.get_surfref("surf_tempOut") eulerKernel_tex = cudaCode.get_function("euler_kernel_texture" ) eulerKernel_shrd = cudaCode.get_function("euler_kernel_shared" ) ######################################################################## from pycuda.elementwise import ElementwiseKernel ######################################################################## copyDtoD_float = ElementwiseKernel(arguments="float *input, float *output", operation = "output[i] = input[i];") ######################################################################## floatToUchar = ElementwiseKernel(arguments="float *input, unsigned char *output", operation = "output[i] = (unsigned char) ( -255*(input[i]-1));") ######################################################################## multiplyByFloat = ElementwiseKernel(arguments="float a, float *input", operation = "input[i] = a*input[i];") ######################################################################## def sendToScreen( plotData ):
surface<void, 2> surf; __global__ void kernel(int width, int height) { // Calculate surface coordinates unsigned int x = blockIdx.x * blockDim.x + threadIdx.x; unsigned int y = blockIdx.y * blockDim.y + threadIdx.y; if (x < 400 && y < 400) { float data = x / 400.f; // Write to output surface surf2Dwrite(data, surf, x*4, y); } } """) kernel_function = mod.get_function('kernel') surface_ref = mod.get_surfref('surf') # surface_ref.set_array(Density.ping_array,0) surface_ref.set_array(Density.ping_array) def Program(fragment): program = gloo.Program("vertex_passthrough.vert", fragment, count=4) program['Position'] = [(-1,-1), (-1,+1), (+1,-1), (+1,+1)] return program Density = Slab(GridWidth, GridHeight, 1, gl.GL_LINEAR) prog_visualize = Program("visualize.frag") def ClearSurface(surface, v):
#'blockDim.x': block3D[0], 'blockDim.y': block3D[1], 'blockDim.z': block3D[2], #'gridDim.x': grid3D[0], 'gridDim.y': grid3D[1], 'gridDim.z': grid3D[2] } cudaCode = SourceModule(cudaCodeString) #setFlux_kernel = cudaCode.get_function('setFlux') setInterFlux_hll_kernel = cudaCode.get_function('setInterFlux_hll') getInterFlux_hll_kernel = cudaCode.get_function('getInterFlux_hll') iterPoissonStep_kernel = cudaCode.get_function('iterPoissonStep') getGravityForce_kernel = cudaCode.get_function('getGravityForce') getBounderyPotential_kernel = cudaCode.get_function('getBounderyPotential') reduceDensity_kernel = cudaCode.get_function('reduceDensity' ) tex_1 = cudaCode.get_texref("tex_1") tex_2 = cudaCode.get_texref("tex_2") tex_3 = cudaCode.get_texref("tex_3") tex_4 = cudaCode.get_texref("tex_4") tex_5 = cudaCode.get_texref("tex_5") surf_1 = cudaCode.get_surfref("surf_1") surf_2 = cudaCode.get_surfref("surf_2") surf_3 = cudaCode.get_surfref("surf_3") surf_4 = cudaCode.get_surfref("surf_4") surf_5 = cudaCode.get_surfref("surf_5") ######################################################################## convertToUCHAR = ElementwiseKernel(arguments="cudaP normaliztion, cudaP *values, unsigned char *psiUCHAR".replace("cudaP", cudaP), operation = "psiUCHAR[i] = (unsigned char) ( -255*( values[i]*normaliztion -1 ) );", name = "sendModuloToUCHAR_kernel") ######################################################################## getTimeMin_kernel = ReductionKernel( np.dtype( cudaPre ), neutral = "1e6", arguments=" float delta, cudaP* cnsv_rho, cudaP* cnsv_vel, float* soundVel".replace("cudaP", cudaP), map_expr = " delta / ( abs( cnsv_vel[i]/ cnsv_rho[i] ) + soundVel[i] ) ", reduce_expr = "min(a,b)", name = "getTimeMin_kernel")