def test_convolution(): imgSize = 32 filterSize = 5 padding = 2 color = 1 imgNum = 1 filterNum = 64 stride = 1 modulesX = 1 + int(((2 * padding + imgSize - filterSize) / float(stride))) print 'Modules X', modulesX img = gpuarray.to_gpu(np.ones((imgSize * imgSize * color, imgNum)).astype(np.float32)) filter = gpuarray.to_gpu(np.ones((filterSize * filterSize * color, filterNum)).astype(np.float32)) target = gpuarray.to_gpu(np.ones((modulesX * modulesX * filterNum, imgNum)).astype(np.float32)) print 'standard output for convolution' print convolve2d(np.ones((imgSize, imgSize)).astype(np.float32), np.ones((filterSize, filterSize)).astype(np.float32),'valid') cudaconv2.convFilterActs(img, filter, target, imgSize, modulesX, modulesX, -padding, stride, color, 1, 0.0, 1.0) print 'pycuda output for convolution' atarget = target.get() print atarget
def fprop(self, input, output): cudaconv2.convFilterActs(input, self.filter, output, self.imgSize, self.outputSize, self.outputSize, -self.padding, self.stride, self.numColor, 1) self.tmp = gpuarray.empty((self.numFilter, self.get_single_img_size() * self.batchSize/self.numFilter), dtype=np.float32) gpu_copy_to(output, self.tmp) add_vec_to_rows(self.tmp, self.bias) gpu_copy_to(self.tmp, output)
def fprop(self, input, output, train=TRAIN): cudaconv2.convFilterActs(input, self.weight, output, self.imgSize, self.outputSize, self.outputSize, -self.padding, self.stride, self.numColor, 1) self.tmp = gpuarray.empty( (self.numFilter, self.get_single_img_size() * self.batchSize / self.numFilter), dtype=np.float32) gpu_copy_to(output, self.tmp) add_vec_to_rows(self.tmp, self.bias) gpu_copy_to(self.tmp, output) if PFout: print_matrix(output, self.name)
def convFilterActs(input, weight, output, bias, padding, stride): from distbase import cuda_base image_y = input.shape[ConvDataLayout.HEIGHT] output_y = output.shape[ConvDataLayout.HEIGHT] output_x = output.shape[ConvDataLayout.WIDTH] color = input.shape[ConvDataLayout.CHANNEL] cudaconv2.convFilterActs(input, weight, output, image_y, output_y, output_x, padding, stride, color, 1) batch_size = output.shape[ConvDataLayout.BATCH] channel = output.shape[ConvDataLayout.CHANNEL] # bias term cuda_base.add_vec_to_rows(output.reshape((channel, output_y * output_x * batch_size)), bias)
def fprop(self, input, output, train=TRAIN): #np.save('input.arr', input.get()) #np.save('weight.arr', self.weight.wt.get()) cudaconv2.convFilterActs(input, self.weight.wt, output, self.img_size, self.outputSize, self.outputSize, -self.padding, self.stride, self.numColor, 1) #util.log_info('%s', output.get().mean()) self.tmp = gpuarray.empty((self.numFilter, self.get_single_img_size() * self.batch_size / self.numFilter), dtype=np.float32) gpu_copy_to(output, self.tmp) add_vec_to_rows(self.tmp, self.bias.wt) gpu_copy_to(self.tmp, output) if PFout: print_matrix(output, self.name)
padding = 2 color = 1 imgNum = 1 filterNum = 64 stride = 1 modulesX = 1 + int(((2 * padding + imgSize - filterSize) / float(stride))) img = gpuarray.to_gpu(np.ones((imgSize * imgSize * color, imgNum)).astype(np.float32)) filter = gpuarray.to_gpu(np.ones((filterSize * filterSize * color, filterNum)).astype(np.float32)) target = gpuarray.to_gpu(np.ones((modulesX * modulesX * filterNum, imgNum)).astype(np.float32)) print 'standard output for convolution' print convolve2d(np.ones((imgSize, imgSize)).astype(np.float32), np.ones((filterSize, filterSize)).astype(np.float32),'valid') cudaconv2.convFilterActs(img, filter, target, imgSize, modulesX, modulesX, -padding, stride, color, 1, 0.0, 1.0) print 'pycuda output for convolution' print target.get() #from pycuda.compiler import * #mod = SourceModule(open('foo.cu').read(), no_extern_c=True, include_dirs=['/home/justin/guppy/include']) #kernel = mod.get_function('kernel') #def i(x): return np.int32(x) # #grid = (1, 32 * 32 * 64 / (4 * 8), 1) #blocks = (32, 4, 1) #kernel(img, filter, target, i(1), i(64), i(32), i(32), i(5), i(-2), i(1), i(32), i(32), i(1), # np.float32(0.0), np.float32(1.0), np.int32(True), block=blocks, grid=grid)
imgNum = 1 filterNum = 64 stride = 1 modulesX = 1 + int(((2 * padding + imgSize - filterSize) / float(stride))) print 'Modules X', modulesX img = gpuarray.to_gpu(np.ones((imgSize * imgSize * color, imgNum)).astype(np.float32)) filter = gpuarray.to_gpu(np.ones((filterSize * filterSize * color, filterNum)).astype(np.float32)) target = gpuarray.to_gpu(np.ones((modulesX * modulesX * filterNum, imgNum)).astype(np.float32)) print 'standard output for convolution' print convolve2d(np.ones((imgSize, imgSize)).astype(np.float32), np.ones((filterSize, filterSize)).astype(np.float32),'valid') cudaconv2.convFilterActs(img, filter, target, imgSize, modulesX, modulesX, -padding, stride, color, 1, 0.0, 1.0) print 'pycuda output for convolution' atarget = target.get() print atarget #for i in range(atarget.shape[0]): # print atarget[i, 0] #from pycuda.compiler import * #mod = SourceModule(open('foo.cu').read(), no_extern_c=True, include_dirs=['/home/justin/guppy/include']) #kernel = mod.get_function('kernel') #def i(x): return np.int32(x) # #grid = (1, 32 * 32 * 64 / (4 * 8), 1)