def three(): image_size = 32 batch_size = 32 input_filters = 512 output_filters = 512 np.random.seed(123) with make_backend(batch_size=batch_size, datatype=np.float32, device_id=0) as be: W = np.random.randn(input_filters,3,3,output_filters).astype(np.float32) W_cuda = MyTensor.from_np(W) print('type(W_cuda)', type(W_cuda)) inputs = np.zeros((input_filters,image_size, image_size,batch_size), dtype=np.float32) inputs[:] = np.random.randn(*inputs.shape) inputs_cuda = MyTensor.from_np(inputs) print('type(inputs_cuda)', type(inputs_cuda)) conv = Convolution((3, 3, output_filters), strides=1, padding=1, be=be) #, init=init) print('created conv') conv.W = W_cuda conv.configure((input_filters,image_size, image_size)) conv.W = W_cuda print('configure done') outputs = np.zeros((image_size * image_size * output_filters, batch_size), dtype=np.float32) outputs_cuda = MyTensor.from_np(outputs) conv.outputs = outputs_cuda conv.fprop(inputs_cuda) cuda.Context.synchronize() for it in range(3): start = time.time() conv.fprop(inputs_cuda) cuda.Context.synchronize() print('time=', time.time() - start) # outputs = outputs_cuda.get() outputs_cuda.to_host() print(outputs[1:3,1:3]) print('outputs.shape', outputs.shape) printDims(W=W, I=inputs) check(W=W, I=inputs, O=outputs, c=0, h=0, w=0, n=0, eps=1e-3) check(W=W, I=inputs, O=outputs, c=0, h=0, w=0, n=1, eps=1e-3) check(W=W, I=inputs, O=outputs, c=0, h=0, w=1, n=0, eps=1e-3) check(W=W, I=inputs, O=outputs, c=0, h=1, w=0, n=0, eps=1e-3) check(W=W, I=inputs, O=outputs, c=1, h=0, w=0, n=0, eps=1e-3) check(W=W, I=inputs, O=outputs, c=3, h=2, w=1, n=27, eps=1e-3) check(W=W, I=inputs, O=outputs, c=17, h=25, w=7, n=27, eps=1e-3)
from neon.layers.layer import Convolution from neon.backends.make_backend import make_backend import numpy as np import pycuda.driver as cuda #import pycuda.autoinit import pycuda.gpuarray as gpuarray import time image_size = 64 batch_size = 128 input_filters = 32 output_filters = 32 np.random.seed(123) with make_backend(batch_size=batch_size, datatype=np.float32, device_id=0) as be: conv = Convolution((3, 3, output_filters), strides=1, padding=1, be=be) print('created conv') W = np.random.randn(input_filters,3,3,output_filters).astype(np.float32) W_cuda = gpuarray.to_gpu(W) conv.W = W_cuda print('type(W_cuda)', type(W_cuda)) inputs = np.zeros((input_filters,image_size, image_size,batch_size), dtype=np.float32) inputs[:] = np.random.randn(*inputs.shape) inputs_cuda = gpuarray.to_gpu(inputs) print('type(inputs_cuda)', type(inputs_cuda)) conv.configure((input_filters,image_size, image_size))