import cudaconv2 # driver.init() # device_info = (0, 0) # for i in range(driver.Device.count()): # dev = driver.Device(i) # ctx = dev.make_context() # ctx.push() # free, total = driver.mem_get_info() # print 'Free Memory for Device', i, 'is', free / 1000000, 'MB' # # if device_info[1] < free: # device_info = (i, free) # # ctx.pop() # ctx.detach() # print 'Choose Device', device_info[0] # dev = driver.Device(device_info[0]) CONTEXT = cudaconv2.init()
import cudaconv2 cudaconv2.init() import pycuda.driver as cuda import sys from pycuda import gpuarray, driver import cudaconv2 import numpy as np from scipy.signal import convolve2d imgSize = 32 filterSize = 5 padding = 2 color = 1 imgNum = 1 filterNum = 64 stride = 1 modulesX = 1 + int(((2 * padding + imgSize - filterSize) / float(stride))) print 'Modules X', modulesX img = gpuarray.to_gpu(np.ones((imgSize * imgSize * color, imgNum)).astype(np.float32)) filter = gpuarray.to_gpu(np.ones((filterSize * filterSize * color, filterNum)).astype(np.float32)) target = gpuarray.to_gpu(np.ones((modulesX * modulesX * filterNum, imgNum)).astype(np.float32)) print 'standard output for convolution' print convolve2d(np.ones((imgSize, imgSize)).astype(np.float32), np.ones((filterSize, filterSize)).astype(np.float32),'valid') cudaconv2.convFilterActs(img, filter, target, imgSize, modulesX, modulesX, -padding, stride, color, 1, 0.0, 1.0)
#!/usr/bin/env python '''A relatively simple distributed network implementation, using async SGD.''' from fastnet import net, layer, data, parser, weights from fastnet.util import EZTimer from mpi4py import MPI import ctypes import cudaconv2 import numpy as np import os WORLD = MPI.COMM_WORLD cudaconv2.init(WORLD.Get_rank()) print 'CUDA', os.environ.get('MV2_USE_CUDA') MASTER = 0 WORKERS = range(1, WORLD.Get_size()) batch_size = 128 data_dir = '/ssd/nn-data/imagenet/' data_provider = 'imagenet' checkpoint_dir = './checkpoint' param_file = 'config/imagenet.cfg' train_range = range(101, 1301) test_range = range(1, 101)
#!/usr/bin/env python '''A relatively simple distributed network implementation, using async SGD.''' from fastnet import net, layer, data, parser, weights from fastnet.util import EZTimer from mpi4py import MPI import ctypes import cudaconv2 import numpy as np import os WORLD = MPI.COMM_WORLD cudaconv2.init(WORLD.Get_rank()) print 'CUDA', os.environ.get('MV2_USE_CUDA') MASTER = 0 WORKERS = range(1, WORLD.Get_size()) batch_size = 128 data_dir = '/ssd/nn-data/imagenet/' data_provider = 'imagenet' checkpoint_dir = './checkpoint' param_file = 'config/imagenet.cfg' train_range = range(101, 1301) test_range = range(1, 101) data_provider = 'imagenet'