示例#1
0
import cudaconv2

# driver.init()
# device_info = (0, 0)
# for i in range(driver.Device.count()):
#  dev = driver.Device(i)
#  ctx = dev.make_context()
#  ctx.push()
#  free, total = driver.mem_get_info()
#  print 'Free Memory for Device', i, 'is', free / 1000000, 'MB'
#
#  if device_info[1] < free:
#    device_info = (i, free)
#
#  ctx.pop()
#  ctx.detach()

# print 'Choose Device', device_info[0]
# dev = driver.Device(device_info[0])

CONTEXT = cudaconv2.init()
示例#2
0
import cudaconv2
cudaconv2.init()

import pycuda.driver as cuda
import sys
from pycuda import gpuarray, driver
import cudaconv2

import numpy as np
from scipy.signal import convolve2d

imgSize = 32
filterSize = 5
padding = 2
color = 1
imgNum = 1
filterNum = 64

stride = 1
modulesX = 1 + int(((2 * padding + imgSize - filterSize) / float(stride)))

print 'Modules X', modulesX


img = gpuarray.to_gpu(np.ones((imgSize * imgSize * color, imgNum)).astype(np.float32))
filter = gpuarray.to_gpu(np.ones((filterSize * filterSize * color, filterNum)).astype(np.float32))
target = gpuarray.to_gpu(np.ones((modulesX * modulesX * filterNum, imgNum)).astype(np.float32))

print 'standard output for convolution'
print convolve2d(np.ones((imgSize, imgSize)).astype(np.float32), np.ones((filterSize, filterSize)).astype(np.float32),'valid')
cudaconv2.convFilterActs(img, filter, target, imgSize, modulesX, modulesX, -padding, stride, color, 1, 0.0, 1.0)
示例#3
0
#!/usr/bin/env python

'''A relatively simple distributed network implementation, using async SGD.'''

from fastnet import net, layer, data, parser, weights
from fastnet.util import EZTimer
from mpi4py import MPI
import ctypes
import cudaconv2
import numpy as np
import os


WORLD = MPI.COMM_WORLD

cudaconv2.init(WORLD.Get_rank())

print 'CUDA', os.environ.get('MV2_USE_CUDA')

MASTER = 0
WORKERS = range(1, WORLD.Get_size())

batch_size = 128

data_dir = '/ssd/nn-data/imagenet/'
data_provider = 'imagenet'
checkpoint_dir = './checkpoint'
param_file = 'config/imagenet.cfg'

train_range = range(101, 1301)
test_range = range(1, 101)
示例#4
0
import cudaconv2

# driver.init()
# device_info = (0, 0)
# for i in range(driver.Device.count()):
#  dev = driver.Device(i)
#  ctx = dev.make_context()
#  ctx.push()
#  free, total = driver.mem_get_info()
#  print 'Free Memory for Device', i, 'is', free / 1000000, 'MB'
#
#  if device_info[1] < free:
#    device_info = (i, free)
#
#  ctx.pop()
#  ctx.detach()

# print 'Choose Device', device_info[0]
# dev = driver.Device(device_info[0])

CONTEXT = cudaconv2.init()

示例#5
0
#!/usr/bin/env python
'''A relatively simple distributed network implementation, using async SGD.'''

from fastnet import net, layer, data, parser, weights
from fastnet.util import EZTimer
from mpi4py import MPI
import ctypes
import cudaconv2
import numpy as np
import os

WORLD = MPI.COMM_WORLD

cudaconv2.init(WORLD.Get_rank())

print 'CUDA', os.environ.get('MV2_USE_CUDA')

MASTER = 0
WORKERS = range(1, WORLD.Get_size())

batch_size = 128

data_dir = '/ssd/nn-data/imagenet/'
data_provider = 'imagenet'
checkpoint_dir = './checkpoint'
param_file = 'config/imagenet.cfg'

train_range = range(101, 1301)
test_range = range(1, 101)

data_provider = 'imagenet'