示例#1
0
文件: cuda.py 项目: ALEXGUOQ/chainer
def init(device=None):
    """Initializes CUDA global state.

    Chainer maintains CUDA context, CUBLAS context, random number generator and
    device memory pool for each GPU device and for each process (the main
    process or a process forked by :mod:`multiprocessing`) as global states. When
    called for the first time on the process, this function initializes these global states.

    .. warning::

       This function also initializes PyCUDA and scikits.cuda. Since these
       packages do not support forking after initialization, do not call this
       function before forking the process.

    This function also registers :func:`shutdown` to :mod:`atexit` slot.

    It also initializes random number generator. User can set fixed seed with
    ``CHAINER_SEED`` environment variable.

    Args:
        device (``int`` or :class:`~pycuda.driver.Device` or ``None``): Device
            ID to initialize on.

    """
    global _contexts, _cublas_handles, _generators, _pid, _pools

    if not available:
        global _import_error
        raise RuntimeError(
            'CUDA environment is not correctly set up. ' +
            'The original import error said: ' + str(_import_error))

    pid = os.getpid()
    if _pid == pid:  # already initialized
        return

    drv.init()

    if device is None:  # use default device
        context = cutools.make_default_context()
        device  = Context.get_device()
    else:
        device  = Device(device)
        context = device.make_context()
    _contexts       = {device: context}
    _generators     = {}
    _pools          = {}
    _cublas_handles = {}
    cumisc.init(mem_alloc)

    seed(os.environ.get('CHAINER_SEED'))

    _pid = pid  # mark as initialized
    atexit.register(shutdown)
示例#2
0
import time
import pycuda.gpuarray as gpuarray
import pycuda.cumath as cumath
import scikits.cuda.misc as scm
import pycuda.autoinit
import utils
import numpy as np

K = 87930
B = 1000

N = 1000

scm.init()

scores = gpuarray.to_gpu((5 * np.random.randn(B, K)).astype(np.float32))
probs = gpuarray.to_gpu(np.random.rand(B, K).astype(np.float32))

maxscores = gpuarray.empty((B,), dtype=np.float32)
maxscoreids = gpuarray.empty((B,), dtype=np.uint32)
deltas = gpuarray.empty_like(scores)
sumdeltas = gpuarray.empty((B,), dtype=np.float32)

cpu_probs = np.empty((B, K), dtype=np.float32)
indices = np.random.randint(0, K, size=(N, B)).astype(np.uint32)
gpu_ind = gpuarray.empty((B,), dtype=np.uint32)
selected_probs = gpuarray.empty((B,), dtype=np.float32)

for i in range(10):
    gpu_ind.set(indices[i])
    gpuarray.take(probs, gpu_ind, out=selected_probs)
示例#3
0
import time
import pycuda.gpuarray as gpuarray
import pycuda.cumath as cumath
import scikits.cuda.misc as scm
import pycuda.autoinit
import utils
import numpy as np

K = 87930
B = 1000

N = 1000

scm.init()

scores = gpuarray.to_gpu((5 * np.random.randn(B, K)).astype(np.float32))
probs = gpuarray.to_gpu(np.random.rand(B, K).astype(np.float32))

maxscores = gpuarray.empty((B, ), dtype=np.float32)
maxscoreids = gpuarray.empty((B, ), dtype=np.uint32)
deltas = gpuarray.empty_like(scores)
sumdeltas = gpuarray.empty((B, ), dtype=np.float32)

cpu_probs = np.empty((B, K), dtype=np.float32)
indices = np.random.randint(0, K, size=(N, B)).astype(np.uint32)
gpu_ind = gpuarray.empty((B, ), dtype=np.uint32)
selected_probs = gpuarray.empty((B, ), dtype=np.float32)

for i in range(10):
    gpu_ind.set(indices[i])
    gpuarray.take(probs, gpu_ind, out=selected_probs)
示例#4
0
 def setUp(self):
     np.random.seed(0)
     misc.init()