def blob_to_CudaNdArray(b, diff=False): from theano.sandbox import cuda data_ptr = long(b.gpu_data_ptr) diff_ptr = long(b.gpu_diff_ptr) strides = tuple() if len(b.shape) > 0: strides = [1] for i in b.shape[::-1][:-1]: strides.append(strides[-1]*i) strides = tuple(strides[::-1]) return cuda.from_gpu_pointer(data_ptr, b.shape, strides, b), \ cuda.from_gpu_pointer(diff_ptr, b.shape, strides, b)
def garray_to_cudandarray(x): """ take a gnumpy.garray and make a CudaNdarray that point to its memory """ if not isinstance(x, gnumpy.garray): raise ValueError( "We can transfer only gnumpy.garray to CudaNdarray") # elif x.dtype != "float32": # raise ValueError("CudaNdarray support only float32") # We don't need this, because cudamat is always float32. else: strides = [1] for i in x.shape[::-1][:-1]: strides.append(strides[-1] * i) strides = strides[::-1] for i in range(len(strides)): if x.shape[i] == 1: strides[i] = 0 strides = tuple(strides) import ctypes ptr_long = long( ctypes.cast(x._base.mat.data_device, ctypes.c_void_p).value) # seems legit. z = cuda.from_gpu_pointer(ptr_long, x.shape, strides, x._base) return z
def test_pycuda_memory_to_theano(): # Test that we can use the GpuArray memory space in pycuda in a CudaNdarray y = pycuda.gpuarray.zeros((3, 4, 5), 'float32') print(sys.getrefcount(y)) # This increase the ref count with never pycuda. Do pycuda also # cache ndarray? # print y.get() initial_refcount = sys.getrefcount(y) print("gpuarray ref count before creating a CudaNdarray", end=' ') print(sys.getrefcount(y)) assert sys.getrefcount(y) == initial_refcount rand = np.random.randn(*y.shape).astype(np.float32) cuda_rand = cuda_ndarray.CudaNdarray(rand) strides = [1] for i in y.shape[::-1][:-1]: strides.append(strides[-1] * i) strides = tuple(strides[::-1]) print('strides', strides) assert cuda_rand._strides == strides, (cuda_rand._strides, strides) # in pycuda trunk, y.ptr also works, which is a little cleaner y_ptr = int(y.gpudata) z = cuda_ndarray.from_gpu_pointer(y_ptr, y.shape, strides, y) print("gpuarray ref count after creating a CudaNdarray", sys.getrefcount(y)) assert sys.getrefcount(y) == initial_refcount + 1 assert (np.asarray(z) == 0).all() assert z.base is y # Test that we can take a view from this cuda view on pycuda memory zz = z.view() assert sys.getrefcount(y) == initial_refcount + 2 assert zz.base is y del zz assert sys.getrefcount(y) == initial_refcount + 1 cuda_ones = cuda_ndarray.CudaNdarray(np.asarray([[[1]]], dtype='float32')) z += cuda_ones assert (np.asarray(z) == np.ones(y.shape)).all() assert (np.asarray(z) == 1).all() assert cuda_rand.shape == z.shape assert cuda_rand._strides == z._strides, (cuda_rand._strides, z._strides) assert (np.asarray(cuda_rand) == rand).all() z += cuda_rand assert (np.asarray(z) == (rand + 1)).all() # Check that the ref count to the gpuarray is right. del z print("gpuarray ref count after deleting the CudaNdarray", end=' ') print(sys.getrefcount(y)) assert sys.getrefcount(y) == initial_refcount
def to_cudandarray(x): """ take a pycuda.gpuarray.GPUArray and make a CudaNdarray that point to its memory :note: CudaNdarray support only float32, so only float32 GPUArray are accepted """ if not isinstance(x, pycuda.gpuarray.GPUArray): raise ValueError("We can transfer only pycuda.gpuarray.GPUArray to CudaNdarray") elif x.dtype != "float32": raise ValueError("CudaNdarray support only float32") else: strides = [1] for i in x.shape[::-1][:-1]: strides.append(strides[-1]*i) strides = tuple(strides[::-1]) ptr = int(x.gpudata) # in pycuda trunk, y.ptr also works, which is a little cleaner z = cuda.from_gpu_pointer(ptr, x.shape, strides, x) return z
def to_complex_cudandarray(x): """ adapted version of theano.misc.pycuda_utils.to_cudandarray that takes a complex64 array and turns it into a float32 CudaNdarray with an extra trailing dimension of length 2 for real/imaginary parts. """ if not isinstance(x, pycuda.gpuarray.GPUArray): raise ValueError("We can transfer only pycuda.gpuarray.GPUArray to CudaNdarray") elif x.dtype != "complex64": raise ValueError("Only conversion from complex64 arrays is supported") else: # TODO: figure out what is going on here and adapt it for the complex64-float32 case. strides = [1, 2] for i in x.shape[::-1][:-1]: strides.append(strides[-1]*i) strides = tuple(strides[::-1]) shape = tuple(list(x.shape) + [2]) ptr = int(x.gpudata) # in pycuda trunk, y.ptr also works, which is a little cleaner z = cuda.from_gpu_pointer(ptr, shape, strides, x) return z
def cudamat_to_cudandarray(x): """ take a cudamat.CUDAMatrix and make a CudaNdarray that point to its memory """ if not isinstance(x, cudamat.CUDAMatrix): raise ValueError( "We can transfer only cudamat.CUDAMatrix to CudaNdarray") # elif x.dtype != "float32": # raise ValueError("CudaNdarray support only float32") # We don't need this, because cudamat is always float32. else: strides = [1] for i in x.shape[::-1][:-1]: strides.append(strides[-1] * i) strides = tuple(strides[::-1]) import ctypes ptr_long = int( ctypes.cast(x.mat.data_device, ctypes.c_void_p).value) # seems legit. z = cuda.from_gpu_pointer(ptr_long, x.shape, strides, x) return z
def to_complex_cudandarray(x): """ adapted version of theano.misc.pycuda_utils.to_cudandarray that takes a complex64 array and turns it into a float32 CudaNdarray with an extra trailing dimension of length 2 for real/imaginary parts. """ if not isinstance(x, pycuda.gpuarray.GPUArray): raise ValueError( "We can transfer only pycuda.gpuarray.GPUArray to CudaNdarray") elif x.dtype != "complex64": raise ValueError("Only conversion from complex64 arrays is supported") else: # TODO: figure out what is going on here and adapt it for the complex64-float32 case. strides = [1, 2] for i in x.shape[::-1][:-1]: strides.append(strides[-1] * i) strides = tuple(strides[::-1]) shape = tuple(list(x.shape) + [2]) ptr = int( x.gpudata ) # in pycuda trunk, y.ptr also works, which is a little cleaner z = cuda.from_gpu_pointer(ptr, shape, strides, x) return z