def train_rfn_gpu(X, n_hidden, n_iter, learnrateW, learnratePsi, dropout_rate, input_droput_rate, minPsi=0.1, seed=32): k = n_hidden n, m = X.shape W = np.random.normal(scale=0.01, size=(k, m)).astype(np.float32) P = np.array([0.1] * m, dtype=np.float32) XXdiag = np.diag(np.dot(X.T, X) / n).copy() # explicit copy to avoid numpy 1.8 warning W = gpu.to_gpu(W, allocator=_mempool.allocate) P = gpu.to_gpu(P, allocator=_mempool.allocate) X = gpu.to_gpu(X, allocator=_mempool.allocate) XXdiag = gpu.to_gpu(XXdiag, allocator=_mempool.allocate) I = la.eye(k, dtype=np.float32) init_rng(seed) t0 = time.time() for cur_iter in range(n_iter): H, tmp = calculate_H_gpu(X, W, P) if dropout_rate > 0: dropout(H, dropout_rate) Xtmp = X if input_dropout_rate > 0: Xtmp = X.copy() saltpepper_noise(Xtmp, input_dropout_rate) U = la.dot(Xtmp, H, "t", "n") / n S = la.dot(H, H, "t", "n") / n S += I S -= la.dot(tmp, W, "n", "t") Cii = la.dot(la.dot(W, S, "t") - 2 * U, W) Sinv = la.inv(S, overwrite=True) dW = la.dot(Sinv, U, "n", "t") - W dP = XXdiag + la.diag(Cii) - P W += learnrateW * dW P += learnratePsi * dP P = gpu.maximum(P, minPsi) if cur_iter % 25 == 0: print "iter %3d (elapsed time: %5.2fs)" % (cur_iter, time.time() - t0) return W.get(), P.get()
def train_rfn_gpu(X, n_hidden, n_iter, learnrateW, learnratePsi, dropout_rate, input_droput_rate, minPsi=0.1, seed=32): k = n_hidden n, m = X.shape W = np.random.normal(scale=0.01, size=(k, m)).astype(np.float32) P = np.array([0.1] * m, dtype=np.float32) XXdiag = np.diag(np.dot(X.T, X) / n).copy() # explicit copy to avoid numpy 1.8 warning W = gpu.to_gpu(W, allocator=_mempool.allocate) P = gpu.to_gpu(P, allocator=_mempool.allocate) X = gpu.to_gpu(X, allocator=_mempool.allocate) XXdiag = gpu.to_gpu(XXdiag, allocator=_mempool.allocate) I = la.eye(k, dtype=np.float32) init_rng(seed) t0 = time.time() for cur_iter in range(n_iter): H, tmp = calculate_H_gpu(X, W, P) if dropout_rate > 0: dropout(H, dropout_rate) Xtmp = X if input_dropout_rate > 0: Xtmp = X.copy() saltpepper_noise(Xtmp, input_dropout_rate) U = la.dot(Xtmp, H, "t", "n") / n S = la.dot(H, H, "t", "n") / n S += I S -= la.dot(tmp, W, "n", "t") Cii = la.dot(la.dot(W, S, "t") - 2*U, W) Sinv = la.inv(S, overwrite=True) dW = la.dot(Sinv, U, "n", "t") - W dP = XXdiag + la.diag(Cii) - P W += learnrateW * dW P += learnratePsi * dP P = gpu.maximum(P, minPsi) if cur_iter % 25 == 0: print "iter %3d (elapsed time: %5.2fs)" % (cur_iter, time.time() - t0) return W.get(), P.get()
#!/usr/bin/env python """ Demonstrate diagonal matrix creation on the GPU. """ import pycuda.autoinit import pycuda.gpuarray as gpuarray import pycuda.driver as drv import numpy as np import scikits.cuda.linalg as culinalg import scikits.cuda.misc as cumisc culinalg.init() # Double precision is only supported by devices with compute # capability >= 1.3: import string demo_types = [np.float32, np.complex64] if cumisc.get_compute_capability(pycuda.autoinit.device) >= 1.3: demo_types.extend([np.float64, np.complex128]) for t in demo_types: print 'Testing real diagonal matrix creation for type ' + str(np.dtype(t)) v = np.array([1, 2, 3, 4, 5, 6], t) v_gpu = gpuarray.to_gpu(v) d_gpu = culinalg.diag(v_gpu) print 'Success status: ', np.all(d_gpu.get() == np.diag(v))
def test_diag_2d_tall_complex128(self): v = np.array(np.random.rand(64, 32) * 1j, np.complex128) v_gpu = gpuarray.to_gpu(v) d_gpu = linalg.diag(v_gpu) assert np.all(np.diag(v) == d_gpu.get())
def test_diag_1d_complex128(self): v = np.array([1j, 2j, 3j, 4j, 5j, 6j], np.complex128) v_gpu = gpuarray.to_gpu(v) d_gpu = linalg.diag(v_gpu) assert np.all(np.diag(v) == d_gpu.get())
def test_diag_2d_wide_complex64(self): v = np.array(np.random.rand(32, 64) * 1j, np.complex64) v_gpu = gpuarray.to_gpu(v) d_gpu = linalg.diag(v_gpu) assert np.all(np.diag(v) == d_gpu.get())
def test_diag_2d_tall_float64(self): v = np.array(np.random.rand(64, 32), np.float64) v_gpu = gpuarray.to_gpu(v) d_gpu = linalg.diag(v_gpu) assert np.all(np.diag(v) == d_gpu.get())
def test_diag_1d_float64(self): v = np.array([1, 2, 3, 4, 5, 6], np.float64) v_gpu = gpuarray.to_gpu(v) d_gpu = linalg.diag(v_gpu) assert np.all(np.diag(v) == d_gpu.get())
def test_diag_2d_wide_float32(self): v = np.array(np.random.rand(32, 64), np.float32) v_gpu = gpuarray.to_gpu(v) d_gpu = linalg.diag(v_gpu) assert np.all(np.diag(v) == d_gpu.get())
def test_diag_complex128(self): v = np.array([1j, 2j, 3j, 4j, 5j, 6j], np.complex128) v_gpu = gpuarray.to_gpu(v) d_gpu = linalg.diag(v_gpu) assert np.all(np.diag(v) == d_gpu.get())
def test_diag_float64(self): v = np.array([1, 2, 3, 4, 5, 6], np.float64) v_gpu = gpuarray.to_gpu(v) d_gpu = linalg.diag(v_gpu) assert np.all(np.diag(v) == d_gpu.get())
def test_diag_float32_large(self): v = np.array(np.random.rand(64, 64), np.float32) v_gpu = gpuarray.to_gpu(v) d_gpu = linalg.diag(v_gpu) assert np.all(np.diag(v) == d_gpu.get())
#!/usr/bin/env python """ Demonstrate diagonal matrix creation on the GPU. """ import pycuda.autoinit import pycuda.gpuarray as gpuarray import pycuda.driver as drv import numpy as np import scikits.cuda.linalg as culinalg import scikits.cuda.misc as cumisc culinalg.init() # Double precision is only supported by devices with compute # capability >= 1.3: import string demo_types = [np.float32, np.complex64] if cumisc.get_compute_capability(pycuda.autoinit.device) >= 1.3: demo_types.extend([np.float64, np.complex128]) for t in demo_types: print 'Testing real diagonal matrix creation for type ' + str(np.dtype(t)) v = np.array([1, 2, 3, 4, 5, 6], t) v_gpu = gpuarray.to_gpu(v) d_gpu = culinalg.diag(v_gpu); print 'Success status: ', np.all(d_gpu.get() == np.diag(v))
#!/usr/bin/env python """ Demonstrate diagonal matrix creation on the GPU. """ import pycuda.autoinit import pycuda.gpuarray as gpuarray import pycuda.driver as drv import numpy as np import scikits.cuda.linalg as linalg linalg.init() v = np.array([1, 2, 3, 4, 5, 6], np.float32) v_gpu = gpuarray.to_gpu(v) d_gpu = linalg.diag(v_gpu, pycuda.autoinit.device); print 'Success status: ', np.all(d_gpu.get() == np.diag(v)) v = np.array([1j, 2j, 3j, 4j, 5j, 6j], np.complex64) v_gpu = gpuarray.to_gpu(v) d_gpu = linalg.diag(v_gpu, pycuda.autoinit.device); print 'Success status: ', np.all(d_gpu.get() == np.diag(v))