def ikj_lu_decomposer_opt_gpu(M): m = M.shape[0] n = M.shape[1] import pycuda.autoinit import pycuda.gpuarray as gpuarray from skcuda.cublas import cublasCreate, cublasDaxpy, cublasDscal, cublasDestroy import skcuda.misc as misc N_gpu = gpuarray.to_gpu(M) h = cublasCreate() for i in range(0,n): for k in range(0,i): #N[i,k] = N[i,k] / N[k,k] cublasDscal(h, N_gpu[i,k].size, 1.0/np.float64(N_gpu[k,k].get()), N_gpu[i,k].gpudata, 1) #N[i,k+1:] -= N[i,k] * N[k,k+1:] cublasDaxpy(h, N_gpu[k,k+1:].size, -np.float64(N_gpu[i,k].get()), N_gpu[k,k+1:].gpudata, 1, N_gpu[i,k+1:].gpudata, 1) #Move from GPU to CPU N = N_gpu.get() cublasDestroy(h) return N
def test_cublasDscal(self): x = np.random.rand(5).astype(np.float64) x_gpu = gpuarray.to_gpu(x) alpha = np.float64(np.random.rand()) cublas.cublasDscal(self.cublas_handle, x_gpu.size, alpha, x_gpu.gpudata, 1) assert np.allclose(x_gpu.get(), alpha*x)
def jki_lu_decomposer_opt_gpu(M): m = M.shape[0] n = M.shape[1] import pycuda.autoinit import pycuda.gpuarray as gpuarray from skcuda.cublas import cublasCreate, cublasDaxpy, cublasDscal, cublasDestroy import skcuda.misc as misc N_gpu = gpuarray.to_gpu(M) h = cublasCreate() for j in range(0,n): for k in range(0,j): #N[k+1:,j] = N[k+1:,j] - N[k+1:,k] * N[k,j] cublasDaxpy(h, N_gpu[k+1:,k].size, -np.float64(N_gpu[k,j].get()), N_gpu[k+1:,k].gpudata, n, N_gpu[k+1:,j].gpudata, n) #N[j+1:,j] /= N[j,j] cublasDscal(h, N_gpu[j+1:,j].size, 1.0/np.float64(N_gpu[j,j].get()), N_gpu[j+1:,j].gpudata, n) #Move from GPU to CPU N = N_gpu.get() cublasDestroy(h) return N