def computeAx(x_gpu, Xprime_gpu, X_gpu, XX_gpu, Yprime_gpu, Y_gpu, YY_gpu, Zprime_gpu, zzero, freq, FREQ_gpu, c, Deltaxprime, Deltayprime, Deltazprime, sizePartition, sizeOut): numPartitions = np.int32(np.ceil(Xprime_gpu.size / sizePartition)) y_gpu = gpuarray.zeros((sizeOut, 1), dtype=np.complex64) p1 = 0 p2 = sizePartition for k in range(numPartitions): currentA_gpu = computeA(Xprime_gpu[p1:p2], X_gpu, XX_gpu, Yprime_gpu[p1:p2], Y_gpu, YY_gpu, Zprime_gpu[p1:p2], zzero, freq, FREQ_gpu, c, Deltaxprime, Deltayprime, Deltazprime) #y_gpu = y_gpu + culinalg.dot(currentA_gpu, x_gpu[p1 : p2], 'N', 'N') m, n = currentA_gpu.shape cublas.cublasCgemv(cublasHandle, 't', n, m, np.complex64(1), currentA_gpu.gpudata, n, x_gpu[p1:p2].gpudata, 1, np.complex64(1), y_gpu.gpudata, 1) p1 = p2 if (k == (numPartitions - 2)): p2 = Xprime_gpu.size else: p2 = p2 + sizePartition return y_gpu
def computeAdy(cublasHandle, y_gpu, Xprime_gpu, XX_gpu, Yprime_gpu, YY_gpu, Zprime_gpu, zzero, FREQ_gpu, c, Deltaprime, sizePartition, sizeOut): numPartitions = np.int32(np.ceil(XX_gpu.size / sizePartition)) x_gpu = gpuarray.zeros((sizeOut, 1), dtype=np.complex64) p1 = 0 p2 = sizePartition for k in range(numPartitions): currentAd_gpu = computeAd(Xprime_gpu, XX_gpu[p1:p2], Yprime_gpu, YY_gpu[p1:p2], Zprime_gpu, zzero, FREQ_gpu[p1:p2], c, Deltaprime, sizePartition) m, n = currentAd_gpu.shape cublas.cublasCgemv(cublasHandle, 't', n, m, np.complex64(1), currentAd_gpu.gpudata, n, y_gpu[p1:p2].gpudata, 1, np.complex64(1), x_gpu.gpudata, 1) #x_gpu = x_gpu + culinalg.dot(currentAd_gpu, y_gpu[p1 : p2], 'N', 'N', cublasHandle) p1 = p2 if (k == (numPartitions - 2)): p2 = XX_gpu.size else: p2 = p2 + sizePartition return x_gpu
def test_cublasCgemv(self): a = (np.random.rand(2, 3)+1j*np.random.rand(2, 3)).astype(np.complex64) x = (np.random.rand(3, 1)+1j*np.random.rand(3, 1)).astype(np.complex64) a_gpu = gpuarray.to_gpu(a.T.copy()) x_gpu = gpuarray.to_gpu(x) y_gpu = gpuarray.empty((2, 1), np.complex64) alpha = np.complex64(1.0) beta = np.complex64(0.0) cublas.cublasCgemv(self.cublas_handle, 'n', 2, 3, alpha, a_gpu.gpudata, 2, x_gpu.gpudata, 1, beta, y_gpu.gpudata, 1) assert np.allclose(y_gpu.get(), np.dot(a, x))