Exemplo n.º 1
0
def computeAx(x_gpu, Xprime_gpu, X_gpu, XX_gpu, Yprime_gpu, Y_gpu, YY_gpu,
              Zprime_gpu, zzero, freq, FREQ_gpu, c, Deltaxprime, Deltayprime,
              Deltazprime, sizePartition, sizeOut):

    numPartitions = np.int32(np.ceil(Xprime_gpu.size / sizePartition))

    y_gpu = gpuarray.zeros((sizeOut, 1), dtype=np.complex64)
    p1 = 0
    p2 = sizePartition

    for k in range(numPartitions):

        currentA_gpu = computeA(Xprime_gpu[p1:p2], X_gpu, XX_gpu,
                                Yprime_gpu[p1:p2], Y_gpu, YY_gpu,
                                Zprime_gpu[p1:p2], zzero, freq, FREQ_gpu, c,
                                Deltaxprime, Deltayprime, Deltazprime)
        #y_gpu   = y_gpu + culinalg.dot(currentA_gpu, x_gpu[p1 : p2], 'N', 'N')
        m, n = currentA_gpu.shape
        cublas.cublasCgemv(cublasHandle, 't', n, m, np.complex64(1),
                           currentA_gpu.gpudata, n, x_gpu[p1:p2].gpudata, 1,
                           np.complex64(1), y_gpu.gpudata, 1)
        p1 = p2
        if (k == (numPartitions - 2)):
            p2 = Xprime_gpu.size
        else:
            p2 = p2 + sizePartition

    return y_gpu
Exemplo n.º 2
0
def computeAdy(cublasHandle, y_gpu, Xprime_gpu, XX_gpu, Yprime_gpu, YY_gpu,
               Zprime_gpu, zzero, FREQ_gpu, c, Deltaprime, sizePartition,
               sizeOut):

    numPartitions = np.int32(np.ceil(XX_gpu.size / sizePartition))

    x_gpu = gpuarray.zeros((sizeOut, 1), dtype=np.complex64)

    p1 = 0
    p2 = sizePartition

    for k in range(numPartitions):
        currentAd_gpu = computeAd(Xprime_gpu, XX_gpu[p1:p2], Yprime_gpu,
                                  YY_gpu[p1:p2], Zprime_gpu, zzero,
                                  FREQ_gpu[p1:p2], c, Deltaprime,
                                  sizePartition)
        m, n = currentAd_gpu.shape
        cublas.cublasCgemv(cublasHandle, 't', n, m, np.complex64(1),
                           currentAd_gpu.gpudata, n, y_gpu[p1:p2].gpudata, 1,
                           np.complex64(1), x_gpu.gpudata, 1)
        #x_gpu       = x_gpu + culinalg.dot(currentAd_gpu, y_gpu[p1 : p2], 'N', 'N', cublasHandle)
        p1 = p2
        if (k == (numPartitions - 2)):
            p2 = XX_gpu.size
        else:
            p2 = p2 + sizePartition

    return x_gpu
Exemplo n.º 3
0
 def test_cublasCgemv(self):
     a = (np.random.rand(2, 3)+1j*np.random.rand(2, 3)).astype(np.complex64)
     x = (np.random.rand(3, 1)+1j*np.random.rand(3, 1)).astype(np.complex64)
     a_gpu = gpuarray.to_gpu(a.T.copy())
     x_gpu = gpuarray.to_gpu(x)
     y_gpu = gpuarray.empty((2, 1), np.complex64)
     alpha = np.complex64(1.0)
     beta = np.complex64(0.0)
     cublas.cublasCgemv(self.cublas_handle, 'n', 2, 3, alpha,
                        a_gpu.gpudata, 2, x_gpu.gpudata,
                        1, beta, y_gpu.gpudata, 1)
     assert np.allclose(y_gpu.get(), np.dot(a, x))