Exemplo n.º 1
0
def dot(A, B, opa = 'n', opb = 'n',
        C = None, Cstart = None,
        scale = 1.0, Cscale = 0.0, handle = None):
    """
    Multiplication of two matrices A and B in PitchArray format
    if C is specified, use the memory in C.
    Specified C must have the same leading dimension as that of the result and
    the other dimension must be bigger or equal to that of the result.
    
    Parameters:
    -----------
    A: parray.PitchArray
    B: parray.PitchArray
    opa: str
         operation on A
         'n' or 'N': use A itself
         't' or 'T': use transpose of A
         'c' or 'C': use conjugate transpose of A
    opb: str
         operation on B
         'n' or 'N': use B itself
         't' or 'T': use transpose of B
         'c' or 'C': use conjugate transpose of B
    C: parray.PitchArray
       if specified, the result will be stored in C
    Cstart: int
            the offset start of C array
    scale: float
            scaling factor for A*B
            see Cscale
    Cscale: float
            scaling factor for C
            result will be C = C*Cscale + scale*A*B
    
    Note:
    -----
    works only for CUDA VERSION > 4.0 where handle is introduced.
    
    Will NOT work for complex case when A and B shares overlapping
    memory, but should work if A==B.
    """
    
    if A.dtype != B.dtype:
        raise TypeError("matrix multiplication must have same dtype")

    if (len(A.shape) != 2) | (len(B.shape) != 2):
        raise TypeError("A, B must both be matrices")

    if opa in ['n', 'N']:
        m,n = A.shape
    elif opa in ['t','T', 'c','C']:
        n,m = A.shape
    else:
        raise ValueError("unknown value assigned to opa")

    if opb in ['n', 'N']:
        k,l = B.shape
    elif opb in ['t','T', 'c','C']:
        l,k = B.shape
    else:
        raise ValueError("unknown value assigned to opa")

    if (k != n) | (0 in [m,n,l]):
        raise ValueError("matrix dimension mismatch, "
                         "(%d,%d) with (%d,%d)" % (m,n,k,l))

    dtype = A.dtype
    if dtype in [np.float32, np.float64]:
        if opb in ['c', 'C']:
            opb = 't'

        if opa in ['c', 'C']:
            opa = 't'
        
    scale = dtype.type(scale)
    Cscale = dtype.type(Cscale)
    
    if dtype == np.float64:
        tp = 'cublas.cublasD'
        complex_type = False
    elif dtype == np.complex128:
        tp = 'cublas.cublasZ'
        complex_type = True
    elif dtype == np.float32:
        tp = 'cublas.cublasS'
        complex_type = False
    elif dtype == np.complex64:
        tp = 'cublas.cublasC'
        complex_type = True

    if C is None:
        C = parray.empty((m,l), dtype)
        Cstart = 0
        Cempty = True
        Cscale = dtype.type(0)
    else:
        Cempty = False
        if Cstart is None:
            Cstart = 0
        if C.shape[1] != l:
            raise AttributeError("shape of the provided result array "
                                 + C.shape.__str__()
                                 + " does not match intended result " 
                                 + (m,l).__str__())
        if C.shape[0] < m + Cstart:
            raise AttributeError("shape of the provided result array "
                                 + C.shape.__str__()
                                 + " does not match intended result "
                                + (m,l).__str__())
        if C.dtype != dtype:
            raise TypeError("Result array C provided must have "
                            "the same dtype as inputs")
    
    conjA = False
    conjB = False
    conjC = False
    
    sameflag = (A==B)
    
    itemsize = C.dtype.itemsize
    handlestr = "handle.handle"
    if m == 1:
        if n == 1:
            alpha = A.get()[0,0]
            if opa in ['c','C']:
                alpha = np.conj(alpha)
            C*=Cscale
            if opb in ['c','C']:
                func = (tp+"axpy(handle.handle, l, alpha*scale, "
                        + "parray.conj(B).gpudata, 1,"
                        + "int(C.gpudata)+Cstart*itemsize, 1)")
            else:
                func = (tp+"axpy(handle.handle, l, alpha*scale, "
                        + "B.gpudata, 1, "
                        + "int(C.gpudata)+Cstart*itemsize, 1)")
        else:
            if l > 1:
                alpha = scale
                beta = Cscale
                if opa in ['c','C']:
                    A.conj()
                    conjA = True
                func = (tp+"gemv(handle.handle, '"+opb+"',B.shape[1], "
                        + "B.shape[0], alpha, B.gpudata, B.ld, A.gpudata, "
                        + "1, beta, int(C.gpudata)+Cstart*itemsize*C.ld, 1)")
            else:
                if opa in ['c','C']:
                    if opb in ['c', 'C']:
                        func = ("C.set(np.array(scale*" + tp
                                + "dotu(handle.handle, n, A.gpudata, "
                                + "1, B.gpudata, 1)"
                                +").conj()+C.get()*Cscale)")
                    else:
                        func = ("C.set(np.array(scale*" + tp
                                + "dotc(handle.handle, n, A.gpudata, "
                                + "1, B.gpudata, 1)) + C.get()*Cscale)")
                elif opb in ['c', 'C']:
                    func = ("C.set(np.array(scale*" + tp
                            + "dotc(handle.handle, n, B.gpudata, 1, "
                            + "A.gpudata, 1)) + C.get()*Cscale)")
                else:
                    if complex_type:
                        func = ("C.set(np.array(scale*" + tp
                                + "dotu(handle.handle, n, A.gpudata, 1, "
                                + "B.gpudata, 1)) + C.get()*Cscale)")
                    else:
                        func = ("C.set(np.array(scale*" + tp
                                + "dot(handle.handle, n, A.gpudata, 1, "
                                + "B.gpudata, 1)) + C.get()*Cscale)")
    else:#m!=1
        if n == 1:
            if l == 1:
                alpha = B.get()[0,0]
                if opb in ['c','C']:
                    alpha = np.conj(alpha)
                C*=Cscale
                if opa in ['c','C']:
                    func = (tp+"axpy(handle.handle, m, alpha*scale, "
                            + "parray.conj(A).gpudata, 1, "
                            + "int(C.gpudata)+Cstart*itemsize, 1)")
                else:
                    func = (tp+"axpy(handle.handle, m, alpha*scale, "
                            + "A.gpudata, 1, "
                            + "int(C.gpudata)+Cstart*itemsize, 1)")
            else:
                if Cempty:
                    C.fill(0)
                else:
                    C*=Cscale
                if opa in ['c','C']:
                    if opb in ['c', 'C']:
                        B.conj()
                        conjB = True
                        func = (tp + "gerc(handle.handle, l, m, scale, "
                                + "B.gpudata, 1, A.gpudata, 1, "
                                + "int(C.gpudata)+Cstart*itemsize*C.ld, "
                                + "C.ld)")
                    else:
                        func = (tp + "gerc(handle.handle, l, m, scale, "
                                + "B.gpudata, 1, A.gpudata, 1, "
                                + "int(C.gpudata)+Cstart*itemsize*C.ld, "
                                + "C.ld)")
                elif opb in ['c', 'C']:
                    if sameflag:
                        B.conj()
                        conjB = True
                        func = (tp + "gerc(handle.handle, l, m, scale, "
                                + "B.gpudata, 1, A.gpudata, 1, "
                                + "int(C.gpudata)+Cstart*itemsize*C.ld, C.ld)")
                    
                    else:
                        B.conj()
                        conjB = True
                        func = (tp + "geru(handle.handle, l, m, scale, "
                                + "B.gpudata, 1, A.gpudata, 1, "
                                + "int(C.gpudata)+Cstart*itemsize*C.ld, C.ld)")
                else:
                    if complex_type:
                        func = (tp + "geru(handle.handle, l, m, scale, "
                                + "B.gpudata, 1,  A.gpudata, 1, "
                                + "int(C.gpudata)+Cstart*itemsize*C.ld, C.ld)")
                    else:
                        func = (tp + "ger(handle.handle, l, m, scale, "
                                + "B.gpudata, 1, A.gpudata, 1, "
                                + "int(C.gpudata)+Cstart*itemsize*C.ld, C.ld)")
        else:
            if l == 1:
                if opb in ['c', 'C']:
                    if opa in ['c', 'C']:
                        conjC = True
                        if not Cempty:
                            C.conj()
                            Cscale = Cscale.conj()
                        func = (tp + "gemv(handle.handle, 'n', A.shape[1], "
                                + "A.shape[0], scale, A.gpudata, A.ld, "
                                + "B.gpudata, 1, Cscale, int(C.gpudata) + "
                                + "Cstart * itemsize * C.ld, 1)")
                    else:
                        B.conj()
                        conjB = True
                        if opa in ['t', 'T']:
                            opa = 'n'
                        else:
                            opa = 't'
                        
                        func = (tp + "gemv(handle.handle, '" + opa + "', "
                                + "A.shape[1], A.shape[0], scale, A.gpudata, "
                                + "A.ld, B.gpudata, 1, Cscale, "
                                + "int(C.gpudata)+Cstart*itemsize*C.ld, 1)")
                else:
                    if opa in ['c', 'C']:
                        B.conj()
                        conjB = True
                        conjC = True
                        if not Cempty:
                            C.conj()
                            Cscale = Cscale.conj()
                        func = (tp + "gemv(handle.handle, 'n', A.shape[1], "
                                + "A.shape[0], scale, A.gpudata, A.ld, "
                                + "B.gpudata, 1, Cscale, int(C.gpudata) + "
                                + "Cstart * itemsize * C.ld, 1)")
                    else:
                        if opa in ['t', 'T']:
                            opa = 'n'
                        else:
                            opa = 't' 
                        func = (tp + "gemv(handle.handle, '" + opa + "', "
                                + "A.shape[1],  A.shape[0], scale, A.gpudata, "
                                + "A.ld, B.gpudata, 1, Cscale, int(C.gpudata) "
                                + "+ Cstart * itemsize * C.ld, 1)")
            else:
                func = (tp+"gemm(handle.handle, '" + opb + "','" + opa + "', "
                        + "l, m, k, scale, B.gpudata, B.ld, A.gpudata, A.ld, "
                        + "Cscale, int(C.gpudata) + "
                        + "Cstart * itemsize * C.ld, C.ld)")

    if handle is None:
        handle = cublashandle()
    eval(func)
    
    if conjC:
        C.conj()

    if conjA:
        A.conj()

    if conjB:
        B.conj()
    return C
Exemplo n.º 2
0
def eig_sym(G, compute_z=True, uplo='U'):
    """
    compute Eigenvalue Decompositon of a symmetric or Hermitian matrix G
    G = V D V^{*}

    Parameters
    -------------------------------------
    G:  PitchArray, GPUArray or numpy.ndarray
        if G is GPUArray or PitchArray, its gpudata will be destroyed
        after calling the function
    compute_z: bool
               whether return eigenvectors
    uplo: str
          'U' or 'u' assumes the entries of G are stored
          in upper triangular part,
          lower off diagonal triangular part is not referenced
          'L' or 'l' assumes the entries of G are stored
          in lower triangular part,
          upper off diagonal triangular part is not referenced

    Returns
    -------------------------------------
    D:  PitchArray
        a row vector containing all eigenvalues with ascending order
    V:  PitchArray
        if compute_z, jth column of V contains orthonormal
        eigenvector associated with jth eigenvalue

    Examples
    D = eig_sym(G, compute_z = False)
    D,V = eig_sym(G, compute_z = True)
    """
    if cula._libcula_toolkit != 'premium':
        raise ValueError("eigenvalue decomposition is only supported "
                         "in premium version of CULA")

    if G.__class__ is not parray.PitchArray:
        if G.__class__ is garray.GPUArray:
            h_G = G.get()
            del G.gpudata
            A = parray.to_gpu(h_G)
        elif G.__class__ is np.ndarray:
            A = parray.to_gpu(G)
        else:
            raise TypeError("G must be either parray, or GPUArray or ndarray")
    else:
        A = G

    if len(A.shape) != 2:
        raise TypeError("eig only works on 2D matrix")

    if A.shape[0] != A.shape[1]:
        raise ValueError("G must be square matrix")

    if uplo in ['u', 'U']:
        uplo = 'L'
    elif uplo in ['l', 'L']:
        uplo = 'U'
    else:
        raise ValueError("uplo must be 'U' or 'L'")

    real_dtype = np.dtype(np.float32)
    if A.dtype == np.complex64:
        eig_func = cula.culaDeviceCheev
    elif A.dtype == np.float32:
        eig_func = cula.culaDeviceSsyev
    else:
        if A.dtype == np.complex128:
            eig_func = cula.culaDeviceZheev
        elif A.dtype == np.float64:
            eig_func = cula.culaDeviceDsyev
        else:
            raise ValueError('unsupported type')
        real_dtype = np.dtype(np.float64)

    D = parray.empty(A.shape[0], real_dtype)

    cula.culaInitialize()
    handle = cublashandle()
    if compute_z:
        jobz = 'V'
    else:
        jobz = 'N'
    eig_func(handle.handle, jobz, uplo, A.shape[0], A.gpudata, A.ld, D.gpudata)
    #cula.culaShutdown()
    if compute_z:
        return D, A.conj().T()
    else:
        return D
Exemplo n.º 3
0
def svd(G, compute_u=True, compute_v=True, econ=False):
    """
    compute Singular Value Decompositon of G
    G = U*(diag(S))*V

    Parameters
    ----------------------------------------
    G:  PitchArray, GPUArray or numpy.ndarray of shape (m,n)
        if G is GPUArray or PitchArray, its gpudata will be 
        destroyed after calling the function
    compute_u: bool
               whether return U matrix or not
    compute_v: bool
               whether return V matrix or not
    econ: bool
          return economical matrix

    Returns:
    U:  parray.PitchArray matrix
        as U in G = U*(diag(S))*V,
        if econ, returns the first min(m,n) columns of U
    S:  parray.PitchArray vector
        a row vector containing all singular values
        with descending order
    V:  parray.PitchArray matrix
        as V in G = U*(diag(S))*V,
        if econ, returns the first min(m,n) rows of V

    order of output:
    always obeys the order U,S,V
    e.g.
    S = svd(G, compute_u = False, compute_v = False)
    U,S = svd(G, compute_u = True, compute_v = False)
    S,V = svd(G, compute_u = False, compute_v = True)
    U,S,V = svd(G, compute_u = True, compute_v = True)
    """

    if G.__class__ is not parray.PitchArray:
        if G.__class__ is garray.GPUArray:
            h_G = G.get()
            del G.gpudata
            A = parray.to_gpu(h_G)
        elif G.__class__ is np.ndarray:
            A = parray.to_gpu(G)
        else:
            raise TypeError("G must be either parray, or GPUArray or ndarray")
    else:
        A = G

    real_dtype = np.dtype(np.float32)
    if A.dtype == np.complex64:
        svd_func = cula.culaDeviceCgesvd
    elif A.dtype == np.float32:
        svd_func = cula.culaDeviceSgesvd
    else:
        if cula._libcula_toolkit == 'standard':
            if A.dtype == np.complex128:
                svd_func = cula.culaDeviceZgesvd
            elif A.dtype == np.float64:
                svd_func = cula.culaDeviceDgesvd
            else:
                raise ValueError('unsupported type')
            real_dtype = np.dtype(np.float64)
        else:
            raise TypeError('does not support premium double precision svd')

    if len(A.shape) != 2:
        raise TypeError("svd only works on 2D matrix")

    S = parray.empty(min(A.shape), real_dtype)
    cula.culaInitialize()

    if compute_u:
        if compute_v:
            if econ:
                if A.shape[1] <= A.shape[0]:
                    jobu = 'A'
                    jobvt = 'O'
                    V = parray.empty((A.shape[1], A.shape[1]), A.dtype)
                    svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata,
                             A.ld, S.gpudata, V.gpudata, V.ld, 1, 1)
                    #cula.culaShutdown()
                    return A, S, V
                else:
                    jobu = 'O'
                    jobvt = 'A'
                    U = parray.empty((A.shape[0], A.shape[0]), A.dtype)
                    svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata,
                             A.ld, S.gpudata, 1, 1, U.gpudata, U.ld)
                    #cula.culaShutdown()
                    return U, S, A
            else:
                if A.shape[1] <= A.shape[0]:
                    jobu = 'O'
                    jobvt = 'A'
                    U = parray.empty((A.shape[0], A.shape[0]), A.dtype)
                    svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata,
                             A.ld, S.gpudata, 1, 1, U.gpudata, U.ld)
                    #cula.culaShutdown()
                    A.shape = (A.shape[1], A.shape[1])
                    return U, S, A
                else:
                    jobu = 'A'
                    jobvt = 'O'
                    V = parray.empty((A.shape[1], A.shape[1]), A.dtype)
                    svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata,
                             A.ld, S.gpudata, V.gpudata, V.ld, 1, 1)
                    A.shape = (A.shape[0], A.shape[0])
                    #cula.culaShutdown()
                    return A, S, V
        else:
            if econ | (A.shape[1] >= A.shape[0]):
                jobu = 'N'
                jobvt = 'O'
                svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata, A.ld,
                         S.gpudata, 1, 1, 1, 1)
                if (A.shape[1] > A.shape[0]):
                    A.shape = (A.shape[0], A.shape[0])
                #cula.culaShutdown()
                return A, S
            else:
                jobu = 'N'
                jobvt = 'A'
                U = parray.empty((A.shape[0], A.shape[0]), A.dtype)
                svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata, A.ld,
                         S.gpudata, 1, 1, U.gpudata, U.ld)
                #cula.culaShutdown()
                return U, S
    else:
        if compute_v:
            if econ | (A.shape[1] <= A.shape[0]):
                jobu = 'O'
                jobvt = 'N'
                svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata, A.ld,
                         S.gpudata, 1, 1, 1, 1)
                if (A.shape[1] < A.shape[0]):
                    A.shape = (A.shape[1], A.shape[1])
                #cula.culaShutdown()
                return S, A
            else:
                jobu = 'A'
                jobvt = 'N'
                V = parray.empty((A.shape[1], A.shape[1]), A.dtype)
                svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata, A.ld,
                         S.gpudata, V.gpudata, V.ld, 1, 1)
                #cula.culaShutdown()
                return S, V
        else:
            jobu = 'N'
            jobvt = 'N'
            svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata, A.ld,
                     S.gpudata, 1, 1, 1, 1)
            #cula.culaShutdown()
            return S
Exemplo n.º 4
0
def dot(A,
        B,
        opa='n',
        opb='n',
        C=None,
        Cstart=None,
        scale=1.0,
        Cscale=0.0,
        handle=None):
    """
    Multiplication of two matrices A and B in PitchArray format
    if C is specified, use the memory in C.
    Specified C must have the same leading dimension as that of the result and
    the other dimension must be bigger or equal to that of the result.
    
    Parameters
    ------------------------------------
    A: parray.PitchArray
    B: parray.PitchArray
    opa: str
         operation on A
         'n' or 'N': use A itself
         't' or 'T': use transpose of A
         'c' or 'C': use conjugate transpose of A
    opb: str
         operation on B
         'n' or 'N': use B itself
         't' or 'T': use transpose of B
         'c' or 'C': use conjugate transpose of B
    C: parray.PitchArray
       if specified, the result will be stored in C
    Cstart: int
            the offset start of C array
    scale: float
            scaling factor for A*B
            see Cscale
    Cscale: float
            scaling factor for C
            result will be C = C*Cscale + scale*A*B
    
    Note:
    works only for CUDA VERSION > 4.0 where handle is introduced.
    """

    if A.dtype != B.dtype:
        raise TypeError("matrix multiplication must have same dtype")

    if (len(A.shape) != 2) | (len(B.shape) != 2):
        raise TypeError("A, B must both be matrices")

    if opa in ['n', 'N']:
        m, n = A.shape
    elif opa in ['t', 'T', 'c', 'C']:
        n, m = A.shape
    else:
        raise ValueError("unknown value assigned to opa")

    if opb in ['n', 'N']:
        k, l = B.shape
    elif opb in ['t', 'T', 'c', 'C']:
        l, k = B.shape
    else:
        raise ValueError("unknown value assigned to opa")

    if (k != n) | (0 in [m, n, l]):
        raise ValueError("matrix dimension mismatch, "
                         "(%d,%d) with (%d,%d)" % (m, n, k, l))

    dtype = A.dtype
    if dtype in [np.float32, np.float64]:
        if opb in ['c', 'C']:
            opb = 't'

        if opa in ['c', 'C']:
            opa = 't'

    scale = dtype.type(scale)
    Cscale = dtype.type(Cscale)

    if dtype == np.float64:
        tp = 'cublas.cublasD'
        complex_type = False
    elif dtype == np.complex128:
        tp = 'cublas.cublasZ'
        complex_type = True
    elif dtype == np.float32:
        tp = 'cublas.cublasS'
        complex_type = False
    elif dtype == np.complex64:
        tp = 'cublas.cublasC'
        complex_type = True

    if C is None:
        C = parray.empty((m, l), dtype)
        Cstart = 0
        Cempty = True
        Cscale = dtype.type(0)
    else:
        Cempty = False
        if Cstart is None:
            Cstart = 0
        if C.shape[1] != l:
            raise AttributeError("shape of the provided result array " +
                                 C.shape.__str__() +
                                 " does not match intended result " +
                                 (m, l).__str__())
        if C.shape[0] < m + Cstart:
            raise AttributeError("shape of the provided result array " +
                                 C.shape.__str__() +
                                 " does not match intended result " +
                                 (m, l).__str__())
        if C.dtype != dtype:
            raise TypeError("Result array C provided must have "
                            "the same dtype as inputs")

    conjA = False
    conjB = False
    conjC = False

    itemsize = C.dtype.itemsize
    handlestr = "handle.handle"
    if m == 1:
        if n == 1:
            #alpha = A.get()[0,0]
            #cuda.memcpy_dtod(int(C.gpudata) + Cstart * itemsize,
            #                 B.gpudata, l*dtype.itemsize)
            #if opa in ['c','C']:
            #    alpha = np.conj(alpha)
            #if opb in ['c', 'C']:
            #    C.conj()
            #func = (tp+"scal(l, alpha*scale, int(C.gpudata) + "
            #        "Cstart * itemsize, 1)")
            alpha = A.get()[0, 0]
            if opa in ['c', 'C']:
                alpha = np.conj(alpha)
            C *= Cscale
            if opb in ['c', 'C']:
                func = (tp + "axpy(handle.handle, l, alpha*scale, " +
                        "parray.conj(B).gpudata, 1," +
                        "int(C.gpudata)+Cstart*itemsize, 1)")
            else:
                func = (tp + "axpy(handle.handle, l, alpha*scale, " +
                        "B.gpudata, 1, " +
                        "int(C.gpudata)+Cstart*itemsize, 1)")
        else:
            if l > 1:
                alpha = scale
                beta = Cscale
                if opa in ['c', 'C']:
                    A.conj()
                    conjA = True
                func = (tp + "gemv(handle.handle, '" + opb + "',B.shape[1], " +
                        "B.shape[0], alpha, B.gpudata, B.ld, A.gpudata, " +
                        "1, beta, int(C.gpudata)+Cstart*itemsize*C.ld, 1)")
            else:
                if opa in ['c', 'C']:
                    if opb in ['c', 'C']:
                        #func = ("C.set(np.array(" + tp
                        #        + "dotu(n, A.gpudata, 1, B.gpudata, 1)"
                        #        +").conj())")
                        func = ("C.set(np.array(scale*" + tp +
                                "dotu(handle.handle, n, A.gpudata, " +
                                "1, B.gpudata, 1)" +
                                ").conj()+C.get()*Cscale)")
                    else:
                        #func = ("C.set(np.array(" + tp
                        #        + "dotc(n, A.gpudata, 1, B.gpudata, 1)"
                        #        +"))")
                        func = ("C.set(np.array(scale*" + tp +
                                "dotc(handle.handle, n, A.gpudata, " +
                                "1, B.gpudata, 1)) + C.get()*Cscale)")
                elif opb in ['c', 'C']:
                    #func = ("C.set(np.array(" + tp
                    #        + "dotc(n, B.gpudata, 1, A.gpudata, 1)" +"))")
                    func = ("C.set(np.array(scale*" + tp +
                            "dotc(handle.handle, n, B.gpudata, 1, " +
                            "A.gpudata, 1)) + C.get()*Cscale)")
                else:
                    if complex_type:
                        #func = ("C.set(np.array(" + tp
                        #        + "dotu(n, A.gpudata, 1, B.gpudata, 1)"
                        #        +"))")
                        func = ("C.set(np.array(scale*" + tp +
                                "dotu(handle.handle, n, A.gpudata, 1, " +
                                "B.gpudata, 1)) + C.get()*Cscale)")
                    else:
                        #func = ("C.set(np.array(" + tp
                        #        + "dot(n, A.gpudata, 1, B.gpudata, 1)"
                        #        +"))")
                        func = ("C.set(np.array(scale*" + tp +
                                "dot(handle.handle, n, A.gpudata, 1, " +
                                "B.gpudata, 1)) + C.get()*Cscale)")
    else:  #m!=1
        if n == 1:
            if l == 1:
                #alpha = B.get()[0,0]
                #cuda.memcpy_dtod(int(C.gpudata) + Cstart * itemsize,
                #                 A.gpudata, m*dtype.itemsize)
                #if opa in ['c','C']:
                #    alpha = np.conj(alpha)
                #if opb in ['c', 'C']:
                #    C.conj()
                #func = (tp+"scal(m, alpha, int(C.gpudata) "
                #        "+ Cstart * itemsize,1)")
                alpha = B.get()[0, 0]
                if opb in ['c', 'C']:
                    alpha = np.conj(alpha)
                C *= Cscale
                if opa in ['c', 'C']:
                    func = (tp + "axpy(handle.handle, m, alpha*scale, " +
                            "parray.conj(A).gpudata, 1, " +
                            "int(C.gpudata)+Cstart*itemsize, 1)")
                else:
                    func = (tp + "axpy(handle.handle, m, alpha*scale, " +
                            "A.gpudata, 1, " +
                            "int(C.gpudata)+Cstart*itemsize, 1)")
            else:
                #C.fill(0)
                C *= Cscale
                if opa in ['c', 'C']:
                    if opb in ['c', 'C']:
                        B.conj()
                        conjB = True
                        print l, m, scale, C.shape
                        func = (tp + "gerc(handle.handle, l, m, scale, " +
                                "B.gpudata, 1, A.gpudata, 1, " +
                                "int(C.gpudata)+Cstart*itemsize*C.ld, " +
                                "C.ld)")
                    else:
                        func = (tp + "gerc(handle.handle, l, m, scale, " +
                                "B.gpudata, 1, A.gpudata, 1, " +
                                "int(C.gpudata)+Cstart*itemsize*C.ld, " +
                                "C.ld)")
                elif opb in ['c', 'C']:
                    B.conj()
                    conjB = True
                    func = (tp + "geru(handle.handle, l, m, scale, " +
                            "B.gpudata, 1, A.gpudata, 1, " +
                            "int(C.gpudata)+Cstart*itemsize*C.ld, C.ld)")
                else:
                    if complex_type:
                        func = (tp + "geru(handle.handle, l, m, scale, " +
                                "B.gpudata, 1,  A.gpudata, 1, " +
                                "int(C.gpudata)+Cstart*itemsize*C.ld, C.ld)")
                    else:
                        func = (tp + "ger(handle.handle, l, m, scale, " +
                                "B.gpudata, 1, A.gpudata, 1, " +
                                "int(C.gpudata)+Cstart*itemsize*C.ld, C.ld)")
        else:
            if l == 1:
                if opb in ['c', 'C']:
                    if opa in ['c', 'C']:
                        conjC = True
                        if not Cempty:
                            C.conj()
                            Cscale = Cscale.conj()
                        func = (tp + "gemv(handle.handle, 'n', A.shape[1], " +
                                "A.shape[0], scale, A.gpudata, A.ld, " +
                                "B.gpudata, 1, Cscale, int(C.gpudata) + " +
                                "Cstart * itemsize * C.ld, 1)")
                    else:
                        B.conj()
                        conjB = True
                        if opa in ['t', 'T']:
                            opa = 'n'
                        else:
                            opa = 't'

                        func = (tp + "gemv(handle.handle, '" + opa + "', " +
                                "A.shape[1], A.shape[0], scale, A.gpudata, " +
                                "A.ld, B.gpudata, 1, Cscale, " +
                                "int(C.gpudata)+Cstart*itemsize*C.ld, 1)")
                else:
                    if opa in ['c', 'C']:
                        B.conj()
                        conjB = True
                        conjC = True
                        if not Cempty:
                            C.conj()
                            Cscale = Cscale.conj()
                        func = (tp + "gemv(handle.handle, 'n', A.shape[1], " +
                                "A.shape[0], scale, A.gpudata, A.ld, " +
                                "B.gpudata, 1, Cscale, int(C.gpudata) + " +
                                "Cstart * itemsize * C.ld, 1)")
                    else:
                        if opa in ['t', 'T']:
                            opa = 'n'
                        else:
                            opa = 't'
                        func = (tp + "gemv(handle.handle, '" + opa + "', " +
                                "A.shape[1],  A.shape[0], scale, A.gpudata, " +
                                "A.ld, B.gpudata, 1, Cscale, int(C.gpudata) " +
                                "+ Cstart * itemsize * C.ld, 1)")
            else:
                func = (tp + "gemm(handle.handle, '" + opb + "','" + opa +
                        "', " +
                        "l, m, k, scale, B.gpudata, B.ld, A.gpudata, A.ld, " +
                        "Cscale, int(C.gpudata) + " +
                        "Cstart * itemsize * C.ld, C.ld)")
    #if cublas._libcublas_ctx is None:
    #    cublas.cublasInit()
    if handle is None:
        handle = cublashandle()
    eval(func)

    if conjC:
        C.conj()

    if conjA:
        A.conj()

    if conjB:
        B.conj()
    return C
Exemplo n.º 5
0
def eig_sym(G, compute_z = True, uplo = 'U'):
    """
    compute Eigenvalue Decompositon of a symmetric or Hermitian matrix G
    G = V D V^{*}

    Parameters
    -------------------------------------
    G:  PitchArray, GPUArray or numpy.ndarray
        if G is GPUArray or PitchArray, its gpudata will be destroyed
        after calling the function
    compute_z: bool
               whether return eigenvectors
    uplo: str
          'U' or 'u' assumes the entries of G are stored
          in upper triangular part,
          lower off diagonal triangular part is not referenced
          'L' or 'l' assumes the entries of G are stored
          in lower triangular part,
          upper off diagonal triangular part is not referenced

    Returns
    -------------------------------------
    D:  PitchArray
        a row vector containing all eigenvalues with ascending order
    V:  PitchArray
        if compute_z, jth column of V contains orthonormal
        eigenvector associated with jth eigenvalue

    Examples
    D = eig_sym(G, compute_z = False)
    D,V = eig_sym(G, compute_z = True)
    """
    if cula._libcula_toolkit != 'premium':
        raise ValueError("eigenvalue decomposition is only supported "
                         "in premium version of CULA")

    if G.__class__ is not parray.PitchArray:
        if G.__class__ is garray.GPUArray:
            h_G = G.get()
            del G.gpudata
            A= parray.to_gpu(h_G)
        elif G.__class__ is np.ndarray:
            A = parray.to_gpu(G)
        else:
            raise TypeError("G must be either parray, or GPUArray or ndarray")
    else:
        A = G
    
    if len(A.shape) != 2:
        raise TypeError("eig only works on 2D matrix")
    
    if A.shape[0] != A.shape[1]:
        raise ValueError("G must be square matrix")

    if uplo in ['u', 'U']:
        uplo = 'L'
    elif uplo in ['l', 'L']:
        uplo = 'U'
    else:
        raise ValueError("uplo must be 'U' or 'L'")
    
    real_dtype = np.dtype(np.float32)
    if A.dtype == np.complex64:
        eig_func = cula.culaDeviceCheev        
    elif A.dtype == np.float32:
        eig_func = cula.culaDeviceSsyev
    else:
        if A.dtype == np.complex128:
            eig_func = cula.culaDeviceZheev
        elif A.dtype == np.float64:
            eig_func = cula.culaDeviceDsyev
        else:
            raise ValueError('unsupported type')
        real_dtype = np.dtype(np.float64)
    
    D = parray.empty(A.shape[0], real_dtype)
    
    cula.culaInitialize()
    handle = cublashandle()
    if compute_z:
        jobz = 'V'
    else:
        jobz = 'N'
    eig_func(handle.handle, jobz, uplo, A.shape[0], A.gpudata, A.ld, D.gpudata)
    #cula.culaShutdown()
    if compute_z:
        return D, A.conj().T()
    else:
        return D
Exemplo n.º 6
0
def svd(G, compute_u = True, compute_v = True, econ = False):
    """
    compute Singular Value Decompositon of G
    G = U*(diag(S))*V

    Parameters
    ----------------------------------------
    G:  PitchArray, GPUArray or numpy.ndarray of shape (m,n)
        if G is GPUArray or PitchArray, its gpudata will be 
        destroyed after calling the function
    compute_u: bool
               whether return U matrix or not
    compute_v: bool
               whether return V matrix or not
    econ: bool
          return economical matrix

    Returns:
    U:  parray.PitchArray matrix
        as U in G = U*(diag(S))*V,
        if econ, returns the first min(m,n) columns of U
    S:  parray.PitchArray vector
        a row vector containing all singular values
        with descending order
    V:  parray.PitchArray matrix
        as V in G = U*(diag(S))*V,
        if econ, returns the first min(m,n) rows of V

    order of output:
    always obeys the order U,S,V
    e.g.
    S = svd(G, compute_u = False, compute_v = False)
    U,S = svd(G, compute_u = True, compute_v = False)
    S,V = svd(G, compute_u = False, compute_v = True)
    U,S,V = svd(G, compute_u = True, compute_v = True)
    """
    
    if G.__class__ is not parray.PitchArray:
        if G.__class__ is garray.GPUArray:
            h_G = G.get()
            del G.gpudata
            A= parray.to_gpu(h_G)
        elif G.__class__ is np.ndarray:
            A = parray.to_gpu(G)
        else:
            raise TypeError("G must be either parray, or GPUArray or ndarray")
    else:
        A = G
    
    real_dtype = np.dtype(np.float32)
    if A.dtype == np.complex64:
        svd_func = cula.culaDeviceCgesvd        
    elif A.dtype == np.float32:
        svd_func = cula.culaDeviceSgesvd
    else:
        if cula._libcula_toolkit == 'standard':
            if A.dtype == np.complex128:
                svd_func = cula.culaDeviceZgesvd
            elif A.dtype == np.float64:
                svd_func = cula.culaDeviceDgesvd
            else:
                raise ValueError('unsupported type')
            real_dtype = np.dtype(np.float64)
        else:
            raise TypeError('does not support premium double precision svd')
    
    if len(A.shape) != 2:
        raise TypeError("svd only works on 2D matrix")
    
    S = parray.empty(min(A.shape), real_dtype)
    cula.culaInitialize()
    
    if compute_u:
        if compute_v:
            if econ:
                if A.shape[1] <= A.shape[0]:
                    jobu = 'A'
                    jobvt = 'O'
                    V = parray.empty((A.shape[1], A.shape[1]), A.dtype)
                    svd_func(jobu, jobvt, A.shape[1], A.shape[0],
                             A.gpudata, A.ld, S.gpudata, V.gpudata,
                            V.ld, 1, 1)
                    #cula.culaShutdown()
                    return A,S,V
                else:
                    jobu = 'O'
                    jobvt = 'A'
                    U = parray.empty((A.shape[0], A.shape[0]), A.dtype)
                    svd_func(jobu, jobvt, A.shape[1], A.shape[0],
                             A.gpudata, A.ld, S.gpudata, 1, 1,
                             U.gpudata, U.ld)
                    #cula.culaShutdown()
                    return U,S,A
            else:
                if A.shape[1] <= A.shape[0]:
                    jobu = 'O'
                    jobvt = 'A'
                    U = parray.empty((A.shape[0], A.shape[0]), A.dtype)
                    svd_func(jobu, jobvt, A.shape[1], A.shape[0],
                             A.gpudata, A.ld, S.gpudata, 1, 1,
                             U.gpudata, U.ld)
                    #cula.culaShutdown()
                    A.shape = (A.shape[1],A.shape[1])
                    return U,S,A
                else:
                    jobu = 'A'
                    jobvt = 'O'
                    V = parray.empty((A.shape[1], A.shape[1]), A.dtype)
                    svd_func(jobu, jobvt, A.shape[1], A.shape[0],
                             A.gpudata, A.ld, S.gpudata, V.gpudata,
                             V.ld, 1, 1)
                    A.shape = (A.shape[0], A.shape[0])
                    #cula.culaShutdown()
                    return A,S,V
        else:
            if econ | (A.shape[1] >= A.shape[0]):
                jobu = 'N'
                jobvt = 'O'
                svd_func(jobu, jobvt, A.shape[1], A.shape[0],
                         A.gpudata, A.ld, S.gpudata, 1, 1, 1, 1)
                if (A.shape[1] > A.shape[0]):
                    A.shape = (A.shape[0], A.shape[0])
                #cula.culaShutdown()
                return A,S
            else:
                jobu = 'N'
                jobvt = 'A'
                U = parray.empty((A.shape[0],A.shape[0]),A.dtype)
                svd_func(jobu, jobvt, A.shape[1], A.shape[0],
                         A.gpudata, A.ld, S.gpudata, 1, 1, U.gpudata, U.ld)
                #cula.culaShutdown()
                return U,S
    else:
        if compute_v:
            if econ | (A.shape[1] <= A.shape[0]):
                jobu = 'O'
                jobvt = 'N'
                svd_func(jobu, jobvt, A.shape[1], A.shape[0],
                         A.gpudata, A.ld, S.gpudata, 1, 1, 1, 1)
                if (A.shape[1] < A.shape[0]):
                    A.shape = (A.shape[1], A.shape[1])
                #cula.culaShutdown()
                return S,A
            else:
                jobu = 'A'
                jobvt = 'N'
                V = parray.empty((A.shape[1],A.shape[1]),A.dtype)
                svd_func(jobu, jobvt, A.shape[1], A.shape[0],
                         A.gpudata, A.ld, S.gpudata, V.gpudata, V.ld, 1, 1)
                #cula.culaShutdown()
                return S,V
        else:
            jobu = 'N'
            jobvt = 'N'
            svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata,
                     A.ld, S.gpudata, 1, 1, 1, 1)
            #cula.culaShutdown()
            return S