Exemplo n.º 1
0
def cuda_qr():
    cula.culaInitialize()

    a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.float32)

    n = a.shape[0]
    m = a.shape[1]
    ida = a.shape[1]

    tau = np.empty(n, dtype=np.int32)

    tau_gpu = gpuarray.to_gpu(tau)
    a_gpu = gpuarray.to_gpu(a)

    # culaDeviceDgeqrf
    output = cula.culaDeviceSgeqrf(m, n, a_gpu.gpudata, ida, tau_gpu.gpudata)

    print a_gpu.get()
    print tau_gpu.get()

    cula.culaShutdown()
Exemplo n.º 2
0
    def make_thunk(self, node, storage_map, _, no_recycling=[]):

        # Initialize CULA the first time it is needed
        global cula_initialized

        if not cula_available:
            raise RuntimeError('Cula is not available and '
                               'GpuSolve Op can not be constructed.')

        if not cula_initialized:
            cula.culaInitialize()
            cula_initialized = True

        inputs = [storage_map[v] for v in node.inputs]
        outputs = [storage_map[v] for v in node.outputs]

        def thunk():
            # size of the matrices to invert
            z = outputs[0]

            # Matrix
            A = inputs[0][0]

            # Solution vectors
            b = inputs[1][0]

            # A is not explicitly converted between C and F order, instead we
            # switch the "transpose" flag
            if self.trans in ('T', 'C'):
                trans = 'N'
            else:
                trans = 'T'

            # Convert b to F-order from c-order.
            b_cpy = dimshuffle(b, (1, 0)).reshape((b.shape[0], b.shape[1]))

            # This copy forces allocation of a new C-contiguous buffer
            # and returns it.
            A_cpy = A.copy()
            b_cpy = b_cpy.copy()

            def cula_gpu_solve(A_, b_, trans='T'):

                A_shape = A_.shape
                b_shape = b_.shape

                assert (len(A_shape) == 2)
                assert (len(b_shape) == 2)

                if trans in ['T', 'C']:
                    l, n = A_shape
                    k, m = b_shape
                    if n != k:
                        raise ValueError('A and b must be aligned.')
                elif trans in ['N']:
                    n, l = A_shape
                    k, m = b_shape
                    if l != m:
                        raise ValueError('A and b must be aligned.')
                else:
                    raise ValueError('Invalid value for trans')

                lda = max(1, n)
                ldb = max(1, n, l)

                # construct pointer arrays needed for culaDeviceSgels
                # Cula requires you to pass a pointer for A and b.
                A_ptr = A_.gpudata
                b_ptr = b_.gpudata

                cula.culaDeviceSgels(trans, n, l, m, A_ptr, lda, b_ptr, ldb)
                return A_, b_

            A_pycuda, b_pycuda = cula_gpu_solve(A_cpy, b_cpy, trans)

            # Convert b to F-order from c-order and assign it to output:
            b_cpy = b_cpy.reshape(b.shape[::-1])
            b_cpy = dimshuffle(b_cpy, (1, 0))
            z[0] = b_cpy

        thunk.inputs = inputs
        thunk.outputs = outputs
        thunk.lazy = False

        return thunk
Exemplo n.º 3
0
def eig_sym(G, compute_z=True, uplo='U'):
    """
    compute Eigenvalue Decompositon of a symmetric or Hermitian matrix G
    G = V D V^{*}

    Parameters
    -------------------------------------
    G:  PitchArray, GPUArray or numpy.ndarray
        if G is GPUArray or PitchArray, its gpudata will be destroyed
        after calling the function
    compute_z: bool
               whether return eigenvectors
    uplo: str
          'U' or 'u' assumes the entries of G are stored
          in upper triangular part,
          lower off diagonal triangular part is not referenced
          'L' or 'l' assumes the entries of G are stored
          in lower triangular part,
          upper off diagonal triangular part is not referenced

    Returns
    -------------------------------------
    D:  PitchArray
        a row vector containing all eigenvalues with ascending order
    V:  PitchArray
        if compute_z, jth column of V contains orthonormal
        eigenvector associated with jth eigenvalue

    Examples
    D = eig_sym(G, compute_z = False)
    D,V = eig_sym(G, compute_z = True)
    """
    if cula._libcula_toolkit != 'premium':
        raise ValueError("eigenvalue decomposition is only supported "
                         "in premium version of CULA")

    if G.__class__ is not parray.PitchArray:
        if G.__class__ is garray.GPUArray:
            h_G = G.get()
            del G.gpudata
            A = parray.to_gpu(h_G)
        elif G.__class__ is np.ndarray:
            A = parray.to_gpu(G)
        else:
            raise TypeError("G must be either parray, or GPUArray or ndarray")
    else:
        A = G

    if len(A.shape) != 2:
        raise TypeError("eig only works on 2D matrix")

    if A.shape[0] != A.shape[1]:
        raise ValueError("G must be square matrix")

    if uplo in ['u', 'U']:
        uplo = 'L'
    elif uplo in ['l', 'L']:
        uplo = 'U'
    else:
        raise ValueError("uplo must be 'U' or 'L'")

    real_dtype = np.dtype(np.float32)
    if A.dtype == np.complex64:
        eig_func = cula.culaDeviceCheev
    elif A.dtype == np.float32:
        eig_func = cula.culaDeviceSsyev
    else:
        if A.dtype == np.complex128:
            eig_func = cula.culaDeviceZheev
        elif A.dtype == np.float64:
            eig_func = cula.culaDeviceDsyev
        else:
            raise ValueError('unsupported type')
        real_dtype = np.dtype(np.float64)

    D = parray.empty(A.shape[0], real_dtype)

    cula.culaInitialize()
    handle = cublashandle()
    if compute_z:
        jobz = 'V'
    else:
        jobz = 'N'
    eig_func(handle.handle, jobz, uplo, A.shape[0], A.gpudata, A.ld, D.gpudata)
    #cula.culaShutdown()
    if compute_z:
        return D, A.conj().T()
    else:
        return D
Exemplo n.º 4
0
def svd(G, compute_u=True, compute_v=True, econ=False):
    """
    compute Singular Value Decompositon of G
    G = U*(diag(S))*V

    Parameters
    ----------------------------------------
    G:  PitchArray, GPUArray or numpy.ndarray of shape (m,n)
        if G is GPUArray or PitchArray, its gpudata will be 
        destroyed after calling the function
    compute_u: bool
               whether return U matrix or not
    compute_v: bool
               whether return V matrix or not
    econ: bool
          return economical matrix

    Returns:
    U:  parray.PitchArray matrix
        as U in G = U*(diag(S))*V,
        if econ, returns the first min(m,n) columns of U
    S:  parray.PitchArray vector
        a row vector containing all singular values
        with descending order
    V:  parray.PitchArray matrix
        as V in G = U*(diag(S))*V,
        if econ, returns the first min(m,n) rows of V

    order of output:
    always obeys the order U,S,V
    e.g.
    S = svd(G, compute_u = False, compute_v = False)
    U,S = svd(G, compute_u = True, compute_v = False)
    S,V = svd(G, compute_u = False, compute_v = True)
    U,S,V = svd(G, compute_u = True, compute_v = True)
    """

    if G.__class__ is not parray.PitchArray:
        if G.__class__ is garray.GPUArray:
            h_G = G.get()
            del G.gpudata
            A = parray.to_gpu(h_G)
        elif G.__class__ is np.ndarray:
            A = parray.to_gpu(G)
        else:
            raise TypeError("G must be either parray, or GPUArray or ndarray")
    else:
        A = G

    real_dtype = np.dtype(np.float32)
    if A.dtype == np.complex64:
        svd_func = cula.culaDeviceCgesvd
    elif A.dtype == np.float32:
        svd_func = cula.culaDeviceSgesvd
    else:
        if cula._libcula_toolkit == 'standard':
            if A.dtype == np.complex128:
                svd_func = cula.culaDeviceZgesvd
            elif A.dtype == np.float64:
                svd_func = cula.culaDeviceDgesvd
            else:
                raise ValueError('unsupported type')
            real_dtype = np.dtype(np.float64)
        else:
            raise TypeError('does not support premium double precision svd')

    if len(A.shape) != 2:
        raise TypeError("svd only works on 2D matrix")

    S = parray.empty(min(A.shape), real_dtype)
    cula.culaInitialize()

    if compute_u:
        if compute_v:
            if econ:
                if A.shape[1] <= A.shape[0]:
                    jobu = 'A'
                    jobvt = 'O'
                    V = parray.empty((A.shape[1], A.shape[1]), A.dtype)
                    svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata,
                             A.ld, S.gpudata, V.gpudata, V.ld, 1, 1)
                    #cula.culaShutdown()
                    return A, S, V
                else:
                    jobu = 'O'
                    jobvt = 'A'
                    U = parray.empty((A.shape[0], A.shape[0]), A.dtype)
                    svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata,
                             A.ld, S.gpudata, 1, 1, U.gpudata, U.ld)
                    #cula.culaShutdown()
                    return U, S, A
            else:
                if A.shape[1] <= A.shape[0]:
                    jobu = 'O'
                    jobvt = 'A'
                    U = parray.empty((A.shape[0], A.shape[0]), A.dtype)
                    svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata,
                             A.ld, S.gpudata, 1, 1, U.gpudata, U.ld)
                    #cula.culaShutdown()
                    A.shape = (A.shape[1], A.shape[1])
                    return U, S, A
                else:
                    jobu = 'A'
                    jobvt = 'O'
                    V = parray.empty((A.shape[1], A.shape[1]), A.dtype)
                    svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata,
                             A.ld, S.gpudata, V.gpudata, V.ld, 1, 1)
                    A.shape = (A.shape[0], A.shape[0])
                    #cula.culaShutdown()
                    return A, S, V
        else:
            if econ | (A.shape[1] >= A.shape[0]):
                jobu = 'N'
                jobvt = 'O'
                svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata, A.ld,
                         S.gpudata, 1, 1, 1, 1)
                if (A.shape[1] > A.shape[0]):
                    A.shape = (A.shape[0], A.shape[0])
                #cula.culaShutdown()
                return A, S
            else:
                jobu = 'N'
                jobvt = 'A'
                U = parray.empty((A.shape[0], A.shape[0]), A.dtype)
                svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata, A.ld,
                         S.gpudata, 1, 1, U.gpudata, U.ld)
                #cula.culaShutdown()
                return U, S
    else:
        if compute_v:
            if econ | (A.shape[1] <= A.shape[0]):
                jobu = 'O'
                jobvt = 'N'
                svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata, A.ld,
                         S.gpudata, 1, 1, 1, 1)
                if (A.shape[1] < A.shape[0]):
                    A.shape = (A.shape[1], A.shape[1])
                #cula.culaShutdown()
                return S, A
            else:
                jobu = 'A'
                jobvt = 'N'
                V = parray.empty((A.shape[1], A.shape[1]), A.dtype)
                svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata, A.ld,
                         S.gpudata, V.gpudata, V.ld, 1, 1)
                #cula.culaShutdown()
                return S, V
        else:
            jobu = 'N'
            jobvt = 'N'
            svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata, A.ld,
                     S.gpudata, 1, 1, 1, 1)
            #cula.culaShutdown()
            return S
Exemplo n.º 5
0
    def make_thunk(self,
                   node,
                   storage_map, _,
                   no_recycling=[]):

        # Initialize CULA the first time it is needed
        global cula_initialized

        if not cula_available:
            raise RuntimeError('Cula is not available and '
                               'GpuSolve Op can not be constructed.')

        if not cula_initialized:
            cula.culaInitialize()
            cula_initialized = True

        inputs = [storage_map[v] for v in node.inputs]
        outputs = [storage_map[v] for v in node.outputs]

        def thunk():
            # size of the matrices to invert
            z = outputs[0]

            # Matrix
            A = inputs[0][0]

            # Solution vectors
            b = inputs[1][0]

            # A is not explicitly converted between C and F order, instead we
            # switch the "transpose" flag
            if self.trans in ('T', 'C'):
                trans = 'N'
            else:
                trans = 'T'

            # Convert b to F-order from c-order.
            b_cpy = dimshuffle(b, (1, 0)).reshape((b.shape[0], b.shape[1]))

            # This copy forces allocation of a new C-contiguous buffer
            # and returns it.
            A_cpy = A.copy()
            b_cpy = b_cpy.copy()

            def cula_gpu_solve(A_, b_, trans='T'):

                A_shape = A_.shape
                b_shape = b_.shape

                assert(len(A_shape) == 2)
                assert(len(b_shape) == 2)

                if trans in ['T', 'C']:
                    l, n = A_shape
                    k, m = b_shape
                    if n != k:
                        raise ValueError('A and b must be aligned.')
                elif trans in ['N']:
                    n, l = A_shape
                    k, m = b_shape
                    if l != m:
                        raise ValueError('A and b must be aligned.')
                else:
                    raise ValueError('Invalid value for trans')

                lda = max(1, n)
                ldb = max(1, n, l)

                # construct pointer arrays needed for culaDeviceSgels
                # Cula requires you to pass a pointer for A and b.
                A_ptr = A_.gpudata
                b_ptr = b_.gpudata

                cula.culaDeviceSgels(trans, n, l, m, A_ptr, lda, b_ptr, ldb)
                return A_, b_

            A_pycuda, b_pycuda = cula_gpu_solve(A_cpy, b_cpy, trans)

            # Convert b to F-order from c-order and assign it to output:
            b_cpy = b_cpy.reshape(b.shape[::-1])
            b_cpy = dimshuffle(b_cpy, (1, 0))
            z[0] = b_cpy

        thunk.inputs = inputs
        thunk.outputs = outputs
        thunk.lazy = False

        return thunk
Exemplo n.º 6
0
def eig_sym(G, compute_z = True, uplo = 'U'):
    """
    compute Eigenvalue Decompositon of a symmetric or Hermitian matrix G
    G = V D V^{*}

    Parameters
    -------------------------------------
    G:  PitchArray, GPUArray or numpy.ndarray
        if G is GPUArray or PitchArray, its gpudata will be destroyed
        after calling the function
    compute_z: bool
               whether return eigenvectors
    uplo: str
          'U' or 'u' assumes the entries of G are stored
          in upper triangular part,
          lower off diagonal triangular part is not referenced
          'L' or 'l' assumes the entries of G are stored
          in lower triangular part,
          upper off diagonal triangular part is not referenced

    Returns
    -------------------------------------
    D:  PitchArray
        a row vector containing all eigenvalues with ascending order
    V:  PitchArray
        if compute_z, jth column of V contains orthonormal
        eigenvector associated with jth eigenvalue

    Examples
    D = eig_sym(G, compute_z = False)
    D,V = eig_sym(G, compute_z = True)
    """
    if cula._libcula_toolkit != 'premium':
        raise ValueError("eigenvalue decomposition is only supported "
                         "in premium version of CULA")

    if G.__class__ is not parray.PitchArray:
        if G.__class__ is garray.GPUArray:
            h_G = G.get()
            del G.gpudata
            A= parray.to_gpu(h_G)
        elif G.__class__ is np.ndarray:
            A = parray.to_gpu(G)
        else:
            raise TypeError("G must be either parray, or GPUArray or ndarray")
    else:
        A = G
    
    if len(A.shape) != 2:
        raise TypeError("eig only works on 2D matrix")
    
    if A.shape[0] != A.shape[1]:
        raise ValueError("G must be square matrix")

    if uplo in ['u', 'U']:
        uplo = 'L'
    elif uplo in ['l', 'L']:
        uplo = 'U'
    else:
        raise ValueError("uplo must be 'U' or 'L'")
    
    real_dtype = np.dtype(np.float32)
    if A.dtype == np.complex64:
        eig_func = cula.culaDeviceCheev        
    elif A.dtype == np.float32:
        eig_func = cula.culaDeviceSsyev
    else:
        if A.dtype == np.complex128:
            eig_func = cula.culaDeviceZheev
        elif A.dtype == np.float64:
            eig_func = cula.culaDeviceDsyev
        else:
            raise ValueError('unsupported type')
        real_dtype = np.dtype(np.float64)
    
    D = parray.empty(A.shape[0], real_dtype)
    
    cula.culaInitialize()
    handle = cublashandle()
    if compute_z:
        jobz = 'V'
    else:
        jobz = 'N'
    eig_func(handle.handle, jobz, uplo, A.shape[0], A.gpudata, A.ld, D.gpudata)
    #cula.culaShutdown()
    if compute_z:
        return D, A.conj().T()
    else:
        return D
Exemplo n.º 7
0
def svd(G, compute_u = True, compute_v = True, econ = False):
    """
    compute Singular Value Decompositon of G
    G = U*(diag(S))*V

    Parameters
    ----------------------------------------
    G:  PitchArray, GPUArray or numpy.ndarray of shape (m,n)
        if G is GPUArray or PitchArray, its gpudata will be 
        destroyed after calling the function
    compute_u: bool
               whether return U matrix or not
    compute_v: bool
               whether return V matrix or not
    econ: bool
          return economical matrix

    Returns:
    U:  parray.PitchArray matrix
        as U in G = U*(diag(S))*V,
        if econ, returns the first min(m,n) columns of U
    S:  parray.PitchArray vector
        a row vector containing all singular values
        with descending order
    V:  parray.PitchArray matrix
        as V in G = U*(diag(S))*V,
        if econ, returns the first min(m,n) rows of V

    order of output:
    always obeys the order U,S,V
    e.g.
    S = svd(G, compute_u = False, compute_v = False)
    U,S = svd(G, compute_u = True, compute_v = False)
    S,V = svd(G, compute_u = False, compute_v = True)
    U,S,V = svd(G, compute_u = True, compute_v = True)
    """
    
    if G.__class__ is not parray.PitchArray:
        if G.__class__ is garray.GPUArray:
            h_G = G.get()
            del G.gpudata
            A= parray.to_gpu(h_G)
        elif G.__class__ is np.ndarray:
            A = parray.to_gpu(G)
        else:
            raise TypeError("G must be either parray, or GPUArray or ndarray")
    else:
        A = G
    
    real_dtype = np.dtype(np.float32)
    if A.dtype == np.complex64:
        svd_func = cula.culaDeviceCgesvd        
    elif A.dtype == np.float32:
        svd_func = cula.culaDeviceSgesvd
    else:
        if cula._libcula_toolkit == 'standard':
            if A.dtype == np.complex128:
                svd_func = cula.culaDeviceZgesvd
            elif A.dtype == np.float64:
                svd_func = cula.culaDeviceDgesvd
            else:
                raise ValueError('unsupported type')
            real_dtype = np.dtype(np.float64)
        else:
            raise TypeError('does not support premium double precision svd')
    
    if len(A.shape) != 2:
        raise TypeError("svd only works on 2D matrix")
    
    S = parray.empty(min(A.shape), real_dtype)
    cula.culaInitialize()
    
    if compute_u:
        if compute_v:
            if econ:
                if A.shape[1] <= A.shape[0]:
                    jobu = 'A'
                    jobvt = 'O'
                    V = parray.empty((A.shape[1], A.shape[1]), A.dtype)
                    svd_func(jobu, jobvt, A.shape[1], A.shape[0],
                             A.gpudata, A.ld, S.gpudata, V.gpudata,
                            V.ld, 1, 1)
                    #cula.culaShutdown()
                    return A,S,V
                else:
                    jobu = 'O'
                    jobvt = 'A'
                    U = parray.empty((A.shape[0], A.shape[0]), A.dtype)
                    svd_func(jobu, jobvt, A.shape[1], A.shape[0],
                             A.gpudata, A.ld, S.gpudata, 1, 1,
                             U.gpudata, U.ld)
                    #cula.culaShutdown()
                    return U,S,A
            else:
                if A.shape[1] <= A.shape[0]:
                    jobu = 'O'
                    jobvt = 'A'
                    U = parray.empty((A.shape[0], A.shape[0]), A.dtype)
                    svd_func(jobu, jobvt, A.shape[1], A.shape[0],
                             A.gpudata, A.ld, S.gpudata, 1, 1,
                             U.gpudata, U.ld)
                    #cula.culaShutdown()
                    A.shape = (A.shape[1],A.shape[1])
                    return U,S,A
                else:
                    jobu = 'A'
                    jobvt = 'O'
                    V = parray.empty((A.shape[1], A.shape[1]), A.dtype)
                    svd_func(jobu, jobvt, A.shape[1], A.shape[0],
                             A.gpudata, A.ld, S.gpudata, V.gpudata,
                             V.ld, 1, 1)
                    A.shape = (A.shape[0], A.shape[0])
                    #cula.culaShutdown()
                    return A,S,V
        else:
            if econ | (A.shape[1] >= A.shape[0]):
                jobu = 'N'
                jobvt = 'O'
                svd_func(jobu, jobvt, A.shape[1], A.shape[0],
                         A.gpudata, A.ld, S.gpudata, 1, 1, 1, 1)
                if (A.shape[1] > A.shape[0]):
                    A.shape = (A.shape[0], A.shape[0])
                #cula.culaShutdown()
                return A,S
            else:
                jobu = 'N'
                jobvt = 'A'
                U = parray.empty((A.shape[0],A.shape[0]),A.dtype)
                svd_func(jobu, jobvt, A.shape[1], A.shape[0],
                         A.gpudata, A.ld, S.gpudata, 1, 1, U.gpudata, U.ld)
                #cula.culaShutdown()
                return U,S
    else:
        if compute_v:
            if econ | (A.shape[1] <= A.shape[0]):
                jobu = 'O'
                jobvt = 'N'
                svd_func(jobu, jobvt, A.shape[1], A.shape[0],
                         A.gpudata, A.ld, S.gpudata, 1, 1, 1, 1)
                if (A.shape[1] < A.shape[0]):
                    A.shape = (A.shape[1], A.shape[1])
                #cula.culaShutdown()
                return S,A
            else:
                jobu = 'A'
                jobvt = 'N'
                V = parray.empty((A.shape[1],A.shape[1]),A.dtype)
                svd_func(jobu, jobvt, A.shape[1], A.shape[0],
                         A.gpudata, A.ld, S.gpudata, V.gpudata, V.ld, 1, 1)
                #cula.culaShutdown()
                return S,V
        else:
            jobu = 'N'
            jobvt = 'N'
            svd_func(jobu, jobvt, A.shape[1], A.shape[0], A.gpudata,
                     A.ld, S.gpudata, 1, 1, 1, 1)
            #cula.culaShutdown()
            return S