Пример #1
0
def svd(a_gpu, jobu='A', jobvt='A'):
    """
    Singular Value Decomposition.

    Factors the matrix `a` into two unitary matrices, `u` and `vh`,
    and a 1-dimensional array of real, non-negative singular values,
    `s`, such that `a == dot(u.T, dot(diag(s), vh.T))`.

    Parameters
    ----------
    a : pycuda.gpuarray.GPUArray
        Input matrix of shape `(m, n)` to decompose.
    jobu : {'A', 'S', 'O', 'N'}
        If 'A', return the full `u` matrix with shape `(m, m)`.
        If 'S', return the `u` matrix with shape `(m, k)`.
        If 'O', return the `u` matrix with shape `(m, k) without
        allocating a new matrix.
        If 'N', don't return `u`.
    jobvt : {'A', 'S', 'O', 'N'}
        If 'A', return the full `vh` matrix with shape `(n, n)`.
        If 'S', return the `vh` matrix with shape `(k, n)`.
        If 'O', return the `vh` matrix with shape `(k, n) without
        allocating a new matrix.
        If 'N', don't return `vh`.

    Returns
    -------
    u : pycuda.gpuarray.GPUArray
        Unitary matrix of shape `(m, m)` or `(m, k)` depending on
        value of `jobu`.
    s : pycuda.gpuarray.GPUArray
        Array containing the singular values, sorted such that `s[i] >= s[i+1]`.
        `s` is of length `min(m, n)`.
    vh : pycuda.gpuarray.GPUArray
        Unitary matrix of shape `(n, n)` or `(k, n)`, depending
        on `jobvt`.

    Notes
    -----
    Double precision is only supported if the standard version of the
    CULA Dense toolkit is installed.

    This function destroys the contents of the input matrix regardless
    of the values of `jobu` and `jobvt`.

    Only one of `jobu` or `jobvt` may be set to `O`, and then only for
    a square matrix.

    Examples
    --------
    >>> import pycuda.gpuarray as gpuarray
    >>> import pycuda.autoinit
    >>> import numpy as np
    >>> import linalg
    >>> linalg.init()
    >>> a = np.random.randn(9, 6) + 1j*np.random.randn(9, 6)
    >>> a = np.asarray(a, np.complex64)
    >>> a_gpu = gpuarray.to_gpu(a)
    >>> u_gpu, s_gpu, vh_gpu = linalg.svd(a_gpu, 'S', 'S')
    >>> np.allclose(a, np.dot(u_gpu.get(), np.dot(np.diag(s_gpu.get()), vh_gpu.get())), 1e-4)
    True

    """

    if not _has_cula:
        raise NotImplementError('CULA not installed')

    # The free version of CULA only supports single precision floating
    # point numbers:
    data_type = a_gpu.dtype.type
    real_type = np.float32
    if data_type == np.complex64:
        cula_func = cula._libcula.culaDeviceCgesvd
    elif data_type == np.float32:
        cula_func = cula._libcula.culaDeviceSgesvd
    else:
        if cula._libcula_toolkit == 'standard':
            if data_type == np.complex128:
                cula_func = cula._libcula.culaDeviceZgesvd
            elif data_type == np.float64:
                cula_func = cula._libcula.culaDeviceDgesvd
            else:
                raise ValueError('unsupported type')
            real_type = np.float64
        else:
            raise ValueError('double precision not supported')

    # Since CUDA assumes that arrays are stored in column-major
    # format, the input matrix is assumed to be transposed:
    n, m = a_gpu.shape
    square = (n == m)

    # Since the input matrix is transposed, jobu and jobvt must also
    # be switched because the computed matrices will be returned in
    # reversed order:
    jobvt, jobu = jobu, jobvt

    # Set the leading dimension of the input matrix:
    lda = max(1, m)

    # Allocate the array of singular values:
    s_gpu = gpuarray.empty(min(m, n), real_type)

    # Set the leading dimension and allocate u:
    jobu = upper(jobu)
    jobvt = upper(jobvt)
    ldu = m
    if jobu == 'A':
        u_gpu = gpuarray.empty((ldu, m), data_type)
    elif jobu == 'S':
        u_gpu = gpuarray.empty((min(m, n), ldu), data_type)
    elif jobu == 'O':
        if not square:
            raise ValueError('in-place computation of singular vectors '+
                             'of non-square matrix not allowed')
        ldu = 1
        u_gpu = a_gpu
    else:
        ldu = 1
        u_gpu = gpuarray.empty((), data_type)

    # Set the leading dimension and allocate vh:
    if jobvt == 'A':
        ldvt = n
        vh_gpu = gpuarray.empty((n, n), data_type)
    elif jobvt == 'S':
        ldvt = min(m, n)
        vh_gpu = gpuarray.empty((n, ldvt), data_type)
    elif jobvt == 'O':
        if jobu == 'O':
            raise ValueError('jobu and jobvt cannot both be O')
        if not square:
            raise ValueError('in-place computation of singular vectors '+
                             'of non-square matrix not allowed')
        ldvt = 1
        vh_gpu = a_gpu
    else:
        ldvt = 1
        vh_gpu = gpuarray.empty((), data_type)

    # Compute SVD and check error status:

    status = cula_func(jobu, jobvt, m, n, int(a_gpu.gpudata),
                       lda, int(s_gpu.gpudata), int(u_gpu.gpudata),
                       ldu, int(vh_gpu.gpudata), ldvt)

    cula.culaCheckStatus(status)

    # Free internal CULA memory:
    cula.culaFreeBuffers()

    # Since the input is assumed to be transposed, it is necessary to
    # return the computed matrices in reverse order:
    if jobu in ['A', 'S', 'O'] and jobvt in ['A', 'S', 'O']:
        return vh_gpu, s_gpu, u_gpu
    elif jobu == 'N' and jobvt != 'N':
        return vh_gpu, s_gpu
    elif jobu != 'N' and jobvt == 'N':
        return s_gpu, u_gpu
    else:
        return s_gpu
Пример #2
0
def cho_factor(a_gpu, uplo='L'):
    """
    Cholesky factorisation

    Performs an in-place cholesky factorisation on the matrix 'a' 
    such that a = x*x.T or x.T*x, if the lower='L' or upper='U'
    triangle of 'a' is used, respectively.

    Parameters
    ----------
    a : pycuda.gpuarray.GPUArray
        Input matrix of shape `(m, m)` to decompose.
    uplo: use the upper='U' or lower='L' (default) triangle of 'a'

    Returns
    -------
    a: Cholesky factorised matrix

    Notes
    -----
    Double precision is only supported if the standard version of the
    CULA Dense toolkit is installed.

    Examples
    --------
    >>> import pycuda.gpuarray as gpuarray
    >>> import pycuda.autoinit
    >>> import numpy as np
    >>> import linalg
    >>> linalg.init()
    >>> a = np.array([[3.0,0.0],[0.0,7.0]])
    >>> a = np.asarray(a, np.float64)
    >>> a_gpu = gpuarray.to_gpu(a)
    >>> cho_factor(a_gpu)

    """

    if not _has_cula:
        raise NotImplementError('CULA not installed')

    data_type = a_gpu.dtype.type
    real_type = np.float32
    if cula._libcula_toolkit == 'standard':
        if data_type == np.complex64:
            cula_func = cula._libcula.culaDeviceCpotrf
        elif data_type == np.float32:
            cula_func = cula._libcula.culaDeviceSpotrf
        if data_type == np.complex128:
            cula_func = cula._libcula.culaDeviceZpotrf
        elif data_type == np.float64:
            cula_func = cula._libcula.culaDeviceDpotrf
        else:
            raise ValueError('unsupported type')
        real_type = np.float64
    else:
        raise ValueError('Cholesky factorisation not included in CULA Dense Free version')

    # Since CUDA assumes that arrays are stored in column-major
    # format, the input matrix is assumed to be transposed:
    n, m = a_gpu.shape
    square = (n == m)

    if (n!=m):
        raise ValueError('Matrix must be symmetric positive-definite')

    # Set the leading dimension of the input matrix:
    lda = max(1, m)

    status = cula_func(uplo, n, int(a_gpu.gpudata), lda)

    cula.culaCheckStatus(status)

    # Free internal CULA memory:
    cula.culaFreeBuffers()
Пример #3
0
def cho_solve(a_gpu, b_gpu, uplo='L'):
    """
    Cholesky solver

    Solve a system of equations via cholesky factorisation,
    i.e. a*x = b.
    Overwrites 'b' to give 'inv(a)*b', and overwrites the chosen triangle
    of 'a' with factorised triangle

    Parameters
    ----------
    a : pycuda.gpuarray.GPUArray
        Input matrix of shape `(m, m)` to decompose.
    b : pycuda.gpuarray.GPUArray
        Input matrix of shape `(m, 1)` to decompose.
    uplo: use the upper='U' or lower='L' (default) triangle of 'a'

    Returns
    -------
    a: Cholesky factorised matrix

    Notes
    -----
    Double precision is only supported if the standard version of the
    CULA Dense toolkit is installed.

    Examples
    --------
    >>> import pycuda.gpuarray as gpuarray
    >>> import pycuda.autoinit
    >>> import numpy as np
    >>> import linalg
    >>> linalg.init()
    >>> a = np.array([[3.0,0.0],[0.0,7.0]])
    >>> a = np.asarray(a, np.float64)
    >>> a_gpu = gpuarray.to_gpu(a)
    >>> b = np.array([11.,19.])
    >>> b = np.asarray(b, np.float64)
    >>> b_gpu  = gpuarray.to_gpu(b)
    >>> cho_solve(a_gpu,b_gpu)

    """

    if not _has_cula:
        raise NotImplementError('CULA not installed')

    data_type = a_gpu.dtype.type
    real_type = np.float32
    if cula._libcula_toolkit == 'standard':
        if data_type == np.complex64:
            cula_func = cula._libcula.culaDeviceCpotrf
        elif data_type == np.float32:
            cula_func = cula._libcula.culaDeviceSpotrf
        if data_type == np.complex128:
            cula_func = cula._libcula.culaDeviceZpotrf
        elif data_type == np.float64:
            cula_func = cula._libcula.culaDeviceDpotrf
        else:
            raise ValueError('unsupported type')
        real_type = np.float64
    else:
        raise ValueError('Cholesky factorisation not included in CULA Dense Free version')

    # Since CUDA assumes that arrays are stored in column-major
    # format, the input matrix is assumed to be transposed:
    na, ma = a_gpu.shape
    square = (na == ma)
    
    if (na!=ma):
        raise ValueError('Matrix must be symmetric positive-definite')

    # Set the leading dimension of the input matrix:
    lda = max(1, ma)
    ldb = lda

    # Assuming we are only solving for a vector. Hence, nrhs = 1
    status = cula_func(uplo, na, 1, int(a_gpu.gpudata), lda, int(b_gpu.gpudata), ldb)

    cula.culaCheckStatus(status)

    # Free internal CULA memory:
    cula.culaFreeBuffers()
Пример #4
0
def svd(a_gpu, jobu='A', jobvt='A'):
    """
    Singular Value Decomposition.

    Factors the matrix `a` into two unitary matrices, `u` and `vh`,
    and a 1-dimensional array of real, non-negative singular values,
    `s`, such that `a == dot(u.T, dot(diag(s), vh.T))`.

    Parameters
    ----------
    a : pycuda.gpuarray.GPUArray
        Input matrix of shape `(m, n)` to decompose.
    jobu : {'A', 'S', 'O', 'N'}
        If 'A', return the full `u` matrix with shape `(m, m)`.
        If 'S', return the `u` matrix with shape `(m, k)`.
        If 'O', return the `u` matrix with shape `(m, k) without
        allocating a new matrix.
        If 'N', don't return `u`.
    jobvt : {'A', 'S', 'O', 'N'}
        If 'A', return the full `vh` matrix with shape `(n, n)`.
        If 'S', return the `vh` matrix with shape `(k, n)`.
        If 'O', return the `vh` matrix with shape `(k, n) without
        allocating a new matrix.
        If 'N', don't return `vh`.

    Returns
    -------
    u : pycuda.gpuarray.GPUArray
        Unitary matrix of shape `(m, m)` or `(m, k)` depending on
        value of `jobu`.
    s : pycuda.gpuarray.GPUArray
        Array containing the singular values, sorted such that `s[i] >= s[i+1]`.
        `s` is of length `min(m, n)`.
    vh : pycuda.gpuarray.GPUArray
        Unitary matrix of shape `(n, n)` or `(k, n)`, depending
        on `jobvt`.

    Notes
    -----
    Double precision is only supported if the standard version of the
    CULA Dense toolkit is installed.

    This function destroys the contents of the input matrix regardless
    of the values of `jobu` and `jobvt`.

    Only one of `jobu` or `jobvt` may be set to `O`, and then only for
    a square matrix.

    Examples
    --------
    >>> import pycuda.gpuarray as gpuarray
    >>> import pycuda.autoinit
    >>> import numpy as np
    >>> import linalg
    >>> linalg.init()
    >>> a = np.random.randn(9, 6) + 1j*np.random.randn(9, 6)
    >>> a = np.asarray(a, np.complex64)
    >>> a_gpu = gpuarray.to_gpu(a)
    >>> u_gpu, s_gpu, vh_gpu = linalg.svd(a_gpu, 'S', 'S')
    >>> np.allclose(a, np.dot(u_gpu.get(), np.dot(np.diag(s_gpu.get()), vh_gpu.get())), 1e-4)
    True

    """

    if not _has_cula:
        raise NotImplementError('CULA not installed')

    # The free version of CULA only supports single precision floating
    # point numbers:
    data_type = a_gpu.dtype.type
    real_type = np.float32
    if data_type == np.complex64:
        cula_func = cula._libcula.culaDeviceCgesvd
    elif data_type == np.float32:
        cula_func = cula._libcula.culaDeviceSgesvd
    else:
        if cula._libcula_toolkit == 'standard':
            if data_type == np.complex128:
                cula_func = cula._libcula.culaDeviceZgesvd
            elif data_type == np.float64:
                cula_func = cula._libcula.culaDeviceDgesvd
            else:
                raise ValueError('unsupported type')
            real_type = np.float64
        else:
            raise ValueError('double precision not supported')

    # Since CUDA assumes that arrays are stored in column-major
    # format, the input matrix is assumed to be transposed:
    n, m = a_gpu.shape
    square = (n == m)

    # Since the input matrix is transposed, jobu and jobvt must also
    # be switched because the computed matrices will be returned in
    # reversed order:
    jobvt, jobu = jobu, jobvt

    # Set the leading dimension of the input matrix:
    lda = max(1, m)

    # Allocate the array of singular values:
    s_gpu = gpuarray.empty(min(m, n), real_type)

    # Set the leading dimension and allocate u:
    jobu = upper(jobu)
    jobvt = upper(jobvt)
    ldu = m
    if jobu == 'A':
        u_gpu = gpuarray.empty((ldu, m), data_type)
    elif jobu == 'S':
        u_gpu = gpuarray.empty((min(m, n), ldu), data_type)
    elif jobu == 'O':
        if not square:
            raise ValueError('in-place computation of singular vectors ' +
                             'of non-square matrix not allowed')
        ldu = 1
        u_gpu = a_gpu
    else:
        ldu = 1
        u_gpu = gpuarray.empty((), data_type)

    # Set the leading dimension and allocate vh:
    if jobvt == 'A':
        ldvt = n
        vh_gpu = gpuarray.empty((n, n), data_type)
    elif jobvt == 'S':
        ldvt = min(m, n)
        vh_gpu = gpuarray.empty((n, ldvt), data_type)
    elif jobvt == 'O':
        if jobu == 'O':
            raise ValueError('jobu and jobvt cannot both be O')
        if not square:
            raise ValueError('in-place computation of singular vectors ' +
                             'of non-square matrix not allowed')
        ldvt = 1
        vh_gpu = a_gpu
    else:
        ldvt = 1
        vh_gpu = gpuarray.empty((), data_type)

    # Compute SVD and check error status:

    status = cula_func(jobu, jobvt, m, n, int(a_gpu.gpudata), lda,
                       int(s_gpu.gpudata), int(u_gpu.gpudata), ldu,
                       int(vh_gpu.gpudata), ldvt)

    cula.culaCheckStatus(status)

    # Free internal CULA memory:
    cula.culaFreeBuffers()

    # Since the input is assumed to be transposed, it is necessary to
    # return the computed matrices in reverse order:
    if jobu in ['A', 'S', 'O'] and jobvt in ['A', 'S', 'O']:
        return vh_gpu, s_gpu, u_gpu
    elif jobu == 'N' and jobvt != 'N':
        return vh_gpu, s_gpu
    elif jobu != 'N' and jobvt == 'N':
        return s_gpu, u_gpu
    else:
        return s_gpu