Пример #1
0
def cholesky(a):
    """Cholesky decomposition.

    Decompose a given two-dimensional square matrix into ``L * L.T``,
    where ``L`` is a lower-triangular matrix and ``.T`` is a conjugate
    transpose operator.

    Args:
        a (cupy.ndarray): The input matrix with dimension ``(N, N)``

    Returns:
        cupy.ndarray: The lower-triangular matrix.

    .. warning::
        This function calls one or more cuSOLVER routine(s) which may yield
        invalid results if input conditions are not met.
        To detect these invalid results, you can set the `linalg`
        configuration to a value that is not `ignore` in
        :func:`cupyx.errstate` or :func:`cupyx.seterr`.

    .. seealso:: :func:`numpy.linalg.cholesky`
    """
    # TODO(Saito): Current implementation only accepts two-dimensional arrays
    util._assert_cupy_array(a)
    util._assert_rank2(a)
    util._assert_nd_squareness(a)

    if a.dtype.char == 'f' or a.dtype.char == 'd':
        dtype = a.dtype.char
    else:
        dtype = numpy.promote_types(a.dtype.char, 'f').char

    x = a.astype(dtype, order='C', copy=True)
    n = len(a)
    handle = device.get_cusolver_handle()
    dev_info = cupy.empty(1, dtype=numpy.int32)

    if dtype == 'f':
        potrf = cusolver.spotrf
        potrf_bufferSize = cusolver.spotrf_bufferSize
    elif dtype == 'd':
        potrf = cusolver.dpotrf
        potrf_bufferSize = cusolver.dpotrf_bufferSize
    elif dtype == 'F':
        potrf = cusolver.cpotrf
        potrf_bufferSize = cusolver.cpotrf_bufferSize
    else:  # dtype == 'D':
        potrf = cusolver.zpotrf
        potrf_bufferSize = cusolver.zpotrf_bufferSize

    buffersize = potrf_bufferSize(handle, cublas.CUBLAS_FILL_MODE_UPPER, n,
                                  x.data.ptr, n)
    workspace = cupy.empty(buffersize, dtype=dtype)
    potrf(handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr, n,
          workspace.data.ptr, buffersize, dev_info.data.ptr)
    cupy.linalg.util._check_cusolver_dev_info_if_synchronization_allowed(
        potrf, dev_info)

    util._tril(x, k=0)
    return x
Пример #2
0
def inv(a):
    """Computes the inverse of a matrix.

    This function computes matrix ``a_inv`` from n-dimensional regular matrix
    ``a`` such that ``dot(a, a_inv) == eye(n)``.

    Args:
        a (cupy.ndarray): The regular matrix

    Returns:
        cupy.ndarray: The inverse of a matrix.

    .. seealso:: :func:`numpy.linalg.inv`
    """
    if not cuda.cusolver_enabled:
        raise RuntimeError('Current cupy only supports cusolver in CUDA 8.0')

    # to prevent `a` to be overwritten
    a = a.copy()

    util._assert_cupy_array(a)
    util._assert_rank2(a)
    util._assert_nd_squareness(a)

    if a.dtype.char == 'f' or a.dtype.char == 'd':
        dtype = a.dtype.char
    else:
        dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char

    cusolver_handle = device.get_cusolver_handle()
    dev_info = cupy.empty(1, dtype=dtype)

    ipiv = cupy.empty((a.shape[0], 1), dtype=dtype)

    if dtype == 'f':
        getrf = cusolver.sgetrf
        getrf_bufferSize = cusolver.sgetrf_bufferSize
        getrs = cusolver.sgetrs
    else:  # dtype == 'd'
        getrf = cusolver.dgetrf
        getrf_bufferSize = cusolver.dgetrf_bufferSize
        getrs = cusolver.dgetrs

    m = a.shape[0]

    buffersize = getrf_bufferSize(cusolver_handle, m, m, a.data.ptr, m)
    workspace = cupy.empty(buffersize, dtype=dtype)

    # LU factorization
    getrf(cusolver_handle, m, m, a.data.ptr, m, workspace.data.ptr,
          ipiv.data.ptr, dev_info.data.ptr)

    b = cupy.eye(m, dtype=dtype)

    # solve for the inverse
    getrs(cusolver_handle, 0, m, m, a.data.ptr, m, ipiv.data.ptr, b.data.ptr,
          m, dev_info.data.ptr)

    return b
Пример #3
0
def cholesky(a):
    """Cholesky decomposition.

    Decompose a given two-dimensional square matrix into ``L * L.T``,
    where ``L`` is a lower-triangular matrix and ``.T`` is a conjugate
    transpose operator. Note that in the current implementation ``a`` must be
    a real matrix, and only float32 and float64 are supported.

    Args:
        a (cupy.ndarray): The input matrix with dimension ``(N, N)``

    Returns:
        cupy.ndarray: The lower-triangular matrix.

    .. seealso:: :func:`numpy.linalg.cholesky`
    """
    if not cuda.cusolver_enabled:
        raise RuntimeError('Current cupy only supports cusolver in CUDA 8.0')

    # TODO(Saito): Current implementation only accepts two-dimensional arrays
    util._assert_cupy_array(a)
    util._assert_rank2(a)
    util._assert_nd_squareness(a)

    # Cast to float32 or float64
    if a.dtype.char == 'f' or a.dtype.char == 'd':
        dtype = a.dtype.char
    else:
        dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char

    x = a.astype(dtype, order='C', copy=True)
    n = len(a)
    handle = device.get_cusolver_handle()
    dev_info = cupy.empty(1, dtype=numpy.int32)
    if dtype == 'f':
        buffersize = cusolver.spotrf_bufferSize(
            handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr, n)
        workspace = cupy.empty(buffersize, dtype=numpy.float32)
        cusolver.spotrf(
            handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr, n,
            workspace.data.ptr, buffersize, dev_info.data.ptr)
    else:  # dtype == 'd'
        buffersize = cusolver.dpotrf_bufferSize(
            handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr, n)
        workspace = cupy.empty(buffersize, dtype=numpy.float64)
        cusolver.dpotrf(
            handle, cublas.CUBLAS_FILL_MODE_UPPER, n, x.data.ptr, n,
            workspace.data.ptr, buffersize, dev_info.data.ptr)
    status = int(dev_info[0])
    if status > 0:
        raise linalg.LinAlgError(
            'The leading minor of order {} '
            'is not positive definite'.format(status))
    elif status < 0:
        raise linalg.LinAlgError(
            'Parameter error (maybe caused by a bug in cupy.linalg?)')
    util._tril(x, k=0)
    return x
Пример #4
0
def _slogdet_one(a):
    util._assert_rank2(a)
    util._assert_nd_squareness(a)
    dtype = a.dtype

    handle = device.get_cusolver_handle()
    m = len(a)
    ipiv = cupy.empty(m, 'i')
    info = cupy.empty((), 'i')

    # Need to make a copy because getrf works inplace
    a_copy = a.copy(order='F')

    if dtype == 'f':
        getrf_bufferSize = cusolver.sgetrf_bufferSize
        getrf = cusolver.sgetrf
    #<-- MODIFIED
    elif dtype == 'd':
        getrf_bufferSize = cusolver.dgetrf_bufferSize
        getrf = cusolver.dgetrf
    elif dtype == 'F':
        getrf_bufferSize = cusolver.cgetrf_bufferSize
        getrf = cusolver.cgetrf
    else:
        getrf_bufferSize = cusolver.zgetrf_bufferSize
        getrf = cusolver.zgetrf
    #<-- MODIFIED

    buffersize = getrf_bufferSize(handle, m, m, a_copy.data.ptr, m)
    workspace = cupy.empty(buffersize, dtype=dtype)
    getrf(handle, m, m, a_copy.data.ptr, m, workspace.data.ptr, ipiv.data.ptr,
          info.data.ptr)

    if info[()] == 0:
        diag = cupy.diag(a_copy)
        # ipiv is 1-origin
        non_zero = (cupy.count_nonzero(ipiv != cupy.arange(1, m + 1)) +
                    cupy.count_nonzero(diag < 0))
        # Note: sign == -1 ** (non_zero % 2)
        sign = (non_zero % 2) * -2 + 1
        logdet = cupy.log(abs(diag)).sum()
    else:
        sign = cupy.array(0.0, dtype=dtype)
        #ORIGINAL
        # logdet = cupy.array(float('-inf'), dtype)

        #<-- MODIFIED
        if dtype in ['f', 'd']:
            logdet = cupy.array(float('-inf'), dtype)
        elif dtype == 'F':
            logdet = cupy.array(float('-inf'), cupy.float32)
        else:
            logdet = cupy.array(float('-inf'), cupy.float64)
        #<-- MODIFIED

    return sign, logdet
Пример #5
0
def _lu_factor(a, overwrite_a=False, check_finite=True):
    a = cupy.asarray(a)
    util._assert_rank2(a)

    dtype = a.dtype

    if dtype.char == 'f':
        getrf = cusolver.sgetrf
        getrf_bufferSize = cusolver.sgetrf_bufferSize
    elif dtype.char == 'd':
        getrf = cusolver.dgetrf
        getrf_bufferSize = cusolver.dgetrf_bufferSize
    elif dtype.char == 'F':
        getrf = cusolver.cgetrf
        getrf_bufferSize = cusolver.cgetrf_bufferSize
    elif dtype.char == 'D':
        getrf = cusolver.zgetrf
        getrf_bufferSize = cusolver.zgetrf_bufferSize
    else:
        msg = 'Only float32, float64, complex64 and complex128 are supported.'
        raise NotImplementedError(msg)

    a = a.astype(dtype, order='F', copy=(not overwrite_a))

    if check_finite:
        if a.dtype.kind == 'f' and not cupy.isfinite(a).all():
            raise ValueError('array must not contain infs or NaNs')

    cusolver_handle = device.get_cusolver_handle()
    dev_info = cupy.empty(1, dtype=numpy.int32)

    m, n = a.shape

    ipiv = cupy.empty((min(m, n), ), dtype=numpy.intc)

    buffersize = getrf_bufferSize(cusolver_handle, m, n, a.data.ptr, m)
    workspace = cupy.empty(buffersize, dtype=dtype)

    # LU factorization
    getrf(cusolver_handle, m, n, a.data.ptr, m, workspace.data.ptr,
          ipiv.data.ptr, dev_info.data.ptr)

    if dev_info[0] < 0:
        raise ValueError('illegal value in %d-th argument of '
                         'internal getrf (lu_factor)' % -dev_info[0])
    elif dev_info[0] > 0:
        warn('Diagonal number %d is exactly zero. Singular matrix.' %
             dev_info[0],
             RuntimeWarning,
             stacklevel=2)

    # cuSolver uses 1-origin while SciPy uses 0-origin
    ipiv -= 1

    return (a, ipiv)
Пример #6
0
def _slogdet_one(a):
    util._assert_rank2(a)
    util._assert_nd_squareness(a)
    dtype = a.dtype

    handle = device.get_cusolver_handle()
    m = len(a)
    ipiv = cupy.empty(m, dtype=numpy.int32)
    dev_info = cupy.empty((), dtype=numpy.int32)

    # Need to make a copy because getrf works inplace
    a_copy = a.copy(order='F')

    if dtype == 'f':
        getrf_bufferSize = cusolver.sgetrf_bufferSize
        getrf = cusolver.sgetrf
    else:
        getrf_bufferSize = cusolver.dgetrf_bufferSize
        getrf = cusolver.dgetrf

    buffersize = getrf_bufferSize(handle, m, m, a_copy.data.ptr, m)
    workspace = cupy.empty(buffersize, dtype=dtype)
    getrf(handle, m, m, a_copy.data.ptr, m, workspace.data.ptr, ipiv.data.ptr,
          dev_info.data.ptr)

    # dev_info < 0 means illegal value (in dimensions, strides, and etc.) that
    # should never happen even if the matrix contains nan or inf.
    # TODO(kataoka): assert dev_info >= 0 if synchronization is allowed for
    # debugging purposes.

    diag = cupy.diag(a_copy)
    # ipiv is 1-origin
    non_zero = (cupy.count_nonzero(ipiv != cupy.arange(1, m + 1)) +
                cupy.count_nonzero(diag < 0))

    # Note: sign == -1 ** (non_zero % 2)
    sign = (non_zero % 2) * -2 + 1
    logdet = cupy.log(abs(diag)).sum()

    singular = dev_info > 0
    return (
        cupy.where(singular, dtype.type(0), sign),
        cupy.where(singular, dtype.type('-inf'), logdet),
    )
Пример #7
0
def _slogdet_one(a):
    util._assert_rank2(a)
    util._assert_nd_squareness(a)
    dtype = a.dtype

    handle = device.get_cusolver_handle()
    m = len(a)
    ipiv = cupy.empty(m, dtype=numpy.int32)
    dev_info = cupy.empty(1, dtype=numpy.int32)

    # Need to make a copy because getrf works inplace
    a_copy = a.copy(order='F')

    if dtype == 'f':
        getrf_bufferSize = cusolver.sgetrf_bufferSize
        getrf = cusolver.sgetrf
    else:
        getrf_bufferSize = cusolver.dgetrf_bufferSize
        getrf = cusolver.dgetrf

    buffersize = getrf_bufferSize(handle, m, m, a_copy.data.ptr, m)
    workspace = cupy.empty(buffersize, dtype=dtype)
    getrf(handle, m, m, a_copy.data.ptr, m, workspace.data.ptr,
          ipiv.data.ptr, dev_info.data.ptr)

    try:
        cupy.linalg.util._check_cusolver_dev_info_if_synchronization_allowed(
            getrf, dev_info)

        diag = cupy.diag(a_copy)
        # ipiv is 1-origin
        non_zero = (cupy.count_nonzero(ipiv != cupy.arange(1, m + 1)) +
                    cupy.count_nonzero(diag < 0))
        # Note: sign == -1 ** (non_zero % 2)
        sign = (non_zero % 2) * -2 + 1
        logdet = cupy.log(abs(diag)).sum()
    except linalg.LinAlgError:
        sign = cupy.array(0.0, dtype=dtype)
        logdet = cupy.array(float('-inf'), dtype)

    return sign, logdet
Пример #8
0
def inv(a):
    '''Computes the inverse of a matrix.

    This function computes matrix ``a_inv`` from n-dimensional regular matrix
    ``a`` such that ``dot(a, a_inv) == eye(n)``.

    Args:
        a (cupy.ndarray): The regular matrix

    Returns:
        cupy.ndarray: The inverse of a matrix.

    .. seealso:: :func:`numpy.linalg.inv`
    '''
    if not cuda.cusolver_enabled:
        raise RuntimeError('Current cupy only supports cusolver in CUDA 8.0')

    util._assert_cupy_array(a)
    util._assert_rank2(a)
    util._assert_nd_squareness(a)

    b = cupy.eye(len(a), dtype=a.dtype)
    return solve(a, b)
Пример #9
0
def invh(a):
    """Compute the inverse of a Hermitian matrix.

    This function computes a inverse of a real symmetric or complex hermitian
    positive-definite matrix using Cholesky factorization. If matrix ``a`` is
    not positive definite, Cholesky factorization fails and it raises an error.

    Args:
        a (cupy.ndarray): Real symmetric or complex hermitian maxtix.

    Returns:
        cupy.ndarray: The inverse of matrix ``a``.
    """

    # to prevent `a` from being overwritten
    a = a.copy()

    util._assert_cupy_array(a)
    util._assert_rank2(a)
    util._assert_nd_squareness(a)

    # support float32, float64, complex64, and complex128
    if a.dtype.char in 'fdFD':
        dtype = a.dtype.char
    else:
        dtype = numpy.promote_types(a.dtype.char, 'f').char

    cusolver_handle = device.get_cusolver_handle()
    dev_info = cupy.empty(1, dtype=numpy.int32)

    if dtype == 'f':
        potrf = cusolver.spotrf
        potrf_bufferSize = cusolver.spotrf_bufferSize
        potrs = cusolver.spotrs
    elif dtype == 'd':
        potrf = cusolver.dpotrf
        potrf_bufferSize = cusolver.dpotrf_bufferSize
        potrs = cusolver.dpotrs
    elif dtype == 'F':
        potrf = cusolver.cpotrf
        potrf_bufferSize = cusolver.cpotrf_bufferSize
        potrs = cusolver.cpotrs
    elif dtype == 'D':
        potrf = cusolver.zpotrf
        potrf_bufferSize = cusolver.zpotrf_bufferSize
        potrs = cusolver.zpotrs
    else:
        msg = ('dtype must be float32, float64, complex64 or complex128'
               ' (actual: {})'.format(a.dtype))
        raise ValueError(msg)

    m = a.shape[0]
    uplo = cublas.CUBLAS_FILL_MODE_LOWER

    worksize = potrf_bufferSize(cusolver_handle, uplo, m, a.data.ptr, m)
    workspace = cupy.empty(worksize, dtype=dtype)

    # Cholesky factorization
    potrf(cusolver_handle, uplo, m, a.data.ptr, m, workspace.data.ptr,
          worksize, dev_info.data.ptr)

    info = dev_info[0]
    if info != 0:
        if info < 0:
            msg = '\tThe {}-th parameter is wrong'.format(-info)
        else:
            msg = ('\tThe leading minor of order {} is not positive definite'
                   .format(info))
        raise RuntimeError('matrix inversion failed at potrf.\n' + msg)

    b = cupy.eye(m, dtype=dtype)

    # Solve: A * X = B
    potrs(cusolver_handle, uplo, m, m, a.data.ptr, m, b.data.ptr, m,
          dev_info.data.ptr)

    info = dev_info[0]
    if info > 0:
        assert False, ('Unexpected output returned by potrs (actual: {})'
                       .format(info))
    elif info < 0:
        raise RuntimeError('matrix inversion failed at potrs.\n'
                           '\tThe {}-th parameter is wrong'.format(-info))

    return b
Пример #10
0
def solve(a, b):
    '''Solves a linear matrix equation.

    It computes the exact solution of ``x`` in ``ax = b``,
    where ``a`` is a square and full rank matrix.

    Args:
        a (cupy.ndarray): The matrix with dimension ``(M, M)``
        b (cupy.ndarray): The vector with ``M`` elements, or
            the matrix with dimension ``(M, K)``

    Returns:
        cupy.ndarray:
            The vector with ``M`` elements, or the matrix with dimension
            ``(M, K)``.

    .. seealso:: :func:`numpy.linalg.solve`
    '''
    # NOTE: Since cusolver in CUDA 8.0 does not support gesv,
    #       we manually solve a linear system with QR decomposition.
    #       For details, please see the following:
    #       https://docs.nvidia.com/cuda/cusolver/index.html#qr_examples
    if not cuda.cusolver_enabled:
        raise RuntimeError('Current cupy only supports cusolver in CUDA 8.0')

    # TODO(Saito): Current implementation only accepts two-dimensional arrays
    util._assert_cupy_array(a, b)
    util._assert_rank2(a)
    util._assert_nd_squareness(a)
    if 2 < b.ndim:
        raise linalg.LinAlgError('{}-dimensional array given. Array must be '
                                 'one or two-dimensional'.format(b.ndim))
    if len(a) != len(b):
        raise linalg.LinAlgError('The number of rows of array a must be '
                                 'the same as that of array b')

    # Cast to float32 or float64
    if a.dtype.char == 'f' or a.dtype.char == 'd':
        dtype = a.dtype.char
    else:
        dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char

    m, k = (b.size, 1) if b.ndim == 1 else b.shape
    a = a.transpose().astype(dtype, order='C', copy=True)
    b = b.transpose().astype(dtype, order='C', copy=True)
    cusolver_handle = device.get_cusolver_handle()
    cublas_handle = device.get_cublas_handle()
    dev_info = cupy.empty(1, dtype=numpy.int32)

    if dtype == 'f':
        geqrf = cusolver.sgeqrf
        geqrf_bufferSize = cusolver.sgeqrf_bufferSize
        ormqr = cusolver.sormqr
        trsm = cublas.strsm
    else:  # dtype == 'd'
        geqrf = cusolver.dgeqrf
        geqrf_bufferSize = cusolver.dgeqrf_bufferSize
        ormqr = cusolver.dormqr
        trsm = cublas.dtrsm

    # 1. QR decomposition (A = Q * R)
    buffersize = geqrf_bufferSize(cusolver_handle, m, m, a.data.ptr, m)
    workspace = cupy.empty(buffersize, dtype=dtype)
    tau = cupy.empty(m, dtype=dtype)
    geqrf(cusolver_handle, m, m, a.data.ptr, m, tau.data.ptr,
          workspace.data.ptr, buffersize, dev_info.data.ptr)
    _check_status(dev_info)
    # 2. ormqr (Q^T * B)
    ormqr(cusolver_handle, cublas.CUBLAS_SIDE_LEFT, cublas.CUBLAS_OP_T, m, k,
          m, a.data.ptr, m, tau.data.ptr, b.data.ptr, m, workspace.data.ptr,
          buffersize, dev_info.data.ptr)
    _check_status(dev_info)
    # 3. trsm (X = R^{-1} * (Q^T * B))
    trsm(cublas_handle, cublas.CUBLAS_SIDE_LEFT, cublas.CUBLAS_FILL_MODE_UPPER,
         cublas.CUBLAS_OP_N, cublas.CUBLAS_DIAG_NON_UNIT, m, k, 1, a.data.ptr,
         m, b.data.ptr, m)
    return b.transpose()
Пример #11
0
Файл: solve.py Проект: y1r/cupy
def inv(a):
    """Computes the inverse of a matrix.

    This function computes matrix ``a_inv`` from n-dimensional regular matrix
    ``a`` such that ``dot(a, a_inv) == eye(n)``.

    Args:
        a (cupy.ndarray): The regular matrix

    Returns:
        cupy.ndarray: The inverse of a matrix.

    .. seealso:: :func:`numpy.linalg.inv`
    """
    if a.ndim >= 3:
        return _batched_inv(a)

    # to prevent `a` to be overwritten
    a = a.copy()

    util._assert_cupy_array(a)
    util._assert_rank2(a)
    util._assert_nd_squareness(a)

    # support float32, float64, complex64, and complex128
    if a.dtype.char in 'fdFD':
        dtype = a.dtype.char
    else:
        dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char

    cusolver_handle = device.get_cusolver_handle()
    dev_info = cupy.empty(1, dtype=numpy.int32)

    ipiv = cupy.empty((a.shape[0], 1), dtype=numpy.intc)

    if dtype == 'f':
        getrf = cusolver.sgetrf
        getrf_bufferSize = cusolver.sgetrf_bufferSize
        getrs = cusolver.sgetrs
    elif dtype == 'd':
        getrf = cusolver.dgetrf
        getrf_bufferSize = cusolver.dgetrf_bufferSize
        getrs = cusolver.dgetrs
    elif dtype == 'F':
        getrf = cusolver.cgetrf
        getrf_bufferSize = cusolver.cgetrf_bufferSize
        getrs = cusolver.cgetrs
    elif dtype == 'D':
        getrf = cusolver.zgetrf
        getrf_bufferSize = cusolver.zgetrf_bufferSize
        getrs = cusolver.zgetrs
    else:
        msg = ('dtype must be float32, float64, complex64 or complex128'
               ' (actual: {})'.format(a.dtype))
        raise ValueError(msg)

    m = a.shape[0]

    buffersize = getrf_bufferSize(cusolver_handle, m, m, a.data.ptr, m)
    workspace = cupy.empty(buffersize, dtype=dtype)

    # LU factorization
    getrf(cusolver_handle, m, m, a.data.ptr, m, workspace.data.ptr,
          ipiv.data.ptr, dev_info.data.ptr)

    b = cupy.eye(m, dtype=dtype)

    # solve for the inverse
    getrs(cusolver_handle, 0, m, m, a.data.ptr, m, ipiv.data.ptr, b.data.ptr,
          m, dev_info.data.ptr)

    return b
Пример #12
0
def lstsq(a, b, rcond=1e-15):
    """Return the least-squares solution to a linear matrix equation.

    Solves the equation `a x = b` by computing a vector `x` that
    minimizes the Euclidean 2-norm `|| b - a x ||^2`.  The equation may
    be under-, well-, or over- determined (i.e., the number of
    linearly independent rows of `a` can be less than, equal to, or
    greater than its number of linearly independent columns).  If `a`
    is square and of full rank, then `x` (but for round-off error) is
    the "exact" solution of the equation.

    Args:
        a (cupy.ndarray): "Coefficient" matrix with dimension ``(M, N)``
        b (cupy.ndarray): "Dependent variable" values with dimension ``(M,)``
            or ``(M, K)``
        rcond (float): Cutoff parameter for small singular values.
            For stability it computes the largest singular value denoted by
            ``s``, and sets all singular values smaller than ``s`` to zero.

    Returns:
        tuple:
            A tuple of ``(x, residuals, rank, s)``. Note ``x`` is the
            least-squares solution with shape ``(N,)`` or ``(N, K)`` depending
            if ``b`` was two-dimensional. The sums of ``residuals`` is the
            squared Euclidean 2-norm for each column in b - a*x. The
            ``residuals`` is an empty array if the rank of a is < N or M <= N,
            but  iff b is 1-dimensional, this is a (1,) shape array, Otherwise
            the shape is (K,). The ``rank`` of matrix ``a`` is an integer. The
            singular values of ``a`` are ``s``.

    .. warning::
        This function calls one or more cuSOLVER routine(s) which may yield
        invalid results if input conditions are not met.
        To detect these invalid results, you can set the `linalg`
        configuration to a value that is not `ignore` in
        :func:`cupyx.errstate` or :func:`cupyx.seterr`.

    .. seealso:: :func:`numpy.linalg.lstsq`
    """
    util._assert_cupy_array(a, b)
    util._assert_rank2(a)
    if b.ndim > 2:
        raise linalg.LinAlgError('{}-dimensional array given. Array must be at'
                                 ' most two-dimensional'.format(b.ndim))
    m, n = a.shape[-2:]
    m2 = b.shape[0]
    if m != m2:
        raise linalg.LinAlgError('Incompatible dimensions')

    u, s, vt = cupy.linalg.svd(a, full_matrices=False)
    # number of singular values and matrix rank
    cutoff = rcond * s.max()
    s1 = 1 / s
    sing_vals = s <= cutoff
    s1[sing_vals] = 0
    rank = s.size - sing_vals.sum()

    if b.ndim == 2:
        s1 = cupy.repeat(s1.reshape(-1, 1), b.shape[1], axis=1)
    # Solve the least-squares solution
    z = core.dot(u.transpose(), b) * s1
    x = core.dot(vt.transpose(), z)
    # Calculate squared Euclidean 2-norm for each column in b - a*x
    if rank != n or m <= n:
        resids = cupy.array([], dtype=a.dtype)
    elif b.ndim == 2:
        e = b - core.dot(a, x)
        resids = cupy.sum(cupy.square(e), axis=0)
    else:
        e = b - cupy.dot(a, x)
        resids = cupy.dot(e.T, e).reshape(-1)
    return x, resids, rank, s
Пример #13
0
def svd(a, full_matrices=True, compute_uv=True):
    """Singular Value Decomposition.

    Factorizes the matrix ``a`` as ``u * np.diag(s) * v``, where ``u`` and
    ``v`` are unitary and ``s`` is an one-dimensional array of ``a``'s
    singular values.

    Args:
        a (cupy.ndarray): The input matrix with dimension ``(M, N)``.
        full_matrices (bool): If True, it returns u and v with dimensions
            ``(M, M)`` and ``(N, N)``. Otherwise, the dimensions of u and v
            are respectively ``(M, K)`` and ``(K, N)``, where
            ``K = min(M, N)``.
        compute_uv (bool): If ``False``, it only returns singular values.

    Returns:
        tuple of :class:`cupy.ndarray`:
            A tuple of ``(u, s, v)`` such that ``a = u * np.diag(s) * v``.

    .. seealso:: :func:`numpy.linalg.svd`
    """
    # TODO(Saito): Current implementation only accepts two-dimensional arrays
    util._assert_cupy_array(a)
    util._assert_rank2(a)

    # Cast to float32 or float64
    a_dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char
    if a_dtype == 'f':
        s_dtype = 'f'
    elif a_dtype == 'd':
        s_dtype = 'd'
    elif a_dtype == 'F':
        s_dtype = 'f'
    else:  # a_dtype == 'D':
        a_dtype = 'D'
        s_dtype = 'd'

    # Remark 1: gesvd only supports m >= n (WHAT?)
    # Remark 2: gesvd only supports jobu = 'A' and jobvt = 'A'
    # Remark 3: gesvd returns matrix U and V^H
    # Remark 4: Remark 2 is removed since cuda 8.0 (new!)
    n, m = a.shape

    # `a` must be copied because xgesvd destroys the matrix
    if m >= n:
        x = a.astype(a_dtype, order='C', copy=True)
        trans_flag = False
    else:
        m, n = a.shape
        x = a.transpose().astype(a_dtype, order='C', copy=True)
        trans_flag = True
    mn = min(m, n)

    if compute_uv:
        if full_matrices:
            u = cupy.empty((m, m), dtype=a_dtype)
            vt = cupy.empty((n, n), dtype=a_dtype)
        else:
            u = cupy.empty((mn, m), dtype=a_dtype)
            vt = cupy.empty((mn, n), dtype=a_dtype)
        u_ptr, vt_ptr = u.data.ptr, vt.data.ptr
    else:
        u_ptr, vt_ptr = 0, 0  # Use nullptr
    s = cupy.empty(mn, dtype=s_dtype)
    handle = device.get_cusolver_handle()
    dev_info = cupy.empty(1, dtype=numpy.int32)
    if compute_uv:
        job = ord('A') if full_matrices else ord('S')
    else:
        job = ord('N')
    if a_dtype == 'f':
        buffersize = cusolver.sgesvd_bufferSize(handle, m, n)
        workspace = cupy.empty(buffersize, dtype=a_dtype)
        cusolver.sgesvd(
            handle, job, job, m, n, x.data.ptr, m,
            s.data.ptr, u_ptr, m, vt_ptr, n,
            workspace.data.ptr, buffersize, 0, dev_info.data.ptr)
    elif a_dtype == 'd':
        buffersize = cusolver.dgesvd_bufferSize(handle, m, n)
        workspace = cupy.empty(buffersize, dtype=a_dtype)
        cusolver.dgesvd(
            handle, job, job, m, n, x.data.ptr, m,
            s.data.ptr, u_ptr, m, vt_ptr, n,
            workspace.data.ptr, buffersize, 0, dev_info.data.ptr)
    elif a_dtype == 'F':
        buffersize = cusolver.cgesvd_bufferSize(handle, m, n)
        workspace = cupy.empty(buffersize, dtype=a_dtype)
        cusolver.cgesvd(
            handle, job, job, m, n, x.data.ptr, m,
            s.data.ptr, u_ptr, m, vt_ptr, n,
            workspace.data.ptr, buffersize, 0, dev_info.data.ptr)
    else:  # a_dtype == 'D':
        buffersize = cusolver.zgesvd_bufferSize(handle, m, n)
        workspace = cupy.empty(buffersize, dtype=a_dtype)
        cusolver.zgesvd(
            handle, job, job, m, n, x.data.ptr, m,
            s.data.ptr, u_ptr, m, vt_ptr, n,
            workspace.data.ptr, buffersize, 0, dev_info.data.ptr)

    status = int(dev_info[0])
    if status > 0:
        raise linalg.LinAlgError(
            'SVD computation does not converge')
    elif status < 0:
        raise linalg.LinAlgError(
            'Parameter error (maybe caused by a bug in cupy.linalg?)')

    # Note that the returned array may need to be transporsed
    # depending on the structure of an input
    if compute_uv:
        if trans_flag:
            return u.transpose(), s, vt.transpose()
        else:
            return vt, s, u
    else:
        return s
Пример #14
0
def lu_factor(a, overwrite_a=False, check_finite=True):
    """LU decomposition.

    Decompose a given two-dimensional square matrix into ``P * L * U``,
    where ``P`` is a permutation matrix,  ``L`` lower-triangular with
    unit diagonal elements, and ``U`` upper-triangular matrix.
    Note that in the current implementation ``a`` must be
    a real matrix, and only :class:`numpy.float32` and :class:`numpy.float64`
    are supported.

    Args:
        a (cupy.ndarray): The input matrix with dimension ``(M, N)``
        overwrite_a (bool): Allow overwriting data in ``a`` (may enhance
            performance)
        check_finite (bool): Whether to check that the input matrices contain
            only finite numbers. Disabling may give a performance gain, but may
            result in problems (crashes, non-termination) if the inputs do
            contain infinities or NaNs.

    Returns:
        tuple:
            ``(lu, piv)`` where ``lu`` is a :class:`cupy.ndarray`
            storing ``U`` in its upper triangle, and ``L`` without
            unit diagonal elements in its lower triangle, and ``piv`` is
            a :class:`cupy.ndarray` storing pivot indices representing
            permutation matrix ``P``. For ``0 <= i < min(M,N)``, row
            ``i`` of the matrix was interchanged with row ``piv[i]``

    .. seealso:: :func:`scipy.linalg.lu_factor`

    .. note::

        Current implementation returns result different from SciPy when the
        matrix singular. SciPy returns an array containing ``0.`` while the
        current implementation returns an array containing ``nan``.

        >>> import numpy as np
        >>> import scipy.linalg
        >>> scipy.linalg.lu_factor(np.array([[0, 1], [0, 0]], \
dtype=np.float32))
        (array([[0., 1.],
               [0., 0.]], dtype=float32), array([0, 1], dtype=int32))

        >>> import cupy as cp
        >>> import cupyx.scipy.linalg
        >>> cupyx.scipy.linalg.lu_factor(cp.array([[0, 1], [0, 0]], \
dtype=cp.float32))
        (array([[ 0.,  1.],
               [nan, nan]], dtype=float32), array([0, 1], dtype=int32))
    """

    a = cupy.asarray(a)
    util._assert_rank2(a)

    dtype = a.dtype

    if dtype.char == 'f':
        getrf = cusolver.sgetrf
        getrf_bufferSize = cusolver.sgetrf_bufferSize
    elif dtype.char == 'd':
        getrf = cusolver.dgetrf
        getrf_bufferSize = cusolver.dgetrf_bufferSize
    else:
        raise NotImplementedError('Only float32 and float64 are supported.')

    a = a.astype(dtype, order='F', copy=(not overwrite_a))

    if check_finite:
        if a.dtype.kind == 'f' and not cupy.isfinite(a).all():
            raise ValueError('array must not contain infs or NaNs')

    cusolver_handle = device.get_cusolver_handle()
    dev_info = cupy.empty(1, dtype=numpy.int32)

    m, n = a.shape

    ipiv = cupy.empty((min(m, n), ), dtype=numpy.intc)

    buffersize = getrf_bufferSize(cusolver_handle, m, n, a.data.ptr, m)
    workspace = cupy.empty(buffersize, dtype=dtype)

    # LU factorization
    getrf(cusolver_handle, m, n, a.data.ptr, m, workspace.data.ptr,
          ipiv.data.ptr, dev_info.data.ptr)

    if dev_info[0] < 0:
        raise ValueError('illegal value in %d-th argument of '
                         'internal getrf (lu_factor)' % -dev_info[0])
    elif dev_info[0] > 0:
        warn('Diagonal number %d is exactly zero. Singular matrix.' %
             dev_info[0],
             RuntimeWarning,
             stacklevel=2)

    # cuSolver uses 1-origin while SciPy uses 0-origin
    ipiv -= 1

    return (a, ipiv)
Пример #15
0
def qr(a, mode='reduced'):
    '''QR decomposition.

    Decompose a given two-dimensional matrix into ``Q * R``, where ``Q``
    is an orthonormal and ``R`` is an upper-triangular matrix.

    Args:
        a (cupy.ndarray): The input matrix.
        mode (str): The mode of decomposition. Currently 'reduced',
            'complete', 'r', and 'raw' modes are supported. The default mode
            is 'reduced', and decompose a matrix ``A = (M, N)`` into ``Q``,
            ``R`` with dimensions ``(M, K)``, ``(K, N)``, where
            ``K = min(M, N)``.

    .. seealso:: :func:`numpy.linalg.qr`
    '''
    if not cuda.cusolver_enabled:
        raise RuntimeError('Current cupy only supports cusolver in CUDA 8.0')

    # TODO(Saito): Current implementation only accepts two-dimensional arrays
    util._assert_cupy_array(a)
    util._assert_rank2(a)

    if mode not in ('reduced', 'complete', 'r', 'raw'):
        if mode in ('f', 'full', 'e', 'economic'):
            msg = 'The deprecated mode \'{}\' is not supported'.format(mode)
            raise ValueError(msg)
        else:
            raise ValueError('Unrecognized mode \'{}\''.format(mode))

    # Cast to float32 or float64
    if a.dtype.char == 'f' or a.dtype.char == 'd':
        dtype = a.dtype.char
    else:
        dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char

    m, n = a.shape
    x = a.transpose().astype(dtype, order='C', copy=True)
    mn = min(m, n)
    handle = device.get_cusolver_handle()
    dev_info = cupy.empty(1, dtype=numpy.int32)
    # compute working space of geqrf and ormqr, and solve R
    if dtype == 'f':
        buffersize = cusolver.sgeqrf_bufferSize(handle, m, n, x.data.ptr, n)
        workspace = cupy.empty(buffersize, dtype=numpy.float32)
        tau = cupy.empty(mn, dtype=numpy.float32)
        cusolver.sgeqrf(
            handle, m, n, x.data.ptr, m,
            tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr)
    else:  # dtype == 'd'
        buffersize = cusolver.dgeqrf_bufferSize(handle, n, m, x.data.ptr, n)
        workspace = cupy.empty(buffersize, dtype=numpy.float64)
        tau = cupy.empty(mn, dtype=numpy.float64)
        cusolver.dgeqrf(
            handle, m, n, x.data.ptr, m,
            tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr)
    status = int(dev_info[0])
    if status < 0:
        raise linalg.LinAlgError(
            'Parameter error (maybe caused by a bug in cupy.linalg?)')

    if mode == 'r':
        r = x[:, :mn].transpose()
        return util._triu(r)

    if mode == 'raw':
        if a.dtype.char == 'f':
            # The original numpy.linalg.qr returns float64 in raw mode,
            # whereas the cusolver returns float32. We agree that the
            # following code would be inappropriate, however, in this time
            # we explicitly convert them to float64 for compatibility.
            return x.astype(numpy.float64), tau.astype(numpy.float64)
        return x, tau

    if mode == 'complete' and m > n:
        mc = m
        q = cupy.empty((m, m), dtype)
    else:
        mc = mn
        q = cupy.empty((n, m), dtype)
    q[:n] = x

    # solve Q
    if dtype == 'f':
        buffersize = cusolver.sorgqr_bufferSize(
            handle, m, mc, mn, q.data.ptr, m, tau.data.ptr)
        workspace = cupy.empty(buffersize, dtype=numpy.float32)
        cusolver.sorgqr(
            handle, m, mc, mn, q.data.ptr, m, tau.data.ptr,
            workspace.data.ptr, buffersize, dev_info.data.ptr)
    else:
        buffersize = cusolver.dorgqr_bufferSize(
            handle, m, mc, mn, q.data.ptr, m, tau.data.ptr)
        workspace = cupy.empty(buffersize, dtype=numpy.float64)
        cusolver.dorgqr(
            handle, m, mc, mn, q.data.ptr, m, tau.data.ptr,
            workspace.data.ptr, buffersize, dev_info.data.ptr)

    q = q[:mc].transpose()
    r = x[:, :mc].transpose()
    return q, util._triu(r)
Пример #16
0
def lu_solve(lu_and_piv, b, trans=0, overwrite_b=False, check_finite=True):
    """Solve an equation system, ``a * x = b``, given the LU factorization of ``a``

    Args:
        lu_and_piv (tuple): LU factorization of matrix ``a`` (``(M, M)``)
            together with pivot indices.
        b (cupy.ndarray): The matrix with dimension ``(M,)`` or
            ``(M, N)``.
        trans ({0, 1, 2}): Type of system to solve:

            ========  =========
            trans     system
            ========  =========
            0         a x  = b
            1         a^T x = b
            2         a^H x = b
            ========  =========
        overwrite_b (bool): Allow overwriting data in b (may enhance
            performance)
        check_finite (bool): Whether to check that the input matrices contain
            only finite numbers. Disabling may give a performance gain, but may
            result in problems (crashes, non-termination) if the inputs do
            contain infinities or NaNs.

    Returns:
        cupy.ndarray:
            The matrix with dimension ``(M,)`` or ``(M, N)``.

    .. seealso:: :func:`scipy.linalg.lu_solve`
    """

    (lu, ipiv) = lu_and_piv

    util._assert_cupy_array(lu)
    util._assert_rank2(lu)
    util._assert_nd_squareness(lu)

    m = lu.shape[0]
    if m != b.shape[0]:
        raise ValueError('incompatible dimensions.')

    dtype = lu.dtype
    if dtype.char == 'f':
        getrs = cusolver.sgetrs
    elif dtype.char == 'd':
        getrs = cusolver.dgetrs
    else:
        raise NotImplementedError('Only float32 and float64 are supported.')

    if trans == 0:
        trans = cublas.CUBLAS_OP_N
    elif trans == 1:
        trans = cublas.CUBLAS_OP_T
    elif trans == 2:
        trans = cublas.CUBLAS_OP_C
    else:
        raise ValueError('unknown trans')

    lu = lu.astype(dtype, order='F', copy=False)
    ipiv = ipiv.astype(ipiv.dtype, order='F', copy=True)
    # cuSolver uses 1-origin while SciPy uses 0-origin
    ipiv += 1
    b = b.astype(dtype, order='F', copy=(not overwrite_b))

    if check_finite:
        if lu.dtype.kind == 'f' and not cupy.isfinite(lu).all():
            raise ValueError(
                'array must not contain infs or NaNs.\n'
                'Note that when a singular matrix is given, unlike '
                'scipy.linalg.lu_factor, cupyx.scipy.linalg.lu_factor '
                'returns an array containing NaN.')
        if b.dtype.kind == 'f' and not cupy.isfinite(b).all():
            raise ValueError('array must not contain infs or NaNs')

    n = 1 if b.ndim == 1 else b.shape[1]
    cusolver_handle = device.get_cusolver_handle()
    dev_info = cupy.empty(1, dtype=numpy.int32)

    # solve for the inverse
    getrs(cusolver_handle, trans, m, n, lu.data.ptr, m, ipiv.data.ptr,
          b.data.ptr, m, dev_info.data.ptr)

    if dev_info[0] < 0:
        raise ValueError('illegal value in %d-th argument of '
                         'internal getrs (lu_solve)' % -dev_info[0])

    return b
Пример #17
0
def qr(a, mode='reduced'):
    """QR decomposition.

    Decompose a given two-dimensional matrix into ``Q * R``, where ``Q``
    is an orthonormal and ``R`` is an upper-triangular matrix.

    Args:
        a (cupy.ndarray): The input matrix.
        mode (str): The mode of decomposition. Currently 'reduced',
            'complete', 'r', and 'raw' modes are supported. The default mode
            is 'reduced', in which matrix ``A = (M, N)`` is decomposed into
            ``Q``, ``R`` with dimensions ``(M, K)``, ``(K, N)``, where
            ``K = min(M, N)``.

    Returns:
        cupy.ndarray, or tuple of ndarray:
            Although the type of returned object depends on the mode,
            it returns a tuple of ``(Q, R)`` by default.
            For details, please see the document of :func:`numpy.linalg.qr`.

    .. warning::
        This function calls one or more cuSOLVER routine(s) which may yield
        invalid results if input conditions are not met.
        To detect these invalid results, you can set the `linalg`
        configuration to a value that is not `ignore` in
        :func:`cupyx.errstate` or :func:`cupyx.seterr`.

    .. seealso:: :func:`numpy.linalg.qr`
    """
    # TODO(Saito): Current implementation only accepts two-dimensional arrays
    util._assert_cupy_array(a)
    util._assert_rank2(a)

    if mode not in ('reduced', 'complete', 'r', 'raw'):
        if mode in ('f', 'full', 'e', 'economic'):
            msg = 'The deprecated mode \'{}\' is not supported'.format(mode)
            raise ValueError(msg)
        else:
            raise ValueError('Unrecognized mode \'{}\''.format(mode))

    # support float32, float64, complex64, and complex128
    if a.dtype.char in 'fdFD':
        dtype = a.dtype.char
    else:
        dtype = numpy.promote_types(a.dtype.char, 'f').char

    m, n = a.shape
    mn = min(m, n)
    if mn == 0:
        if mode == 'reduced':
            return cupy.empty((m, 0), dtype), cupy.empty((0, n), dtype)
        elif mode == 'complete':
            return cupy.identity(m, dtype), cupy.empty((m, n), dtype)
        elif mode == 'r':
            return cupy.empty((0, n), dtype)
        else:  # mode == 'raw'
            # compatibility with numpy.linalg.qr
            dtype = numpy.promote_types(dtype, 'd')
            return cupy.empty((n, m), dtype), cupy.empty((0, ), dtype)

    x = a.transpose().astype(dtype, order='C', copy=True)
    handle = device.get_cusolver_handle()
    dev_info = cupy.empty(1, dtype=numpy.int32)

    if dtype == 'f':
        geqrf_bufferSize = cusolver.sgeqrf_bufferSize
        geqrf = cusolver.sgeqrf
    elif dtype == 'd':
        geqrf_bufferSize = cusolver.dgeqrf_bufferSize
        geqrf = cusolver.dgeqrf
    elif dtype == 'F':
        geqrf_bufferSize = cusolver.cgeqrf_bufferSize
        geqrf = cusolver.cgeqrf
    elif dtype == 'D':
        geqrf_bufferSize = cusolver.zgeqrf_bufferSize
        geqrf = cusolver.zgeqrf
    else:
        msg = ('dtype must be float32, float64, complex64 or complex128'
               ' (actual: {})'.format(a.dtype))
        raise ValueError(msg)

    # compute working space of geqrf and solve R
    buffersize = geqrf_bufferSize(handle, m, n, x.data.ptr, n)
    workspace = cupy.empty(buffersize, dtype=dtype)
    tau = cupy.empty(mn, dtype=dtype)
    geqrf(handle, m, n, x.data.ptr, m, tau.data.ptr, workspace.data.ptr,
          buffersize, dev_info.data.ptr)
    cupy.linalg.util._check_cusolver_dev_info_if_synchronization_allowed(
        geqrf, dev_info)

    if mode == 'r':
        r = x[:, :mn].transpose()
        return util._triu(r)

    if mode == 'raw':
        if a.dtype.char == 'f':
            # The original numpy.linalg.qr returns float64 in raw mode,
            # whereas the cusolver returns float32. We agree that the
            # following code would be inappropriate, however, in this time
            # we explicitly convert them to float64 for compatibility.
            return x.astype(numpy.float64), tau.astype(numpy.float64)
        elif a.dtype.char == 'F':
            # The same applies to complex64
            return x.astype(numpy.complex128), tau.astype(numpy.complex128)
        return x, tau

    if mode == 'complete' and m > n:
        mc = m
        q = cupy.empty((m, m), dtype)
    else:
        mc = mn
        q = cupy.empty((n, m), dtype)
    q[:n] = x

    # compute working space of orgqr and solve Q
    if dtype == 'f':
        orgqr_bufferSize = cusolver.sorgqr_bufferSize
        orgqr = cusolver.sorgqr
    elif dtype == 'd':
        orgqr_bufferSize = cusolver.dorgqr_bufferSize
        orgqr = cusolver.dorgqr
    elif dtype == 'F':
        orgqr_bufferSize = cusolver.cungqr_bufferSize
        orgqr = cusolver.cungqr
    elif dtype == 'D':
        orgqr_bufferSize = cusolver.zungqr_bufferSize
        orgqr = cusolver.zungqr

    buffersize = orgqr_bufferSize(handle, m, mc, mn, q.data.ptr, m,
                                  tau.data.ptr)
    workspace = cupy.empty(buffersize, dtype=dtype)
    orgqr(handle, m, mc, mn, q.data.ptr, m, tau.data.ptr, workspace.data.ptr,
          buffersize, dev_info.data.ptr)
    cupy.linalg.util._check_cusolver_dev_info_if_synchronization_allowed(
        orgqr, dev_info)

    q = q[:mc].transpose()
    r = x[:, :mc].transpose()
    return q, util._triu(r)
Пример #18
0
def qr(a, mode='reduced'):
    """QR decomposition.

    Decompose a given two-dimensional matrix into ``Q * R``, where ``Q``
    is an orthonormal and ``R`` is an upper-triangular matrix.

    Args:
        a (cupy.ndarray): The input matrix.
        mode (str): The mode of decomposition. Currently 'reduced',
            'complete', 'r', and 'raw' modes are supported. The default mode
            is 'reduced', in which matrix ``A = (M, N)`` is decomposed into
            ``Q``, ``R`` with dimensions ``(M, K)``, ``(K, N)``, where
            ``K = min(M, N)``.

    Returns:
        cupy.ndarray, or tuple of ndarray:
            Although the type of returned object depends on the mode,
            it returns a tuple of ``(Q, R)`` by default.
            For details, please see the document of :func:`numpy.linalg.qr`.

    .. seealso:: :func:`numpy.linalg.qr`
    """
    # TODO(Saito): Current implementation only accepts two-dimensional arrays
    util._assert_cupy_array(a)
    util._assert_rank2(a)

    if mode not in ('reduced', 'complete', 'r', 'raw'):
        if mode in ('f', 'full', 'e', 'economic'):
            msg = 'The deprecated mode \'{}\' is not supported'.format(mode)
            raise ValueError(msg)
        else:
            raise ValueError('Unrecognized mode \'{}\''.format(mode))

    # support float32, float64, complex64, and complex128
    if a.dtype.char in 'fdFD':
        dtype = a.dtype.char
    else:
        dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char

    m, n = a.shape
    x = a.transpose().astype(dtype, order='C', copy=True)
    mn = min(m, n)
    handle = device.get_cusolver_handle()
    dev_info = cupy.empty(1, dtype=numpy.int32)
    # compute working space of geqrf and orgqr, and solve R
    if dtype == 'f':
        geqrf_bufferSize = cusolver.sgeqrf_bufferSize
        geqrf = cusolver.sgeqrf
    elif dtype == 'd':
        geqrf_bufferSize = cusolver.dgeqrf_bufferSize
        geqrf = cusolver.dgeqrf
    elif dtype == 'F':
        geqrf_bufferSize = cusolver.cgeqrf_bufferSize
        geqrf = cusolver.cgeqrf
    elif dtype == 'D':
        geqrf_bufferSize = cusolver.zgeqrf_bufferSize
        geqrf = cusolver.zgeqrf
    else:
        msg = ('dtype must be float32, float64, complex64 or complex128'
               ' (actual: {})'.format(a.dtype))
        raise ValueError(msg)
    buffersize = geqrf_bufferSize(handle, m, n, x.data.ptr, n)
    workspace = cupy.empty(buffersize, dtype=dtype)
    tau = cupy.empty(mn, dtype=dtype)
    geqrf(handle, m, n, x.data.ptr, m,
          tau.data.ptr, workspace.data.ptr, buffersize, dev_info.data.ptr)

    status = int(dev_info[0])
    if status < 0:
        raise linalg.LinAlgError(
            'Parameter error (maybe caused by a bug in cupy.linalg?)')

    if mode == 'r':
        r = x[:, :mn].transpose()
        return util._triu(r)

    if mode == 'raw':
        if a.dtype.char == 'f':
            # The original numpy.linalg.qr returns float64 in raw mode,
            # whereas the cusolver returns float32. We agree that the
            # following code would be inappropriate, however, in this time
            # we explicitly convert them to float64 for compatibility.
            return x.astype(numpy.float64), tau.astype(numpy.float64)
        elif a.dtype.char == 'F':
            # The same applies to complex64
            return x.astype(numpy.complex128), tau.astype(numpy.complex128)
        return x, tau

    if mode == 'complete' and m > n:
        mc = m
        q = cupy.empty((m, m), dtype)
    else:
        mc = mn
        q = cupy.empty((n, m), dtype)
    q[:n] = x

    # solve Q
    if dtype == 'f':
        orgqr_bufferSize = cusolver.sorgqr_bufferSize
        orgqr = cusolver.sorgqr
    elif dtype == 'd':
        orgqr_bufferSize = cusolver.dorgqr_bufferSize
        orgqr = cusolver.dorgqr
    elif dtype == 'F':
        orgqr_bufferSize = cusolver.cungqr_bufferSize
        orgqr = cusolver.cungqr
    elif dtype == 'D':
        orgqr_bufferSize = cusolver.zungqr_bufferSize
        orgqr = cusolver.zungqr
    buffersize = orgqr_bufferSize(handle, m, mc, mn, q.data.ptr, m,
                                  tau.data.ptr)
    workspace = cupy.empty(buffersize, dtype=dtype)
    orgqr(handle, m, mc, mn, q.data.ptr, m, tau.data.ptr,
          workspace.data.ptr, buffersize, dev_info.data.ptr)

    q = q[:mc].transpose()
    r = x[:, :mc].transpose()
    return q, util._triu(r)
Пример #19
0
def svd(a, full_matrices=True, compute_uv=True):
    """Singular Value Decomposition.

    Factorizes the matrix ``a`` as ``u * np.diag(s) * v``, where ``u`` and
    ``v`` are unitary and ``s`` is an one-dimensional array of ``a``'s
    singular values.

    Args:
        a (cupy.ndarray): The input matrix with dimension ``(M, N)``.
        full_matrices (bool): If True, it returns u and v with dimensions
            ``(M, M)`` and ``(N, N)``. Otherwise, the dimensions of u and v
            are respectively ``(M, K)`` and ``(K, N)``, where
            ``K = min(M, N)``.
        compute_uv (bool): If ``False``, it only returns singular values.

    Returns:
        tuple of :class:`cupy.ndarray`:
            A tuple of ``(u, s, v)`` such that ``a = u * np.diag(s) * v``.

    .. warning::
        This function calls one or more cuSOLVER routine(s) which may yield
        invalid results if input conditions are not met.
        To detect these invalid results, you can set the `linalg`
        configuration to a value that is not `ignore` in
        :func:`cupyx.errstate` or :func:`cupyx.seterr`.

    .. seealso:: :func:`numpy.linalg.svd`
    """
    # TODO(Saito): Current implementation only accepts two-dimensional arrays
    util._assert_cupy_array(a)
    util._assert_rank2(a)

    # Cast to float32 or float64
    a_dtype = numpy.promote_types(a.dtype.char, 'f').char
    if a_dtype == 'f':
        s_dtype = 'f'
    elif a_dtype == 'd':
        s_dtype = 'd'
    elif a_dtype == 'F':
        s_dtype = 'f'
    else:  # a_dtype == 'D':
        a_dtype = 'D'
        s_dtype = 'd'

    # Remark 1: gesvd only supports m >= n (WHAT?)
    # Remark 2: gesvd only supports jobu = 'A' and jobvt = 'A'
    # Remark 3: gesvd returns matrix U and V^H
    # Remark 4: Remark 2 is removed since cuda 8.0 (new!)
    n, m = a.shape

    # `a` must be copied because xgesvd destroys the matrix
    if m >= n:
        x = a.astype(a_dtype, order='C', copy=True)
        trans_flag = False
    else:
        m, n = a.shape
        x = a.transpose().astype(a_dtype, order='C', copy=True)
        trans_flag = True
    mn = min(m, n)

    if compute_uv:
        if full_matrices:
            u = cupy.empty((m, m), dtype=a_dtype)
            vt = cupy.empty((n, n), dtype=a_dtype)
        else:
            u = cupy.empty((mn, m), dtype=a_dtype)
            vt = cupy.empty((mn, n), dtype=a_dtype)
        u_ptr, vt_ptr = u.data.ptr, vt.data.ptr
    else:
        u_ptr, vt_ptr = 0, 0  # Use nullptr
    s = cupy.empty(mn, dtype=s_dtype)
    handle = device.get_cusolver_handle()
    dev_info = cupy.empty(1, dtype=numpy.int32)

    if compute_uv:
        job = ord('A') if full_matrices else ord('S')
    else:
        job = ord('N')

    if a_dtype == 'f':
        gesvd = cusolver.sgesvd
        gesvd_bufferSize = cusolver.sgesvd_bufferSize
    elif a_dtype == 'd':
        gesvd = cusolver.dgesvd
        gesvd_bufferSize = cusolver.dgesvd_bufferSize
    elif a_dtype == 'F':
        gesvd = cusolver.cgesvd
        gesvd_bufferSize = cusolver.cgesvd_bufferSize
    else:  # a_dtype == 'D':
        gesvd = cusolver.zgesvd
        gesvd_bufferSize = cusolver.zgesvd_bufferSize

    buffersize = gesvd_bufferSize(handle, m, n)
    workspace = cupy.empty(buffersize, dtype=a_dtype)
    gesvd(handle, job, job, m, n, x.data.ptr, m, s.data.ptr, u_ptr, m, vt_ptr,
          n, workspace.data.ptr, buffersize, 0, dev_info.data.ptr)
    cupy.linalg.util._check_cusolver_dev_info_if_synchronization_allowed(
        gesvd, dev_info)

    # Note that the returned array may need to be transporsed
    # depending on the structure of an input
    if compute_uv:
        if trans_flag:
            return u.transpose(), s, vt.transpose()
        else:
            return vt, s, u
    else:
        return s
Пример #20
0
def inv(a):
    """Computes the inverse of a matrix.

    This function computes matrix ``a_inv`` from n-dimensional regular matrix
    ``a`` such that ``dot(a, a_inv) == eye(n)``.

    Args:
        a (cupy.ndarray): The regular matrix

    Returns:
        cupy.ndarray: The inverse of a matrix.

    .. warning::
        This function calls one or more cuSOLVER routine(s) which may yield
        invalid results if input conditions are not met.
        To detect these invalid results, you can set the `linalg`
        configuration to a value that is not `ignore` in
        :func:`cupyx.errstate` or :func:`cupyx.seterr`.

    .. seealso:: :func:`numpy.linalg.inv`
    """
    if a.ndim >= 3:
        return _batched_inv(a)

    # to prevent `a` to be overwritten
    a = a.copy()

    util._assert_cupy_array(a)
    util._assert_rank2(a)
    util._assert_nd_squareness(a)

    # support float32, float64, complex64, and complex128
    if a.dtype.char in 'fdFD':
        dtype = a.dtype.char
    else:
        dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char

    cusolver_handle = device.get_cusolver_handle()
    dev_info = cupy.empty(1, dtype=numpy.int32)

    ipiv = cupy.empty((a.shape[0], 1), dtype=numpy.intc)

    if dtype == 'f':
        getrf = cusolver.sgetrf
        getrf_bufferSize = cusolver.sgetrf_bufferSize
        getrs = cusolver.sgetrs
    elif dtype == 'd':
        getrf = cusolver.dgetrf
        getrf_bufferSize = cusolver.dgetrf_bufferSize
        getrs = cusolver.dgetrs
    elif dtype == 'F':
        getrf = cusolver.cgetrf
        getrf_bufferSize = cusolver.cgetrf_bufferSize
        getrs = cusolver.cgetrs
    elif dtype == 'D':
        getrf = cusolver.zgetrf
        getrf_bufferSize = cusolver.zgetrf_bufferSize
        getrs = cusolver.zgetrs
    else:
        msg = ('dtype must be float32, float64, complex64 or complex128'
               ' (actual: {})'.format(a.dtype))
        raise ValueError(msg)

    m = a.shape[0]

    buffersize = getrf_bufferSize(cusolver_handle, m, m, a.data.ptr, m)
    workspace = cupy.empty(buffersize, dtype=dtype)

    # LU factorization
    getrf(cusolver_handle, m, m, a.data.ptr, m, workspace.data.ptr,
          ipiv.data.ptr, dev_info.data.ptr)
    cupy.linalg.util._check_cusolver_dev_info_if_synchronization_allowed(
        getrf, dev_info)

    b = cupy.eye(m, dtype=dtype)

    # solve for the inverse
    getrs(cusolver_handle, 0, m, m, a.data.ptr, m, ipiv.data.ptr, b.data.ptr,
          m, dev_info.data.ptr)
    cupy.linalg.util._check_cusolver_dev_info_if_synchronization_allowed(
        getrs, dev_info)

    return b
Пример #21
0
def inv_core(a, cholesky=False):
    """Computes the inverse of a matrix.
    This function computes matrix ``a_inv`` from n-dimensional regular matrix
    ``a`` such that ``dot(a, a_inv) == eye(n)``.
    Args:
        a (cupy.ndarray): The regular matrix
        b (Boolean): Use cholesky decomposition
    Returns:
        cupy.ndarray: The inverse of a matrix.
    .. seealso:: :func:`numpy.linalg.inv`
    """

    xp = cupy.get_array_module(a)
    if xp == numpy:
        if cholesky:
            warnings.warn(
                "Current fast-inv using cholesky doesn't support numpy.ndarray."
            )
        return numpy.linalg.inv(a)

    if not cuda.cusolver_enabled:
        raise RuntimeError('Current cupy only supports cusolver in CUDA 8.0')

    # to prevent `a` to be overwritten
    a = a.copy()

    util._assert_cupy_array(a)
    util._assert_rank2(a)
    util._assert_nd_squareness(a)

    if a.dtype.char == 'f' or a.dtype.char == 'd':
        dtype = a.dtype.char
    else:
        dtype = numpy.find_common_type((a.dtype.char, 'f'), ()).char

    cusolver_handle = device.get_cusolver_handle()
    dev_info = cupy.empty(1, dtype=cupy.int)
    m = a.shape[0]

    b = cupy.eye(m, dtype=dtype)

    if not cholesky:
        if dtype == 'f':
            getrf = cusolver.sgetrf
            getrf_bufferSize = cusolver.sgetrf_bufferSize
            getrs = cusolver.sgetrs
        else:  # dtype == 'd'
            getrf = cusolver.dgetrf
            getrf_bufferSize = cusolver.dgetrf_bufferSize
            getrs = cusolver.dgetrs

        buffersize = getrf_bufferSize(cusolver_handle, m, m, a.data.ptr, m)

        # TODO(y1r): cache buffer to avoid malloc
        workspace = cupy.empty(buffersize, dtype=dtype)
        ipiv = cupy.empty((a.shape[0], 1), dtype=dtype)

        # LU Decomposition
        getrf(cusolver_handle, m, m, a.data.ptr, m, workspace.data.ptr,
              ipiv.data.ptr, dev_info.data.ptr)

        # TODO(y1r): check dev_info status

        # solve for the inverse
        getrs(cusolver_handle, 0, m, m, a.data.ptr, m, ipiv.data.ptr,
              b.data.ptr, m, dev_info.data.ptr)

        # TODO(y1r): check dev_info status
    else:
        if dtype == 'f':
            potrf = cusolver.spotrf
            potrf_bufferSize = cusolver.spotrf_bufferSize
            potrs = cusolver.spotrs
        else:  # dtype == 'd'
            potrf = cusolver.dpotrf
            potrf_bufferSize = cusolver.dpotrf_bufferSize
            potrs = cusolver.dpotrs

        buffersize = potrf_bufferSize(cusolver_handle,
                                      cublas.CUBLAS_FILL_MODE_UPPER, m,
                                      a.data.ptr, m)

        # TODO(y1r): cache buffer to avoid malloc
        workspace = cupy.empty(buffersize, dtype=dtype)

        # Cholesky Decomposition
        potrf(cusolver_handle, cublas.CUBLAS_FILL_MODE_UPPER, m, a.data.ptr, m,
              workspace.data.ptr, buffersize, dev_info.data.ptr)

        # TODO(y1r): check dev_info status

        # solve for the inverse
        potrs(cusolver_handle, cublas.CUBLAS_FILL_MODE_UPPER, m, m, a.data.ptr,
              m, b.data.ptr, m, dev_info.data.ptr)

        # TODO(y1r): check dev_info status

    return b