def test_inv_assumptions(app_inst: ArrayApplication): # pylint: disable=no-member, unused-variable np_Z = sample_sym_pd_mat(shape=(10, 10)) # Compute the inverse of np_Z using sym_psd routine. Z = app_inst.array(np_Z, np_Z.shape) Z_inv = app_inst.inv(Z).get() Z_true_inv = np.linalg.inv(np_Z) assert np.allclose(Z_true_inv, Z_inv) # Try Cholesky approach. np_L = np.linalg.cholesky(np_Z) np_L_inv = np.linalg.inv(np_L) Z_cho_inv = np_L_inv.T @ np_L_inv assert np.allclose(Z_cho_inv, Z_true_inv) # Test backsub. assert np_L.dtype == np.float64 lp_L_inv, _ = lapack.dtrtri(np_L, lower=1, unitdiag=0, overwrite_c=0) assert np.allclose(np_L_inv, lp_L_inv) # Test overwrite. overwrite_L_inv = np_L.copy(order="F") overwrite_L_inv_res, info = lapack.dtrtri(overwrite_L_inv, lower=1, unitdiag=0, overwrite_c=1) assert np.allclose(overwrite_L_inv_res, overwrite_L_inv) assert np.allclose(np_L_inv, overwrite_L_inv) # This should copy. overwrite_L_inv = np_L.copy(order="C") overwrite_L_inv_res, info = lapack.dtrtri(overwrite_L_inv, lower=1, unitdiag=0, overwrite_c=1) assert not np.allclose(overwrite_L_inv_res, overwrite_L_inv) # scipy cholesky tests. scipy_L_inv, info = lapack.dtrtri(scipy.linalg.cholesky(np.asfortranarray(np_Z), lower=True, overwrite_a=True, check_finite=False), lower=1, unitdiag=0, overwrite_c=1) assert np.allclose(scipy_L_inv, np_L_inv) # Benchmark test. np_Z = sample_sym_pd_mat((1500, 1500)) scipy_runtime = time.time() scipy_L_inv, info = lapack.dtrtri(scipy.linalg.cholesky(np.asfortranarray(np_Z), lower=True, overwrite_a=True, check_finite=False), lower=1, unitdiag=0, overwrite_c=1) scipy_Z_inv = scipy_L_inv.T @ scipy_L_inv scipy_runtime = time.time() - scipy_runtime np_runtime = time.time() np_Z_inv = np.linalg.inv(np_Z) np_runtime = time.time() - np_runtime assert scipy_runtime < np_runtime
def log_mvnpdf_low_rank(y: np.ndarray, mu: np.ndarray, M: np.ndarray, d: np.ndarray, scipy_lapack: bool = True) -> float: """ efficiently computes log N(y; mu, MM' + diag(d)) :param y: this_flux, (n_points, ) :param mu: this_mu, the mean vector of GP, (n_points, ) :param M: this_M, the low rank decomposition of covariance matrix, (n_points, k) :param d: diagonal noise term, (n_points, ) """ log_2pi = 1.83787706640934534 n, k = M.shape y = y[:, None] - mu[:, None] d_inv = 1 / d[:, None] # (n_points, 1) D_inv_y = d_inv * y # (n_points, 1) D_inv_M = d_inv * M # (n_points, k) # use Woodbury identity, define # B = (I + M' D^-1 M), # then # K^-1 = D^-1 - D^-1 M B^-1 M' D^-1 B = np.matmul(M.T, D_inv_M) # (k, n_points) * (n_points, k) -> (k, k) # add the identity matrix with magic indicing B.ravel()[0::(k + 1)] = B.ravel()[0::(k + 1)] + 1 # numpy cholesky returns lower triangle, different than MATLAB's upper triangle L = np.linalg.cholesky(B) # C = B^-1 M' D^-1 if scipy_lapack: tmp = np.matmul( lapack.dtrtri(np.asfortranarray(L), lower=1)[0], D_inv_M.T) C = np.matmul( lapack.dtrtri(np.asfortranarray(L.T), lower=0)[0], tmp) else: tmp = scipy.linalg.solve_triangular(L, D_inv_M.T, lower=True) # (k, n_points) C = scipy.linalg.solve_triangular(L.T, tmp, lower=False) # (k, n_points) K_inv_y = D_inv_y - np.matmul(D_inv_M, np.matmul(C, y)) # (n_points, 1) log_det_K = np.sum(np.log(d)) + 2 * np.sum(np.log(np.diag(L))) log_p = -0.5 * (np.matmul(y.T, K_inv_y).sum() + log_det_K + n * log_2pi) return log_p
def qrSolve(X, y): ''' Uses QR Decomposition to solve X * theta = y Uses scipy when use_gpu = False, and uses solve_triangular to solve R = QTy. This implementation of QR Solve handles ill conditioned problems using the following algorithm: compute covariance XTX try Q, R = qr(XTX) if R^-1 * R diag sum > 1.1*p { compute Q, R = qr(X + I) } Solves theta = R^-1 * QT * y ''' XTX = cov(X) Q, R = scipy_qr(XTX, mode = 'economic', check_finite = False, overwrite_a = True) check = 1 a,b = R.shape if use_gpu: R = R.numpy() if a == b: _R, check = dtrtri(R) if check == 1: _R = pinv(R) return _R @ (Q.T @ (X.T @ y))
def wishart_pdf(X, S, v, d, chol=False, log_form = False): '''Wishart probability density with possible use of the cholesky decomposition of S. Returns the same output as scipy.stats.wishart(df=v, scale=S).pdf(X). The equation is (Wikipedia or Kevin P. Murphy, 2007): {|X|**[0.5(v-d-1)] exp[-0.5tr(inv(S)X)]}/{2**[0.5vd] |S|**[0.5v] [multivariate_gamma_function(0.5v, d)]} Thomas Minka (1998) has a different form for the equation, but both are equivalent for the same inputs: {1}/{[multivariate_gamma_function(0.5v, d)] |X|**(0.5(d+1))} {|0.5X inv(S)|**(0.5v)} {exp[-0.5tr(inv(S)X)]} Parameters ---------- X: array-like. Positive definite dxd matrix for which the probability function is to be estimated. If chol, this must be the matrix L, instead. L is a lower triangular decomposition of X, such that X = LL'. S:array-like Positive definite dxd scale matrix If chol, this must be the matrix L2, instead. L2 is a lower triangular decomposition of S, such that S = L2L2' v: int or float. degrees of freedom for the distribution. v must be >d d: int dimension of each row or column of X Outputs -------- If log_form returns the logpdf estimate of X, else it returns the pdf estimate of X ''' #constants if chol: det_X = chol_log_determinant(X) det_S = chol_log_determinant(S) iS = lpack.dtrtri(S, lower=1)[0] trace = np.einsum('ij,ji', iS.T.dot(iS), X.dot(X.T)) else: det_X = np.linalg.slogdet(X)[1] det_S = np.linalg.slogdet(S)[1] trace = np.trace(np.linalg.inv(S).dot(X)) # p1 = 0.5*(v-d-1)*det_X p2 = -0.5*trace p3 = -0.5*(v*d)*math.log(2) p4 = -0.5*(v)*det_S p5 = -spe.multigammaln(0.5*v,d) if log_form: return p1+p2+p3+p4+p5 else: return math.exp(p1+p2+p3+p4+p5)
def chol_inv(L): """ Inverts a Cholesky lower triangular matrix :param L: lower triangular matrix :rtype: inverse of L """ return lapack.dtrtri(L, lower=True)[0]
def inv_lower(L): ''' Inverts a lower triangular matrix. Args: L: A lower triangular matrix ''' return np.matrix(dtrtri(L, lower=1)[0])
def invwishart_pdf(X, S, v, d, chol=False, log_form = False): '''Inverse Wishart probability density with possible use of the cholesky decomposition of S and X. Returns the output that is comparable to scipy.stats.invwishart(df=v, scale=S).pdf(X). The equation is (Wikipedia or Kevin P. Murphy, 2007): {|S|**[0.5v] |X|**[-0.5(v+d+1)] exp[-0.5tr(S inv(X))]}/{2**[0.5vd] [multivariate_gamma_function(0.5v, d)]} Parameters ---------- X: array-like. Positive definite dxd matrix for which the probability function is to be estimated. If chol, this must be the matrix L, instead. L is a lower triangular decomposition of X, such that X = LL'. S:array-like Positive definite dxd scale matrix If chol, this must be the matrix L2, instead. L2 is a lower triangular decomposition of S, such that S = L2L2' v: int or float. degrees of freedom for the distribution. v must be >d d: int dimension of each row or column of X Outputs -------- If log_form returns the logpdf estimate of X, else it returns the pdf estimate of X ''' #constants if chol: det_X = chol_log_determinant(X) det_S = chol_log_determinant(S) iX = lpack.dtrtri(X, lower=1)[0] trace = np.einsum('ij,ji', S.dot(S.T),iX.T.dot(iX)) else: det_X = np.linalg.slogdet(X)[1] det_S = np.linalg.slogdet(S)[1] trace = np.trace(S.dot(np.linalg.inv(X))) # p1 = -0.5*(v*d)*math.log(2) p2 = -spe.multigammaln(0.5*v,d) p3 = 0.5*(v)*det_S p4 = -0.5*(v+d+1)*det_X p5 = -0.5*trace if log_form: return p1+p2+p3+p4+p5 else: return math.exp(p1+p2+p3+p4+p5)
def linear_regression(X, y, algo=None): """ Solves the linear regression problem, find :math:`\\beta` which minimizes :math:`\\norme{y - X\\beta}`, based on the algorithm :ref:`Arbre de décision optimisé pour les régressions linéaires <algo_decision_tree_mselin>`. @param X features @param y targets @param algo None to use the standard algorithm :math:`\\beta = (X'X)^{-1} X'y`, `'gram'`, `'qr'` @return beta .. runpython:: :showcode: import numpy from mlstatpy.ml.matrices import linear_regression X = numpy.array([[1., 2., 3., 4.], [5., 6., 6., 6.], [5., 6., 7., 8.]]).T y = numpy.array([0.1, 0.2, 0.19, 0.29]) beta = linear_regression(X, y, algo="gram") print(beta) ``algo=None`` computes :math:`\\beta = (X'X)^{-1} X'y`. ``algo='qr'`` uses a `QR <https://docs.scipy.org/doc/numpy/reference /generated/numpy.linalg.qr.html>`_ decomposition and calls function `dtrtri <https://docs.scipy.org/doc/scipy/reference/generated/scipy. linalg.lapack.dtrtri.html>`_ to invert an upper triangular matrix. ``algo='gram'`` uses :func:`gram_schmidt <mlstatpy.ml.matrices.gram_schmidt>` and then computes the solution of the linear regression (see above for a link to the algorithm). """ if len(y.shape) != 1: warnings.warn( "This function is not tested for a multidimensional linear regression." ) if algo is None: inv = numpy.linalg.inv(X.T @ X) return inv @ (X.T @ y) elif algo == "gram": T, P = gram_schmidt(X.T, change=True) # T = P X return (y.T @ T.T @ P).ravel() elif algo == "qr": Q, R = numpy.linalg.qr(X, "full") Ri = dtrtri(R)[0] gamma = (y.T @ Q).ravel() return (gamma @ Ri.T).ravel() else: raise ValueError("Unknwown algo='{}'.".format(algo))
def dtrtri(L): """ Inverts a Cholesky lower triangular matrix :param L: lower triangular matrix :rtype: inverse of L """ L = force_F_ordered(L) return lapack.dtrtri(L, lower=1)[0]
def compute(self, x): '''pre-compute the properties of the gp x is the list of sample points in the time dimension ''' if self.x is not x: self.computed = False # only recompute if not already computed for this x if not self.computed: self.covariance = self._make_covariance_matrix(x) self.L = cholesky(self.covariance) L_inv = np.matrix(dtrtri(self.L, lower=1)[0]) self.K_inv = L_inv.transpose() * L_inv self.x = x self.detK = np.prod(np.diag(self.L))**2 self.log_detK = 2 * np.sum(np.log(np.diag(self.L))) self.computed = True
def dtrtri(L): L = force_F_ordered(L) return lapack.dtrtri(L, lower=1)[0]
def lapack_dtrtri(self, arr, lower=0, unitdiag=0, overwrite_c=0): inv, info = lapack.dtrtri(arr, lower, unitdiag, overwrite_c) return inv
def gsvd(A, B, full_matrices=False, extras='uv', X1=False): '''Compute the generalized singular value decomposition of a pair of matrices ``A`` of shape ``(m, n)`` and ``B`` of shape ``(p, n)`` The GSVD is defined as a joint decomposition, as follows. A = U*C*X.T C = U.T*A*inv(X.T) B = V*S*X.T S = V.T*B*inv(X.T) or letting X1 = inv(X.T) A = U*C*inv(X1) C = U.T*A*X1 B = V*S*inv(X1) S = V.T*B*X1 where C.T*C + S.T*S = I where ``U`` and ``V`` are unitary matrices. Parameters ---------- A, B : ndarray Input matrices on which to perform the decomposition. Must be no more than 2D (and will be promoted if only 1D). The matrices must also have the same number of columns. full_matrices : bool, optional If ``True``, the returned matrices ``U`` and ``V`` have at most ``p`` columns and ``C`` and ``S`` are of length ``p``. extras : str, optional A string indicating which of the orthogonal transformation matrices should be computed. By default, this only computes the generalized singular values in ``C`` and ``S``, and the right generalized singular vectors in ``X``. The string may contain either 'u' or 'v' to indicate that the corresponding matrix is to be computed. X1 : bool, optional If ``True``, X inverse transpose is returned in place of the default X matrix. This may be convenient for regularization routines. This matrix satisfies U.T@A@X = C, V.T@B@X = S. Returns ------- C : ndarray The generalized singular values of ``A``. These are returned in decreasing order. S : ndarray The generalized singular values of ``B``. These are returned in increasing order. X : ndarray The right generalized singular vectors of ``A`` and ``B``. U : ndarray The left generalized singular vectors of ``A``, with shape ``(m, m)``. This is only returned if ``'u' in extras`` is True. V : ndarray The left generalized singular vectors of ``B``, with shape ``(p, p)``. This is only returned if ``'v' in extras`` is True. Raises ------ A ValueError is raised if ``A`` and ``B`` do not have the same number of columns, or if they are not both 2D (1D input arrays will be promoted). A RuntimeError is raised if the underlying LAPACK routine fails. Notes ----- This routine is intended to be as similar as possible to the decomposition provided by Matlab and Octave. Note that this is slightly different from the decomposition as put forth in Golub and Van Loan [1], and that this routine is thus not directly a wrapper for the underlying LAPACK routine. One important difference between this routine and that provided by Matlab is that this routine returns the singular values in decreasing order, for consistency with NumPy's ``svd`` routine. References ---------- [1] Golub, G., and C.F. Van Loan, 2013, Matrix Computations, 4th Ed. ''' # The LAPACK routine stores R inside A and/or B, so we copy to # avoid modifying the caller's arrays. dtype = np.complex128 if any(map(np.iscomplexobj, (A, B))) else np.double Ac = np.array(A, copy=True, dtype=dtype, order='C', ndmin=2) Bc = np.array(B, copy=True, dtype=dtype, order='C', ndmin=2) m, n = Ac.shape p = Bc.shape[0] if (n != Bc.shape[1]): raise ValueError('A and B must have the same number of columns') # Allocate input arrays to LAPACK routine compute_uv = tuple(each in extras for each in 'uv') sizes = (m, p) U, V = (np.zeros((size, size), dtype=dtype) if compute else np.zeros( (1, 1), dtype=dtype) for size, compute in zip(sizes, compute_uv)) Q = np.zeros((n, n), dtype=dtype) C = np.zeros((n, ), dtype=np.double) S = np.zeros((n, ), dtype=np.double) iwork = np.zeros((n, ), dtype=np.int32) # Compute GSVD via LAPACK wrapper, returning the effective rank k, l = _gsvd.gsvd(Ac, Bc, U, V, Q, C, S, iwork, compute_uv[0], compute_uv[1]) # r is the rank of the matrix (A.T | B.T).T denoted A|B # l is the rank of B r = k + l R = _extract_R(Ac, Bc, k, l) tmp = np.eye(n, dtype=R.dtype) if X1: # Compute X so that U'AX = C and V'BX = S # invert R by back substitution tmp[n-r:, n-r:] = ztrtri(R, overwrite_c=1)[0] \ if R.dtype == np.complex128 else dtrtri(R, overwrite_c=1)[0] else: # Compute X so that A = UCX' and B = VCX' tmp[n-r:, n-r:] = R.conj().T \ if R.dtype == np.complex128 else R.T X = Q.dot(tmp) # Sort columns of X, U and V to achieve the correct ordering of # the singular values. if m - r >= 0: ix = np.argsort(C[k:r])[::-1] # sort l values X[:, -l:] = X[:, -l:][:, ix] if compute_uv[0]: U[:, k:k + l] = U[:, k:k + l][:, ix] if compute_uv[1]: V[:, :l] = V[:, :l][:, ix] C[k:r] = C[k:r][ix] S[k:r] = S[k:r][ix] else: # m - r < 0 ix = np.argsort(C[k:m])[::-1] # sort m-k values X[:, n - l:n + m - r] = X[:, n - l:n + m - r][:, ix] if compute_uv[0]: U[:, k:] = U[:, k:][:, ix] if compute_uv[1]: V[:, :m - k] = V[:, :m - k][:, ix] C[k:m] = C[k:m][ix] S[k:m] = S[k:m][ix] # For convenience in reconstructing A and B from their decompositions, # try to move SV's to the diagonal in cases when rank(A|B) < n. # This is not possible if rank(A|B) > rank(B) and # the number of rows of B is less than rank(A|B). if n - r > 0: X = np.roll(X, r - n, axis=1) if k > 0 and p >= r: V = np.roll(V, k, axis=1) # If full matrices are not required, limit X, U, and V to at most r # columns. if not full_matrices: X = X[:, :r] if compute_uv[0] and m > r: U = U[:, :r] if compute_uv[1] and p > r: V = V[:, :r] C = C[:r] S = S[:r] outputs = (C, S, X) + tuple( arr for arr, compute in zip((U, V), compute_uv) if compute) return outputs