def compute_schur( self, n_components: int = 10, initial_distribution: Optional[np.ndarray] = None, method: str = "krylov", which: str = "LR", alpha: float = 1, ): """ Compute the Schur decomposition. Parameters ---------- n_components Number of vectors to compute. initial_distribution Input probability distribution over all cells. If `None`, uniform is chosen. method Method for calculating the Schur vectors. Valid options are: `'krylov'` or `'brandts'`. For benefits of each method, see :class:`msmtools.analysis.dense.gpcca.GPCCA`. The former is an iterative procedure that computes a partial, sorted Schur decomposition for large, sparse matrices whereas the latter computes a full sorted Schur decomposition of a dense matrix. %(eigen)s Returns ------- None Nothing, but updates the following fields: - :paramref:`{schur_vectors}` - :paramref:`{schur_matrix}` - :paramref:`{eigendec}` """ if n_components < 2: raise ValueError( f"Number of components must be `>=2`, found `{n_components}`.") self._gpcca = _GPCCA(self.transition_matrix, eta=initial_distribution, z=which, method=method) start = logg.info("Computing Schur decomposition") try: self._gpcca._do_schur_helper(n_components) except ValueError: logg.warning( f"Using `{n_components}` components would split a block of complex conjugates. " f"Increasing `n_components` to `{n_components + 1}`") self._gpcca._do_schur_helper(n_components + 1) # make it available for pl setattr(self, A.SCHUR.s, self._gpcca.X) setattr(self, A.SCHUR_MAT.s, self._gpcca.R) self._invalid_n_states = np.array([ i for i in range(2, len(self._gpcca.eigenvalues)) if _check_conj_split(self._gpcca.eigenvalues[:i]) ]) if len(self._invalid_n_states): logg.info( f"When computing macrostates, choose a number of states NOT in `{list(self._invalid_n_states)}`" ) self._write_eig_to_adata( { "D": self._gpcca.eigenvalues, "eigengap": _eigengap(self._gpcca.eigenvalues, alpha), "params": { "which": which, "k": len(self._gpcca.eigenvalues), "alpha": alpha, }, }, start=start, extra_msg= f"\n `.{P.SCHUR}`\n `.{P.SCHUR_MAT}`\n Finish", )
def compute_eigendecomposition( self, k: int = 20, which: str = "LR", alpha: float = 1, only_evals: bool = False, ncv: Optional[int] = None, ) -> None: """ Compute eigendecomposition of transition matrix. Uses a sparse implementation, if possible, and only computes the top :math:`k` eigenvectors to speed up the computation. Computes both left and right eigenvectors. Parameters ---------- k Number of eigenvalues/vectors to compute. %(eigen)s only_evals Compute only eigenvalues. ncv Number of Lanczos vectors generated. Returns ------- None Nothing, but updates the following field: - :paramref:`{prop}` """ def get_top_k_evals(): return D[np.flip(np.argsort(D.real))][:k] start = logg.info( "Computing eigendecomposition of the transition matrix") if self.issparse: logg.debug(f"Computing top `{k}` eigenvalues for sparse matrix") D, V_l = eigs(self.transition_matrix.T, k=k, which=which, ncv=ncv) if only_evals: self._write_eig_to_adata({ "D": get_top_k_evals(), "eigengap": _eigengap(get_top_k_evals().real, alpha), "params": { "which": which, "k": k, "alpha": alpha }, }) return _, V_r = eigs(self.transition_matrix, k=k, which=which, ncv=ncv) else: logg.warning( "This transition matrix is not sparse, computing full eigendecomposition" ) D, V_l = np.linalg.eig(self.transition_matrix.T) if only_evals: self._write_eig_to_adata({ "D": get_top_k_evals(), "eigengap": _eigengap(D.real, alpha), "params": { "which": which, "k": k, "alpha": alpha }, }) return _, V_r = np.linalg.eig(self.transition_matrix) # Sort the eigenvalues and eigenvectors and take the real part logg.debug("Sorting eigenvalues by their real part") p = np.flip(np.argsort(D.real)) D, V_l, V_r = D[p], V_l[:, p], V_r[:, p] e_gap = _eigengap(D.real, alpha) pi = np.abs(V_l[:, 0].real) pi /= np.sum(pi) self._write_eig_to_adata( { "D": D, "stationary_dist": pi, "V_l": V_l, "V_r": V_r, "eigengap": e_gap, "params": { "which": which, "k": k, "alpha": alpha }, }, start=start, )
def compute_terminal_states( self, method: str = "stability", n_cells: int = 30, alpha: Optional[float] = 1, stability_threshold: float = 0.96, n_states: Optional[int] = None, ): """ Automatically select terminal states from macrostates. Parameters ---------- method One of following: - `'eigengap'` - select the number of states based on the `eigengap` of the transition matrix. - `'eigengap_coarse'` - select the number of states based on the `eigengap` of the diagonal of the coarse-grained transition matrix. - `'top_n'` - select top ``n_states`` based on the probability of the diagonal of the coarse-grained transition matrix. - `'stability'` - select states which have a stability index >= ``stability_threshold``. The stability index is given by the diagonal elements of the coarse-grained transition matrix. %(n_cells)s alpha Weight given to the deviation of an eigenvalue from one. Used when ``method='eigengap'`` or ``method='eigengap_coarse'``. stability_threshold Threshold used when ``method='stability'``. n_states Numer of states used when ``method='top_n'``. Returns ------- None Nothing, just updates the following fields: - :attr:`{fsp}` - :attr:`{fs}` """ if len(self._get(P.MACRO).cat.categories) == 1: logg.warning( "Found only one macrostate. Making it the single main state") self.set_terminal_states_from_macrostates(None, n_cells=n_cells) return coarse_T = self._get(P.COARSE_T) if method == "eigengap": if self._get(P.EIG) is None: raise RuntimeError( "Compute eigendecomposition first as `.compute_eigendecomposition()`." ) n_states = _eigengap(self._get(P.EIG)["D"], alpha=alpha) + 1 elif method == "eigengap_coarse": if coarse_T is None: raise RuntimeError( "Compute macrostates first as `.compute_macrostates()`.") n_states = _eigengap(np.sort(np.diag(coarse_T)[::-1]), alpha=alpha) elif method == "top_n": if n_states is None: raise ValueError( "Argument `n_states` must be != `None` for `method='top_n'`." ) elif n_states <= 0: raise ValueError( f"Expected `n_states` to be positive, found `{n_states}`.") elif method == "stability": if stability_threshold is None: raise ValueError( "Argument `stability_threshold` must be != `None` for `method='stability'`." ) self_probs = pd.Series(np.diag(coarse_T), index=coarse_T.columns) names = self_probs[self_probs.values >= stability_threshold].index self.set_terminal_states_from_macrostates(names, n_cells=n_cells) return else: raise ValueError( f"Invalid method `{method!r}`. Valid options are `'eigengap', 'eigengap_coarse', " f"'top_n' and 'min_self_prob'`.") names = coarse_T.columns[np.argsort(np.diag(coarse_T))][-n_states:] self.set_terminal_states_from_macrostates(names, n_cells=n_cells)