Exemplo n.º 1
0
    def compute_schur(
        self,
        n_components: int = 10,
        initial_distribution: Optional[np.ndarray] = None,
        method: str = "krylov",
        which: str = "LR",
        alpha: float = 1,
    ):
        """
        Compute the Schur decomposition.

        Parameters
        ----------
        n_components
            Number of vectors to compute.
        initial_distribution
            Input probability distribution over all cells. If `None`, uniform is chosen.
        method
            Method for calculating the Schur vectors. Valid options are: `'krylov'` or `'brandts'`.
            For benefits of each method, see :class:`msmtools.analysis.dense.gpcca.GPCCA`. The former is
            an iterative procedure that computes a partial, sorted Schur decomposition for large, sparse
            matrices whereas the latter computes a full sorted Schur decomposition of a dense matrix.
        %(eigen)s

        Returns
        -------
        None
            Nothing, but updates the following fields:

                - :paramref:`{schur_vectors}`
                - :paramref:`{schur_matrix}`
                - :paramref:`{eigendec}`
        """

        if n_components < 2:
            raise ValueError(
                f"Number of components must be `>=2`, found `{n_components}`.")

        self._gpcca = _GPCCA(self.transition_matrix,
                             eta=initial_distribution,
                             z=which,
                             method=method)
        start = logg.info("Computing Schur decomposition")

        try:
            self._gpcca._do_schur_helper(n_components)
        except ValueError:
            logg.warning(
                f"Using `{n_components}` components would split a block of complex conjugates. "
                f"Increasing `n_components` to `{n_components + 1}`")
            self._gpcca._do_schur_helper(n_components + 1)

        # make it available for pl
        setattr(self, A.SCHUR.s, self._gpcca.X)
        setattr(self, A.SCHUR_MAT.s, self._gpcca.R)

        self._invalid_n_states = np.array([
            i for i in range(2, len(self._gpcca.eigenvalues))
            if _check_conj_split(self._gpcca.eigenvalues[:i])
        ])
        if len(self._invalid_n_states):
            logg.info(
                f"When computing macrostates, choose a number of states NOT in `{list(self._invalid_n_states)}`"
            )

        self._write_eig_to_adata(
            {
                "D": self._gpcca.eigenvalues,
                "eigengap": _eigengap(self._gpcca.eigenvalues, alpha),
                "params": {
                    "which": which,
                    "k": len(self._gpcca.eigenvalues),
                    "alpha": alpha,
                },
            },
            start=start,
            extra_msg=
            f"\n       `.{P.SCHUR}`\n       `.{P.SCHUR_MAT}`\n    Finish",
        )
Exemplo n.º 2
0
    def compute_eigendecomposition(
        self,
        k: int = 20,
        which: str = "LR",
        alpha: float = 1,
        only_evals: bool = False,
        ncv: Optional[int] = None,
    ) -> None:
        """
        Compute eigendecomposition of transition matrix.

        Uses a sparse implementation, if possible, and only computes the top :math:`k` eigenvectors
        to speed up the computation. Computes both left and right eigenvectors.

        Parameters
        ----------
        k
            Number of eigenvalues/vectors to compute.
        %(eigen)s
        only_evals
            Compute only eigenvalues.
        ncv
            Number of Lanczos vectors generated.

        Returns
        -------
        None
            Nothing, but updates the following field:

                - :paramref:`{prop}`
        """
        def get_top_k_evals():
            return D[np.flip(np.argsort(D.real))][:k]

        start = logg.info(
            "Computing eigendecomposition of the transition matrix")

        if self.issparse:
            logg.debug(f"Computing top `{k}` eigenvalues for sparse matrix")
            D, V_l = eigs(self.transition_matrix.T, k=k, which=which, ncv=ncv)
            if only_evals:
                self._write_eig_to_adata({
                    "D":
                    get_top_k_evals(),
                    "eigengap":
                    _eigengap(get_top_k_evals().real, alpha),
                    "params": {
                        "which": which,
                        "k": k,
                        "alpha": alpha
                    },
                })
                return
            _, V_r = eigs(self.transition_matrix, k=k, which=which, ncv=ncv)
        else:
            logg.warning(
                "This transition matrix is not sparse, computing full eigendecomposition"
            )
            D, V_l = np.linalg.eig(self.transition_matrix.T)
            if only_evals:
                self._write_eig_to_adata({
                    "D": get_top_k_evals(),
                    "eigengap": _eigengap(D.real, alpha),
                    "params": {
                        "which": which,
                        "k": k,
                        "alpha": alpha
                    },
                })
                return
            _, V_r = np.linalg.eig(self.transition_matrix)

        # Sort the eigenvalues and eigenvectors and take the real part
        logg.debug("Sorting eigenvalues by their real part")
        p = np.flip(np.argsort(D.real))
        D, V_l, V_r = D[p], V_l[:, p], V_r[:, p]
        e_gap = _eigengap(D.real, alpha)

        pi = np.abs(V_l[:, 0].real)
        pi /= np.sum(pi)

        self._write_eig_to_adata(
            {
                "D": D,
                "stationary_dist": pi,
                "V_l": V_l,
                "V_r": V_r,
                "eigengap": e_gap,
                "params": {
                    "which": which,
                    "k": k,
                    "alpha": alpha
                },
            },
            start=start,
        )
Exemplo n.º 3
0
    def compute_terminal_states(
        self,
        method: str = "stability",
        n_cells: int = 30,
        alpha: Optional[float] = 1,
        stability_threshold: float = 0.96,
        n_states: Optional[int] = None,
    ):
        """
        Automatically select terminal states from macrostates.

        Parameters
        ----------
        method
            One of following:

                - `'eigengap'` - select the number of states based on the `eigengap` of the transition matrix.
                - `'eigengap_coarse'` - select the number of states based on the `eigengap` of the diagonal
                  of the coarse-grained transition matrix.
                - `'top_n'` - select top ``n_states`` based on the probability of the diagonal
                  of the coarse-grained transition matrix.
                - `'stability'` - select states which have a stability index >= ``stability_threshold``. The stability
                  index is given by the diagonal elements of the coarse-grained transition matrix.
        %(n_cells)s
        alpha
            Weight given to the deviation of an eigenvalue from one. Used when ``method='eigengap'``
            or ``method='eigengap_coarse'``.
        stability_threshold
            Threshold used when ``method='stability'``.
        n_states
            Numer of states used when ``method='top_n'``.

        Returns
        -------
        None
            Nothing, just updates the following fields:

                - :attr:`{fsp}`
                - :attr:`{fs}`
        """

        if len(self._get(P.MACRO).cat.categories) == 1:
            logg.warning(
                "Found only one macrostate. Making it the single main state")
            self.set_terminal_states_from_macrostates(None, n_cells=n_cells)
            return

        coarse_T = self._get(P.COARSE_T)

        if method == "eigengap":
            if self._get(P.EIG) is None:
                raise RuntimeError(
                    "Compute eigendecomposition first as `.compute_eigendecomposition()`."
                )
            n_states = _eigengap(self._get(P.EIG)["D"], alpha=alpha) + 1
        elif method == "eigengap_coarse":
            if coarse_T is None:
                raise RuntimeError(
                    "Compute macrostates first as `.compute_macrostates()`.")
            n_states = _eigengap(np.sort(np.diag(coarse_T)[::-1]), alpha=alpha)
        elif method == "top_n":
            if n_states is None:
                raise ValueError(
                    "Argument `n_states` must be != `None` for `method='top_n'`."
                )
            elif n_states <= 0:
                raise ValueError(
                    f"Expected `n_states` to be positive, found `{n_states}`.")
        elif method == "stability":
            if stability_threshold is None:
                raise ValueError(
                    "Argument `stability_threshold` must be != `None` for `method='stability'`."
                )
            self_probs = pd.Series(np.diag(coarse_T), index=coarse_T.columns)
            names = self_probs[self_probs.values >= stability_threshold].index
            self.set_terminal_states_from_macrostates(names, n_cells=n_cells)
            return
        else:
            raise ValueError(
                f"Invalid method `{method!r}`. Valid options are `'eigengap', 'eigengap_coarse', "
                f"'top_n' and 'min_self_prob'`.")

        names = coarse_T.columns[np.argsort(np.diag(coarse_T))][-n_states:]
        self.set_terminal_states_from_macrostates(names, n_cells=n_cells)