Пример #1
0
    def test_empty_keys(self):
        x = pd.Series(["a", "b", np.nan, "b", np.nan]).astype("category")

        res = _process_series(x, [])

        assert res.shape == x.shape
        assert np.all(pd.isnull(res))
Пример #2
0
    def test_repeat_key(self):
        x = pd.Series(["a", "b", np.nan, "b", np.nan]).astype("category")
        expected = pd.Series(["a"] + [np.nan] * 4).astype("category")

        res = _process_series(x, keys=["a, a, a"])

        assert_array_nan_equal(res, expected)
Пример #3
0
    def test_normal_run(self):
        x = pd.Series(["a", "b", np.nan, "b", np.nan]).astype("category")
        expected = pd.Series(["a"] + [np.nan] * 4).astype("category")

        res = _process_series(x, keys=["a"])

        assert_array_nan_equal(expected, res)
Пример #4
0
    def test_no_keys_colors(self):
        x = pd.Series(["a", "b", np.nan, "b", np.nan]).astype("category")
        colors = ["foo"]

        res, res_colors = _process_series(x, keys=None, colors=colors)

        assert x is res
        assert colors is res_colors
Пример #5
0
    def test_reoder_keys(self):
        x = pd.Series(["b", "c", "a", "d", "a"]).astype("category")
        expected = pd.Series(["a or b or d", np.nan] +
                             ["a or b or d"] * 3).astype("category")

        res = _process_series(x, keys=["b, a, d"])

        assert_array_nan_equal(res, expected)
Пример #6
0
    def test_return_colors(self):
        x = pd.Series(["b", "c", "a", "d", "a"]).astype("category")
        expected = pd.Series(
            ["a or b", "c or d", "a or b", "c or d",
             "a or b"]).astype("category")

        res, colors = _process_series(x,
                                      keys=["b, a", "d, c"],
                                      colors=["red", "green", "blue", "white"])

        assert isinstance(res, pd.Series)
        assert is_categorical_dtype(res)
        assert isinstance(colors, list)

        np.testing.assert_array_equal(res.values, expected.values)
        assert set(colors) == {
            _compute_mean_color(["red", "green"]),
            _compute_mean_color(["blue", "white"]),
        }
Пример #7
0
    def test_no_keys(self):
        x = pd.Series(["a", "b", np.nan, "b", np.nan]).astype("category")

        res = _process_series(x, keys=None)

        assert x is res
Пример #8
0
    def test_keys_overlap(self):
        x = pd.Series(["a", "b", np.nan, "b", np.nan]).astype("category")

        with pytest.raises(ValueError):
            _ = _process_series(x, ["a", "b, a"])
Пример #9
0
    def test_keys_are_not_proper_categories(self):
        x = pd.Series(["a", "b", np.nan, "b", np.nan]).astype("category")

        with pytest.raises(ValueError):
            _ = _process_series(x, ["foo"])
Пример #10
0
    def test_colors_not_colorlike(self):
        x = pd.Series(["a", "b", np.nan, "b", np.nan]).astype("category")

        with pytest.raises(ValueError):
            _ = _process_series(x, ["foo"], colors=["bar"])
Пример #11
0
    def test_colors_wrong_number_of_colors(self):
        x = pd.Series(["a", "b", np.nan, "b", np.nan]).astype("category")

        with pytest.raises(ValueError):
            _ = _process_series(x, ["foo"], colors=["red"])
Пример #12
0
    def test_not_categorical(self):
        x = pd.Series(["a", "b", np.nan, "b", np.nan])

        with pytest.raises(TypeError):
            _ = _process_series(x, ["foo"])
Пример #13
0
    def compute_absorption_probabilities(
        self,
        keys: Optional[Sequence[str]] = None,
        check_irreducibility: bool = False,
        solver: str = "gmres",
        use_petsc: bool = True,
        time_to_absorption: Optional[
            Union[
                str,
                Sequence[Union[str, Sequence[str]]],
                Dict[Union[str, Sequence[str]], str],
            ]
        ] = None,
        n_jobs: Optional[int] = None,
        backend: str = "loky",
        show_progress_bar: bool = True,
        tol: float = 1e-6,
        preconditioner: Optional[str] = None,
    ) -> None:
        """
        Compute absorption probabilities of a Markov chain.

        For each cell, this computes the probability of it reaching any of the approximate recurrent classes defined
        by :paramref:`{fs}`.

        Parameters
        ----------
        keys
            Keys defining the recurrent classes.
        check_irreducibility:
            Check whether the transition matrix is irreducible.
        solver
            Solver to use for the linear problem. Options are `'direct', 'gmres', 'lgmres', 'bicgstab' or 'gcrotmk'`
            when ``use_petsc=False`` or one of :class:`petsc4py.PETSc.KPS.Type` otherwise.

            Information on the :mod:`scipy` iterative solvers can be found in :func:`scipy.sparse.linalg` or for
            :mod:`petsc4py` solver `here <https://www.mcs.anl.gov/petsc/documentation/linearsolvertable.html>`__.
        use_petsc
            Whether to use solvers from :mod:`petsc4py` or :mod:`scipy`. Recommended for large problems.
            If no installation is found, defaults to :func:`scipy.sparse.linalg.gmres`.
        time_to_absorption
            Whether to compute mean time to absorption and its variance to specific absorbing states.

            If a :class:`dict`, can be specified as ``{{'Alpha': 'var', ...}}`` to also compute variance.
            In case when states are a :class:`tuple`, time to absorption will be computed to the subset of these states,
            such as ``[('Alpha', 'Beta'), ...]`` or ``{{('Alpha', 'Beta'): 'mean', ...}}``.
            Can be specified as ``'all'`` to compute it to any absorbing state in ``keys``, which is more efficient
            than listing all absorbing states.

            It might be beneficial to disable the progress bar as ``show_progress_bar=False``, because many linear
            systems are being solved.
        n_jobs
            Number of parallel jobs to use when using an iterative solver. When ``use_petsc=True`` or for
            quickly-solvable problems, we recommend higher number (>=8) of jobs in order to fully saturate the cores.
        backend
            Which backend to use for multiprocessing. See :class:`joblib.Parallel` for valid options.
        show_progress_bar
            Whether to show progress bar when the solver isn't a direct one.
        tol
            Convergence tolerance for the iterative solver. The default is fine for most cases, only consider
            decreasing this for severely ill-conditioned matrices.
        preconditioner
            Preconditioner to use, only available when ``use_petsc=True``. For available values, see
            `here <https://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/PC/PCType.html#PCType>`__ or the values
            of `petsc4py.PETSc.PC.Type`.
            We recommended `'ilu'` preconditioner for badly conditioned problems.

        Returns
        -------
        None
            Nothing, but updates the following fields:

                - :paramref:`{abs_prob}` - probabilities of being absorbed into the terminal states.
                - :paramref:`{lat}` - mean times until absorption to subset absorbing states and optionally
                  their variances saved as ``'{{lineage}} mean'`` and ``'{{lineage}} var'``, respectively,
                  for each subset of absorbing states specified in ``time_to_absorption``.
        """

        if self._get(P.TERM) is None:
            raise RuntimeError(_COMP_TERM_STATES_MSG)
        if keys is not None:
            keys = sorted(set(keys))

        start = logg.info("Computing absorption probabilities")

        # get the transition matrix
        t = self.transition_matrix
        if not self.issparse:
            logg.warning(
                "Attempting to solve a potentially large linear system with dense transition matrix"
            )

        # process the current annotations according to `keys`
        terminal_states_, colors_ = _process_series(
            series=self._get(P.TERM), keys=keys, colors=self._get(A.TERM_COLORS)
        )
        # warn in case only one state is left
        keys = list(terminal_states_.cat.categories)
        if len(keys) == 1:
            logg.warning(
                "There is only 1 recurrent class, all cells will have probability 1 of going there"
            )

        lin_abs_times = {}
        if time_to_absorption is not None:
            if isinstance(time_to_absorption, (str, tuple)):
                time_to_absorption = [time_to_absorption]
            if not isinstance(time_to_absorption, dict):
                time_to_absorption = {ln: "mean" for ln in time_to_absorption}
            for ln, moment in time_to_absorption.items():
                if moment not in ("mean", "var"):
                    raise ValueError(
                        f"Moment must be either `'mean'` or `'var'`, found `{moment!r}` for `{ln!r}`."
                    )

                seen = set()
                if isinstance(ln, str):
                    ln = tuple(keys) if ln == "all" else (ln,)
                sorted_ln = tuple(sorted(ln))  # preserve the user order

                if sorted_ln not in seen:
                    seen.add(sorted_ln)
                    for lin in ln:
                        if lin not in keys:
                            raise ValueError(
                                f"Invalid absorbing state `{lin!r}` in `{ln}`. "
                                f"Valid options are `{list(terminal_states_.cat.categories)}`."
                            )
                    lin_abs_times[tuple(ln)] = moment

            # define the dimensions of this problem
        n_cells = t.shape[0]
        n_macrostates = len(terminal_states_.cat.categories)

        # get indices corresponding to recurrent and transient states
        rec_indices, trans_indices, lookup_dict = _get_cat_and_null_indices(
            terminal_states_
        )
        if not len(trans_indices):
            raise RuntimeError("Cannot proceed - Markov chain is irreducible.")

        # create Q (restriction transient-transient), S (restriction transient-recurrent)
        q = t[trans_indices, :][:, trans_indices]
        s = t[trans_indices, :][:, rec_indices]

        # check for irreducibility
        if check_irreducibility:
            if self.is_irreducible is None:
                self._is_irreducible = _irreducible(self.transition_matrix)
            else:
                if not self.is_irreducible:
                    logg.warning("Transition matrix is not irreducible")
                else:
                    logg.debug("Transition matrix is irreducible")

        logg.debug(f"Found `{n_cells}` cells and `{s.shape[1]}` absorbing states")

        # solve the linear system of equations
        mat_x = _solve_lin_system(
            q,
            s,
            solver=solver,
            use_petsc=use_petsc,
            n_jobs=n_jobs,
            backend=backend,
            tol=tol,
            use_eye=True,
            show_progress_bar=show_progress_bar,
            preconditioner=preconditioner,
        )

        if time_to_absorption is not None:
            abs_time_means = _calculate_lineage_absorption_time_means(
                q,
                t[trans_indices, :][:, rec_indices],
                trans_indices,
                n=t.shape[0],
                ixs=lookup_dict,
                lineages=lin_abs_times,
                solver=solver,
                use_petsc=use_petsc,
                n_jobs=n_jobs,
                backend=backend,
                tol=tol,
                show_progress_bar=show_progress_bar,
                preconditioner=preconditioner,
            )
            abs_time_means.index = self.adata.obs_names
        else:
            abs_time_means = None

        # take individual solutions and piece them together to get absorption probabilities towards the classes
        macro_ix_helper = np.cumsum(
            [0] + [len(indices) for indices in lookup_dict.values()]
        )
        _abs_classes = np.concatenate(
            [
                mat_x[:, np.arange(a, b)].sum(1)[:, None]
                for a, b in _pairwise(macro_ix_helper)
            ],
            axis=1,
        )

        # for recurrent states, set their self-absorption probability to one
        abs_classes = np.zeros((len(self), n_macrostates))
        rec_classes_full = {
            cl: np.where(terminal_states_ == cl)[0]
            for cl in terminal_states_.cat.categories
        }
        for col, cl_indices in enumerate(rec_classes_full.values()):
            abs_classes[trans_indices, col] = _abs_classes[:, col]
            abs_classes[cl_indices, col] = 1

        self._set(
            A.ABS_PROBS,
            Lineage(
                abs_classes,
                names=terminal_states_.cat.categories,
                colors=colors_,
            ),
        )

        extra_msg = ""
        if abs_time_means is not None:
            self._set(A.LIN_ABS_TIMES, abs_time_means)
            extra_msg = f"       `.{P.LIN_ABS_TIMES}`\n"

        self._write_absorption_probabilities(time=start, extra_msg=extra_msg)