def test_empty_keys(self): x = pd.Series(["a", "b", np.nan, "b", np.nan]).astype("category") res = _process_series(x, []) assert res.shape == x.shape assert np.all(pd.isnull(res))
def test_repeat_key(self): x = pd.Series(["a", "b", np.nan, "b", np.nan]).astype("category") expected = pd.Series(["a"] + [np.nan] * 4).astype("category") res = _process_series(x, keys=["a, a, a"]) assert_array_nan_equal(res, expected)
def test_normal_run(self): x = pd.Series(["a", "b", np.nan, "b", np.nan]).astype("category") expected = pd.Series(["a"] + [np.nan] * 4).astype("category") res = _process_series(x, keys=["a"]) assert_array_nan_equal(expected, res)
def test_no_keys_colors(self): x = pd.Series(["a", "b", np.nan, "b", np.nan]).astype("category") colors = ["foo"] res, res_colors = _process_series(x, keys=None, colors=colors) assert x is res assert colors is res_colors
def test_reoder_keys(self): x = pd.Series(["b", "c", "a", "d", "a"]).astype("category") expected = pd.Series(["a or b or d", np.nan] + ["a or b or d"] * 3).astype("category") res = _process_series(x, keys=["b, a, d"]) assert_array_nan_equal(res, expected)
def test_return_colors(self): x = pd.Series(["b", "c", "a", "d", "a"]).astype("category") expected = pd.Series( ["a or b", "c or d", "a or b", "c or d", "a or b"]).astype("category") res, colors = _process_series(x, keys=["b, a", "d, c"], colors=["red", "green", "blue", "white"]) assert isinstance(res, pd.Series) assert is_categorical_dtype(res) assert isinstance(colors, list) np.testing.assert_array_equal(res.values, expected.values) assert set(colors) == { _compute_mean_color(["red", "green"]), _compute_mean_color(["blue", "white"]), }
def test_no_keys(self): x = pd.Series(["a", "b", np.nan, "b", np.nan]).astype("category") res = _process_series(x, keys=None) assert x is res
def test_keys_overlap(self): x = pd.Series(["a", "b", np.nan, "b", np.nan]).astype("category") with pytest.raises(ValueError): _ = _process_series(x, ["a", "b, a"])
def test_keys_are_not_proper_categories(self): x = pd.Series(["a", "b", np.nan, "b", np.nan]).astype("category") with pytest.raises(ValueError): _ = _process_series(x, ["foo"])
def test_colors_not_colorlike(self): x = pd.Series(["a", "b", np.nan, "b", np.nan]).astype("category") with pytest.raises(ValueError): _ = _process_series(x, ["foo"], colors=["bar"])
def test_colors_wrong_number_of_colors(self): x = pd.Series(["a", "b", np.nan, "b", np.nan]).astype("category") with pytest.raises(ValueError): _ = _process_series(x, ["foo"], colors=["red"])
def test_not_categorical(self): x = pd.Series(["a", "b", np.nan, "b", np.nan]) with pytest.raises(TypeError): _ = _process_series(x, ["foo"])
def compute_absorption_probabilities( self, keys: Optional[Sequence[str]] = None, check_irreducibility: bool = False, solver: str = "gmres", use_petsc: bool = True, time_to_absorption: Optional[ Union[ str, Sequence[Union[str, Sequence[str]]], Dict[Union[str, Sequence[str]], str], ] ] = None, n_jobs: Optional[int] = None, backend: str = "loky", show_progress_bar: bool = True, tol: float = 1e-6, preconditioner: Optional[str] = None, ) -> None: """ Compute absorption probabilities of a Markov chain. For each cell, this computes the probability of it reaching any of the approximate recurrent classes defined by :paramref:`{fs}`. Parameters ---------- keys Keys defining the recurrent classes. check_irreducibility: Check whether the transition matrix is irreducible. solver Solver to use for the linear problem. Options are `'direct', 'gmres', 'lgmres', 'bicgstab' or 'gcrotmk'` when ``use_petsc=False`` or one of :class:`petsc4py.PETSc.KPS.Type` otherwise. Information on the :mod:`scipy` iterative solvers can be found in :func:`scipy.sparse.linalg` or for :mod:`petsc4py` solver `here <https://www.mcs.anl.gov/petsc/documentation/linearsolvertable.html>`__. use_petsc Whether to use solvers from :mod:`petsc4py` or :mod:`scipy`. Recommended for large problems. If no installation is found, defaults to :func:`scipy.sparse.linalg.gmres`. time_to_absorption Whether to compute mean time to absorption and its variance to specific absorbing states. If a :class:`dict`, can be specified as ``{{'Alpha': 'var', ...}}`` to also compute variance. In case when states are a :class:`tuple`, time to absorption will be computed to the subset of these states, such as ``[('Alpha', 'Beta'), ...]`` or ``{{('Alpha', 'Beta'): 'mean', ...}}``. Can be specified as ``'all'`` to compute it to any absorbing state in ``keys``, which is more efficient than listing all absorbing states. It might be beneficial to disable the progress bar as ``show_progress_bar=False``, because many linear systems are being solved. n_jobs Number of parallel jobs to use when using an iterative solver. When ``use_petsc=True`` or for quickly-solvable problems, we recommend higher number (>=8) of jobs in order to fully saturate the cores. backend Which backend to use for multiprocessing. See :class:`joblib.Parallel` for valid options. show_progress_bar Whether to show progress bar when the solver isn't a direct one. tol Convergence tolerance for the iterative solver. The default is fine for most cases, only consider decreasing this for severely ill-conditioned matrices. preconditioner Preconditioner to use, only available when ``use_petsc=True``. For available values, see `here <https://www.mcs.anl.gov/petsc/petsc-current/docs/manualpages/PC/PCType.html#PCType>`__ or the values of `petsc4py.PETSc.PC.Type`. We recommended `'ilu'` preconditioner for badly conditioned problems. Returns ------- None Nothing, but updates the following fields: - :paramref:`{abs_prob}` - probabilities of being absorbed into the terminal states. - :paramref:`{lat}` - mean times until absorption to subset absorbing states and optionally their variances saved as ``'{{lineage}} mean'`` and ``'{{lineage}} var'``, respectively, for each subset of absorbing states specified in ``time_to_absorption``. """ if self._get(P.TERM) is None: raise RuntimeError(_COMP_TERM_STATES_MSG) if keys is not None: keys = sorted(set(keys)) start = logg.info("Computing absorption probabilities") # get the transition matrix t = self.transition_matrix if not self.issparse: logg.warning( "Attempting to solve a potentially large linear system with dense transition matrix" ) # process the current annotations according to `keys` terminal_states_, colors_ = _process_series( series=self._get(P.TERM), keys=keys, colors=self._get(A.TERM_COLORS) ) # warn in case only one state is left keys = list(terminal_states_.cat.categories) if len(keys) == 1: logg.warning( "There is only 1 recurrent class, all cells will have probability 1 of going there" ) lin_abs_times = {} if time_to_absorption is not None: if isinstance(time_to_absorption, (str, tuple)): time_to_absorption = [time_to_absorption] if not isinstance(time_to_absorption, dict): time_to_absorption = {ln: "mean" for ln in time_to_absorption} for ln, moment in time_to_absorption.items(): if moment not in ("mean", "var"): raise ValueError( f"Moment must be either `'mean'` or `'var'`, found `{moment!r}` for `{ln!r}`." ) seen = set() if isinstance(ln, str): ln = tuple(keys) if ln == "all" else (ln,) sorted_ln = tuple(sorted(ln)) # preserve the user order if sorted_ln not in seen: seen.add(sorted_ln) for lin in ln: if lin not in keys: raise ValueError( f"Invalid absorbing state `{lin!r}` in `{ln}`. " f"Valid options are `{list(terminal_states_.cat.categories)}`." ) lin_abs_times[tuple(ln)] = moment # define the dimensions of this problem n_cells = t.shape[0] n_macrostates = len(terminal_states_.cat.categories) # get indices corresponding to recurrent and transient states rec_indices, trans_indices, lookup_dict = _get_cat_and_null_indices( terminal_states_ ) if not len(trans_indices): raise RuntimeError("Cannot proceed - Markov chain is irreducible.") # create Q (restriction transient-transient), S (restriction transient-recurrent) q = t[trans_indices, :][:, trans_indices] s = t[trans_indices, :][:, rec_indices] # check for irreducibility if check_irreducibility: if self.is_irreducible is None: self._is_irreducible = _irreducible(self.transition_matrix) else: if not self.is_irreducible: logg.warning("Transition matrix is not irreducible") else: logg.debug("Transition matrix is irreducible") logg.debug(f"Found `{n_cells}` cells and `{s.shape[1]}` absorbing states") # solve the linear system of equations mat_x = _solve_lin_system( q, s, solver=solver, use_petsc=use_petsc, n_jobs=n_jobs, backend=backend, tol=tol, use_eye=True, show_progress_bar=show_progress_bar, preconditioner=preconditioner, ) if time_to_absorption is not None: abs_time_means = _calculate_lineage_absorption_time_means( q, t[trans_indices, :][:, rec_indices], trans_indices, n=t.shape[0], ixs=lookup_dict, lineages=lin_abs_times, solver=solver, use_petsc=use_petsc, n_jobs=n_jobs, backend=backend, tol=tol, show_progress_bar=show_progress_bar, preconditioner=preconditioner, ) abs_time_means.index = self.adata.obs_names else: abs_time_means = None # take individual solutions and piece them together to get absorption probabilities towards the classes macro_ix_helper = np.cumsum( [0] + [len(indices) for indices in lookup_dict.values()] ) _abs_classes = np.concatenate( [ mat_x[:, np.arange(a, b)].sum(1)[:, None] for a, b in _pairwise(macro_ix_helper) ], axis=1, ) # for recurrent states, set their self-absorption probability to one abs_classes = np.zeros((len(self), n_macrostates)) rec_classes_full = { cl: np.where(terminal_states_ == cl)[0] for cl in terminal_states_.cat.categories } for col, cl_indices in enumerate(rec_classes_full.values()): abs_classes[trans_indices, col] = _abs_classes[:, col] abs_classes[cl_indices, col] = 1 self._set( A.ABS_PROBS, Lineage( abs_classes, names=terminal_states_.cat.categories, colors=colors_, ), ) extra_msg = "" if abs_time_means is not None: self._set(A.LIN_ABS_TIMES, abs_time_means) extra_msg = f" `.{P.LIN_ABS_TIMES}`\n" self._write_absorption_probabilities(time=start, extra_msg=extra_msg)