示例#1
0
def _knn_smooth(diff_kernel, velo_graph, trans_graph, weight_diffusion):
    # utility function for combining KNN kernel and velocity kernel
    assert weight_diffusion >= 0, "Weight diffusion must be non-negative."
    assert weight_diffusion <= 1, "Weight diffusion must be <= 1."

    # this is necessary because I don't want to normalize this graph (density correction)
    G_sim = trans_graph.copy()

    if diff_kernel == "mult":
        logg.debug("DEBUG: Using a multiplicative diffusion kernel")
        # element wise multiplication
        velo_graph = velo_graph.multiply(G_sim)
    elif diff_kernel == "sum":
        logg.debug("DEBUG: Using an additive diffusion kernel")
        # G_sim  = G_sim.multiply(velo_graph>0)
        velo_graph, trans_graph = _normalize(velo_graph), _normalize(G_sim)
        velo_graph = (
            1 - weight_diffusion) * velo_graph + weight_diffusion * trans_graph
    elif diff_kernel == "both":
        logg.debug(
            "DEBUG: Using first a multiplicative and then an additive diffusion kernel"
        )
        G_sim = G_sim.multiply(velo_graph > 0)
        velo_graph = velo_graph.multiply(G_sim)
        velo_graph, trans_grap = _normalize(velo_graph), _normalize(G_sim)
        velo_graph = (1 -
                      weight_diffusion) * velo_graph + weight_diffusion * G_sim
    else:
        raise ValueError(
            f"Invalid kernel type `{diff_kernel}`. Valid options are: `'mult', 'sum', 'both'`."
        )

    return velo_graph
示例#2
0
    def test_backward_manual_dense_norm(self, adata):
        backward = True
        vk = VelocityKernel(adata,
                            backward=backward).compute_transition_matrix(
                                density_normalize=False)
        ck = ConnectivityKernel(adata,
                                backward=backward).compute_transition_matrix(
                                    density_normalize=False)

        # combine the kernels
        comb = 0.8 * vk + 0.2 * ck
        T_1 = comb.transition_matrix
        conn = _get_neighs(adata, "connectivities")
        T_1 = density_normalization(T_1, conn)
        T_1 = _normalize(T_1)

        transition_matrix(
            adata,
            diff_kernel="sum",
            weight_diffusion=0.2,
            density_normalize=True,
            backward=backward,
        )
        T_2 = adata.uns[_transition(Direction.BACKWARD)]["T"]

        np.testing.assert_allclose(T_1.A, T_2.A, rtol=_rtol)
示例#3
0
    def transition_matrix(self, value: Union[np.ndarray, spmatrix]) -> None:
        """
        Set a new value of the transition matrix.

        Params
        ------
        value
            The new transition matrix.

        Returns
        -------
        None
        """

        if self._parent is None:
            self._transition_matrix = _normalize(value)
        else:
            self._transition_matrix = _normalize(value) if self._normalize else value
示例#4
0
    def test_addition_adaptive(self, adata):
        adata.uns["velocity_variances"] = vv = np.random.random(
            size=(adata.n_obs, adata.n_obs))
        adata.uns["connectivity_variances"] = cv = np.random.random(
            size=(adata.n_obs, adata.n_obs))
        vk, ck = create_kernels(adata)

        k = vk ^ ck
        expected = _normalize(0.5 * vv * vk.transition_matrix +
                              0.5 * cv * ck.transition_matrix)

        np.testing.assert_allclose(k.transition_matrix.A, expected)
示例#5
0
    def test_addition_adaptive_wrong_variances(self, adata):
        a, b = np.random.uniform(0, 10, 2)
        s = a + b
        adata.uns["velocity_variances"] = np.random.random(size=(adata.n_obs,
                                                                 adata.n_obs))
        adata.uns["connectivity_variances"] = np.random.random(
            size=(adata.n_obs, adata.n_obs))
        vk, ck = create_kernels(adata)

        k = a * vk ^ b * ck
        expected = _normalize(a / s * vk.transition_matrix +
                              b / s * ck.transition_matrix)

        assert not np.allclose(k.transition_matrix.A, expected.A)
示例#6
0
    def test_addition_adaptive_constants(self, adata):
        a, b = np.random.uniform(0, 10, 2)
        s = a + b
        adata.uns["velocity_variances"] = vv = np.random.random(
            size=(adata.n_obs, adata.n_obs))
        adata.uns["connectivity_variances"] = cv = np.random.random(
            size=(adata.n_obs, adata.n_obs))
        vk, ck = create_kernels(adata)  # diagonal + upper diag

        k = a * vk ^ b * ck
        expected = _normalize(a / s * vv * vk.transition_matrix +
                              b / s * cv * ck.transition_matrix)

        np.testing.assert_allclose(k.transition_matrix.A, expected)
示例#7
0
    def test_palantir(self, adata):
        conn = _get_neighs(adata, "connectivities")
        n_neighbors = _get_neighs_params(adata)["n_neighbors"]
        pseudotime = adata.obs["latent_time"]

        conn_biased = bias_knn(conn, pseudotime, n_neighbors)
        T_1 = _normalize(conn_biased)

        pk = PalantirKernel(adata,
                            time_key="latent_time").compute_transition_matrix(
                                density_normalize=False)
        T_2 = pk.transition_matrix

        np.testing.assert_allclose(T_1.A, T_2.A, rtol=_rtol)
示例#8
0
    def test_addition_adaptive_4_kernels(self, adata):
        a, b, c, d = np.random.uniform(0, 10, 4)
        s = a + b + c + d
        adata.uns["velocity_variances"] = vv = np.random.random(
            size=(adata.n_obs, adata.n_obs))
        adata.uns["connectivity_variances"] = cv = np.random.random(
            size=(adata.n_obs, adata.n_obs))
        vk, ck = create_kernels(adata)
        vk1, ck1 = create_kernels(adata)

        k = a * vk ^ b * ck ^ c * vk1 ^ d * ck1
        expected = _normalize(a / s * vv * vk.transition_matrix +
                              b / s * cv * ck.transition_matrix +
                              c / s * vv * vk1.transition_matrix +
                              d / s * cv * ck1.transition_matrix)

        np.testing.assert_allclose(k.transition_matrix.A, expected)
示例#9
0
def transition_matrix(
    adata: AnnData,
    vkey: str = "velocity",
    backward: bool = False,
    self_transitions: Optional[str] = None,
    sigma_corr: Optional[float] = None,
    diff_kernel: Optional[str] = None,
    weight_diffusion: float = 0.2,
    density_normalize: bool = True,
    backward_mode: str = "transpose",
    inplace: bool = True,
) -> csr_matrix:
    """
    Computes transition probabilities from velocity graph.

    THIS FUNCTION HAS BEEN DEPRECATED.
    Interact with kernels via the Kernel class or via cellrank.tools_transition_matrix.transition_matrix

    Employs ideas of both scvelo as well as velocyto.

    Parameters
    --------
    adata : :class:`anndata.AnnData`
        Annotated Data Matrix
    vkey
        Name of the velocity estimates to be used
    backward
        Whether to use the transition matrix to push forward (`False`) or to pull backward (`True`)
    self_transitions
        How to fill the diagonal. Can be either 'velocyto' or 'scvelo'. Two diffent
        heuristics are used. Can prevent dividing by zero in unlucky sitatuations for the
        reverse process
    sigma_corr
        Kernel width for exp kernel to be used to compute transition probabilities
        from the velocity graph. If None, the median cosine correlation of all
        potisive cosine correlations will be used.
    diff_kernel
        Whether to multiply the velocity connectivities with transcriptomic distances to make them more robust.
        Options are ('sum', 'mult', 'both')
    weight_diffusion
        Relative weight given to the diffusion kernel. Must be in [0, 1]. Only matters when using 'sum' or 'both'
        for the diffusion kernel.
    density_normalize
        Whether to use the transcriptomic KNN graph for density normalization as performed in scanpy when
        computing diffusion maps
    backward_mode
        Options are ['transpose', 'negate'].
    inplace
        If True, adds to adata. Otherwise returns.

    Returns
    --------
    T: :class:`scipy.sparse.csr_matrix`
        Transition matrix
    """
    logg.info("Computing transition probability from velocity graph")

    from datetime import datetime

    print(datetime.now())

    # get the direction of the process
    direction = Direction.BACKWARD if backward else Direction.FORWARD

    # get the velocity correlations
    if (vkey + "_graph" not in adata.uns.keys()) or (vkey + "_graph_neg"
                                                     not in adata.uns.keys()):
        raise ValueError(
            "You need to run `tl.velocity_graph` first to compute cosine correlations"
        )
    velo_corr, velo_corr_neg = (
        csr_matrix(adata.uns[vkey + "_graph"]).copy(),
        csr_matrix(adata.uns[vkey + "_graph_neg"]).copy(),
    )
    velo_corr_comb_ = (velo_corr + velo_corr_neg).astype(np.float64)
    if backward:
        if backward_mode == "negate":
            velo_corr_comb = velo_corr_comb_.multiply(-1)
        elif backward_mode == "transpose":
            velo_corr_comb = velo_corr_comb_.T
        else:
            raise ValueError(f"Unknown backward_mode `{backward_mode}`.")
    else:
        velo_corr_comb = velo_corr_comb_
    med_corr = np.median(np.abs(velo_corr_comb.data))

    # compute the raw transition matrix. At the moment, this is just an exponential kernel
    logg.debug("DEBUG: Computing the raw transition matrix")
    if sigma_corr is None:
        sigma_corr = 1 / med_corr
    velo_graph = velo_corr_comb.copy()
    velo_graph.data = np.exp(velo_graph.data * sigma_corr)

    # should I row-_normalize the transcriptomic connectivities?
    if diff_kernel is not None or density_normalize:
        params = _get_neighs_params(adata)
        logg.debug(
            f'DEBUG: Using KNN graph computed in basis {params.get("use_rep", "Unknown")!r} '
            'with {params["n_neighbors"]} neighbors')
        trans_graph = _get_neighs(adata, "connectivities")
        dev = norm((trans_graph - trans_graph.T), ord="fro")
        if dev > 1e-4:
            logg.warning("KNN base graph not symmetric, `dev={dev}`")

    # KNN smoothing
    if diff_kernel is not None:
        logg.debug("DEBUG: Smoothing KNN graph with diffusion kernel")
        velo_graph = _knn_smooth(diff_kernel, velo_graph, trans_graph,
                                 weight_diffusion)
    # return velo_graph

    # set the diagonal elements. This is important especially for the backwards direction
    logg.debug("DEBUG: Setting diagonal elements")
    velo_graph = _self_loops(self_transitions, velo_graph)

    # density normalisation - taken from scanpy
    if density_normalize:
        logg.debug("DEBUG: Density correcting the velocity graph")
        velo_graph = density_normalization(velo_graph, trans_graph)

    # normalize
    T = _normalize(velo_graph)

    if not inplace:
        logg.info("Computed transition matrix")
        return T

    if _transition(direction) in adata.uns.keys():
        logg.warning(
            f"`.uns` already contains a field `{_transition(direction)!r}`. Overwriting"
        )

    params = {
        "backward": backward,
        "self_transitions": self_transitions,
        "sigma_corr": np.round(sigma_corr, 3),
        "diff_kernel": diff_kernel,
        "weight_diffusion": weight_diffusion,
        "density_normalize": density_normalize,
    }

    adata.uns[_transition(direction)] = {"T": T, "params": params}
    logg.info(
        f"Computed transition matrix and added the key `{_transition(direction)!r}` to `adata.uns`"
    )
示例#10
0
    def compute_lin_probs(
        self,
        keys: Optional[Sequence[str]] = None,
        check_irred: bool = False,
        norm_by_frequ: bool = False,
    ) -> None:
        """
        Compute absorption probabilities for a Markov chain.

        For each cell, this computes the probability of it reaching any of the approximate recurrent classes.
        This also computes the entropy over absorption probabilities, which is a measure of cell plasticity, see
        [Setty19]_.

        Params
        ------
        keys
            Comma separated sequence of keys defining the recurrent classes.
        check_irred
            Check whether the matrix restricted to the given transient states is irreducible.
        norm_by_frequ
            Divide absorption probabilities for `rc_i` by `|rc_i|`.

        Returns
        -------
        None
            Nothing, but updates the following fields: :paramref:`lineage_probabilities`, :paramref:`diff_potential`.
        """

        if self._meta_states is None:
            raise RuntimeError(
                "Compute approximate recurrent classes first as `.compute_metastable_states()`"
            )
        if keys is not None:
            keys = sorted(set(keys))

        # Note: There are three relevant data structures here
        # - self.metastable_states: pd.Series which contains annotations for approx rcs. Associated colors in
        #   self.metastable_states_colors
        # - self.lin_probs: Linage object which contains the lineage probabilities with associated names and colors
        # -_metastable_states: pd.Series, temporary copy of self.approx rcs used in the context of this function.
        #   In this copy, some metastable_states may be removed or combined with others
        start = logg.info("Computing absorption probabilities")

        # we don't expect the abs. probs. to be sparse, therefore, make T dense. See scipy docs about sparse lin solve.
        t = self._T.A if self._is_sparse else self._T

        # colors are created in `compute_metastable_states`, this is just in case
        self._check_and_create_colors()

        # process the current annotations according to `keys`
        metastable_states_, colors_ = _process_series(
            series=self._meta_states, keys=keys, colors=self._meta_states_colors
        )

        #  create empty lineage object
        if self._lin_probs is not None:
            logg.debug("DEBUG: Overwriting `.lin_probs`")
        self._lin_probs = Lineage(
            np.empty((1, len(colors_))),
            names=metastable_states_.cat.categories,
            colors=colors_,
        )

        # warn in case only one state is left
        keys = list(metastable_states_.cat.categories)
        if len(keys) == 1:
            logg.warning(
                "There is only one recurrent class, all cells will have probability 1 of going there"
            )

        # create arrays of all recurrent and transient indices
        mask = np.repeat(False, len(metastable_states_))
        for cat in metastable_states_.cat.categories:
            mask = np.logical_or(mask, metastable_states_ == cat)
        rec_indices, trans_indices = np.where(mask)[0], np.where(~mask)[0]

        # create Q (restriction transient-transient), S (restriction transient-recurrent) and I (Q-sized identity)
        q = t[trans_indices, :][:, trans_indices]
        s = t[trans_indices, :][:, rec_indices]
        eye = np.eye(len(trans_indices))

        if check_irred:
            if self._is_irreducible is None:
                self.compute_partition()
            if not self._is_irreducible:
                logg.warning("Restriction Q is not irreducible")

        # compute abs probs. Since we don't expect sparse solution, dense computation is faster.
        logg.debug("DEBUG: Solving the linear system to find absorption probabilities")
        abs_states = solve(eye - q, s)

        # aggregate to class level by summing over columns belonging to the same metastable_states
        approx_rc_red = metastable_states_[mask]
        rec_classes_red = {
            key: np.where(approx_rc_red == key)[0]
            for key in approx_rc_red.cat.categories
        }
        _abs_classes = np.concatenate(
            [
                np.sum(abs_states[:, rec_classes_red[key]], axis=1)[:, None]
                for key in approx_rc_red.cat.categories
            ],
            axis=1,
        )

        if norm_by_frequ:
            logg.debug("DEBUG: Normalizing by frequency")
            _abs_classes /= [len(value) for value in rec_classes_red.values()]
        _abs_classes = _normalize(_abs_classes)

        # for recurrent states, set their self-absorption probability to one
        abs_classes = np.zeros((self._n_states, len(rec_classes_red)))
        rec_classes_full = {
            cl: np.where(metastable_states_ == cl)
            for cl in metastable_states_.cat.categories
        }
        for col, cl_indices in enumerate(rec_classes_full.values()):
            abs_classes[trans_indices, col] = _abs_classes[:, col]
            abs_classes[cl_indices, col] = 1

        self._dp = entropy(abs_classes.T)
        self._lin_probs = Lineage(
            abs_classes,
            names=list(self._lin_probs.names),
            colors=list(self._lin_probs.colors),
        )

        self._adata.obsm[self._lin_key] = self._lin_probs
        self._adata.obs[_dp(self._lin_key)] = self._dp
        self._adata.uns[_lin_names(self._lin_key)] = self._lin_probs.names
        self._adata.uns[_colors(self._lin_key)] = self._lin_probs.colors

        logg.info("    Finish", time=start)