def score_robustness(data, adata_subset=None, fraction=0.5, vkey="velocity", copy=False): adata = data.copy() if copy else data if adata_subset is None: from scvelo.preprocessing.moments import moments from scvelo.preprocessing.neighbors import neighbors from .velocity import velocity logg.switch_verbosity("off") adata_subset = adata.copy() subset = random_subsample(adata_subset, fraction=fraction, return_subset=True) neighbors(adata_subset) moments(adata_subset) velocity(adata_subset, vkey=vkey) logg.switch_verbosity("on") else: subset = adata.obs_names.isin(adata_subset.obs_names) V = adata[subset].layers[vkey] V_subset = adata_subset.layers[vkey] score = np.nan * (subset is False) score[subset] = prod_sum( V, V_subset, axis=1) / (l2_norm(V, axis=1) * l2_norm(V_subset, axis=1)) adata.obs[f"{vkey}_score_robustness"] = score return adata_subset if copy else None
def cosine_correlation(dX, Vi): dx = dX - dX.mean(-1)[:, None] Vi_norm = l2_norm(Vi, axis=0) with warnings.catch_warnings(): warnings.simplefilter("ignore") if Vi_norm == 0: result = np.zeros(dx.shape[0]) else: result = (np.einsum("ij, j", dx, Vi) / (l2_norm(dx, axis=1) * Vi_norm)[None, :]) return result
def velocity_confidence_transition(data, vkey="velocity", scale=10, copy=False): """Computes confidences of velocity transitions. Arguments --------- data: :class:`~anndata.AnnData` Annotated data matrix. vkey: `str` (default: `'velocity'`) Name of velocity estimates to be used. scale: `float` (default: 10) Scale parameter of gaussian kernel. copy: `bool` (default: `False`) Return a copy instead of writing to adata. Returns ------- velocity_confidence_transition: `.obs` Confidence of transition for each cell """ adata = data.copy() if copy else data if vkey not in adata.layers.keys(): raise ValueError("You need to run `tl.velocity` first.") X = np.array(adata.layers["Ms"]) V = np.array(adata.layers[vkey]) tmp_filter = np.invert(np.isnan(np.sum(V, axis=0))) if f"{vkey}_genes" in adata.var.keys(): tmp_filter &= np.array(adata.var[f"{vkey}_genes"], dtype=bool) if "spearmans_score" in adata.var.keys(): tmp_filter &= adata.var["spearmans_score"].values > 0.1 V = V[:, tmp_filter] X = X[:, tmp_filter] T = transition_matrix(adata, vkey=vkey, scale=scale) dX = T.dot(X) - X dX -= dX.mean(1)[:, None] V -= V.mean(1)[:, None] norms = l2_norm(dX, axis=1) * l2_norm(V, axis=1) norms += norms == 0 adata.obs[f"{vkey}_confidence_transition"] = prod_sum(dX, V, axis=1) / norms logg.hint(f"added '{vkey}_confidence_transition' (adata.obs)") return adata if copy else None
def norm(A): """computes the L2-norm along axis 1 (e.g. genes or embedding dimensions) equivalent to np.linalg.norm(A, axis=1) """ warnings.warn( "`norm` is deprecated since scVelo v0.2.4 and will be removed in a future " "version. Please use `l2_norm(A, axis=1)` from `scvelo/core/` instead.", DeprecationWarning, stacklevel=2, ) return l2_norm(A, axis=1)
def _compute_cosines(self, obs_idx, queue): vals, rows, cols, uncertainties = [], [], [], [] if self.compute_uncertainties: moments = get_moments(self.adata, np.sign(self.V_raw), second_order=True) for obs_id in obs_idx: if self.V[obs_id].max() != 0 or self.V[obs_id].min() != 0: neighs_idx = get_iterative_indices(self.indices, obs_id, self.n_recurse_neighbors, self.max_neighs) if self.t0 is not None: t0, t1 = self.t0[obs_id], self.t1[obs_id] if t0 >= 0 and t1 > 0: t1_idx = np.where(self.t0 == t1)[0] if len(t1_idx) > len(neighs_idx): t1_idx = np.random.choice(t1_idx, len(neighs_idx), replace=False) if len(t1_idx) > 0: neighs_idx = np.unique( np.concatenate([neighs_idx, t1_idx])) dX = self.X[neighs_idx] - self.X[obs_id, None] # 60% of runtime if self.sqrt_transform: dX = np.sqrt(np.abs(dX)) * np.sign(dX) val = cosine_correlation(dX, self.V[obs_id]) # 40% of runtime if self.compute_uncertainties: dX /= l2_norm(dX)[:, None] uncertainties.extend( np.nansum(dX**2 * moments[obs_id][None, :], 1)) vals.extend(val) rows.extend(np.ones(len(neighs_idx)) * obs_id) cols.extend(neighs_idx) if queue is not None: queue.put(1) if queue is not None: queue.put(None) return uncertainties, vals, rows, cols
def velocity_confidence(data, vkey="velocity", copy=False): """Computes confidences of velocities. .. code:: python scv.tl.velocity_confidence(adata) scv.pl.scatter(adata, color='velocity_confidence', perc=[2,98]) .. image:: https://user-images.githubusercontent.com/31883718/69626334-b6df5200-1048-11ea-9171-495845c5bc7a.png :width: 600px Arguments --------- data: :class:`~anndata.AnnData` Annotated data matrix. vkey: `str` (default: `'velocity'`) Name of velocity estimates to be used. copy: `bool` (default: `False`) Return a copy instead of writing to adata. Returns ------- velocity_length: `.obs` Length of the velocity vectors for each individual cell velocity_confidence: `.obs` Confidence for each cell """ # noqa E501 adata = data.copy() if copy else data if vkey not in adata.layers.keys(): raise ValueError("You need to run `tl.velocity` first.") V = np.array(adata.layers[vkey]) tmp_filter = np.invert(np.isnan(np.sum(V, axis=0))) if f"{vkey}_genes" in adata.var.keys(): tmp_filter &= np.array(adata.var[f"{vkey}_genes"], dtype=bool) if "spearmans_score" in adata.var.keys(): tmp_filter &= adata.var["spearmans_score"].values > 0.1 V = V[:, tmp_filter] V -= V.mean(1)[:, None] V_norm = l2_norm(V, axis=1) R = np.zeros(adata.n_obs) indices = get_indices(dist=get_neighs(adata, "distances"))[0] for i in range(adata.n_obs): Vi_neighs = V[indices[i]] Vi_neighs -= Vi_neighs.mean(1)[:, None] R[i] = np.mean( np.einsum("ij, j", Vi_neighs, V[i]) / (l2_norm(Vi_neighs, axis=1) * V_norm[i])[None, :]) adata.obs[f"{vkey}_length"] = V_norm.round(2) adata.obs[f"{vkey}_confidence"] = np.clip(R, 0, None) logg.hint(f"added '{vkey}_length' (adata.obs)") logg.hint(f"added '{vkey}_confidence' (adata.obs)") if f"{vkey}_confidence_transition" not in adata.obs.keys(): velocity_confidence_transition(adata, vkey) return adata if copy else None
def velocity_embedding( data, basis=None, vkey="velocity", scale=10, self_transitions=True, use_negative_cosines=True, direct_pca_projection=None, retain_scale=False, autoscale=True, all_comps=True, T=None, copy=False, ): """Projects the single cell velocities into any embedding. Given normalized difference of the embedding positions :math: `\\tilde \\delta_{ij} = \\frac{x_j-x_i}{\\left\\lVert x_j-x_i \\right\\rVert}`. the projections are obtained as expected displacements with respect to the transition matrix :math:`\\tilde \\pi_{ij}` as .. math:: \\tilde \\nu_i = E_{\\tilde \\pi_{i\\cdot}} [\\tilde \\delta_{i \\cdot}] = \\sum_{j \\neq i} \\left( \\tilde \\pi_{ij} - \\frac1n \\right) \\tilde \\ delta_{ij}. Arguments --------- data: :class:`~anndata.AnnData` Annotated data matrix. basis: `str` (default: `'tsne'`) Which embedding to use. vkey: `str` (default: `'velocity'`) Name of velocity estimates to be used. scale: `int` (default: 10) Scale parameter of gaussian kernel for transition matrix. self_transitions: `bool` (default: `True`) Whether to allow self transitions, based on the confidences of transitioning to neighboring cells. use_negative_cosines: `bool` (default: `True`) Whether to project cell-to-cell transitions with negative cosines into negative/opposite direction. direct_pca_projection: `bool` (default: `None`) Whether to directly project the velocities into PCA space, thus skipping the velocity graph. retain_scale: `bool` (default: `False`) Whether to retain scale from high dimensional space in embedding. autoscale: `bool` (default: `True`) Whether to scale the embedded velocities by a scalar multiplier, which simply ensures that the arrows in the embedding are properly scaled. all_comps: `bool` (default: `True`) Whether to compute the velocities on all embedding components. T: `csr_matrix` (default: `None`) Allows the user to directly pass a transition matrix. copy: `bool` (default: `False`) Return a copy instead of writing to `adata`. Returns ------- velocity_umap: `.obsm` coordinates of velocity projection on embedding (e.g., basis='umap') """ adata = data.copy() if copy else data if basis is None: keys = [ key for key in ["pca", "tsne", "umap"] if f"X_{key}" in adata.obsm.keys() ] if len(keys) > 0: basis = "pca" if direct_pca_projection else keys[-1] else: raise ValueError("No basis specified") if f"X_{basis}" not in adata.obsm_keys(): raise ValueError("You need to compute the embedding first.") if direct_pca_projection and "pca" in basis: logg.warn( "Directly projecting velocities into PCA space is for exploratory analysis " "on principal components.\n" " It does not reflect the actual velocity field from high " "dimensional gene expression space.\n" " To visualize velocities, consider applying " "`direct_pca_projection=False`.\n" ) logg.info("computing velocity embedding", r=True) V = np.array(adata.layers[vkey]) vgenes = np.ones(adata.n_vars, dtype=bool) if f"{vkey}_genes" in adata.var.keys(): vgenes &= np.array(adata.var[f"{vkey}_genes"], dtype=bool) vgenes &= ~np.isnan(V.sum(0)) V = V[:, vgenes] if direct_pca_projection and "pca" in basis: PCs = adata.varm["PCs"] if all_comps else adata.varm["PCs"][:, :2] PCs = PCs[vgenes] X_emb = adata.obsm[f"X_{basis}"] V_emb = (V - V.mean(0)).dot(PCs) else: X_emb = ( adata.obsm[f"X_{basis}"] if all_comps else adata.obsm[f"X_{basis}"][:, :2] ) V_emb = np.zeros(X_emb.shape) T = ( transition_matrix( adata, vkey=vkey, scale=scale, self_transitions=self_transitions, use_negative_cosines=use_negative_cosines, ) if T is None else T ) T.setdiag(0) T.eliminate_zeros() densify = adata.n_obs < 1e4 TA = T.A if densify else None with warnings.catch_warnings(): warnings.simplefilter("ignore") for i in range(adata.n_obs): indices = T[i].indices dX = X_emb[indices] - X_emb[i, None] # shape (n_neighbors, 2) if not retain_scale: dX /= l2_norm(dX)[:, None] dX[np.isnan(dX)] = 0 # zero diff in a steady-state probs = TA[i, indices] if densify else T[i].data V_emb[i] = probs.dot(dX) - probs.mean() * dX.sum(0) if retain_scale: X = ( adata.layers["Ms"] if "Ms" in adata.layers.keys() else adata.layers["spliced"] ) delta = T.dot(X[:, vgenes]) - X[:, vgenes] if issparse(delta): delta = delta.A cos_proj = (V * delta).sum(1) / l2_norm(delta) V_emb *= np.clip(cos_proj[:, None] * 10, 0, 1) if autoscale: V_emb /= 3 * quiver_autoscale(X_emb, V_emb) if f"{vkey}_params" in adata.uns.keys(): adata.uns[f"{vkey}_params"]["embeddings"] = ( [] if "embeddings" not in adata.uns[f"{vkey}_params"] else list(adata.uns[f"{vkey}_params"]["embeddings"]) ) adata.uns[f"{vkey}_params"]["embeddings"].extend([basis]) vkey += f"_{basis}" adata.obsm[vkey] = V_emb logg.info(" finished", time=True, end=" " if settings.verbosity > 2 else "\n") logg.hint("added\n" f" '{vkey}', embedded velocity vectors (adata.obsm)") return adata if copy else None
def test_l2_norm(a: ndarray, axis: int): if a.ndim == 1: np.allclose(np.linalg.norm(a), l2_norm(a, axis=axis)) else: np.allclose(np.linalg.norm(a, axis=axis), l2_norm(a, axis=axis))