def downsample_counts(adata, target_counts=20000, random_state=0, replace=True, copy=False): """Downsample counts so that each cell has no more than `target_counts`. Cells with fewer counts than `target_counts` are unaffected by this. This has been implemented by M. D. Luecken. Parameters ---------- adata : :class:`~anndata.AnnData` Annotated data matrix. target_counts : `int` (default: 20,000) Target number of counts for downsampling. Cells with more counts than 'target_counts' will be downsampled to have 'target_counts' counts. random_state : `int` or `None`, optional (default: 0) Random seed to change subsampling. replace : `bool`, optional (default: `True`) Whether to sample the counts with replacement. copy : `bool`, optional (default: `False`) If an :class:`~anndata.AnnData` is passed, determines whether a copy is returned. Returns ------- AnnData, None Depending on `copy` returns or updates an `adata` with downsampled `.X`. """ if copy: adata = adata.copy() adata.X = adata.X.astype(np.integer) # Numba doesn't want floats if issparse(adata.X): X = adata.X if not isspmatrix_csr(X): X = csr_matrix(X) totals = np.ravel(X.sum(axis=1)) under_target = np.nonzero(totals > target_counts)[0] cols = np.split(X.data.view(), X.indptr[1:-1]) for colidx in under_target: col = cols[colidx] downsample_cell(col, target_counts, random_state=random_state, replace=replace, inplace=True) if not isspmatrix_csr(adata.X): # Put it back adata.X = type(adata.X)(X) else: totals = np.ravel(adata.X.sum(axis=1)) under_target = np.nonzero(totals > target_counts)[0] adata.X[under_target, :] = \ np.apply_along_axis(downsample_cell, 1, adata.X[under_target, :], target_counts, random_state=random_state, replace=replace) if copy: return adata
def filter_genes_fano_deprecated(X, Ecutoff, Vcutoff): """Filter genes by fano factor and mean. See `filter_genes_dispersion`. Reference: Weinreb et al. (2017). """ if issparse(X): raise ValueError('Not defined for sparse input. See `filter_genes_dispersion`.') mean_filter = np.mean(X, axis=0) > Ecutoff var_filter = np.var(X, axis=0) / (np.mean(X, axis=0) + .0001) > Vcutoff gene_subset = np.nonzero(np.all([mean_filter, var_filter], axis=0))[0] return gene_subset
def filter_genes_cv_deprecated(X, Ecutoff, cvFilter): """Filter genes by coefficient of variance and mean. See `filter_genes_dispersion`. Reference: Weinreb et al. (2017). """ if issparse(X): raise ValueError('Not defined for sparse input. See `filter_genes_dispersion`.') mean_filter = np.mean(X, axis=0) > Ecutoff var_filter = np.std(X, axis=0) / (np.mean(X, axis=0) + .0001) > cvFilter gene_subset = np.nonzero(np.all([mean_filter, var_filter], axis=0))[0] return gene_subset