Пример #1
0
def downsample_counts(adata,
                      target_counts=20000,
                      random_state=0,
                      replace=True,
                      copy=False):
    """Downsample counts so that each cell has no more than `target_counts`.

    Cells with fewer counts than `target_counts` are unaffected by this. This
    has been implemented by M. D. Luecken.

    Parameters
    ----------
    adata : :class:`~anndata.AnnData`
        Annotated data matrix.
    target_counts : `int` (default: 20,000)
        Target number of counts for downsampling. Cells with more counts than
        'target_counts' will be downsampled to have 'target_counts' counts.
    random_state : `int` or `None`, optional (default: 0)
        Random seed to change subsampling.
    replace : `bool`, optional (default: `True`)
        Whether to sample the counts with replacement.
    copy : `bool`, optional (default: `False`)
        If an :class:`~anndata.AnnData` is passed, determines whether a copy
        is returned.

    Returns
    -------
    AnnData, None
        Depending on `copy` returns or updates an `adata` with downsampled `.X`.
    """
    if copy:
        adata = adata.copy()
    adata.X = adata.X.astype(np.integer)  # Numba doesn't want floats
    if issparse(adata.X):
        X = adata.X
        if not isspmatrix_csr(X):
            X = csr_matrix(X)
        totals = np.ravel(X.sum(axis=1))
        under_target = np.nonzero(totals > target_counts)[0]
        cols = np.split(X.data.view(), X.indptr[1:-1])
        for colidx in under_target:
            col = cols[colidx]
            downsample_cell(col,
                            target_counts,
                            random_state=random_state,
                            replace=replace,
                            inplace=True)
        if not isspmatrix_csr(adata.X):  # Put it back
            adata.X = type(adata.X)(X)
    else:
        totals = np.ravel(adata.X.sum(axis=1))
        under_target = np.nonzero(totals > target_counts)[0]
        adata.X[under_target, :] = \
            np.apply_along_axis(downsample_cell, 1, adata.X[under_target, :],
                                target_counts, random_state=random_state, replace=replace)
    if copy: return adata
Пример #2
0
def filter_genes_fano_deprecated(X, Ecutoff, Vcutoff):
    """Filter genes by fano factor and mean.

    See `filter_genes_dispersion`.

    Reference: Weinreb et al. (2017).
    """
    if issparse(X):
        raise ValueError('Not defined for sparse input. See `filter_genes_dispersion`.')
    mean_filter = np.mean(X, axis=0) > Ecutoff
    var_filter = np.var(X, axis=0) / (np.mean(X, axis=0) + .0001) > Vcutoff
    gene_subset = np.nonzero(np.all([mean_filter, var_filter], axis=0))[0]
    return gene_subset
Пример #3
0
def filter_genes_cv_deprecated(X, Ecutoff, cvFilter):
    """Filter genes by coefficient of variance and mean.

    See `filter_genes_dispersion`.

    Reference: Weinreb et al. (2017).
    """
    if issparse(X):
        raise ValueError('Not defined for sparse input. See `filter_genes_dispersion`.')
    mean_filter = np.mean(X, axis=0) > Ecutoff
    var_filter = np.std(X, axis=0) / (np.mean(X, axis=0) + .0001) > cvFilter
    gene_subset = np.nonzero(np.all([mean_filter, var_filter], axis=0))[0]
    return gene_subset