Пример #1
0
def normalize(adata: scanpy.AnnData,
              filter_min_counts=None,
              size_factors=False,
              scale_input=False,
              logtrans_input=False):

    if filter_min_counts:
        scanpy.pp.filter_genes(adata, min_counts=filter_min_counts)
        scanpy.pp.filter_cells(adata, min_counts=filter_min_counts)

    adata.raw = adata
    dataset = GeneExpressionDataset()
    dataset.from_data(adata.X, raw=adata.raw.X)

    if size_factors:
        scanpy.pp.normalize_per_cell(adata)
        size_factor_cell = dataset.nb_cell_counts / np.median(
            dataset.nb_cell_counts)
        dataset.initialize_cell_attribute("size_factor",
                                          size_factor_cell.reshape((-1, 1)))
    if logtrans_input:
        scanpy.pp.log1p(adata)
    if scale_input:
        scanpy.pp.scale(adata)

    dataset.data = adata.X
    obs_index = adata.obs.columns
    for i, index in enumerate(obs_index):
        dataset.initialize_cell_attribute(index, adata.obs.iloc[:, i].values)
    return dataset
Пример #2
0
def postprocess_mnnpy(adata, bdata):
    """ postprocessing to generate a newly functional AnnData object

    After running mnnpy_mnncorrect we obtain ann AnnData object bdata. Since mnn_correct automatically
    truncates all the genes contained in .raw to contain only the highly variable genes this function
    creates a new AnnData object that contains .X from bdata but .raw from AnnData (which still contains all the
    genes, not only the highly variable ones).

    Before creation of the new AnnData object the matrices are sorted according to cellbarcode so
    that we ensure the labelings are correct.

    parameters
    ----------

    adata:
        the uncorrected AnnData object
    bdata:
        the batch correted AnnData object

    returns
    -------
    AnnData
        AnnData object with adata.X containing the corrected values and .raw all of the original values

    """
    corrected_matrix = DataFrame(data = bdata.X, index = bdata.obs_names.tolist(), columns = bdata.var_names.tolist())
    corrected_matrix.sort_index(inplace=True)

    new_adata = AnnData(corrected_matrix.values)
    new_adata.obs = bdata.obs.sort_index()
    new_adata.var_names = bdata.var_names
    new_adata.obs_names = bdata.obs_names.sort_values()
    new_adata.var = bdata.var

    #need to sort raw object to match the batch corrected order
    raw_matrix = DataFrame(data=(adata.raw.X.todense() if scipy.sparse.issparse(adata.raw.X) else adata.raw.X), index=adata.obs_names.tolist(), columns=adata.raw.var_names.tolist())
    raw_matrix.sort_index(inplace=True)

    #recreate raw
    raw = AnnData(raw_matrix.values)
    raw.var_names = adata.raw.var_names
    raw.obs_names = adata.obs_names.sort_values()
    raw.var = adata.raw.var

    #add raw back in
    new_adata.raw = raw

    #ensure that indices are preserved
    adata.obs_names = adata.obs.CELL
    adata.obs.index = adata.obs.CELL

    return(new_adata)