def recipe_li(adata, n_top_genes=1000): import desc sc.pp.filter_cells(adata, min_genes=200) sc.pp.filter_genes(adata, min_cells=3) mito_genes = adata.var_names.str.startswith('MT-') adata.obs['percent_mito'] = np.sum(adata[:, mito_genes].X, axis=1).A1 / np.sum(adata.X, axis=1).A1 adata.obs['n_counts'] = adata.X.sum(axis=1).A1 # sc.pl.violin(adata, ['n_genes', 'n_counts', 'percent_mito'],jitter=0.4, multi_panel=True,show=False) adata = adata[adata.obs['n_genes'] < 2500, :] adata = adata[adata.obs['percent_mito'] < 0.05, :] desc.normalize_per_cell(adata, counts_per_cell_after=1e4) desc.log1p(adata) adata.raw = adata sc.pp.highly_variable_genes(adata, n_top_genes=n_top_genes, subset=True) adata = adata[:, adata.var['highly_variable']] desc.scale(adata, zero_center=True, max_value=3) return adata
adata.var_names_make_unique() ###filter sc.pp.filter_genes(adata, min_cells=3) sc.pp.filter_cells(adata, min_genes=200) ##remove cells with a high proportion of mitochondria genes expression. mito_genes = adata.var_names.str.startswith('MT-') adata.obs['percent_mito'] = np.sum(adata[:, mito_genes].X, axis=1).A1 / np.sum( adata.X, axis=1).A1 # add the total counts per cell as observations-annotation to adata adata.obs['n_counts'] = adata.X.sum(axis=1).A1 #adata = adata[adata.obs['n_genes'] < 2500, :] adata = adata[adata.obs['n_counts'] < 1500, :] adata = adata[adata.obs['percent_mito'] < 0.5, :] ##normalization desc.normalize_per_cell(adata, counts_per_cell_after=1e4) sc.pp.log1p(adata) adata.raw = adata ##Selection of highly variable genes sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5, subset=True) adata = adata[:, adata.var['highly_variable']] desc.scale( adata, zero_center=True, max_value=3 ) # if the the dataset has two or more batches you can use `adata=desc.scale(adata,groupby="BatchID")` save_dir = "h5_result"