def KS(adata): adata_1 = adata[adata.obs.loc[:, 'DENV_reads'] == 0, ] adata_2 = adata[adata.obs.loc[:, 'DENV_reads'] != 0, ] results = anndataks.compare(adata_1, adata_2, log1p=2) results_sort = results.sort_values(by='statistic', ascending=False) # results_sort_2 = results_sort[(results_sort.pvalue < 0.05)] return (results_sort)
def test_compare_sparse(): import scipy.sparse X1 = np.array([ [0, 1], [1, 2], [2, 3], [3, 4], [3, 5], ]) X2 = np.array([ [0, 1], [6, 2], [6, 3], [6, 5], ]) # Make sparse X1 = scipy.sparse.csc_matrix(X1) X2 = scipy.sparse.csc_matrix(X2) adata1 = anndata.AnnData(X=X1) adata2 = anndata.AnnData(X=X2) adata1.var_names = ['Gene1', 'Gene2'] adata2.var_names = ['Gene1', 'Gene2'] anndataks.rc['use_experimental_ks_2samp'] = True ress = anndataks.compare(adata1, adata2, log1p=False) ress_exp = pd.DataFrame( [[0.75, 1.5, 0.142857, 1.485427, 2.459432, 0.974005], [-0.15, 3, 1.000000, 2.000000, 1.906891, -0.093109]], columns=['statistic', 'value', 'pvalue', 'avg1', 'avg2', 'log2_fold_change'], index=adata1.var_names, ) assert((ress.shape == ress_exp.shape)) assert((np.abs(ress.values - ress_exp.values) < 1e-3).all()) anndataks.rc['use_experimental_ks_2samp'] = False ress = anndataks.compare(adata1, adata2, log1p=False) ress_exp = pd.DataFrame( [[0.75, 0.142857, 1.485427, 2.459432, 0.974005], [0.15, 1.000000, 2.000000, 1.906891, -0.093109]], columns=['statistic', 'pvalue', 'avg1', 'avg2', 'log2_fold_change'], index=adata1.var_names, ) assert((ress.shape == ress_exp.shape)) assert((np.abs(ress.values - ress_exp.values) < 1e-3).all())
import anndataks cell_types = [ 'B_cells', 'Monocytes', 'NK_cells', 'Plasmablasts', 'T_cells', 'cDCs', 'pDCs' ] conditions = ['S_dengue', 'dengue'] #sc.pp.log1p(adatag) adata_kids = adatag[adatag.obs['dataset'] == 'child'] results = {} for cell_type in cell_types: adata_ct = adata_kids[adata_kids.obs['cell_type'] == cell_type] if cell_type == 'cDCs': adata_ct = adata_ct[~adata_ct.obs['ID'].isin(['1_140_01', '5_193_01'])] adata_SD = adata_ct[adata_ct.obs['Condition'] == 'S_dengue'] adata_D = adata_ct[adata_ct.obs['Condition'] == 'dengue'] results[cell_type] = anndataks.compare( adata_D, adata_SD) # log1p=False # log2_fold_change: adata_Sd vs adata_D res = pd.DataFrame([]) for cell_type in cell_types: results[cell_type]['cell_type'] = [cell_type] * results[cell_type].shape[0] res = pd.concat([res, results[cell_type]]) res.to_csv( '/home/yike/phd/dengue/data/excels/log2_fc/S_dengue_vs_dengue/inters/inter_genes_pvalue.tsv' )
conditions = ['S_dengue', 'dengue'] results = {} for subct in subcts: if subct in adata_kid.obs['cell_type_new'].astype( 'category').cat.categories: adata_ct = adata_kid[adata_kid.obs['cell_type_new'] == subct] elif subct in adata_kid.obs['cell_subtype_new'].astype( 'category').cat.categories: adata_ct = adata_kid[adata_kid.obs['cell_subtype_new'] == subct] adata_SD = adata_ct[adata_ct.obs['Condition'] == 'S_dengue'] adata_D = adata_ct[adata_ct.obs['Condition'] == 'dengue'] # while calculating ks test pvalue, the adata is log1ped, so the argument log1p=2 results[subct] = anndataks.compare( adata_D, adata_SD, log1p=2, mode='asymp') # log2_fold_change: adata_Sd vs adata_D ks_res = pd.DataFrame([]) for subct in subcts: results[subct]['cell_subtype'] = [subct] * results[subct].shape[0] ks_res = pd.concat([ks_res, results[subct]]) ks_res.set_index(['cell_subtype', ks_res.index], inplace=True) ks_res.to_csv(save_tabels + 'ks_pvalue.tsv', sep='\t') ks_res = pd.read_csv(save_tabels + 'ks_pvalue.tsv', sep='\t', index_col=['cell_subtype', 'Unnamed: 1']) ct_pair = pd.concat( [ct_pair, ks_res.loc[ct_pair.index][['statistic', 'pvalue']]], axis=1)