示例#1
0
def KS(adata):
    adata_1 = adata[adata.obs.loc[:, 'DENV_reads'] == 0, ]
    adata_2 = adata[adata.obs.loc[:, 'DENV_reads'] != 0, ]
    results = anndataks.compare(adata_1, adata_2, log1p=2)
    results_sort = results.sort_values(by='statistic', ascending=False)
    #     results_sort_2 = results_sort[(results_sort.pvalue < 0.05)]
    return (results_sort)
示例#2
0
def test_compare_sparse():
    import scipy.sparse

    X1 = np.array([
        [0, 1],
        [1, 2],
        [2, 3],
        [3, 4],
        [3, 5],
        ])
    X2 = np.array([
        [0, 1],
        [6, 2],
        [6, 3],
        [6, 5],
        ])

    # Make sparse
    X1 = scipy.sparse.csc_matrix(X1)
    X2 = scipy.sparse.csc_matrix(X2)

    adata1 = anndata.AnnData(X=X1)
    adata2 = anndata.AnnData(X=X2)
    adata1.var_names = ['Gene1', 'Gene2']
    adata2.var_names = ['Gene1', 'Gene2']

    anndataks.rc['use_experimental_ks_2samp'] = True
    ress = anndataks.compare(adata1, adata2, log1p=False)
    ress_exp = pd.DataFrame(
        [[0.75, 1.5, 0.142857, 1.485427, 2.459432, 0.974005],
         [-0.15, 3, 1.000000, 2.000000, 1.906891, -0.093109]],
        columns=['statistic', 'value', 'pvalue', 'avg1', 'avg2', 'log2_fold_change'],
        index=adata1.var_names,
        )
    assert((ress.shape == ress_exp.shape))
    assert((np.abs(ress.values - ress_exp.values) < 1e-3).all())

    anndataks.rc['use_experimental_ks_2samp'] = False
    ress = anndataks.compare(adata1, adata2, log1p=False)
    ress_exp = pd.DataFrame(
        [[0.75, 0.142857, 1.485427, 2.459432, 0.974005],
         [0.15, 1.000000, 2.000000, 1.906891, -0.093109]],
        columns=['statistic', 'pvalue', 'avg1', 'avg2', 'log2_fold_change'],
        index=adata1.var_names,
        )
    assert((ress.shape == ress_exp.shape))
    assert((np.abs(ress.values - ress_exp.values) < 1e-3).all())
import anndataks

cell_types = [
    'B_cells', 'Monocytes', 'NK_cells', 'Plasmablasts', 'T_cells', 'cDCs',
    'pDCs'
]
conditions = ['S_dengue', 'dengue']

#sc.pp.log1p(adatag)

adata_kids = adatag[adatag.obs['dataset'] == 'child']
results = {}
for cell_type in cell_types:
    adata_ct = adata_kids[adata_kids.obs['cell_type'] == cell_type]
    if cell_type == 'cDCs':
        adata_ct = adata_ct[~adata_ct.obs['ID'].isin(['1_140_01', '5_193_01'])]

    adata_SD = adata_ct[adata_ct.obs['Condition'] == 'S_dengue']
    adata_D = adata_ct[adata_ct.obs['Condition'] == 'dengue']
    results[cell_type] = anndataks.compare(
        adata_D,
        adata_SD)  # log1p=False # log2_fold_change: adata_Sd vs adata_D

res = pd.DataFrame([])
for cell_type in cell_types:
    results[cell_type]['cell_type'] = [cell_type] * results[cell_type].shape[0]
    res = pd.concat([res, results[cell_type]])

res.to_csv(
    '/home/yike/phd/dengue/data/excels/log2_fc/S_dengue_vs_dengue/inters/inter_genes_pvalue.tsv'
)
示例#4
0
conditions = ['S_dengue', 'dengue']

results = {}
for subct in subcts:
    if subct in adata_kid.obs['cell_type_new'].astype(
            'category').cat.categories:
        adata_ct = adata_kid[adata_kid.obs['cell_type_new'] == subct]
    elif subct in adata_kid.obs['cell_subtype_new'].astype(
            'category').cat.categories:
        adata_ct = adata_kid[adata_kid.obs['cell_subtype_new'] == subct]

    adata_SD = adata_ct[adata_ct.obs['Condition'] == 'S_dengue']
    adata_D = adata_ct[adata_ct.obs['Condition'] == 'dengue']
    # while calculating ks test pvalue, the adata is log1ped, so the argument log1p=2
    results[subct] = anndataks.compare(
        adata_D, adata_SD, log1p=2,
        mode='asymp')  # log2_fold_change: adata_Sd vs adata_D

ks_res = pd.DataFrame([])
for subct in subcts:
    results[subct]['cell_subtype'] = [subct] * results[subct].shape[0]
    ks_res = pd.concat([ks_res, results[subct]])

ks_res.set_index(['cell_subtype', ks_res.index], inplace=True)
ks_res.to_csv(save_tabels + 'ks_pvalue.tsv', sep='\t')

ks_res = pd.read_csv(save_tabels + 'ks_pvalue.tsv',
                     sep='\t',
                     index_col=['cell_subtype', 'Unnamed: 1'])
ct_pair = pd.concat(
    [ct_pair, ks_res.loc[ct_pair.index][['statistic', 'pvalue']]], axis=1)