Exemplo n.º 1
0
def RunClusterAcc(dataset1, dataset2, gene_dataset, plotname):
    cluster1 = KMeans(len(dataset1.cell_types))
    cluster2 = KMeans(len(dataset2.cell_types))

    latent1 = np.genfromtxt('../harmonization/Seurat_data/' + plotname +
                            '.1.CCA.txt')
    latent2 = np.genfromtxt('../harmonization/Seurat_data/' + plotname +
                            '.2.CCA.txt')
    latent, batch_indices, labels, keys, stats = run_model('readSeurat',
                                                           gene_dataset,
                                                           dataset1,
                                                           dataset2,
                                                           filename=plotname)
    res_seurat = computeARI(latent1, latent2, latent, cluster1, cluster2,
                            batch_indices)

    latent, batch_indices, labels, keys, stats = run_model('MNN',
                                                           gene_dataset,
                                                           dataset1,
                                                           dataset2,
                                                           filename=plotname)
    res_MNN = computeARI(latent1, latent2, latent, cluster1, cluster2,
                         batch_indices)

    latent, batch_indices, labels, keys, stats = run_model('PCA',
                                                           gene_dataset,
                                                           dataset1,
                                                           dataset2,
                                                           filename=plotname)
    res_PCA = computeARI(latent1, latent2, latent, cluster1, cluster2,
                         batch_indices)

    dataset1, dataset2, gene_dataset = SubsetGenes(dataset1, dataset2,
                                                   gene_dataset, plotname)
    latent1, _, _, _, _ = run_model('vae',
                                    dataset1,
                                    0,
                                    0,
                                    filename=plotname,
                                    rep='vae1')
    latent2, _, _, _, _ = run_model('vae',
                                    dataset2,
                                    0,
                                    0,
                                    filename=plotname,
                                    rep='vae2')
    latent, batch_indices, labels, keys, stats = run_model('vae',
                                                           gene_dataset,
                                                           dataset1,
                                                           dataset2,
                                                           filename=plotname)
    res_scvi = computeARI(latent1, latent2, latent, cluster1, cluster2,
                          batch_indices)

    latent, batch_indices, labels, keys, stats = run_model('vae_nb',
                                                           gene_dataset,
                                                           dataset1,
                                                           dataset2,
                                                           filename=plotname)
    res_scvi_nb = computeARI(latent1, latent2, latent, cluster1, cluster2,
                             batch_indices)

    latent, batch_indices, labels, keys, stats = run_model('scanvi1',
                                                           gene_dataset,
                                                           dataset1,
                                                           dataset2,
                                                           filename=plotname)
    res_scanvi1 = computeARI(latent1, latent2, latent, cluster1, cluster2,
                             batch_indices)

    latent, batch_indices, labels, keys, stats = run_model('scanvi2',
                                                           gene_dataset,
                                                           dataset1,
                                                           dataset2,
                                                           filename=plotname)
    res_scanvi2 = computeARI(latent1, latent2, latent, cluster1, cluster2,
                             batch_indices)

    res = [
        res_scvi, res_scvi_nb, res_scanvi1, res_scanvi2, res_seurat, res_MNN,
        res_PCA
    ]
    res = np.asarray(res)
    np.savetxt("%s.clusterScore.csv" % (plotname), res, "%.4f", ',')
Exemplo n.º 2
0
from scvi.dataset.dataset import GeneExpressionDataset
from scvi.harmonization.utils_chenling import SubsetGenes

import pickle as pkl
f = open('../%s/gene_dataset.pkl'%plotname, 'rb')
all_dataset, dataset1, dataset2 = pkl.load(f)
f.close()
all_dataset = GeneExpressionDataset.concat_datasets(dataset1,dataset2)
dataset1, dataset2, gene_dataset = SubsetGenes(dataset1, dataset2, all_dataset, plotname)



import time
from scvi.harmonization.utils_chenling import run_model
start = time.time()
latent, batch_indices, labels, keys, stats = run_model('scmap', gene_dataset, dataset1, dataset2,filename=plotname)
end = time.time()
print( end - start)



batch = gene_dataset.batch_indices.ravel()
labels = gene_dataset.labels.ravel()
scaling_factor = gene_dataset.X.mean(axis=1)
norm_X = gene_dataset.X / scaling_factor.reshape(len(scaling_factor), 1)
index_0 = np.where(batch == 0)[0]
index_1 = np.where(batch == 1)[0]
X1 = np.log(1 + norm_X[index_0])
X2 = np.log(1 + norm_X[index_1])

coral = CORAL()
Exemplo n.º 3
0
 pbmc2.filter_cell_types(newCellType)
 gene_dataset = GeneExpressionDataset.concat_datasets(pbmc, pbmc2)
 # _,_,_,_,_ = run_model('writedata', gene_dataset, pbmc, pbmc2,filename=plotname+'.'
 #                                                                       +celltype1.replace(' ','')+'.'
 #                                                                       +celltype2.replace(' ',''))
 rmCellTypes = '.' + celltype1.replace(
     ' ', '') + '.' + celltype2.replace(' ', '')
 latent1 = np.genfromtxt('../harmonization/Seurat_data/' +
                         plotname + rmCellTypes.replace(' ', '') +
                         '.1.CCA.txt')
 latent2 = np.genfromtxt('../harmonization/Seurat_data/' +
                         plotname + rmCellTypes.replace(' ', '') +
                         '.2.CCA.txt')
 latent, batch_indices, labels, keys, stats = run_model(
     'readSeurat',
     gene_dataset,
     pbmc,
     pbmc2,
     filename=plotname + rmCellTypes.replace(' ', ''))
 acc, cell_type = KNNpurity(latent1, latent2, latent,
                            batch_indices.ravel(), labels, keys)
 f.write('Seurat' + '\t' + rmCellTypes +
         ("\t%.4f" * 8 + "\t%s" * 8 + "\n") %
         tuple(list(acc) + list(cell_type)))
 be, temp1 = BEbyType(keys, latent, labels, batch_indices,
                      celltype1)
 g.write('Seurat' + '\t' + rmCellTypes +
         ("\t%.4f" * 8 + "\t%s" * 8 + "\n") %
         tuple(be + list(temp1)))
 plotUMAP(latent, plotname, 'Seurat', gene_dataset.cell_types,
          rmCellTypes, gene_dataset.batch_indices.ravel())
 pbmc, pbmc2, gene_dataset = SubsetGenes(