Пример #1
0
def benchmark(dataset, n_epochs=250, use_cuda=True):
    vae = VAE(dataset.nb_genes, n_batch=dataset.n_batches)
    infer = VariationalInference(vae, dataset, use_cuda=use_cuda)
    infer.train(n_epochs=n_epochs)
    infer.ll('test')
    infer.marginal_ll('test')
    infer.imputation('test', rate=0.1)  # assert ~ 2.1
    return infer
Пример #2
0
def base_benchmark(gene_dataset):
    vae = VAE(gene_dataset.nb_genes, gene_dataset.n_batches,
              gene_dataset.n_labels)
    infer = VariationalInference(vae,
                                 gene_dataset,
                                 train_size=0.5,
                                 use_cuda=use_cuda)
    infer.train(n_epochs=1)
    return infer
Пример #3
0
         count.append(nsamples)
         cells.append(sample)
     correlation = (np.corrcoef(count[0], count[1])[0, 1])
 print("dataset 1 has %d cells" % (np.sum(count[0])))
 print("dataset 2 has %d cells" % (np.sum(count[1])))
 print(
     "correlation between the cell-type composition of the subsampled dataset is %.3f"
     % correlation)
 sub_dataset = deepcopy(gene_dataset)
 sub_dataset.update_cells(np.concatenate(cells))
 vae = VAE(sub_dataset.nb_genes,
           n_batch=sub_dataset.n_batches,
           n_labels=sub_dataset.n_labels,
           n_hidden=128,
           dispersion='gene')
 infer = VariationalInference(vae, sub_dataset, use_cuda=use_cuda)
 infer.train(n_epochs=250)
 latent, batch_indices, labels = infer.get_latent('sequential')
 keys = sub_dataset.cell_types
 batch_entropy = entropy_batch_mixing(latent, batch_indices)
 print("Entropy batch mixing :", batch_entropy)
 sample = select_indices_evenly(1000, labels)
 res = knn_purity_avg(latent[sample, :],
                      labels[sample].astype('int'),
                      keys=keys,
                      acc=True)
 print('average classification accuracy per cluster')
 for x in res:
     print(x)
 knn_acc = np.mean([x[1] for x in res])
 print("average KNN accuracy:", knn_acc)
Пример #4
0
def cortex_benchmark(n_epochs=250, use_cuda=True, unit_test=False):
    cortex_dataset = CortexDataset()
    vae = VAE(cortex_dataset.nb_genes)
    infer_cortex_vae = VariationalInference(vae, cortex_dataset, use_cuda=use_cuda)
    infer_cortex_vae.train(n_epochs=n_epochs)

    infer_cortex_vae.ll('test')  # assert ~ 1200
    infer_cortex_vae.differential_expression('test')
    infer_cortex_vae.imputation('test', rate=0.1)  # assert ~ 2.3
    n_samples = 1000 if not unit_test else 10
    infer_cortex_vae.show_t_sne('test', n_samples=n_samples)
    return infer_cortex_vae
Пример #5
0
def test_cortex():
    cortex_dataset = CortexDataset()
    vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches)
    infer_cortex_vae = VariationalInference(vae,
                                            cortex_dataset,
                                            train_size=0.1,
                                            use_cuda=use_cuda)
    infer_cortex_vae.train(n_epochs=1)
    infer_cortex_vae.ll('train')
    infer_cortex_vae.differential_expression_stats('train')
    infer_cortex_vae.differential_expression('test')
    infer_cortex_vae.imputation_errors('test', rate=0.5)

    svaec = SVAEC(cortex_dataset.nb_genes, cortex_dataset.n_batches,
                  cortex_dataset.n_labels)
    infer_cortex_svaec = JointSemiSupervisedVariationalInference(
        svaec,
        cortex_dataset,
        n_labelled_samples_per_class=50,
        use_cuda=use_cuda)
    infer_cortex_svaec.train(n_epochs=1)
    infer_cortex_svaec.accuracy('labelled')
    infer_cortex_svaec.ll('all')

    svaec = SVAEC(cortex_dataset.nb_genes,
                  cortex_dataset.n_batches,
                  cortex_dataset.n_labels,
                  logreg_classifier=True)
    infer_cortex_svaec = AlternateSemiSupervisedVariationalInference(
        svaec,
        cortex_dataset,
        n_labelled_samples_per_class=50,
        use_cuda=use_cuda)
    infer_cortex_svaec.train(n_epochs=1, lr=1e-2)
    infer_cortex_svaec.accuracy('unlabelled')
    infer_cortex_svaec.svc_rf(unit_test=True)

    cls = Classifier(cortex_dataset.nb_genes, n_labels=cortex_dataset.n_labels)
    infer_cls = ClassifierInference(cls, cortex_dataset)
    infer_cls.train(n_epochs=1)
    infer_cls.accuracy('train')
Пример #6
0
print('total number of cells =' + str([
    np.sum(sub_dataset1.labels == i)
    for i, k in enumerate(sub_dataset1.cell_types) if k == subpop
][0]))
gene_dataset = GeneExpressionDataset.concat_datasets(sub_dataset1, dataset2,
                                                     dataset3)
gene_dataset.subsample_genes(5000)

vae = VAE(gene_dataset.nb_genes,
          n_batch=gene_dataset.n_batches,
          n_labels=gene_dataset.n_labels,
          n_hidden=128,
          n_latent=10,
          n_layers=1,
          dispersion='gene')
infer_vae = VariationalInference(vae, gene_dataset, use_cuda=use_cuda)
infer_vae.fit(n_epochs=100)

np.save("../" + plotname + '.label.npy', gene_dataset.labels)
np.save("../" + plotname + '.batch.npy', gene_dataset.batch_indices)
mmwrite("../" + plotname + '.count.mtx', gene_dataset.X)

data_loader = DataLoader(gene_dataset,
                         batch_size=128,
                         pin_memory=use_cuda,
                         shuffle=False,
                         collate_fn=gene_dataset.collate_fn)
latent, batch_indices, labels = get_latent(infer_vae.model, data_loader)
keys = gene_dataset.cell_types
batch_indices = np.concatenate(batch_indices)