Exemplo n.º 1
0
def benchmark(dataset, n_epochs=250, use_cuda=True):
    vae = VAE(dataset.nb_genes, n_batch=dataset.n_batches)
    infer = VariationalInference(vae, dataset, use_cuda=use_cuda)
    infer.train(n_epochs=n_epochs)
    infer.ll('test')
    infer.imputation('test', rate=0.1)  # assert ~ 2.1
    return infer
Exemplo n.º 2
0
def base_benchmark(gene_dataset):
    vae = VAE(gene_dataset.nb_genes, gene_dataset.n_batches,
              gene_dataset.n_labels)
    infer = VariationalInference(vae,
                                 gene_dataset,
                                 train_size=0.5,
                                 use_cuda=use_cuda)
    infer.train(n_epochs=1)
    return infer
Exemplo n.º 3
0
def cortex_benchmark(n_epochs=250, use_cuda=True, unit_test=False):
    cortex_dataset = CortexDataset()
    vae = VAE(cortex_dataset.nb_genes)
    infer_cortex_vae = VariationalInference(vae, cortex_dataset, use_cuda=use_cuda)
    infer_cortex_vae.train(n_epochs=n_epochs)

    infer_cortex_vae.ll('test')  # assert ~ 1200
    infer_cortex_vae.differential_expression('test')
    infer_cortex_vae.imputation('test', rate=0.1)  # assert ~ 2.3
    n_samples = 1000 if not unit_test else 10
    infer_cortex_vae.show_t_sne('test', n_samples=n_samples)
    return infer_cortex_vae
Exemplo n.º 4
0
def test_cortex():
    cortex_dataset = CortexDataset()
    vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches)
    infer_cortex_vae = VariationalInference(vae,
                                            cortex_dataset,
                                            train_size=0.1,
                                            use_cuda=use_cuda)
    infer_cortex_vae.train(n_epochs=1)
    infer_cortex_vae.ll('train')
    infer_cortex_vae.differential_expression_stats('train')
    infer_cortex_vae.differential_expression('test')
    infer_cortex_vae.imputation('train', corruption='uniform')
    infer_cortex_vae.imputation('test', n_samples=2, corruption='binomial')

    svaec = SVAEC(cortex_dataset.nb_genes, cortex_dataset.n_batches,
                  cortex_dataset.n_labels)
    infer_cortex_svaec = JointSemiSupervisedVariationalInference(
        svaec,
        cortex_dataset,
        n_labelled_samples_per_class=50,
        use_cuda=use_cuda)
    infer_cortex_svaec.train(n_epochs=1)
    infer_cortex_svaec.accuracy('labelled')
    infer_cortex_svaec.ll('all')

    svaec = SVAEC(cortex_dataset.nb_genes,
                  cortex_dataset.n_batches,
                  cortex_dataset.n_labels,
                  logreg_classifier=True)
    infer_cortex_svaec = AlternateSemiSupervisedVariationalInference(
        svaec,
        cortex_dataset,
        n_labelled_samples_per_class=50,
        use_cuda=use_cuda)
    infer_cortex_svaec.train(n_epochs=1, lr=1e-2)
    infer_cortex_svaec.accuracy('unlabelled')
    infer_cortex_svaec.svc_rf(unit_test=True)

    cls = Classifier(cortex_dataset.nb_genes, n_labels=cortex_dataset.n_labels)
    infer_cls = ClassifierInference(cls, cortex_dataset)
    infer_cls.train(n_epochs=1)
    infer_cls.accuracy('train')
Exemplo n.º 5
0
         cells.append(sample)
     correlation = (np.corrcoef(count[0], count[1])[0, 1])
 print("dataset 1 has %d cells" % (np.sum(count[0])))
 print("dataset 2 has %d cells" % (np.sum(count[1])))
 print(
     "correlation between the cell-type composition of the subsampled dataset is %.3f"
     % correlation)
 sub_dataset = deepcopy(gene_dataset)
 sub_dataset.update_cells(np.concatenate(cells))
 vae = VAE(sub_dataset.nb_genes,
           n_batch=sub_dataset.n_batches,
           n_labels=sub_dataset.n_labels,
           n_hidden=128,
           dispersion='gene')
 infer = VariationalInference(vae, sub_dataset, use_cuda=use_cuda)
 infer.train(n_epochs=250)
 latent, batch_indices, labels = infer.get_latent('sequential')
 keys = sub_dataset.cell_types
 batch_entropy = entropy_batch_mixing(latent, batch_indices)
 print("Entropy batch mixing :", batch_entropy)
 sample = select_indices_evenly(1000, labels)
 res = knn_purity_avg(latent[sample, :],
                      labels[sample].astype('int'),
                      keys=keys,
                      acc=True)
 print('average classification accuracy per cluster')
 for x in res:
     print(x)
 knn_acc = np.mean([x[1] for x in res])
 print("average KNN accuracy:", knn_acc)
 res = clustering_scores(