示例#1
0
def benchmark(dataset, n_epochs=250, use_cuda=True):
    vae = VAE(dataset.nb_genes, n_batch=dataset.n_batches)
    infer = VariationalInference(vae, dataset, use_cuda=use_cuda)
    infer.fit(n_epochs=n_epochs)
    infer.ll('test')
    infer.imputation('test', rate=0.1)  # assert ~ 2.1
    return infer
示例#2
0
def base_benchmark(gene_dataset):
    vae = VAE(gene_dataset.nb_genes, gene_dataset.n_batches,
              gene_dataset.n_labels)
    infer = VariationalInference(vae,
                                 gene_dataset,
                                 train_size=0.5,
                                 use_cuda=use_cuda)
    infer.fit(n_epochs=1)
    return infer
示例#3
0
def cortex_benchmark(n_epochs=250, use_cuda=True, unit_test=False):
    cortex_dataset = CortexDataset()
    vae = VAE(cortex_dataset.nb_genes)
    infer_cortex_vae = VariationalInference(vae, cortex_dataset, use_cuda=use_cuda)
    infer_cortex_vae.fit(n_epochs=n_epochs)

    infer_cortex_vae.ll('test')  # assert ~ 1200
    infer_cortex_vae.differential_expression('test')
    infer_cortex_vae.imputation('test', rate=0.1)  # assert ~ 2.3
    n_samples = 1000 if not unit_test else 10
    infer_cortex_vae.show_t_sne('test', n_samples=n_samples)
    return infer_cortex_vae
示例#4
0
def test_cortex():
    cortex_dataset = CortexDataset()
    vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches)
    infer_cortex_vae = VariationalInference(vae,
                                            cortex_dataset,
                                            train_size=0.1,
                                            use_cuda=use_cuda)
    infer_cortex_vae.fit(n_epochs=1)
    infer_cortex_vae.ll('train')
    infer_cortex_vae.differential_expression_stats('train')
    infer_cortex_vae.differential_expression('test')
    infer_cortex_vae.imputation_errors('test', rate=0.5)

    svaec = SVAEC(cortex_dataset.nb_genes, cortex_dataset.n_batches,
                  cortex_dataset.n_labels)
    infer_cortex_svaec = JointSemiSupervisedVariationalInference(
        svaec,
        cortex_dataset,
        n_labelled_samples_per_class=50,
        use_cuda=use_cuda)
    infer_cortex_svaec.fit(n_epochs=1)
    infer_cortex_svaec.accuracy('labelled')
    infer_cortex_svaec.ll('all')

    svaec = SVAEC(cortex_dataset.nb_genes,
                  cortex_dataset.n_batches,
                  cortex_dataset.n_labels,
                  logreg_classifier=True)
    infer_cortex_svaec = AlternateSemiSupervisedVariationalInference(
        svaec,
        cortex_dataset,
        n_labelled_samples_per_class=50,
        use_cuda=use_cuda)
    infer_cortex_svaec.fit(n_epochs=1, lr=1e-2)
    infer_cortex_svaec.accuracy('unlabelled')
    infer_cortex_svaec.svc_rf(unit_test=True)

    cls = Classifier(cortex_dataset.nb_genes, n_labels=cortex_dataset.n_labels)
    infer_cls = ClassifierInference(cls, cortex_dataset)
    infer_cls.fit(n_epochs=1)
    infer_cls.accuracy('train')
示例#5
0
    np.sum(sub_dataset1.labels == i)
    for i, k in enumerate(sub_dataset1.cell_types) if k == subpop
][0]))
gene_dataset = GeneExpressionDataset.concat_datasets(sub_dataset1, dataset2,
                                                     dataset3)
gene_dataset.subsample_genes(5000)

vae = VAE(gene_dataset.nb_genes,
          n_batch=gene_dataset.n_batches,
          n_labels=gene_dataset.n_labels,
          n_hidden=128,
          n_latent=10,
          n_layers=1,
          dispersion='gene')
infer_vae = VariationalInference(vae, gene_dataset, use_cuda=use_cuda)
infer_vae.fit(n_epochs=100)

np.save("../" + plotname + '.label.npy', gene_dataset.labels)
np.save("../" + plotname + '.batch.npy', gene_dataset.batch_indices)
mmwrite("../" + plotname + '.count.mtx', gene_dataset.X)

data_loader = DataLoader(gene_dataset,
                         batch_size=128,
                         pin_memory=use_cuda,
                         shuffle=False,
                         collate_fn=gene_dataset.collate_fn)
latent, batch_indices, labels = get_latent(infer_vae.model, data_loader)
keys = gene_dataset.cell_types
batch_indices = np.concatenate(batch_indices)

n_plotcells = 6000