def benchmark(dataset, n_epochs=250, use_cuda=True): vae = VAE(dataset.nb_genes, n_batch=dataset.n_batches) infer = VariationalInference(vae, dataset, use_cuda=use_cuda) infer.fit(n_epochs=n_epochs) infer.ll('test') infer.imputation('test', rate=0.1) # assert ~ 2.1 return infer
def base_benchmark(gene_dataset): vae = VAE(gene_dataset.nb_genes, gene_dataset.n_batches, gene_dataset.n_labels) infer = VariationalInference(vae, gene_dataset, train_size=0.5, use_cuda=use_cuda) infer.fit(n_epochs=1) return infer
def cortex_benchmark(n_epochs=250, use_cuda=True, unit_test=False): cortex_dataset = CortexDataset() vae = VAE(cortex_dataset.nb_genes) infer_cortex_vae = VariationalInference(vae, cortex_dataset, use_cuda=use_cuda) infer_cortex_vae.fit(n_epochs=n_epochs) infer_cortex_vae.ll('test') # assert ~ 1200 infer_cortex_vae.differential_expression('test') infer_cortex_vae.imputation('test', rate=0.1) # assert ~ 2.3 n_samples = 1000 if not unit_test else 10 infer_cortex_vae.show_t_sne('test', n_samples=n_samples) return infer_cortex_vae
def test_cortex(): cortex_dataset = CortexDataset() vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches) infer_cortex_vae = VariationalInference(vae, cortex_dataset, train_size=0.1, use_cuda=use_cuda) infer_cortex_vae.fit(n_epochs=1) infer_cortex_vae.ll('train') infer_cortex_vae.differential_expression_stats('train') infer_cortex_vae.differential_expression('test') infer_cortex_vae.imputation_errors('test', rate=0.5) svaec = SVAEC(cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels) infer_cortex_svaec = JointSemiSupervisedVariationalInference( svaec, cortex_dataset, n_labelled_samples_per_class=50, use_cuda=use_cuda) infer_cortex_svaec.fit(n_epochs=1) infer_cortex_svaec.accuracy('labelled') infer_cortex_svaec.ll('all') svaec = SVAEC(cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels, logreg_classifier=True) infer_cortex_svaec = AlternateSemiSupervisedVariationalInference( svaec, cortex_dataset, n_labelled_samples_per_class=50, use_cuda=use_cuda) infer_cortex_svaec.fit(n_epochs=1, lr=1e-2) infer_cortex_svaec.accuracy('unlabelled') infer_cortex_svaec.svc_rf(unit_test=True) cls = Classifier(cortex_dataset.nb_genes, n_labels=cortex_dataset.n_labels) infer_cls = ClassifierInference(cls, cortex_dataset) infer_cls.fit(n_epochs=1) infer_cls.accuracy('train')
np.sum(sub_dataset1.labels == i) for i, k in enumerate(sub_dataset1.cell_types) if k == subpop ][0])) gene_dataset = GeneExpressionDataset.concat_datasets(sub_dataset1, dataset2, dataset3) gene_dataset.subsample_genes(5000) vae = VAE(gene_dataset.nb_genes, n_batch=gene_dataset.n_batches, n_labels=gene_dataset.n_labels, n_hidden=128, n_latent=10, n_layers=1, dispersion='gene') infer_vae = VariationalInference(vae, gene_dataset, use_cuda=use_cuda) infer_vae.fit(n_epochs=100) np.save("../" + plotname + '.label.npy', gene_dataset.labels) np.save("../" + plotname + '.batch.npy', gene_dataset.batch_indices) mmwrite("../" + plotname + '.count.mtx', gene_dataset.X) data_loader = DataLoader(gene_dataset, batch_size=128, pin_memory=use_cuda, shuffle=False, collate_fn=gene_dataset.collate_fn) latent, batch_indices, labels = get_latent(infer_vae.model, data_loader) keys = gene_dataset.cell_types batch_indices = np.concatenate(batch_indices) n_plotcells = 6000