def benchmark(dataset, n_epochs=250, use_cuda=True): vae = VAE(dataset.nb_genes, n_batch=dataset.n_batches) infer = VariationalInference(vae, dataset, use_cuda=use_cuda) infer.train(n_epochs=n_epochs) infer.ll('test') infer.imputation('test', rate=0.1) # assert ~ 2.1 return infer
def base_benchmark(gene_dataset): vae = VAE(gene_dataset.nb_genes, gene_dataset.n_batches, gene_dataset.n_labels) infer = VariationalInference(vae, gene_dataset, train_size=0.5, use_cuda=use_cuda) infer.train(n_epochs=1) return infer
def cortex_benchmark(n_epochs=250, use_cuda=True, unit_test=False): cortex_dataset = CortexDataset() vae = VAE(cortex_dataset.nb_genes) infer_cortex_vae = VariationalInference(vae, cortex_dataset, use_cuda=use_cuda) infer_cortex_vae.train(n_epochs=n_epochs) infer_cortex_vae.ll('test') # assert ~ 1200 infer_cortex_vae.differential_expression('test') infer_cortex_vae.imputation('test', rate=0.1) # assert ~ 2.3 n_samples = 1000 if not unit_test else 10 infer_cortex_vae.show_t_sne('test', n_samples=n_samples) return infer_cortex_vae
def test_cortex(): cortex_dataset = CortexDataset() vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches) infer_cortex_vae = VariationalInference(vae, cortex_dataset, train_size=0.1, use_cuda=use_cuda) infer_cortex_vae.train(n_epochs=1) infer_cortex_vae.ll('train') infer_cortex_vae.differential_expression_stats('train') infer_cortex_vae.differential_expression('test') infer_cortex_vae.imputation('train', corruption='uniform') infer_cortex_vae.imputation('test', n_samples=2, corruption='binomial') svaec = SVAEC(cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels) infer_cortex_svaec = JointSemiSupervisedVariationalInference( svaec, cortex_dataset, n_labelled_samples_per_class=50, use_cuda=use_cuda) infer_cortex_svaec.train(n_epochs=1) infer_cortex_svaec.accuracy('labelled') infer_cortex_svaec.ll('all') svaec = SVAEC(cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels, logreg_classifier=True) infer_cortex_svaec = AlternateSemiSupervisedVariationalInference( svaec, cortex_dataset, n_labelled_samples_per_class=50, use_cuda=use_cuda) infer_cortex_svaec.train(n_epochs=1, lr=1e-2) infer_cortex_svaec.accuracy('unlabelled') infer_cortex_svaec.svc_rf(unit_test=True) cls = Classifier(cortex_dataset.nb_genes, n_labels=cortex_dataset.n_labels) infer_cls = ClassifierInference(cls, cortex_dataset) infer_cls.train(n_epochs=1) infer_cls.accuracy('train')
cells.append(sample) correlation = (np.corrcoef(count[0], count[1])[0, 1]) print("dataset 1 has %d cells" % (np.sum(count[0]))) print("dataset 2 has %d cells" % (np.sum(count[1]))) print( "correlation between the cell-type composition of the subsampled dataset is %.3f" % correlation) sub_dataset = deepcopy(gene_dataset) sub_dataset.update_cells(np.concatenate(cells)) vae = VAE(sub_dataset.nb_genes, n_batch=sub_dataset.n_batches, n_labels=sub_dataset.n_labels, n_hidden=128, dispersion='gene') infer = VariationalInference(vae, sub_dataset, use_cuda=use_cuda) infer.train(n_epochs=250) latent, batch_indices, labels = infer.get_latent('sequential') keys = sub_dataset.cell_types batch_entropy = entropy_batch_mixing(latent, batch_indices) print("Entropy batch mixing :", batch_entropy) sample = select_indices_evenly(1000, labels) res = knn_purity_avg(latent[sample, :], labels[sample].astype('int'), keys=keys, acc=True) print('average classification accuracy per cluster') for x in res: print(x) knn_acc = np.mean([x[1] for x in res]) print("average KNN accuracy:", knn_acc) res = clustering_scores(