def test_filter_and_concat_datasets(): cortex_dataset_1 = CortexDataset(save_path='tests/data/') cortex_dataset_1.subsample_genes(subset_genes=np.arange(0, 3)) cortex_dataset_1.filter_cell_types(["microglia", "oligodendrocytes"]) cortex_dataset_2 = CortexDataset(save_path='tests/data/') cortex_dataset_2.subsample_genes(subset_genes=np.arange(1, 4)) cortex_dataset_2.filter_cell_types(["endothelial-mural", "interneurons", "microglia", "oligodendrocytes"]) cortex_dataset_2.filter_cell_types([2, 0]) cortex_dataset_merged = GeneExpressionDataset.concat_datasets(cortex_dataset_1, cortex_dataset_2) assert cortex_dataset_merged.nb_genes == 2 synthetic_dataset_1 = SyntheticDataset(n_batches=2, n_labels=5) synthetic_dataset_2 = SyntheticDataset(n_batches=3, n_labels=3) synthetic_merged_1 = GeneExpressionDataset.concat_datasets(synthetic_dataset_1, synthetic_dataset_2) assert synthetic_merged_1.n_batches == 5 assert synthetic_merged_1.n_labels == 5 synthetic_merged_2 = GeneExpressionDataset.concat_datasets(synthetic_dataset_1, synthetic_dataset_2, shared_labels=False) assert synthetic_merged_2.n_batches == 5 assert synthetic_merged_2.n_labels == 8 synthetic_dataset_1.filter_cell_types([0, 1, 2, 3]) assert synthetic_dataset_1.n_labels == 4 synthetic_dataset_1.subsample_cells(50) assert len(synthetic_dataset_1) == 50 synthetic_dataset_3 = SyntheticDataset(n_labels=6) synthetic_dataset_3.cell_types = np.arange(6).astype(np.str) synthetic_dataset_3.map_cell_types({"2": "9", ("4", "3"): "8"})
def test_synthetic_1(): synthetic_dataset = SyntheticDataset() synthetic_dataset.cell_types = np.array(['A', 'B', 'C']) svaec = SCANVI(synthetic_dataset.nb_genes, synthetic_dataset.n_batches, synthetic_dataset.n_labels) trainer_synthetic_svaec = JointSemiSupervisedTrainer(svaec, synthetic_dataset, use_cuda=use_cuda) trainer_synthetic_svaec.train(n_epochs=1) trainer_synthetic_svaec.labelled_set.entropy_batch_mixing() trainer_synthetic_svaec.full_dataset.knn_purity(verbose=True) trainer_synthetic_svaec.labelled_set.show_t_sne(n_samples=5) trainer_synthetic_svaec.unlabelled_set.show_t_sne(n_samples=5, color_by='labels') trainer_synthetic_svaec.labelled_set.show_t_sne( n_samples=5, color_by='batches and labels') trainer_synthetic_svaec.labelled_set.clustering_scores() trainer_synthetic_svaec.labelled_set.clustering_scores( prediction_algorithm='gmm') trainer_synthetic_svaec.unlabelled_set.unsupervised_classification_accuracy( ) trainer_synthetic_svaec.unlabelled_set.differential_expression_score( 'B', 'C', genes=['2', '4'], M_sampling=2, M_permutation=10) trainer_synthetic_svaec.unlabelled_set.differential_expression_table( M_sampling=2, M_permutation=10)
def test_synthetic_1(): synthetic_dataset = SyntheticDataset() synthetic_dataset.cell_types = np.array(["A", "B", "C"]) svaec = SCANVI( synthetic_dataset.nb_genes, synthetic_dataset.n_batches, synthetic_dataset.n_labels, ) trainer_synthetic_svaec = JointSemiSupervisedTrainer( svaec, synthetic_dataset, use_cuda=use_cuda ) trainer_synthetic_svaec.train(n_epochs=1) trainer_synthetic_svaec.labelled_set.entropy_batch_mixing() trainer_synthetic_svaec.full_dataset.knn_purity() trainer_synthetic_svaec.labelled_set.show_t_sne(n_samples=5) trainer_synthetic_svaec.unlabelled_set.show_t_sne(n_samples=5, color_by="labels") trainer_synthetic_svaec.labelled_set.show_t_sne( n_samples=5, color_by="batches and labels" ) trainer_synthetic_svaec.labelled_set.clustering_scores() trainer_synthetic_svaec.labelled_set.clustering_scores(prediction_algorithm="gmm") trainer_synthetic_svaec.unlabelled_set.unsupervised_classification_accuracy() trainer_synthetic_svaec.unlabelled_set.differential_expression_score( synthetic_dataset.labels.ravel() == 1, synthetic_dataset.labels.ravel() == 2, n_samples=2, M_permutation=10, ) trainer_synthetic_svaec.unlabelled_set.one_vs_all_degenes( n_samples=2, M_permutation=10 )
def test_synthetic_1(): synthetic_dataset = SyntheticDataset() synthetic_dataset.cell_types = np.array(["A", "B", "C"]) svaec = SCANVI( synthetic_dataset.nb_genes, synthetic_dataset.n_batches, synthetic_dataset.n_labels, ) trainer_synthetic_svaec = JointSemiSupervisedTrainer(svaec, synthetic_dataset, use_cuda=use_cuda) trainer_synthetic_svaec.train(n_epochs=1) trainer_synthetic_svaec.labelled_set.entropy_batch_mixing() with tempfile.TemporaryDirectory() as temp_dir: posterior_save_path = os.path.join(temp_dir, "posterior_data") original_post = trainer_synthetic_svaec.labelled_set.sequential() original_post.save_posterior(posterior_save_path) new_svaec = SCANVI( synthetic_dataset.nb_genes, synthetic_dataset.n_batches, synthetic_dataset.n_labels, ) new_post = load_posterior(posterior_save_path, model=new_svaec, use_cuda=False) assert np.array_equal(new_post.indices, original_post.indices) assert np.array_equal(new_post.gene_dataset.X, original_post.gene_dataset.X) assert np.array_equal(new_post.gene_dataset.labels, original_post.gene_dataset.labels) trainer_synthetic_svaec.full_dataset.knn_purity() trainer_synthetic_svaec.labelled_set.show_t_sne(n_samples=5) trainer_synthetic_svaec.unlabelled_set.show_t_sne(n_samples=5, color_by="labels") trainer_synthetic_svaec.labelled_set.show_t_sne( n_samples=5, color_by="batches and labels") trainer_synthetic_svaec.labelled_set.clustering_scores() trainer_synthetic_svaec.labelled_set.clustering_scores( prediction_algorithm="gmm") trainer_synthetic_svaec.unlabelled_set.unsupervised_classification_accuracy( ) trainer_synthetic_svaec.unlabelled_set.differential_expression_score( synthetic_dataset.labels.ravel() == 1, synthetic_dataset.labels.ravel() == 2, n_samples=2, M_permutation=10, ) trainer_synthetic_svaec.unlabelled_set.one_vs_all_degenes(n_samples=2, M_permutation=10)