def test_synthetic_1(): synthetic_dataset = SyntheticDataset() synthetic_dataset.cell_types = np.array(["A", "B", "C"]) svaec = SCANVI( synthetic_dataset.nb_genes, synthetic_dataset.n_batches, synthetic_dataset.n_labels, ) trainer_synthetic_svaec = JointSemiSupervisedTrainer( svaec, synthetic_dataset, use_cuda=use_cuda ) trainer_synthetic_svaec.train(n_epochs=1) trainer_synthetic_svaec.labelled_set.entropy_batch_mixing() trainer_synthetic_svaec.full_dataset.knn_purity() trainer_synthetic_svaec.labelled_set.show_t_sne(n_samples=5) trainer_synthetic_svaec.unlabelled_set.show_t_sne(n_samples=5, color_by="labels") trainer_synthetic_svaec.labelled_set.show_t_sne( n_samples=5, color_by="batches and labels" ) trainer_synthetic_svaec.labelled_set.clustering_scores() trainer_synthetic_svaec.labelled_set.clustering_scores(prediction_algorithm="gmm") trainer_synthetic_svaec.unlabelled_set.unsupervised_classification_accuracy() trainer_synthetic_svaec.unlabelled_set.differential_expression_score( synthetic_dataset.labels.ravel() == 1, synthetic_dataset.labels.ravel() == 2, n_samples=2, M_permutation=10, ) trainer_synthetic_svaec.unlabelled_set.one_vs_all_degenes( n_samples=2, M_permutation=10 )
def test_synthetic_1(): synthetic_dataset = SyntheticDataset() synthetic_dataset.cell_types = np.array(['A', 'B', 'C']) svaec = SCANVI(synthetic_dataset.nb_genes, synthetic_dataset.n_batches, synthetic_dataset.n_labels) trainer_synthetic_svaec = JointSemiSupervisedTrainer(svaec, synthetic_dataset, use_cuda=use_cuda) trainer_synthetic_svaec.train(n_epochs=1) trainer_synthetic_svaec.labelled_set.entropy_batch_mixing() trainer_synthetic_svaec.full_dataset.knn_purity(verbose=True) trainer_synthetic_svaec.labelled_set.show_t_sne(n_samples=5) trainer_synthetic_svaec.unlabelled_set.show_t_sne(n_samples=5, color_by='labels') trainer_synthetic_svaec.labelled_set.show_t_sne( n_samples=5, color_by='batches and labels') trainer_synthetic_svaec.labelled_set.clustering_scores() trainer_synthetic_svaec.labelled_set.clustering_scores( prediction_algorithm='gmm') trainer_synthetic_svaec.unlabelled_set.unsupervised_classification_accuracy( ) trainer_synthetic_svaec.unlabelled_set.differential_expression_score( 'B', 'C', genes=['2', '4'], M_sampling=2, M_permutation=10) trainer_synthetic_svaec.unlabelled_set.differential_expression_table( M_sampling=2, M_permutation=10)
def test_synthetic_2(): synthetic_dataset = SyntheticDataset() vaec = VAEC(synthetic_dataset.nb_genes, synthetic_dataset.n_batches, synthetic_dataset.n_labels) trainer_synthetic_vaec = JointSemiSupervisedTrainer(vaec, synthetic_dataset, use_cuda=use_cuda, frequency=1, early_stopping_kwargs={'early_stopping_metric': 'll', 'on': 'labelled_set', 'save_best_state_metric': 'll'}) trainer_synthetic_vaec.train(n_epochs=2)
def test_nb_not_zinb(): synthetic_dataset = SyntheticDataset() svaec = SCANVI(synthetic_dataset.nb_genes, synthetic_dataset.n_batches, synthetic_dataset.n_labels, labels_groups=[0, 0, 1], reconstruction_loss="nb") trainer_synthetic_svaec = JointSemiSupervisedTrainer(svaec, synthetic_dataset, use_cuda=use_cuda) trainer_synthetic_svaec.train(n_epochs=1)
def test_synthetic_1(): synthetic_dataset = SyntheticDataset() synthetic_dataset.cell_types = np.array(["A", "B", "C"]) svaec = SCANVI( synthetic_dataset.nb_genes, synthetic_dataset.n_batches, synthetic_dataset.n_labels, ) trainer_synthetic_svaec = JointSemiSupervisedTrainer(svaec, synthetic_dataset, use_cuda=use_cuda) trainer_synthetic_svaec.train(n_epochs=1) trainer_synthetic_svaec.labelled_set.entropy_batch_mixing() with tempfile.TemporaryDirectory() as temp_dir: posterior_save_path = os.path.join(temp_dir, "posterior_data") original_post = trainer_synthetic_svaec.labelled_set.sequential() original_post.save_posterior(posterior_save_path) new_svaec = SCANVI( synthetic_dataset.nb_genes, synthetic_dataset.n_batches, synthetic_dataset.n_labels, ) new_post = load_posterior(posterior_save_path, model=new_svaec, use_cuda=False) assert np.array_equal(new_post.indices, original_post.indices) assert np.array_equal(new_post.gene_dataset.X, original_post.gene_dataset.X) assert np.array_equal(new_post.gene_dataset.labels, original_post.gene_dataset.labels) trainer_synthetic_svaec.full_dataset.knn_purity() trainer_synthetic_svaec.labelled_set.show_t_sne(n_samples=5) trainer_synthetic_svaec.unlabelled_set.show_t_sne(n_samples=5, color_by="labels") trainer_synthetic_svaec.labelled_set.show_t_sne( n_samples=5, color_by="batches and labels") trainer_synthetic_svaec.labelled_set.clustering_scores() trainer_synthetic_svaec.labelled_set.clustering_scores( prediction_algorithm="gmm") trainer_synthetic_svaec.unlabelled_set.unsupervised_classification_accuracy( ) trainer_synthetic_svaec.unlabelled_set.differential_expression_score( synthetic_dataset.labels.ravel() == 1, synthetic_dataset.labels.ravel() == 2, n_samples=2, M_permutation=10, ) trainer_synthetic_svaec.unlabelled_set.one_vs_all_degenes(n_samples=2, M_permutation=10)
def test_hierarchy(): synthetic_dataset = SyntheticDataset() svaec = SCANVI( synthetic_dataset.nb_genes, synthetic_dataset.n_batches, synthetic_dataset.n_labels, ontology=[ np.array([[1, 1, 0], [0, 0, 1]]), np.array([[1, 0, 1, 0], [0, 0, 1, 0], [0, 0, 1, 1]]) ], use_ontology=True, reconstruction_loss="zinb", n_layers=3, ) trainer_synthetic_svaec = JointSemiSupervisedTrainer(svaec, synthetic_dataset, use_cuda=use_cuda) trainer_synthetic_svaec.train(n_epochs=1)
def test_synthetic_2(): synthetic_dataset = SyntheticDataset() vaec = VAEC( synthetic_dataset.nb_genes, synthetic_dataset.n_batches, synthetic_dataset.n_labels, ) trainer_synthetic_vaec = JointSemiSupervisedTrainer( vaec, synthetic_dataset, use_cuda=use_cuda, frequency=1, early_stopping_kwargs={ "early_stopping_metric": "reconstruction_error", "on": "labelled_set", "save_best_state_metric": "reconstruction_error", }, ) trainer_synthetic_vaec.train(n_epochs=2)
def test_cortex(save_path): cortex_dataset = CortexDataset(save_path=save_path) vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches) trainer_cortex_vae = UnsupervisedTrainer(vae, cortex_dataset, train_size=0.5, use_cuda=use_cuda) trainer_cortex_vae.train(n_epochs=1) trainer_cortex_vae.train_set.ll() trainer_cortex_vae.train_set.differential_expression_stats() trainer_cortex_vae.corrupt_posteriors(corruption='binomial') trainer_cortex_vae.corrupt_posteriors() trainer_cortex_vae.train(n_epochs=1) trainer_cortex_vae.uncorrupt_posteriors() trainer_cortex_vae.train_set.imputation_benchmark(n_samples=1, show_plot=False, title_plot='imputation', save_path=save_path) svaec = SCANVI(cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels) trainer_cortex_svaec = JointSemiSupervisedTrainer(svaec, cortex_dataset, n_labelled_samples_per_class=3, use_cuda=use_cuda) trainer_cortex_svaec.train(n_epochs=1) trainer_cortex_svaec.labelled_set.accuracy() trainer_cortex_svaec.full_dataset.ll() svaec = SCANVI(cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels) trainer_cortex_svaec = AlternateSemiSupervisedTrainer(svaec, cortex_dataset, n_labelled_samples_per_class=3, use_cuda=use_cuda) trainer_cortex_svaec.train(n_epochs=1, lr=1e-2) trainer_cortex_svaec.unlabelled_set.accuracy() data_train, labels_train = trainer_cortex_svaec.labelled_set.raw_data() data_test, labels_test = trainer_cortex_svaec.unlabelled_set.raw_data() compute_accuracy_svc(data_train, labels_train, data_test, labels_test, param_grid=[{'C': [1], 'kernel': ['linear']}]) compute_accuracy_rf(data_train, labels_train, data_test, labels_test, param_grid=[{'max_depth': [3], 'n_estimators': [10]}]) cls = Classifier(cortex_dataset.nb_genes, n_labels=cortex_dataset.n_labels) cls_trainer = ClassifierTrainer(cls, cortex_dataset) cls_trainer.train(n_epochs=1) cls_trainer.train_set.accuracy()
def test_cortex(save_path): cortex_dataset = CortexDataset(save_path=save_path) vae = VAE(cortex_dataset.nb_genes, cortex_dataset.n_batches) trainer_cortex_vae = UnsupervisedTrainer( vae, cortex_dataset, train_size=0.5, use_cuda=use_cuda ) trainer_cortex_vae.train(n_epochs=1) trainer_cortex_vae.train_set.reconstruction_error() trainer_cortex_vae.train_set.differential_expression_stats() trainer_cortex_vae.train_set.generate_feature_correlation_matrix( n_samples=2, correlation_type="pearson" ) trainer_cortex_vae.train_set.generate_feature_correlation_matrix( n_samples=2, correlation_type="spearman" ) trainer_cortex_vae.train_set.imputation(n_samples=1) trainer_cortex_vae.test_set.imputation(n_samples=5) trainer_cortex_vae.corrupt_posteriors(corruption="binomial") trainer_cortex_vae.corrupt_posteriors() trainer_cortex_vae.train(n_epochs=1) trainer_cortex_vae.uncorrupt_posteriors() trainer_cortex_vae.train_set.imputation_benchmark( n_samples=1, show_plot=False, title_plot="imputation", save_path=save_path ) trainer_cortex_vae.train_set.generate_parameters() n_cells, n_genes = ( len(trainer_cortex_vae.train_set.indices), cortex_dataset.nb_genes, ) n_samples = 3 (dropout, means, dispersions,) = trainer_cortex_vae.train_set.generate_parameters() assert dropout.shape == (n_cells, n_genes) and means.shape == (n_cells, n_genes) assert dispersions.shape == (n_cells, n_genes) (dropout, means, dispersions,) = trainer_cortex_vae.train_set.generate_parameters( n_samples=n_samples ) assert dropout.shape == (n_samples, n_cells, n_genes) assert means.shape == (n_samples, n_cells, n_genes,) (dropout, means, dispersions,) = trainer_cortex_vae.train_set.generate_parameters( n_samples=n_samples, give_mean=True ) assert dropout.shape == (n_cells, n_genes) and means.shape == (n_cells, n_genes) full = trainer_cortex_vae.create_posterior( vae, cortex_dataset, indices=np.arange(len(cortex_dataset)) ) x_new, x_old = full.generate(n_samples=10) assert x_new.shape == (cortex_dataset.nb_cells, cortex_dataset.nb_genes, 10) assert x_old.shape == (cortex_dataset.nb_cells, cortex_dataset.nb_genes) trainer_cortex_vae.train_set.imputation_benchmark( n_samples=1, show_plot=False, title_plot="imputation", save_path=save_path ) svaec = SCANVI( cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels ) trainer_cortex_svaec = JointSemiSupervisedTrainer( svaec, cortex_dataset, n_labelled_samples_per_class=3, use_cuda=use_cuda ) trainer_cortex_svaec.train(n_epochs=1) trainer_cortex_svaec.labelled_set.accuracy() trainer_cortex_svaec.full_dataset.reconstruction_error() svaec = SCANVI( cortex_dataset.nb_genes, cortex_dataset.n_batches, cortex_dataset.n_labels ) trainer_cortex_svaec = AlternateSemiSupervisedTrainer( svaec, cortex_dataset, n_labelled_samples_per_class=3, use_cuda=use_cuda ) trainer_cortex_svaec.train(n_epochs=1, lr=1e-2) trainer_cortex_svaec.unlabelled_set.accuracy() data_train, labels_train = trainer_cortex_svaec.labelled_set.raw_data() data_test, labels_test = trainer_cortex_svaec.unlabelled_set.raw_data() compute_accuracy_svc( data_train, labels_train, data_test, labels_test, param_grid=[{"C": [1], "kernel": ["linear"]}], ) compute_accuracy_rf( data_train, labels_train, data_test, labels_test, param_grid=[{"max_depth": [3], "n_estimators": [10]}], ) cls = Classifier(cortex_dataset.nb_genes, n_labels=cortex_dataset.n_labels) cls_trainer = ClassifierTrainer(cls, cortex_dataset) cls_trainer.train(n_epochs=1) cls_trainer.train_set.accuracy()