def totalvi_benchmark(dataset, n_epochs, use_cuda=True): totalvae = TOTALVI(dataset.nb_genes, len(dataset.protein_names), n_batch=dataset.n_batches) trainer = TotalTrainer(totalvae, dataset, train_size=0.5, use_cuda=use_cuda, early_stopping_kwargs=None) trainer.train(n_epochs=n_epochs) trainer.test_set.reconstruction_error() trainer.test_set.marginal_ll() trainer.test_set.get_protein_background_mean() trainer.test_set.get_latent() trainer.test_set.generate() trainer.test_set.get_sample_dropout() trainer.test_set.get_normalized_denoised_expression(transform_batch=0) trainer.test_set.get_normalized_denoised_expression(transform_batch=0) trainer.test_set.imputation() trainer.test_set.get_protein_mean() trainer.test_set.one_vs_all_degenes(n_samples=2, M_permutation=10) trainer.test_set.generate_feature_correlation_matrix(n_samples=2) trainer.test_set.generate_feature_correlation_matrix(n_samples=2, transform_batch=0) return trainer
def totalvi_benchmark(dataset, n_epochs, use_cuda=True): totalvae = TOTALVI( dataset.nb_genes, len(dataset.protein_names), n_batch=dataset.n_batches ) trainer = TotalTrainer(totalvae, dataset, train_size=0.5, use_cuda=use_cuda) trainer.train(n_epochs=n_epochs) trainer.test_set.reconstruction_error() trainer.test_set.marginal_ll() trainer.test_set.get_protein_background_mean() trainer.test_set.get_latent() trainer.test_set.generate() trainer.test_set.get_sample_dropout() trainer.test_set.get_normalized_denoised_expression() trainer.test_set.imputation() return trainer
def test_totalvi(save_path): synthetic_dataset_one_batch = SyntheticDataset(n_batches=1) totalvi_benchmark(synthetic_dataset_one_batch, n_epochs=1, use_cuda=use_cuda) synthetic_dataset_two_batches = SyntheticDataset(n_batches=2) totalvi_benchmark(synthetic_dataset_two_batches, n_epochs=1, use_cuda=use_cuda) # adversarial testing dataset = synthetic_dataset_two_batches totalvae = TOTALVI(dataset.nb_genes, len(dataset.protein_names), n_batch=dataset.n_batches) trainer = TotalTrainer( totalvae, dataset, train_size=0.5, use_cuda=use_cuda, early_stopping_kwargs=None, use_adversarial_loss=True, ) trainer.train(n_epochs=1) with tempfile.TemporaryDirectory() as temp_dir: posterior_save_path = os.path.join(temp_dir, "posterior_data") original_post = trainer.create_posterior( totalvae, dataset, indices=np.arange(len(dataset)), type_class=TotalPosterior, ) original_post.save_posterior(posterior_save_path) new_totalvae = TOTALVI(dataset.nb_genes, len(dataset.protein_names), n_batch=dataset.n_batches) new_post = load_posterior(posterior_save_path, model=new_totalvae, use_cuda=False) assert new_post.posterior_type == "TotalPosterior" assert np.array_equal(new_post.gene_dataset.protein_expression, dataset.protein_expression)
def test_totalvi(save_path): synthetic_dataset_one_batch = SyntheticDataset(n_batches=1) totalvi_benchmark(synthetic_dataset_one_batch, n_epochs=1, use_cuda=use_cuda) synthetic_dataset_two_batches = SyntheticDataset(n_batches=2) totalvi_benchmark(synthetic_dataset_two_batches, n_epochs=1, use_cuda=use_cuda) # adversarial testing dataset = synthetic_dataset_two_batches totalvae = TOTALVI( dataset.nb_genes, len(dataset.protein_names), n_batch=dataset.n_batches ) trainer = TotalTrainer( totalvae, dataset, train_size=0.5, use_cuda=use_cuda, early_stopping_kwargs=None, use_adversarial_loss=True, ) trainer.train(n_epochs=1)
early_stopping_kwargs = { "early_stopping_metric": "elbo", "save_best_state_metric": "elbo", "patience": 45, "threshold": 0, "reduce_lr_on_plateau": True, "lr_patience": 30, "lr_factor": 0.6, "posterior_class": TotalPosterior, } trainer = TotalTrainer( model, dataset, train_size=0.9, test_size=0.1, use_cuda=use_cuda, frequency=1, data_loader_kwargs={"batch_size": 256, "pin_memory": False}, early_stopping_kwargs=early_stopping_kwargs, ) trainer.train(lr=lr, n_epochs=500) # create posterior on full data full_posterior = trainer.create_posterior( model, dataset, indices=np.arange(len(dataset)), type_class=TotalPosterior, ) torch.save( trainer.model.state_dict(), "differential_expression/saved_models/" + n + ".pt" )