def test_multiple_covariates_scvi(save_path): adata = synthetic_iid() adata.obs["cont1"] = np.random.normal(size=(adata.shape[0], )) adata.obs["cont2"] = np.random.normal(size=(adata.shape[0], )) adata.obs["cat1"] = np.random.randint(0, 5, size=(adata.shape[0], )) adata.obs["cat2"] = np.random.randint(0, 5, size=(adata.shape[0], )) SCVI.setup_anndata( adata, batch_key="batch", labels_key="labels", continuous_covariate_keys=["cont1", "cont2"], categorical_covariate_keys=["cat1", "cat2"], ) m = SCVI(adata) m.train(1) m = SCANVI(adata, unlabeled_category="Unknown") m.train(1) TOTALVI.setup_anndata( adata, batch_key="batch", protein_expression_obsm_key="protein_expression", protein_names_uns_key="protein_names", continuous_covariate_keys=["cont1", "cont2"], categorical_covariate_keys=["cat1", "cat2"], ) m = TOTALVI(adata) m.train(1)
def test_totalvi_model_library_size(save_path): adata = synthetic_iid() TOTALVI.setup_anndata( adata, batch_key="batch", protein_expression_obsm_key="protein_expression", protein_names_uns_key="protein_names", ) n_latent = 10 model = TOTALVI(adata, n_latent=n_latent, use_observed_lib_size=False) assert hasattr(model.module, "library_log_means") and hasattr( model.module, "library_log_vars") model.train(1, train_size=0.5) assert model.is_trained is True model.get_elbo() model.get_marginal_ll(n_mc_samples=3) model.get_latent_library_size()
def test_totalvi_online_update(save_path): # basic case n_latent = 5 adata1 = synthetic_iid() TOTALVI.setup_anndata( adata1, batch_key="batch", protein_expression_obsm_key="protein_expression", protein_names_uns_key="protein_names", ) model = TOTALVI(adata1, n_latent=n_latent, use_batch_norm="decoder") model.train(1, check_val_every_n_epoch=1) dir_path = os.path.join(save_path, "saved_model/") model.save(dir_path, overwrite=True) adata2 = synthetic_iid() adata2.obs["batch"] = adata2.obs.batch.cat.rename_categories( ["batch_2", "batch_3"]) model2 = TOTALVI.load_query_data(adata2, dir_path) assert model2.module.background_pro_alpha.requires_grad is True model2.train(max_epochs=1) model2.get_latent_representation() # batch 3 has no proteins adata2 = synthetic_iid() adata2.obs["batch"] = adata2.obs.batch.cat.rename_categories( ["batch_2", "batch_3"]) adata2.obsm["protein_expression"][adata2.obs.batch == "batch_3"] = 0 # load from model in memory model3 = TOTALVI.load_query_data(adata2, model) model3.module.protein_batch_mask[2] model3.module.protein_batch_mask[3] model3.train(max_epochs=1) model3.get_latent_representation()
def test_totalvi_size_factor(): adata = synthetic_iid() adata.obs["size_factor"] = np.random.randint(1, 5, size=(adata.shape[0], )) TOTALVI.setup_anndata( adata, batch_key="batch", protein_expression_obsm_key="protein_expression", protein_names_uns_key="protein_names", size_factor_key="size_factor", ) n_latent = 10 # Test size_factor_key overrides use_observed_lib_size. model = TOTALVI(adata, n_latent=n_latent, use_observed_lib_size=False) assert not hasattr(model.module, "library_log_means") and not hasattr( model.module, "library_log_vars") assert model.module.use_size_factor_key model.train(1, train_size=0.5) model = TOTALVI(adata, n_latent=n_latent, use_observed_lib_size=True) assert not hasattr(model.module, "library_log_means") and not hasattr( model.module, "library_log_vars") assert model.module.use_size_factor_key model.train(1, train_size=0.5)
def test_totalvi(save_path): adata = synthetic_iid(run_setup_anndata=False) TOTALVI.setup_anndata( adata, batch_key="batch", protein_expression_obsm_key="protein_expression", protein_names_uns_key="protein_names", ) n_obs = adata.n_obs n_vars = adata.n_vars n_proteins = adata.obsm["protein_expression"].shape[1] n_latent = 10 model = TOTALVI(adata, n_latent=n_latent) model.train(1, train_size=0.5) assert model.is_trained is True z = model.get_latent_representation() assert z.shape == (n_obs, n_latent) model.get_elbo() model.get_marginal_ll(n_mc_samples=3) model.get_reconstruction_error() model.get_normalized_expression() model.get_normalized_expression(transform_batch=["batch_0", "batch_1"]) model.get_latent_library_size() model.get_protein_foreground_probability() model.get_protein_foreground_probability( transform_batch=["batch_0", "batch_1"]) post_pred = model.posterior_predictive_sample(n_samples=2) assert post_pred.shape == (n_obs, n_vars + n_proteins, 2) post_pred = model.posterior_predictive_sample(n_samples=1) assert post_pred.shape == (n_obs, n_vars + n_proteins) feature_correlation_matrix1 = model.get_feature_correlation_matrix( correlation_type="spearman") feature_correlation_matrix1 = model.get_feature_correlation_matrix( correlation_type="spearman", transform_batch=["batch_0", "batch_1"]) feature_correlation_matrix2 = model.get_feature_correlation_matrix( correlation_type="pearson") assert feature_correlation_matrix1.shape == ( n_vars + n_proteins, n_vars + n_proteins, ) assert feature_correlation_matrix2.shape == ( n_vars + n_proteins, n_vars + n_proteins, ) model.get_elbo(indices=model.validation_indices) model.get_marginal_ll(indices=model.validation_indices, n_mc_samples=3) model.get_reconstruction_error(indices=model.validation_indices) adata2 = synthetic_iid(run_setup_anndata=False) TOTALVI.setup_anndata( adata2, batch_key="batch", protein_expression_obsm_key="protein_expression", protein_names_uns_key="protein_names", ) norm_exp = model.get_normalized_expression(adata2, indices=[1, 2, 3]) assert norm_exp[0].shape == (3, adata2.n_vars) assert norm_exp[1].shape == (3, adata2.obsm["protein_expression"].shape[1]) latent_lib_size = model.get_latent_library_size(adata2, indices=[1, 2, 3]) assert latent_lib_size.shape == (3, 1) pro_foreground_prob = model.get_protein_foreground_probability( adata2, indices=[1, 2, 3], protein_list=["1", "2"]) assert pro_foreground_prob.shape == (3, 2) model.posterior_predictive_sample(adata2) model.get_feature_correlation_matrix(adata2) # test transfer_anndata_setup + view adata2 = synthetic_iid(run_setup_anndata=False) transfer_anndata_setup(adata, adata2) model.get_elbo(adata2[:10]) # test automatic transfer_anndata_setup adata = synthetic_iid() model = TOTALVI(adata) adata2 = synthetic_iid(run_setup_anndata=False) model.get_elbo(adata2) # test that we catch incorrect mappings adata = synthetic_iid() adata2 = synthetic_iid(run_setup_anndata=False) transfer_anndata_setup(adata, adata2) adata2.uns["_scvi"]["categorical_mappings"]["_scvi_labels"][ "mapping"] = np.array(["label_1", "label_0", "label_8"]) with pytest.raises(ValueError): model.get_elbo(adata2) # test that same mapping different order is okay adata = synthetic_iid() adata2 = synthetic_iid(run_setup_anndata=False) transfer_anndata_setup(adata, adata2) adata2.uns["_scvi"]["categorical_mappings"]["_scvi_labels"][ "mapping"] = np.array(["label_1", "label_0", "label_2"]) model.get_elbo(adata2) # should automatically transfer setup # test that we catch missing proteins adata2 = synthetic_iid(run_setup_anndata=False) del adata2.obsm["protein_expression"] with pytest.raises(KeyError): model.get_elbo(adata2) model.differential_expression(groupby="labels", group1="label_1") model.differential_expression(groupby="labels", group1="label_1", group2="label_2") model.differential_expression(idx1=[0, 1, 2], idx2=[3, 4, 5]) model.differential_expression(idx1=[0, 1, 2]) model.differential_expression(groupby="labels") # test with missing proteins adata = scvi.data.pbmcs_10x_cite_seq(save_path=save_path, protein_join="outer") model = TOTALVI(adata) assert model.module.protein_batch_mask is not None model.train(1, train_size=0.5) model = TOTALVI(adata, override_missing_proteins=True) assert model.module.protein_batch_mask is None model.train(1, train_size=0.5)