示例#1
0
文件: _destvi.py 项目: saketkc/scVI
 def __init__(
     self,
     st_adata: AnnData,
     cell_type_mapping: np.ndarray,
     decoder_state_dict: OrderedDict,
     px_decoder_state_dict: OrderedDict,
     px_r: np.ndarray,
     n_hidden: int,
     n_latent: int,
     n_layers: int,
     **module_kwargs,
 ):
     st_adata.obs["_indices"] = np.arange(st_adata.n_obs)
     register_tensor_from_anndata(st_adata, "ind_x", "obs", "_indices")
     super(DestVI, self).__init__(st_adata)
     self.module = MRDeconv(
         n_spots=st_adata.n_obs,
         n_labels=cell_type_mapping.shape[0],
         decoder_state_dict=decoder_state_dict,
         px_decoder_state_dict=px_decoder_state_dict,
         px_r=px_r,
         n_genes=st_adata.n_vars,
         n_latent=n_latent,
         n_layers=n_layers,
         n_hidden=n_hidden,
         **module_kwargs,
     )
     self.cell_type_mapping = cell_type_mapping
     self._model_summary_string = "DestVI Model"
     self.init_params_ = self._get_init_params(locals())
示例#2
0
    def __init__(
        self,
        adata: AnnData,
        per_cell_weight=False,
    ):
        # in case any other model was created before that shares the same parameter names.
        clear_param_store()

        # add index for each cell (provided to pyro plate for correct minibatching)
        adata.obs["_indices"] = np.arange(adata.n_obs).astype("int64")
        register_tensor_from_anndata(
            adata,
            registry_key="ind_x",
            adata_attr_name="obs",
            adata_key_name="_indices",
        )

        super().__init__(adata)

        self.module = BayesianRegressionModule(
            in_features=adata.shape[1],
            out_features=1,
            per_cell_weight=per_cell_weight,
        )
        self._model_summary_string = "BayesianRegressionModel"
        self.init_params_ = self._get_init_params(locals())
示例#3
0
def test_register_tensor_from_anndata():
    adata = synthetic_iid()
    adata.obs["cont1"] = np.random.normal(size=(adata.shape[0], ))
    register_tensor_from_anndata(adata,
                                 registry_key="test",
                                 adata_attr_name="obs",
                                 adata_key_name="cont1")
    assert "test" in adata.uns["_scvi"]["data_registry"]
    assert adata.uns["_scvi"]["data_registry"]["test"] == dict(
        attr_name="obs", attr_key="cont1")
示例#4
0
    def __init__(
        self,
        adata: AnnData,
        cell_type_markers: pd.DataFrame,
        size_factor_key: str,
        **model_kwargs,
    ):
        try:
            cell_type_markers = cell_type_markers.loc[adata.var_names]
        except KeyError:
            raise KeyError(
                "Anndata and cell type markers do not contain the same genes."
            )
        super().__init__(adata)

        register_tensor_from_anndata(adata, "_size_factor", "obs", size_factor_key)

        self.n_genes = self.summary_stats["n_vars"]
        self.cell_type_markers = cell_type_markers
        rho = torch.Tensor(cell_type_markers.to_numpy())
        n_cats_per_cov = (
            self.scvi_setup_dict_["extra_categoricals"]["n_cats_per_key"]
            if "extra_categoricals" in self.scvi_setup_dict_
            else None
        )

        x = scvi.data.get_from_registry(adata, _CONSTANTS.X_KEY)
        col_means = np.asarray(np.mean(x, 0)).ravel()  # (g)
        col_means_mu, col_means_std = np.mean(col_means), np.std(col_means)
        col_means_normalized = torch.Tensor((col_means - col_means_mu) / col_means_std)

        # compute basis means for phi - shape (B)
        basis_means = np.linspace(np.min(x), np.max(x), B)  # (B)

        self.module = CellAssignModule(
            n_genes=self.n_genes,
            rho=rho,
            basis_means=basis_means,
            b_g_0=col_means_normalized,
            n_batch=self.summary_stats["n_batch"],
            n_cats_per_cov=n_cats_per_cov,
            n_continuous_cov=self.summary_stats["n_continuous_covs"],
            **model_kwargs,
        )
        self._model_summary_string = (
            "CellAssign Model with params: \nn_genes: {}, n_labels: {}"
        ).format(
            self.n_genes,
            rho.shape[1],
        )
        self.init_params_ = self._get_init_params(locals())
示例#5
0
def test_pyro_bayesian_regression_jit():
    use_gpu = int(torch.cuda.is_available())
    adata = synthetic_iid()
    # add index for each cell (provided to pyro plate for correct minibatching)
    adata.obs["_indices"] = np.arange(adata.n_obs).astype("int64")
    register_tensor_from_anndata(
        adata,
        registry_key="ind_x",
        adata_attr_name="obs",
        adata_key_name="_indices",
    )
    train_dl = AnnDataLoader(adata, shuffle=True, batch_size=128)
    pyro.clear_param_store()
    model = BayesianRegressionModule(in_features=adata.shape[1],
                                     out_features=1)
    plan = PyroTrainingPlan(model, loss_fn=pyro.infer.JitTrace_ELBO())
    plan.n_obs_training = len(train_dl.indices)
    trainer = Trainer(gpus=use_gpu,
                      max_epochs=2,
                      callbacks=[PyroJitGuideWarmup(train_dl)])
    trainer.fit(plan, train_dl)

    # 100 features
    assert list(model.guide.state_dict()
                ["locs.linear.weight_unconstrained"].shape) == [
                    1,
                    100,
                ]
    # 1 bias
    assert list(
        model.guide.state_dict()["locs.linear.bias_unconstrained"].shape) == [
            1,
        ]

    if use_gpu == 1:
        model.cuda()

    # test Predictive
    num_samples = 5
    predictive = model.create_predictive(num_samples=num_samples)
    for tensor_dict in train_dl:
        args, kwargs = model._get_fn_args_from_batch(tensor_dict)
        _ = {
            k: v.detach().cpu().numpy()
            for k, v in predictive(*args, **kwargs).items() if k != "obs"
        }
示例#6
0
    def setup_anndata(
        adata: AnnData,
        size_factor_key: str,
        batch_key: Optional[str] = None,
        layer: Optional[str] = None,
        categorical_covariate_keys: Optional[List[str]] = None,
        continuous_covariate_keys: Optional[List[str]] = None,
        copy: bool = False,
    ) -> Optional[AnnData]:
        """
        %(summary)s.

        Parameters
        ----------
        %(param_adata)s
        size_factor_key
            key in `adata.obs` with continuous valued size factors.
        %(param_batch_key)s
        %(param_layer)s
        %(param_cat_cov_keys)s
        %(param_cat_cov_keys)s
        %(param_copy)s

        Returns
        -------
        %(returns)s
        """
        setup_data = _setup_anndata(
            adata,
            batch_key=batch_key,
            layer=layer,
            categorical_covariate_keys=categorical_covariate_keys,
            continuous_covariate_keys=continuous_covariate_keys,
            copy=copy,
        )
        register_tensor_from_anndata(
            adata if setup_data is None else setup_data,
            "_size_factor",
            "obs",
            size_factor_key,
        )
        return setup_data
示例#7
0
    def __init__(
        self,
        st_adata: AnnData,
        params: Tuple[np.ndarray],
        use_gpu: bool = True,
        prior_weight: Literal["n_obs", "minibatch"] = "n_obs",
        **model_kwargs,
    ):
        st_adata.obs["_indices"] = np.arange(st_adata.n_obs)
        register_tensor_from_anndata(st_adata, "ind_x", "obs", "_indices")
        super().__init__(st_adata, use_gpu=use_gpu)

        self.model = SpatialDeconv(
            n_spots=st_adata.n_obs,
            params=params,
            prior_weight=prior_weight,
            **model_kwargs,
        )
        self._model_summary_string = (
            "RNADeconv Model with params: \nn_spots: {}").format(
                st_adata.n_obs, )
        self.init_params_ = self._get_init_params(locals())
示例#8
0
def test_saving_and_loading(save_path):
    def legacy_save(
        model,
        dir_path,
        prefix=None,
        overwrite=False,
        save_anndata=False,
        **anndata_write_kwargs,
    ):
        if not os.path.exists(dir_path) or overwrite:
            os.makedirs(dir_path, exist_ok=overwrite)
        else:
            raise ValueError(
                "{} already exists. Please provide an unexisting directory for saving."
                .format(dir_path))

        file_name_prefix = prefix or ""

        if save_anndata:
            model.adata.write(
                os.path.join(dir_path, f"{file_name_prefix}adata.h5ad"),
                **anndata_write_kwargs,
            )

        model_save_path = os.path.join(dir_path,
                                       f"{file_name_prefix}model_params.pt")
        attr_save_path = os.path.join(dir_path, f"{file_name_prefix}attr.pkl")
        varnames_save_path = os.path.join(dir_path,
                                          f"{file_name_prefix}var_names.csv")

        torch.save(model.module.state_dict(), model_save_path)

        var_names = model.adata.var_names.astype(str)
        var_names = var_names.to_numpy()
        np.savetxt(varnames_save_path, var_names, fmt="%s")

        # get all the user attributes
        user_attributes = model._get_user_attributes()
        # only save the public attributes with _ at the very end
        user_attributes = {
            a[0]: a[1]
            for a in user_attributes if a[0][-1] == "_"
        }

        with open(attr_save_path, "wb") as f:
            pickle.dump(user_attributes, f)

    def test_save_load_model(cls, adata, save_path, prefix=None, legacy=False):
        model = cls(adata, latent_distribution="normal")
        model.train(1, train_size=0.2)
        z1 = model.get_latent_representation(adata)
        test_idx1 = model.validation_indices
        if legacy:
            legacy_save(model,
                        save_path,
                        overwrite=True,
                        save_anndata=True,
                        prefix=prefix)
        else:
            model.save(save_path,
                       overwrite=True,
                       save_anndata=True,
                       prefix=prefix)
        model = cls.load(save_path, prefix=prefix)
        model.get_latent_representation()
        tmp_adata = scvi.data.synthetic_iid(n_genes=200)
        with pytest.raises(ValueError):
            cls.load(save_path, adata=tmp_adata, prefix=prefix)
        model = cls.load(save_path, adata=adata, prefix=prefix)
        assert "test" in adata.uns["_scvi"]["data_registry"]
        assert adata.uns["_scvi"]["data_registry"]["test"] == dict(
            attr_name="obs", attr_key="cont1")

        z2 = model.get_latent_representation()
        test_idx2 = model.validation_indices
        np.testing.assert_array_equal(z1, z2)
        np.testing.assert_array_equal(test_idx1, test_idx2)
        assert model.is_trained is True

    save_path = os.path.join(save_path, "tmp")
    adata = synthetic_iid()
    # Test custom tensors are loaded properly.
    adata.obs["cont1"] = np.random.normal(size=(adata.shape[0], ))
    register_tensor_from_anndata(adata,
                                 registry_key="test",
                                 adata_attr_name="obs",
                                 adata_key_name="cont1")

    for cls in [SCVI, LinearSCVI, TOTALVI, PEAKVI]:
        print(cls)
        test_save_load_model(cls,
                             adata,
                             save_path,
                             prefix=f"{cls.__name__}_",
                             legacy=True)
        test_save_load_model(cls, adata, save_path, prefix=f"{cls.__name__}_")
        # Test load prioritizes newer save paradigm and thus mismatches legacy save.
        with pytest.raises(AssertionError):
            test_save_load_model(cls,
                                 adata,
                                 save_path,
                                 prefix=f"{cls.__name__}_",
                                 legacy=True)

    # AUTOZI
    def test_save_load_autozi(legacy=False):
        prefix = "AUTOZI_"
        model = AUTOZI(adata, latent_distribution="normal")
        model.train(1, train_size=0.5)
        ab1 = model.get_alphas_betas()
        if legacy:
            legacy_save(model,
                        save_path,
                        overwrite=True,
                        save_anndata=True,
                        prefix=prefix)
        else:
            model.save(save_path,
                       overwrite=True,
                       save_anndata=True,
                       prefix=prefix)
        model = AUTOZI.load(save_path, prefix=prefix)
        model.get_latent_representation()
        tmp_adata = scvi.data.synthetic_iid(n_genes=200)
        with pytest.raises(ValueError):
            AUTOZI.load(save_path, adata=tmp_adata, prefix=prefix)
        model = AUTOZI.load(save_path, adata=adata, prefix=prefix)
        assert "test" in adata.uns["_scvi"]["data_registry"]
        assert adata.uns["_scvi"]["data_registry"]["test"] == dict(
            attr_name="obs", attr_key="cont1")

        ab2 = model.get_alphas_betas()
        np.testing.assert_array_equal(ab1["alpha_posterior"],
                                      ab2["alpha_posterior"])
        np.testing.assert_array_equal(ab1["beta_posterior"],
                                      ab2["beta_posterior"])
        assert model.is_trained is True

    test_save_load_autozi(legacy=True)
    test_save_load_autozi()
    # Test load prioritizes newer save paradigm and thus mismatches legacy save.
    with pytest.raises(AssertionError):
        test_save_load_autozi(legacy=True)

    # SCANVI
    def test_save_load_scanvi(legacy=False):
        prefix = "SCANVI_"
        model = SCANVI(adata, "label_0")
        model.train(max_epochs=1, train_size=0.5)
        p1 = model.predict()
        if legacy:
            legacy_save(model,
                        save_path,
                        overwrite=True,
                        save_anndata=True,
                        prefix=prefix)
        else:
            model.save(save_path,
                       overwrite=True,
                       save_anndata=True,
                       prefix=prefix)
        model = SCANVI.load(save_path, prefix=prefix)
        model.get_latent_representation()
        tmp_adata = scvi.data.synthetic_iid(n_genes=200)
        with pytest.raises(ValueError):
            SCANVI.load(save_path, adata=tmp_adata, prefix=prefix)
        model = SCANVI.load(save_path, adata=adata, prefix=prefix)
        assert "test" in adata.uns["_scvi"]["data_registry"]
        assert adata.uns["_scvi"]["data_registry"]["test"] == dict(
            attr_name="obs", attr_key="cont1")

        p2 = model.predict()
        np.testing.assert_array_equal(p1, p2)
        assert model.is_trained is True

    test_save_load_scanvi(legacy=True)
    test_save_load_scanvi()
    # Test load prioritizes newer save paradigm and thus mismatches legacy save.
    with pytest.raises(AssertionError):
        test_save_load_scanvi(legacy=True)
示例#9
0
def test_pyro_bayesian_regression(save_path):
    use_gpu = int(torch.cuda.is_available())
    adata = synthetic_iid()
    # add index for each cell (provided to pyro plate for correct minibatching)
    adata.obs["_indices"] = np.arange(adata.n_obs).astype("int64")
    register_tensor_from_anndata(
        adata,
        registry_key="ind_x",
        adata_attr_name="obs",
        adata_key_name="_indices",
    )
    train_dl = AnnDataLoader(adata, shuffle=True, batch_size=128)
    pyro.clear_param_store()
    model = BayesianRegressionModule(in_features=adata.shape[1],
                                     out_features=1)
    plan = PyroTrainingPlan(model)
    plan.n_obs_training = len(train_dl.indices)
    trainer = Trainer(
        gpus=use_gpu,
        max_epochs=2,
    )
    trainer.fit(plan, train_dl)
    if use_gpu == 1:
        model.cuda()

    # test Predictive
    num_samples = 5
    predictive = model.create_predictive(num_samples=num_samples)
    for tensor_dict in train_dl:
        args, kwargs = model._get_fn_args_from_batch(tensor_dict)
        _ = {
            k: v.detach().cpu().numpy()
            for k, v in predictive(*args, **kwargs).items() if k != "obs"
        }
    # test save and load
    # cpu/gpu has minor difference
    model.cpu()
    quants = model.guide.quantiles([0.5])
    sigma_median = quants["sigma"][0].detach().cpu().numpy()
    linear_median = quants["linear.weight"][0].detach().cpu().numpy()

    model_save_path = os.path.join(save_path, "model_params.pt")
    torch.save(model.state_dict(), model_save_path)

    pyro.clear_param_store()
    new_model = BayesianRegressionModule(in_features=adata.shape[1],
                                         out_features=1)
    # run model one step to get autoguide params
    try:
        new_model.load_state_dict(torch.load(model_save_path))
    except RuntimeError as err:
        if isinstance(new_model, PyroBaseModuleClass):
            plan = PyroTrainingPlan(new_model)
            plan.n_obs_training = len(train_dl.indices)
            trainer = Trainer(
                gpus=use_gpu,
                max_steps=1,
            )
            trainer.fit(plan, train_dl)
            new_model.load_state_dict(torch.load(model_save_path))
        else:
            raise err

    quants = new_model.guide.quantiles([0.5])
    sigma_median_new = quants["sigma"][0].detach().cpu().numpy()
    linear_median_new = quants["linear.weight"][0].detach().cpu().numpy()

    np.testing.assert_array_equal(sigma_median_new, sigma_median)
    np.testing.assert_array_equal(linear_median_new, linear_median)