def test_ovewrite_model_coords_dims(self): """Check coords and dims from model object can be partially overwrited.""" dim1 = ["a", "b"] new_dim1 = ["c", "d"] coords = {"dim1": dim1, "dim2": ["c1", "c2"]} x_data = np.arange(4).reshape((2, 2)) y = x_data + np.random.normal(size=(2, 2)) with pm.Model(coords=coords): x = pm.Data("x", x_data, dims=("dim1", "dim2")) beta = pm.Normal("beta", 0, 1, dims="dim1") _ = pm.Normal("obs", x * beta, 1, observed=y, dims=("dim1", "dim2")) trace = pm.sample(100, tune=100, return_inferencedata=False) idata1 = to_inference_data(trace) idata2 = to_inference_data(trace, coords={"dim1": new_dim1}, dims={"beta": ["dim2"]}) test_dict = {"posterior": ["beta"], "observed_data": ["obs"], "constant_data": ["x"]} fails1 = check_multiple_attrs(test_dict, idata1) assert not fails1 fails2 = check_multiple_attrs(test_dict, idata2) assert not fails2 assert "dim1" in list(idata1.posterior.beta.dims) assert "dim2" in list(idata2.posterior.beta.dims) assert np.all(idata1.constant_data.x.dim1.values == np.array(dim1)) assert np.all(idata1.constant_data.x.dim2.values == np.array(["c1", "c2"])) assert np.all(idata2.constant_data.x.dim1.values == np.array(new_dim1)) assert np.all(idata2.constant_data.x.dim2.values == np.array(["c1", "c2"]))
def test_predictions_to_idata(self, data, eight_schools_params): "Test that we can add predictions to a previously-existing InferenceData." test_dict = { "posterior": ["mu", "tau", "eta", "theta"], "sample_stats": ["diverging", "lp"], "log_likelihood": ["obs"], "predictions": ["obs"], "prior": ["mu", "tau", "eta", "theta"], "observed_data": ["obs"], } # check adding non-destructively inference_data, posterior_predictive = self.get_predictions_inference_data( data, eight_schools_params, False ) fails = check_multiple_attrs(test_dict, inference_data) assert not fails for key, values in posterior_predictive.items(): ivalues = inference_data.predictions[key] assert ivalues.shape[0] == 1 # one chain in predictions assert np.all(np.isclose(ivalues[0], values)) # check adding in place inference_data, posterior_predictive = self.get_predictions_inference_data( data, eight_schools_params, True ) fails = check_multiple_attrs(test_dict, inference_data) assert not fails for key, values in posterior_predictive.items(): ivalues = inference_data.predictions[key] assert ivalues.shape[0] == 1 # one chain in predictions assert np.all(np.isclose(ivalues[0], values))
def test_predictions_constant_data(self): with pm.Model(): x = pm.Data("x", [1.0, 2.0, 3.0]) y = pm.Data("y", [1.0, 2.0, 3.0]) beta = pm.Normal("beta", 0, 1) obs = pm.Normal("obs", x * beta, 1, observed=y) # pylint: disable=unused-variable trace = pm.sample(100, tune=100, return_inferencedata=False) inference_data = to_inference_data(trace) test_dict = {"posterior": ["beta"], "observed_data": ["obs"], "constant_data": ["x"]} fails = check_multiple_attrs(test_dict, inference_data) assert not fails with pm.Model(): x = pm.Data("x", [1.0, 2.0]) y = pm.Data("y", [1.0, 2.0]) beta = pm.Normal("beta", 0, 1) obs = pm.Normal("obs", x * beta, 1, observed=y) # pylint: disable=unused-variable predictive_trace = pm.sample_posterior_predictive(inference_data) assert set(predictive_trace.keys()) == {"obs"} # this should be four chains of 100 samples # assert predictive_trace["obs"].shape == (400, 2) # but the shape seems to vary between pymc3 versions inference_data = predictions_to_inference_data(predictive_trace, posterior_trace=trace) test_dict = {"posterior": ["beta"], "~observed_data": ""} fails = check_multiple_attrs(test_dict, inference_data) assert not fails, "Posterior data not copied over as expected." test_dict = {"predictions": ["obs"]} fails = check_multiple_attrs(test_dict, inference_data) assert not fails, "Predictions not instantiated as expected." test_dict = {"predictions_constant_data": ["x"]} fails = check_multiple_attrs(test_dict, inference_data) assert not fails, "Predictions constant data not instantiated as expected."
def test_predictions_to_idata(self, data, eight_schools_params): "Test that we can add predictions to a previously-existing InferenceData." test_dict = { "posterior": ["mu", "tau", "eta", "theta"], "sample_stats": ["diverging", "lp"], "log_likelihood": ["obs"], "predictions": ["obs"], "prior": ["mu", "tau", "eta", "theta"], "observed_data": ["obs"], } # check adding non-destructively inference_data, _ = self.get_predictions_inference_data(data, eight_schools_params, False) fails = check_multiple_attrs(test_dict, inference_data) assert not fails for key, ivalues in inference_data.predictions.items(): assert ( len(ivalues["chain"]) == inference_data.posterior.dims["chain"] ) # same chains as in posterior # check adding in place inference_data, posterior_predictive = self.get_predictions_inference_data( data, eight_schools_params, True ) fails = check_multiple_attrs(test_dict, inference_data) assert not fails for key, ivalues in inference_data.predictions.items(): assert ( len(ivalues["chain"]) == inference_data.posterior.dims["chain"] ) # same chains as in posterior
def test_no_trace(self): with pm.Model() as model: x = pm.Data("x", [1.0, 2.0, 3.0]) y = pm.Data("y", [1.0, 2.0, 3.0]) beta = pm.Normal("beta", 0, 1) obs = pm.Normal("obs", x * beta, 1, observed=y) # pylint: disable=unused-variable idata = pm.sample(100, tune=100) prior = pm.sample_prior_predictive() posterior_predictive = pm.sample_posterior_predictive(idata) # Only prior inference_data = to_inference_data(prior=prior, model=model) test_dict = {"prior": ["beta"], "prior_predictive": ["obs"]} fails = check_multiple_attrs(test_dict, inference_data) assert not fails # Only posterior_predictive inference_data = to_inference_data(posterior_predictive=posterior_predictive, model=model) test_dict = {"posterior_predictive": ["obs"]} fails = check_multiple_attrs(test_dict, inference_data) assert not fails # Prior and posterior_predictive but no trace inference_data = to_inference_data( prior=prior, posterior_predictive=posterior_predictive, model=model ) test_dict = { "prior": ["beta"], "prior_predictive": ["obs"], "posterior_predictive": ["obs"], } fails = check_multiple_attrs(test_dict, inference_data) assert not fails
def test_mv_missing_data_model(self): data = ma.masked_values([[1, 2], [2, 2], [-1, 4], [2, -1], [-1, -1]], value=-1) model = pm.Model() with model: mu = pm.Normal("mu", 0, 1, size=2) sd_dist = pm.HalfNormal.dist(1.0) chol, *_ = pm.LKJCholeskyCov("chol_cov", n=2, eta=1, sd_dist=sd_dist, compute_corr=True) y = pm.MvNormal("y", mu=mu, chol=chol, observed=data) inference_data = pm.sample(100, chains=2, return_inferencedata=True) # make sure that data is really missing assert isinstance(y.owner.op, (AdvancedIncSubtensor, AdvancedIncSubtensor1)) test_dict = { "posterior": ["mu", "chol_cov"], "observed_data": ["y"], "log_likelihood": ["y"], } fails = check_multiple_attrs(test_dict, inference_data) assert not fails
def test_multivariate_observations(self): coords = {"direction": ["x", "y", "z"], "experiment": np.arange(20)} data = np.random.multinomial(20, [0.2, 0.3, 0.5], size=20) with pm.Model(coords=coords): p = pm.Beta("p", 1, 1, size=(3, )) pm.Multinomial("y", 20, p, dims=("experiment", "direction"), observed=data) idata = pm.sample(draws=50, chains=2, tune=100, return_inferencedata=True) test_dict = { "posterior": ["p"], "sample_stats": ["lp"], "log_likelihood": ["y"], "observed_data": ["y"], } fails = check_multiple_attrs(test_dict, idata) assert not fails assert "direction" not in idata.log_likelihood.dims assert "direction" in idata.observed_data.dims assert idata.log_likelihood["y"].shape == (2, 50, 20)
def test_predictions_to_idata_new(self, data, eight_schools_params): # check creating new inference_data, posterior_predictive = self.make_predictions_inference_data( data, eight_schools_params ) test_dict = { "posterior": ["mu", "tau", "eta", "theta"], "predictions": ["obs"], "~observed_data": "", } fails = check_multiple_attrs(test_dict, inference_data) assert not fails for key, values in posterior_predictive.items(): ivalues = inference_data.predictions[key] # could the following better be done by simply flattening both the ivalues # and the values? if len(ivalues.shape) == 3: ivalues_arr = np.reshape( ivalues.values, (ivalues.shape[0] * ivalues.shape[1], ivalues.shape[2]) ) elif len(ivalues.shape) == 2: ivalues_arr = np.reshape(ivalues.values, (ivalues.shape[0] * ivalues.shape[1])) else: raise ValueError(f"Unexpected values shape for variable {key}") assert (ivalues.shape[0] == 2) and (ivalues.shape[1] == 500) assert values.shape[0] == 1000 assert np.all(np.isclose(ivalues_arr, values))
def test_inference_data_attrs(self, posterior, prior, save_warmup, warmup_iterations: int): arviz_inference_data_from_pyjags_samples_dict = from_pyjags( posterior=posterior, prior=prior, log_likelihood={"y": "log_like"}, save_warmup=save_warmup, warmup_iterations=warmup_iterations, ) posterior_warmup_prefix = ( "" if save_warmup and warmup_iterations > 0 and posterior is not None else "~" ) prior_warmup_prefix = ( "" if save_warmup and warmup_iterations > 0 and prior is not None else "~" ) print(f'posterior_warmup_prefix="{posterior_warmup_prefix}"') test_dict = { f'{"~" if posterior is None else ""}posterior': ["b", "int"], f'{"~" if prior is None else ""}prior': ["b", "int"], f'{"~" if posterior is None else ""}log_likelihood': ["y"], f"{posterior_warmup_prefix}warmup_posterior": ["b", "int"], f"{prior_warmup_prefix}warmup_prior": ["b", "int"], f"{posterior_warmup_prefix}warmup_log_likelihood": ["y"], } fails = check_multiple_attrs(test_dict, arviz_inference_data_from_pyjags_samples_dict) assert not fails
def test_posterior_predictive_thinned(self, data): with data.model: draws = 20 thin_by = 4 idata = pm.sample(tune=5, draws=draws, chains=2, return_inferencedata=True) thinned_idata = idata.sel(draw=slice(None, None, thin_by)) idata.extend(pm.sample_posterior_predictive(thinned_idata)) test_dict = { "posterior": ["mu", "tau", "eta", "theta"], "sample_stats": ["diverging", "lp", "~log_likelihood"], "log_likelihood": ["obs"], "posterior_predictive": ["obs"], "observed_data": ["obs"], } fails = check_multiple_attrs(test_dict, idata) assert not fails assert idata.posterior.dims["chain"] == 2 assert idata.posterior.dims["draw"] == draws assert idata.posterior_predictive.dims["chain"] == 2 assert idata.posterior_predictive.dims["draw"] == draws / thin_by assert np.allclose(idata.posterior["draw"], np.arange(draws)) assert np.allclose(idata.posterior_predictive["draw"], np.arange(draws, step=thin_by))
def test_save_warmup(self, save_warmup, chains, tune, draws): with pm.Model(): pm.Uniform("u1") pm.Normal("n1") idata = pm.sample( tune=tune, draws=draws, chains=chains, cores=1, step=pm.Metropolis(), discard_tuned_samples=False, return_inferencedata=True, idata_kwargs={"save_warmup": save_warmup}, ) warmup_prefix = "" if save_warmup and (tune > 0) else "~" post_prefix = "" if draws > 0 else "~" test_dict = { f"{post_prefix}posterior": ["u1", "n1"], f"{post_prefix}sample_stats": ["~tune", "accept"], f"{warmup_prefix}warmup_posterior": ["u1", "n1"], f"{warmup_prefix}warmup_sample_stats": ["~tune"], "~warmup_log_likelihood": [], "~log_likelihood": [], } fails = check_multiple_attrs(test_dict, idata) assert not fails if hasattr(idata, "posterior"): assert idata.posterior.dims["chain"] == chains assert idata.posterior.dims["draw"] == draws if hasattr(idata, "warmup_posterior"): assert idata.warmup_posterior.dims["chain"] == chains assert idata.warmup_posterior.dims["draw"] == tune
def test_multiobservedrv_to_observed_data(self, multiobs): # fake regression data, with weights (W) np.random.seed(2019) N = 100 X = np.random.uniform(size=N) W = 1 + np.random.poisson(size=N) a, b = 5, 17 Y = a + np.random.normal(b * X) with pm.Model(): a = pm.Normal("a", 0, 10) b = pm.Normal("b", 0, 10) mu = a + b * X sigma = pm.HalfNormal("sigma", 1) def weighted_normal(y, w): return w * logpt(pm.Normal.dist(mu=mu, sd=sigma), y) y_logp = pm.DensityDist( # pylint: disable=unused-variable "y_logp", weighted_normal, observed={"y": Y, "w": W} ) idata = pm.sample( 20, tune=20, return_inferencedata=True, idata_kwargs={"density_dist_obs": multiobs} ) multiobs_str = "" if multiobs else "~" test_dict = { "posterior": ["a", "b", "sigma"], "sample_stats": ["lp"], "log_likelihood": ["y_logp"], f"{multiobs_str}observed_data": ["y", "w"], } fails = check_multiple_attrs(test_dict, idata) assert not fails if multiobs: assert idata.observed_data.y.dtype.kind == "f"
def test_multiple_observed_rv(self, log_likelihood): y1_data = np.random.randn(10) y2_data = np.random.randn(100) with pm.Model(): x = pm.Normal("x", 1, 1) pm.Normal("y1", x, 1, observed=y1_data) pm.Normal("y2", x, 1, observed=y2_data) inference_data = pm.sample( 100, chains=2, return_inferencedata=True, idata_kwargs={"log_likelihood": log_likelihood}, ) test_dict = { "posterior": ["x"], "observed_data": ["y1", "y2"], "log_likelihood": ["y1", "y2"], "sample_stats": ["diverging", "lp", "~log_likelihood"], } if not log_likelihood: test_dict.pop("log_likelihood") test_dict["~log_likelihood"] = [] if isinstance(log_likelihood, list): test_dict["log_likelihood"] = ["y1", "~y2"] fails = check_multiple_attrs(test_dict, inference_data) assert not fails
def test_missing_data_model(self): # source pymc/pymc/tests/test_missing.py data = ma.masked_values([1, 2, -1, 4, -1], value=-1) model = pm.Model() with model: x = pm.Normal("x", 1, 1) y = pm.Normal("y", x, 1, observed=data) inference_data = pm.sample(100, chains=2, return_inferencedata=True) # make sure that data is really missing assert "y_missing" in model.named_vars test_dict = { "posterior": ["x", "y_missing"], "observed_data": ["y_observed"], "log_likelihood": ["y_observed"], } fails = check_multiple_attrs(test_dict, inference_data) assert not fails # The missing part of partial observed RVs is not included in log_likelihood # See https://github.com/pymc-devs/pymc/issues/5255 assert inference_data.log_likelihood["y_observed"].shape == (2, 100, 3)
def test_save_warmup_issue_1208_after_3_9(self): with pm.Model(): pm.Uniform("u1") pm.Normal("n1") trace = pm.sample( tune=100, draws=200, chains=2, cores=1, step=pm.Metropolis(), discard_tuned_samples=False, return_inferencedata=False, ) assert isinstance(trace, pm.backends.base.MultiTrace) assert len(trace) == 300 # from original trace, warmup draws should be separated out idata = to_inference_data(trace, save_warmup=True) test_dict = { "posterior": ["u1", "n1"], "sample_stats": ["~tune", "accept"], "warmup_posterior": ["u1", "n1"], "warmup_sample_stats": ["~tune", "accept"], } fails = check_multiple_attrs(test_dict, idata) assert not fails assert idata.posterior.dims["chain"] == 2 assert idata.posterior.dims["draw"] == 200 # manually sliced trace triggers the same warning as <=3.8 with pytest.warns(UserWarning, match="Warmup samples"): idata = to_inference_data(trace[-30:], save_warmup=True) test_dict = { "posterior": ["u1", "n1"], "sample_stats": ["~tune", "accept"], "~warmup_posterior": [], "~warmup_sample_stats": [], } fails = check_multiple_attrs(test_dict, idata) assert not fails assert idata.posterior.dims["chain"] == 2 assert idata.posterior.dims["draw"] == 30
def test_predictions_to_idata_new(self, data, eight_schools_params): # check creating new inference_data, posterior_predictive = self.make_predictions_inference_data( data, eight_schools_params ) test_dict = { "posterior": ["mu", "tau", "eta", "theta"], "predictions": ["obs"], "~observed_data": "", } fails = check_multiple_attrs(test_dict, inference_data) assert not fails for key, values in posterior_predictive.items(): ivalues = inference_data.predictions[key] assert (len(ivalues["chain"]) == 2) and (len(ivalues["draw"]) == 500)
def test_constant_data(self, use_context): """Test constant_data group behaviour.""" with pm.Model() as model: x = pm.Data("x", [1.0, 2.0, 3.0]) y = pm.Data("y", [1.0, 2.0, 3.0]) beta = pm.Normal("beta", 0, 1) obs = pm.Normal("obs", x * beta, 1, observed=y) # pylint: disable=unused-variable trace = pm.sample(100, tune=100, return_inferencedata=False) if use_context: inference_data = to_inference_data(trace=trace) if not use_context: inference_data = to_inference_data(trace=trace, model=model) test_dict = {"posterior": ["beta"], "observed_data": ["obs"], "constant_data": ["x"]} fails = check_multiple_attrs(test_dict, inference_data) assert not fails
def test_multiple_observed_rv_without_observations(self): with pm.Model(): mu = pm.Normal("mu") x = pm.DensityDist( # pylint: disable=unused-variable "x", logpt(pm.Normal.dist(mu, 1.0)), observed={"value": 0.1} ) inference_data = pm.sample(100, chains=2, return_inferencedata=True) test_dict = { "posterior": ["mu"], "sample_stats": ["lp"], "log_likelihood": ["x"], "observed_data": ["value", "~x"], } fails = check_multiple_attrs(test_dict, inference_data) assert not fails assert inference_data.observed_data.value.dtype.kind == "f"
def test_to_idata(self, data, eight_schools_params, chains, draws): inference_data = self.get_inference_data(data, eight_schools_params) test_dict = { "posterior": ["mu", "tau", "eta", "theta"], "sample_stats": ["diverging", "lp", "~log_likelihood"], "log_likelihood": ["obs"], "posterior_predictive": ["obs"], "prior": ["mu", "tau", "eta", "theta"], "prior_predictive": ["obs"], "observed_data": ["obs"], } fails = check_multiple_attrs(test_dict, inference_data) assert not fails chains = inference_data.posterior.dims["chain"] draws = inference_data.posterior.dims["draw"] obs = inference_data.observed_data["obs"] assert inference_data.log_likelihood["obs"].shape == (chains, draws) + obs.shape
def test_missing_data_model(self): # source pymc3/pymc3/tests/test_missing.py data = ma.masked_values([1, 2, -1, 4, -1], value=-1) model = pm.Model() with model: x = pm.Normal("x", 1, 1) y = pm.Normal("y", x, 1, observed=data) inference_data = pm.sample(100, chains=2, return_inferencedata=True) # make sure that data is really missing assert "y_missing" in model.named_vars test_dict = { "posterior": ["x", "y_missing"], "observed_data": ["y_observed"], "log_likelihood": ["y_observed"], } fails = check_multiple_attrs(test_dict, inference_data) assert not fails
def test_to_idata(self, data, eight_schools_params, chains, draws): inference_data, posterior_predictive = self.get_inference_data(data, eight_schools_params) test_dict = { "posterior": ["mu", "tau", "eta", "theta"], "sample_stats": ["diverging", "lp", "~log_likelihood"], "log_likelihood": ["obs"], "posterior_predictive": ["obs"], "prior": ["mu", "tau", "eta", "theta"], "prior_predictive": ["obs"], "observed_data": ["obs"], } fails = check_multiple_attrs(test_dict, inference_data) assert not fails for key, values in posterior_predictive.items(): ivalues = inference_data.posterior_predictive[key] for chain in range(chains): assert np.all( np.isclose(ivalues[chain], values[chain * draws : (chain + 1) * draws]) )
def test_priors_separation(self, use_context): """Test model is enough to get prior, prior predictive and observed_data.""" with pm.Model() as model: x = pm.Data("x", [1.0, 2.0, 3.0]) y = pm.Data("y", [1.0, 2.0, 3.0]) beta = pm.Normal("beta", 0, 1) obs = pm.Normal("obs", x * beta, 1, observed=y) # pylint: disable=unused-variable prior = pm.sample_prior_predictive() test_dict = { "prior": ["beta", "~obs"], "observed_data": ["obs"], "prior_predictive": ["obs"], } if use_context: with model: inference_data = to_inference_data(prior=prior) else: inference_data = to_inference_data(prior=prior, model=model) fails = check_multiple_attrs(test_dict, inference_data) assert not fails