def test_sample(self): x = np.random.normal(size=100) y = x + np.random.normal(scale=1e-2, size=100) x_pred = np.linspace(-3, 3, 200, dtype="float32") with pm.Model(): x_shared = pm.MutableData("x_shared", x) b = pm.Normal("b", 0.0, 10.0) pm.Normal("obs", b * x_shared, np.sqrt(1e-2), observed=y) prior_trace0 = pm.sample_prior_predictive(1000) idata = pm.sample(1000, tune=1000, chains=1) pp_trace0 = pm.sample_posterior_predictive(idata) x_shared.set_value(x_pred) prior_trace1 = pm.sample_prior_predictive(1000) pp_trace1 = pm.sample_posterior_predictive(idata) assert prior_trace0.prior["b"].shape == (1, 1000) assert prior_trace0.prior_predictive["obs"].shape == (1, 1000, 100) assert prior_trace1.prior_predictive["obs"].shape == (1, 1000, 200) assert pp_trace0.posterior_predictive["obs"].shape == (1, 1000, 100) np.testing.assert_allclose(x, pp_trace0.posterior_predictive["obs"].mean( ("chain", "draw")), atol=1e-1) assert pp_trace1.posterior_predictive["obs"].shape == (1, 1000, 200) np.testing.assert_allclose(x_pred, pp_trace1.posterior_predictive["obs"].mean( ("chain", "draw")), atol=1e-1)
def test_potentials_warning(self): warning_msg = "The effect of Potentials on other parameters is ignored during" with pm.Model() as m: a = pm.Normal("a", 0, 1) p = pm.Potential("p", a + 1) with m: with pytest.warns(UserWarning, match=warning_msg): pm.sample_prior_predictive(samples=5)
def test_conversion_from_variables_subset(self): """This is a regression test for issue #5337.""" with pm.Model() as model: x = pm.Normal("x") pm.Normal("y", x, observed=5) idata = pm.sample( tune=10, draws=20, chains=1, step=pm.Metropolis(), compute_convergence_checks=False ) pm.sample_posterior_predictive(idata, var_names=["x"]) pm.sample_prior_predictive(var_names=["x"])
def test_model_not_drawable_prior(self): data = np.random.poisson(lam=10, size=200) model = pm.Model() with model: mu = pm.HalfFlat("sigma") pm.Poisson("foo", mu=mu, observed=data) idata = pm.sample(tune=1000) with model: with pytest.raises(NotImplementedError) as excinfo: pm.sample_prior_predictive(50) assert "Cannot sample" in str(excinfo.value) samples = pm.sample_posterior_predictive(idata, 40, return_inferencedata=False) assert samples["foo"].shape == (40, 200)
def test_respects_shape(self): for shape in (2, (2,), (10, 2), (10, 10)): with pm.Model(): mu = pm.Gamma("mu", 3, 1, size=1) goals = pm.Poisson("goals", mu, size=shape) trace1 = pm.sample_prior_predictive( 10, return_inferencedata=False, var_names=["mu", "mu", "goals"] ) trace2 = pm.sample_prior_predictive( 10, return_inferencedata=False, var_names=["mu", "goals"] ) if shape == 2: # want to test shape as an int shape = (2,) assert trace1["goals"].shape == (10,) + shape assert trace2["goals"].shape == (10,) + shape
def test_return_inferencedata(self): with self.model: kwargs = dict(draws=100, tune=50, cores=1, chains=2, step=pm.Metropolis()) # trace with tuning with pytest.warns(UserWarning, match="will be included"): result = pm.sample( **kwargs, return_inferencedata=False, discard_tuned_samples=False ) assert isinstance(result, pm.backends.base.MultiTrace) assert len(result) == 150 # inferencedata with tuning result = pm.sample(**kwargs, return_inferencedata=True, discard_tuned_samples=False) assert isinstance(result, InferenceData) assert result.posterior.sizes["draw"] == 100 assert result.posterior.sizes["chain"] == 2 assert len(result._groups_warmup) > 0 # inferencedata without tuning, with idata_kwargs prior = pm.sample_prior_predictive(return_inferencedata=False) result = pm.sample( **kwargs, return_inferencedata=True, discard_tuned_samples=True, idata_kwargs={"prior": prior}, random_seed=-1, ) assert "prior" in result assert isinstance(result, InferenceData) assert result.posterior.sizes["draw"] == 100 assert result.posterior.sizes["chain"] == 2 assert len(result._groups_warmup) == 0
def test_sample_generate_values(fixture_model, fixture_sizes): model, RVs = fixture_model size = to_tuple(fixture_sizes) with model: prior = pm.sample_prior_predictive(samples=fixture_sizes) for rv in RVs: assert prior[rv.name].shape == size + tuple(rv.distribution.shape)
def test_with_mvnormal(self): # 10 batch, 3-variate Gaussian mu = np.random.randn(self.mixture_comps, 3) mat = np.random.randn(3, 3) cov = mat @ mat.T chol = np.linalg.cholesky(cov) w = np.ones(self.mixture_comps) / self.mixture_comps with pm.Model() as model: comp_dists = pm.MvNormal.dist(mu=mu, chol=chol, shape=(self.mixture_comps, 3)) mixture = pm.MixtureSameFamily( "mixture", w=w, comp_dists=comp_dists, mixture_axis=0, shape=(3,) ) prior = pm.sample_prior_predictive(samples=self.n_samples) assert prior["mixture"].shape == (self.n_samples, 3) assert mixture.random(size=self.size).shape == (self.size, 3) if aesara.config.floatX == "float32": rtol = 1e-4 else: rtol = 1e-7 initial_point = model.recompute_initial_point() comp_logp = comp_dists.logp(initial_point["mixture"].reshape(1, 3)) log_sum_exp = logsumexp( comp_logp.eval() + np.log(w)[..., None], axis=0, keepdims=True ).sum() assert_allclose( model.logp(initial_point), log_sum_exp, rtol, )
def test_with_multinomial(self, batch_shape): p = np.random.uniform(size=(*batch_shape, self.mixture_comps, 3)) n = 100 * np.ones((*batch_shape, 1)) w = np.ones(self.mixture_comps) / self.mixture_comps mixture_axis = len(batch_shape) with pm.Model() as model: comp_dists = pm.Multinomial.dist(p=p, n=n, shape=(*batch_shape, self.mixture_comps, 3)) mixture = pm.MixtureSameFamily( "mixture", w=w, comp_dists=comp_dists, mixture_axis=mixture_axis, shape=(*batch_shape, 3), ) prior = pm.sample_prior_predictive(samples=self.n_samples) assert prior["mixture"].shape == (self.n_samples, *batch_shape, 3) assert mixture.random(size=self.size).shape == (self.size, *batch_shape, 3) if aesara.config.floatX == "float32": rtol = 1e-4 else: rtol = 1e-7 initial_point = model.recompute_initial_point() comp_logp = comp_dists.logp(initial_point["mixture"].reshape(*batch_shape, 1, 3)) log_sum_exp = logsumexp( comp_logp.eval() + np.log(w)[..., None], axis=mixture_axis, keepdims=True ).sum() assert_allclose( model.logp(initial_point), log_sum_exp, rtol, )
def test_shared_data_as_index(self): """ Allow pm.Data to be used for index variables, i.e with integers as well as floats. See https://github.com/pymc-devs/pymc/issues/3813 """ with pm.Model() as model: index = pm.MutableData("index", [2, 0, 1, 0, 2]) y = pm.MutableData("y", [1.0, 2.0, 3.0, 2.0, 1.0]) alpha = pm.Normal("alpha", 0, 1.5, size=3) pm.Normal("obs", alpha[index], np.sqrt(1e-2), observed=y) prior_trace = pm.sample_prior_predictive(1000) idata = pm.sample( 1000, tune=1000, chains=1, compute_convergence_checks=False, ) # Predict on new data new_index = np.array([0, 1, 2]) new_y = [5.0, 6.0, 9.0] with model: pm.set_data(new_data={"index": new_index, "y": new_y}) pp_trace = pm.sample_posterior_predictive( idata, var_names=["alpha", "obs"]) assert prior_trace.prior["alpha"].shape == (1, 1000, 3) assert idata.posterior["alpha"].shape == (1, 1000, 3) assert pp_trace.posterior_predictive["alpha"].shape == (1, 1000, 3) assert pp_trace.posterior_predictive["obs"].shape == (1, 1000, 3)
def test_no_trace(self): with pm.Model() as model: x = pm.Data("x", [1.0, 2.0, 3.0]) y = pm.Data("y", [1.0, 2.0, 3.0]) beta = pm.Normal("beta", 0, 1) obs = pm.Normal("obs", x * beta, 1, observed=y) # pylint: disable=unused-variable idata = pm.sample(100, tune=100) prior = pm.sample_prior_predictive(return_inferencedata=False) posterior_predictive = pm.sample_posterior_predictive( idata, return_inferencedata=False) # Only prior inference_data = to_inference_data(prior=prior, model=model) test_dict = {"prior": ["beta"], "prior_predictive": ["obs"]} fails = check_multiple_attrs(test_dict, inference_data) assert not fails # Only posterior_predictive inference_data = to_inference_data( posterior_predictive=posterior_predictive, model=model) test_dict = {"posterior_predictive": ["obs"]} fails = check_multiple_attrs(test_dict, inference_data) assert not fails # Prior and posterior_predictive but no trace inference_data = to_inference_data( prior=prior, posterior_predictive=posterior_predictive, model=model) test_dict = { "prior": ["beta"], "prior_predictive": ["obs"], "posterior_predictive": ["obs"], } fails = check_multiple_attrs(test_dict, inference_data) assert not fails
def test_shape_edgecase(self): with pm.Model(): mu = pm.Normal("mu", size=5) sd = pm.Uniform("sd", lower=2, upper=3) x = pm.Normal("x", mu=mu, sigma=sd, size=5) prior = pm.sample_prior_predictive(10) assert prior.prior["mu"].shape == (1, 10, 5)
def test_zeroinflatedpoisson(self): with pm.Model(): theta = pm.Beta("theta", alpha=1, beta=1) psi = pm.HalfNormal("psi", sd=1) pm.ZeroInflatedPoisson("suppliers", psi=psi, theta=theta, size=20) gen_data = pm.sample_prior_predictive(samples=5000) assert gen_data.prior["theta"].shape == (1, 5000) assert gen_data.prior["psi"].shape == (1, 5000) assert gen_data.prior["suppliers"].shape == (1, 5000, 20)
def test_broadcasting_in_shape(self): with pm.Model() as model: mu = pm.Gamma("mu", 1.0, 1.0, shape=2) comp_dists = pm.Poisson.dist(mu, shape=2) mix = pm.MixtureSameFamily( "mix", w=np.ones(2) / 2, comp_dists=comp_dists, shape=(1000,) ) prior = pm.sample_prior_predictive(samples=self.n_samples) assert prior["mix"].shape == (self.n_samples, 1000)
def test_transformed_vars(self): # Test that prior predictive returns transformation of RVs when these are # passed explicitly in `var_names` def ub_interval_forward(x, ub): # Interval transform assuming lower bound is zero return np.log(x - 0) - np.log(ub - x) with pm.Model(rng_seeder=123) as model: ub = pm.HalfNormal("ub", 10) x = pm.Uniform("x", 0, ub) prior = pm.sample_prior_predictive( var_names=["ub", "ub_log__", "x", "x_interval__"], samples=10, ) # Check values are correct assert np.allclose(prior.prior["ub_log__"].data, np.log(prior.prior["ub"].data)) assert np.allclose( prior.prior["x_interval__"].data, ub_interval_forward(prior.prior["x"].data, prior.prior["ub"].data), ) # Check that it works when the original RVs are not mentioned in var_names with pm.Model(rng_seeder=123) as model_transformed_only: ub = pm.HalfNormal("ub", 10) x = pm.Uniform("x", 0, ub) prior_transformed_only = pm.sample_prior_predictive( var_names=["ub_log__", "x_interval__"], samples=10, ) assert ( "ub" not in prior_transformed_only.prior.data_vars and "x" not in prior_transformed_only.prior.data_vars ) assert np.allclose( prior.prior["ub_log__"].data, prior_transformed_only.prior["ub_log__"].data ) assert np.allclose( prior.prior["x_interval__"], prior_transformed_only.prior["x_interval__"].data )
def test_shared(self): n1 = 10 obs = shared(np.random.rand(n1) < 0.5) draws = 50 with pm.Model() as m: p = pm.Beta("p", 1.0, 1.0) y = pm.Bernoulli("y", p, observed=obs) o = pm.Deterministic("o", obs) gen1 = pm.sample_prior_predictive(draws) assert gen1.prior["y"].shape == (1, draws, n1) assert gen1.prior["o"].shape == (1, draws, n1) n2 = 20 obs.set_value(np.random.rand(n2) < 0.5) with m: gen2 = pm.sample_prior_predictive(draws) assert gen2.prior["y"].shape == (1, draws, n2) assert gen2.prior["o"].shape == (1, draws, n2)
def test_issue_4490(self): # Test that samples do not depend on var_name order or, more fundamentally, # that they do not depend on the set order used inside `sample_prior_predictive` seed = 4490 with pm.Model(rng_seeder=seed) as m1: a = pm.Normal("a") b = pm.Normal("b") c = pm.Normal("c") d = pm.Normal("d") prior1 = pm.sample_prior_predictive(samples=1, var_names=["a", "b", "c", "d"]) with pm.Model(rng_seeder=seed) as m2: a = pm.Normal("a") b = pm.Normal("b") c = pm.Normal("c") d = pm.Normal("d") prior2 = pm.sample_prior_predictive(samples=1, var_names=["b", "a", "d", "c"]) assert prior1.prior["a"] == prior2.prior["a"] assert prior1.prior["b"] == prior2.prior["b"] assert prior1.prior["c"] == prior2.prior["c"] assert prior1.prior["d"] == prior2.prior["d"]
def test_sample_prior_and_posterior(self): def build_toy_dataset(N, K): pi = np.array([0.2, 0.5, 0.3]) mus = [[1, 1, 1], [-1, -1, -1], [2, -2, 0]] stds = [[0.1, 0.1, 0.1], [0.1, 0.2, 0.2], [0.2, 0.3, 0.3]] x = np.zeros((N, 3), dtype=np.float32) y = np.zeros((N, ), dtype=np.int) for n in range(N): k = np.argmax(np.random.multinomial(1, pi)) x[n, :] = np.random.multivariate_normal( mus[k], np.diag(stds[k])) y[n] = k return x, y N = 100 # number of data points K = 3 # number of mixture components D = 3 # dimensionality of the data X, y = build_toy_dataset(N, K) with pm.Model() as model: pi = pm.Dirichlet("pi", np.ones(K), shape=(K, )) comp_dist = [] mu = [] packed_chol = [] chol = [] for i in range(K): mu.append(pm.Normal("mu%i" % i, 0, 10, shape=D)) packed_chol.append( pm.LKJCholeskyCov("chol_cov_%i" % i, eta=2, n=D, sd_dist=pm.HalfNormal.dist(2.5))) chol.append( pm.expand_packed_triangular(D, packed_chol[i], lower=True)) comp_dist.append( pm.MvNormal.dist(mu=mu[i], chol=chol[i], shape=D)) pm.Mixture("x_obs", pi, comp_dist, observed=X) with model: idata = pm.sample(30, tune=10, chains=1) n_samples = 20 with model: ppc = pm.sample_posterior_predictive(idata, n_samples) prior = pm.sample_prior_predictive(samples=n_samples) assert ppc["x_obs"].shape == (n_samples, ) + X.shape assert prior["x_obs"].shape == (n_samples, ) + X.shape assert prior["mu0"].shape == (n_samples, D) assert prior["chol_cov_0"].shape == (n_samples, D * (D + 1) // 2)
def test_sample_from_xarray_prior(self, point_list_arg_bug_fixture): pmodel, trace = point_list_arg_bug_fixture with pmodel: prior = pm.sample_prior_predictive( samples=20, return_inferencedata=False, ) idat = pm.to_inference_data(trace, prior=prior) with pmodel: pp = pm.sample_posterior_predictive( idat.prior, return_inferencedata=False, var_names=["d"] )
def get_inference_data(self, data, eight_schools_params): with data.model: prior = pm.sample_prior_predictive(return_inferencedata=False) posterior_predictive = pm.sample_posterior_predictive( data.obj, return_inferencedata=False ) return to_inference_data( trace=data.obj, prior=prior, posterior_predictive=posterior_predictive, coords={"school": np.arange(eight_schools_params["J"])}, dims={"theta": ["school"], "eta": ["school"]}, model=data.model, )
def test_density_dist(self): obs = np.random.normal(-1, 0.1, size=10) with pm.Model(): mu = pm.Normal("mu", 0, 1) sd = pm.HalfNormal("sd", 1e-6) a = pm.DensityDist( "a", mu, sd, random=lambda mu, sd, rng=None, size=None: rng.normal(loc=mu, scale=sd, size=size), observed=obs, ) prior = pm.sample_prior_predictive(return_inferencedata=False) npt.assert_almost_equal((prior["a"] - prior["mu"][..., None]).mean(), 0, decimal=3)
def test_ignores_observed(self): observed = np.random.normal(10, 1, size=200) with pm.Model(): # Use a prior that's way off to show we're ignoring the observed variables observed_data = pm.Data("observed_data", observed) mu = pm.Normal("mu", mu=-100, sigma=1) positive_mu = pm.Deterministic("positive_mu", np.abs(mu)) z = -1 - positive_mu pm.Normal("x_obs", mu=z, sigma=1, observed=observed_data) prior = pm.sample_prior_predictive(return_inferencedata=False) assert "observed_data" not in prior assert (prior["mu"] < -90).all() assert (prior["positive_mu"] > 90).all() assert (prior["x_obs"] < -90).all() assert prior["x_obs"].shape == (500, 200) npt.assert_array_almost_equal(prior["positive_mu"], np.abs(prior["mu"]), decimal=4)
def test_multivariate2(self): # Added test for issue #3271 mn_data = np.random.multinomial(n=100, pvals=[1 / 6.0] * 6, size=10) with pm.Model() as dm_model: probs = pm.Dirichlet("probs", a=np.ones(6)) obs = pm.Multinomial("obs", n=100, p=probs, observed=mn_data) burned_trace = pm.sample( 20, tune=10, cores=1, return_inferencedata=False, compute_convergence_checks=False ) sim_priors = pm.sample_prior_predictive( return_inferencedata=False, samples=20, model=dm_model ) sim_ppc = pm.sample_posterior_predictive( burned_trace, return_inferencedata=False, samples=20, model=dm_model ) assert sim_priors["probs"].shape == (20, 6) assert sim_priors["obs"].shape == (20,) + mn_data.shape assert sim_ppc["obs"].shape == (20,) + mn_data.shape
def test_one_gaussian(self): assert self.count_rvs(self.SMABC_test.logpt) == 1 with self.SMABC_test: trace = pm.sample_smc(draws=1000, return_inferencedata=False) pr_p = pm.sample_prior_predictive(1000, return_inferencedata=False) po_p = pm.sample_posterior_predictive(trace, 1000, return_inferencedata=False) assert abs(self.data.mean() - trace["a"].mean()) < 0.05 assert abs(self.data.std() - trace["b"].mean()) < 0.05 assert pr_p["s"].shape == (1000, 1000) assert abs(0 - pr_p["s"].mean()) < 0.10 assert abs(1.4 - pr_p["s"].std()) < 0.10 assert po_p["s"].shape == (1000, 1000) assert abs(self.data.mean() - po_p["s"].mean()) < 0.10 assert abs(self.data.std() - po_p["s"].std()) < 0.10
def test_transformed(self): n = 18 at_bats = 45 * np.ones(n, dtype=int) hits = np.random.randint(1, 40, size=n, dtype=int) draws = 50 with pm.Model() as model: phi = pm.Beta("phi", alpha=1.0, beta=1.0) kappa_log = pm.Exponential("logkappa", lam=5.0) kappa = pm.Deterministic("kappa", at.exp(kappa_log)) thetas = pm.Beta("thetas", alpha=phi * kappa, beta=(1.0 - phi) * kappa, size=n) y = pm.Binomial("y", n=at_bats, p=thetas, observed=hits) gen = pm.sample_prior_predictive(draws) assert gen.prior["phi"].shape == (1, draws) assert gen.prior_predictive["y"].shape == (1, draws, n) assert "thetas" in gen.prior.data_vars
def test_priors_separation(self, use_context): """Test model is enough to get prior, prior predictive and observed_data.""" with pm.Model() as model: x = pm.Data("x", [1.0, 2.0, 3.0]) y = pm.Data("y", [1.0, 2.0, 3.0]) beta = pm.Normal("beta", 0, 1) obs = pm.Normal("obs", x * beta, 1, observed=y) # pylint: disable=unused-variable prior = pm.sample_prior_predictive(return_inferencedata=False) test_dict = { "prior": ["beta", "~obs"], "observed_data": ["obs"], "prior_predictive": ["obs"], } if use_context: with model: inference_data = to_inference_data(prior=prior) else: inference_data = to_inference_data(prior=prior, model=model) fails = check_multiple_attrs(test_dict, inference_data) assert not fails
def get_predictions_inference_data( self, data, eight_schools_params, inplace ) -> Tuple[InferenceData, Dict[str, np.ndarray]]: with data.model: prior = pm.sample_prior_predictive(return_inferencedata=False) posterior_predictive = pm.sample_posterior_predictive( data.obj, keep_size=True, return_inferencedata=False ) idata = to_inference_data( trace=data.obj, prior=prior, coords={"school": np.arange(eight_schools_params["J"])}, dims={"theta": ["school"], "eta": ["school"]}, ) assert isinstance(idata, InferenceData) extended = predictions_to_inference_data( posterior_predictive, idata_orig=idata, inplace=inplace ) assert isinstance(extended, InferenceData) assert (id(idata) == id(extended)) == inplace return (extended, posterior_predictive)
def test_multivariate(self): with pm.Model(): m = pm.Multinomial("m", n=5, p=np.array([0.25, 0.25, 0.25, 0.25])) trace = pm.sample_prior_predictive(10) assert trace.prior["m"].shape == (1, 10, 4)