def test_predictions_constant_data(self): with pm.Model(): x = pm.ConstantData("x", [1.0, 2.0, 3.0]) y = pm.MutableData("y", [1.0, 2.0, 3.0]) beta = pm.Normal("beta", 0, 1) obs = pm.Normal("obs", x * beta, 1, observed=y) # pylint: disable=unused-variable trace = pm.sample(100, tune=100, return_inferencedata=False) inference_data = to_inference_data(trace) test_dict = {"posterior": ["beta"], "observed_data": ["obs"], "constant_data": ["x"]} fails = check_multiple_attrs(test_dict, inference_data) assert not fails with pm.Model(): x = pm.MutableData("x", [1.0, 2.0]) y = pm.ConstantData("y", [1.0, 2.0]) beta = pm.Normal("beta", 0, 1) obs = pm.Normal("obs", x * beta, 1, observed=y) # pylint: disable=unused-variable predictive_trace = pm.sample_posterior_predictive( inference_data, return_inferencedata=False ) assert set(predictive_trace.keys()) == {"obs"} # this should be four chains of 100 samples # assert predictive_trace["obs"].shape == (400, 2) # but the shape seems to vary between pymc versions inference_data = predictions_to_inference_data(predictive_trace, posterior_trace=trace) test_dict = {"posterior": ["beta"], "~observed_data": ""} fails = check_multiple_attrs(test_dict, inference_data) assert not fails, "Posterior data not copied over as expected." test_dict = {"predictions": ["obs"]} fails = check_multiple_attrs(test_dict, inference_data) assert not fails, "Predictions not instantiated as expected." test_dict = {"predictions_constant_data": ["x"]} fails = check_multiple_attrs(test_dict, inference_data) assert not fails, "Predictions constant data not instantiated as expected."
def test_simultaneous_shape_and_dims(self, ellipsis_in): with pm.Model() as pmodel: x = pm.ConstantData("x", [1, 2, 3], dims="ddata") if ellipsis_in == "none": # The shape and dims tuples correspond to each other. # Note: No checks are performed that implied shape (x), shape and dims actually match. y = pm.Normal("y", mu=x, shape=(2, 3), dims=("dshape", "ddata")) assert pmodel.RV_dims["y"] == ("dshape", "ddata") elif ellipsis_in == "shape": y = pm.Normal("y", mu=x, shape=(2, ...), dims=("dshape", "ddata")) assert pmodel.RV_dims["y"] == ("dshape", "ddata") elif ellipsis_in == "dims": y = pm.Normal("y", mu=x, shape=(2, 3), dims=("dshape", ...)) assert pmodel.RV_dims["y"] == ("dshape", None) elif ellipsis_in == "both": y = pm.Normal("y", mu=x, shape=(2, ...), dims=("dshape", ...)) assert pmodel.RV_dims["y"] == ("dshape", None) assert "dshape" in pmodel.dim_lengths assert y.eval().shape == (2, 3)
def test_no_trace(self): with pm.Model() as model: x = pm.ConstantData("x", [1.0, 2.0, 3.0]) y = pm.MutableData("y", [1.0, 2.0, 3.0]) beta = pm.Normal("beta", 0, 1) obs = pm.Normal("obs", x * beta, 1, observed=y) # pylint: disable=unused-variable idata = pm.sample(100, tune=100) prior = pm.sample_prior_predictive(return_inferencedata=False) posterior_predictive = pm.sample_posterior_predictive(idata, return_inferencedata=False) # Only prior inference_data = to_inference_data(prior=prior, model=model) test_dict = {"prior": ["beta"], "prior_predictive": ["obs"]} fails = check_multiple_attrs(test_dict, inference_data) assert not fails # Only posterior_predictive inference_data = to_inference_data(posterior_predictive=posterior_predictive, model=model) test_dict = {"posterior_predictive": ["obs"]} fails = check_multiple_attrs(test_dict, inference_data) assert not fails # Prior and posterior_predictive but no trace inference_data = to_inference_data( prior=prior, posterior_predictive=posterior_predictive, model=model ) test_dict = { "prior": ["beta"], "prior_predictive": ["obs"], "posterior_predictive": ["obs"], } fails = check_multiple_attrs(test_dict, inference_data) assert not fails
def test_ovewrite_model_coords_dims(self): """Check coords and dims from model object can be partially overwritten.""" dim1 = ["a", "b"] new_dim1 = ["c", "d"] coords = {"dim1": dim1, "dim2": ["c1", "c2"]} x_data = np.arange(4).reshape((2, 2)) y = x_data + np.random.normal(size=(2, 2)) with pm.Model(coords=coords): x = pm.ConstantData("x", x_data, dims=("dim1", "dim2")) beta = pm.Normal("beta", 0, 1, dims="dim1") _ = pm.Normal("obs", x * beta, 1, observed=y, dims=("dim1", "dim2")) trace = pm.sample(100, tune=100, return_inferencedata=False) idata1 = to_inference_data(trace) idata2 = to_inference_data(trace, coords={"dim1": new_dim1}, dims={"beta": ["dim2"]}) test_dict = {"posterior": ["beta"], "observed_data": ["obs"], "constant_data": ["x"]} fails1 = check_multiple_attrs(test_dict, idata1) assert not fails1 fails2 = check_multiple_attrs(test_dict, idata2) assert not fails2 assert "dim1" in list(idata1.posterior.beta.dims) assert "dim2" in list(idata2.posterior.beta.dims) assert np.all(idata1.constant_data.x.dim1.values == np.array(dim1)) assert np.all(idata1.constant_data.x.dim2.values == np.array(["c1", "c2"])) assert np.all(idata2.constant_data.x.dim1.values == np.array(new_dim1)) assert np.all(idata2.constant_data.x.dim2.values == np.array(["c1", "c2"]))
def model_with_dims(): with pm.Model( coords={"city": ["Aachen", "Maastricht", "London", "Bergheim"] }) as pmodel: economics = pm.Uniform("economics", lower=-1, upper=1, shape=(1, )) population = pm.HalfNormal("population", sigma=5, dims=("city")) time = pm.ConstantData("time", [2014, 2015, 2016], dims="year") n = pm.Deterministic("tax revenue", economics * population[None, :] * time[:, None], dims=("year", "city")) yobs = pm.MutableData("observed", np.ones((3, 4))) L = pm.Normal("L", n, observed=yobs) compute_graph = { "economics": set(), "population": set(), "time": set(), "tax revenue": {"economics", "population", "time"}, "L": {"tax revenue"}, "observed": {"L"}, } plates = { "1": {"economics"}, "city (4)": {"population"}, "year (3)": {"time"}, "year (3) x city (4)": {"tax revenue"}, "3 x 4": {"L", "observed"}, } return pmodel, compute_graph, plates
def test_data_defined_size_dimension_can_register_dimname(self): with pm.Model() as pmodel: x = pm.ConstantData("x", [[1, 2, 3, 4]], dims=("first", "second")) assert "first" in pmodel.dim_lengths assert "second" in pmodel.dim_lengths # two dimensions are implied; a "third" dimension is created y = pm.Normal("y", mu=x, size=2, dims=("third", "first", "second")) assert "third" in pmodel.dim_lengths assert y.eval().shape() == (2, 1, 4)
def test_data_naming(): """ This is a test for issue #3793 -- `Data` objects in named models are not given model-relative names. """ with pm.Model("named_model") as model: x = pm.ConstantData("x", [1.0, 2.0, 3.0]) y = pm.Normal("y") assert y.name == "named_model::y" assert x.name == "named_model::x"
def test_coords_and_constantdata_create_immutable_dims(): """ When created from `pm.Model(coords=...)` or `pm.ConstantData` a dimension should be resizable. """ with pm.Model(coords={"group": ["A", "B"]}) as m: x = pm.ConstantData("x", [0], dims="feature") y = pm.Normal("y", x, 1, dims=("group", "feature")) assert isinstance(m._dim_lengths["feature"], TensorConstant) assert isinstance(m._dim_lengths["group"], TensorConstant) assert x.eval().shape == (1,) assert y.eval().shape == (2, 1)
def test_autodetect_coords_from_model(self, use_context): pd = pytest.importorskip("pandas") df_data = pd.DataFrame(columns=["date"]).set_index("date") dates = pd.date_range(start="2020-05-01", end="2020-05-20") for city, mu in {"Berlin": 15, "San Marino": 18, "Paris": 16}.items(): df_data[city] = np.random.normal(loc=mu, size=len(dates)) df_data.index = dates df_data.index.name = "date" coords = {"date": df_data.index, "city": df_data.columns} with pm.Model(coords=coords) as model: europe_mean = pm.Normal("europe_mean_temp", mu=15.0, sigma=3.0) city_offset = pm.Normal("city_offset", mu=0.0, sigma=3.0, dims="city") city_temperature = pm.Deterministic("city_temperature", europe_mean + city_offset, dims="city") data_dims = ("date", "city") data = pm.ConstantData("data", df_data, dims=data_dims) _ = pm.Normal("likelihood", mu=city_temperature, sigma=0.5, observed=data, dims=data_dims) trace = pm.sample( return_inferencedata=False, compute_convergence_checks=False, cores=1, chains=1, tune=20, draws=30, step=pm.Metropolis(), ) if use_context: idata = to_inference_data(trace=trace) if not use_context: idata = to_inference_data(trace=trace, model=model) assert "city" in list(idata.posterior.dims) assert "city" in list(idata.observed_data.dims) assert "date" in list(idata.observed_data.dims) np.testing.assert_array_equal(idata.posterior.coords["city"], coords["city"]) np.testing.assert_array_equal(idata.observed_data.coords["date"], coords["date"]) np.testing.assert_array_equal(idata.observed_data.coords["city"], coords["city"])
def test_set_coords_through_pmdata(self): with pm.Model() as pmodel: pm.ConstantData("population", [100, 200], dims="city", coords={"city": ["Tinyvil", "Minitown"]}) pm.MutableData( "temperature", [[15, 20, 22, 17], [18, 22, 21, 12]], dims=("city", "season"), coords={"season": ["winter", "spring", "summer", "fall"]}, ) assert "city" in pmodel.coords assert "season" in pmodel.coords assert pmodel.coords["city"] == ("Tinyvil", "Minitown") assert pmodel.coords["season"] == ("winter", "spring", "summer", "fall")
def test_simultaneous_size_and_dims(self, with_dims_ellipsis): with pm.Model() as pmodel: x = pm.ConstantData("x", [1, 2, 3], dims="ddata") assert "ddata" in pmodel.dim_lengths # Size does not include support dims, so this test must use a dist with support dims. kwargs = dict(name="y", size=2, mu=at.ones((3, 4)), cov=at.eye(4)) if with_dims_ellipsis: y = pm.MvNormal(**kwargs, dims=("dsize", ...)) assert pmodel.RV_dims["y"] == ("dsize", None, None) else: y = pm.MvNormal(**kwargs, dims=("dsize", "ddata", "dsupport")) assert pmodel.RV_dims["y"] == ("dsize", "ddata", "dsupport") assert "dsize" in pmodel.dim_lengths assert y.eval().shape == (2, 3, 4)
def test_constant_data(self, use_context): """Test constant_data group behaviour.""" with pm.Model() as model: x = pm.ConstantData("x", [1.0, 2.0, 3.0]) y = pm.MutableData("y", [1.0, 2.0, 3.0]) beta = pm.Normal("beta", 0, 1) obs = pm.Normal("obs", x * beta, 1, observed=y) # pylint: disable=unused-variable trace = pm.sample(100, chains=2, tune=100, return_inferencedata=False) if use_context: inference_data = to_inference_data(trace=trace) if not use_context: inference_data = to_inference_data(trace=trace, model=model) test_dict = {"posterior": ["beta"], "observed_data": ["obs"], "constant_data": ["x"]} fails = check_multiple_attrs(test_dict, inference_data) assert not fails assert inference_data.log_likelihood["obs"].shape == (2, 100, 3)
def model_with_different_descendants(): """ Model proposed by Michael to test variable selection functionality From here: https://github.com/pymc-devs/pymc/pull/5634#pullrequestreview-916297509 """ with pm.Model() as pmodel2: a = pm.Normal("a") b = pm.Normal("b") pm.Normal("c", a * b) intermediate = pm.Deterministic("intermediate", a + b) pred = pm.Deterministic("pred", intermediate * 3) obs = pm.ConstantData("obs", 1.75) L = pm.Normal("L", mu=1 + 0.5 * pred, observed=obs) return pmodel2
def test_implicit_coords_series(self): ser_sales = pd.Series( data=np.random.randint(low=0, high=30, size=22), index=pd.date_range(start="2020-05-01", periods=22, freq="24H", name="date"), name="sales", ) with pm.Model() as pmodel: pm.ConstantData("sales", ser_sales, dims="date", export_index_as_coords=True) assert "date" in pmodel.coords assert len(pmodel.coords["date"]) == 22 assert pmodel.RV_dims == {"sales": ("date", )}
def test_implicit_coords_dataframe(self): N_rows = 5 N_cols = 7 df_data = pd.DataFrame() for c in range(N_cols): df_data[f"Column {c+1}"] = np.random.normal(size=(N_rows, )) df_data.index.name = "rows" df_data.columns.name = "columns" # infer coordinates from index and columns of the DataFrame with pm.Model() as pmodel: pm.ConstantData("observations", df_data, dims=("rows", "columns"), export_index_as_coords=True) assert "rows" in pmodel.coords assert "columns" in pmodel.coords assert pmodel.RV_dims == {"observations": ("rows", "columns")}
def test_priors_separation(self, use_context): """Test model is enough to get prior, prior predictive and observed_data.""" with pm.Model() as model: x = pm.MutableData("x", [1.0, 2.0, 3.0]) y = pm.ConstantData("y", [1.0, 2.0, 3.0]) beta = pm.Normal("beta", 0, 1) obs = pm.Normal("obs", x * beta, 1, observed=y) # pylint: disable=unused-variable prior = pm.sample_prior_predictive(return_inferencedata=False) test_dict = { "prior": ["beta", "~obs"], "observed_data": ["obs"], "prior_predictive": ["obs"], } if use_context: with model: inference_data = to_inference_data(prior=prior) else: inference_data = to_inference_data(prior=prior, model=model) fails = check_multiple_attrs(test_dict, inference_data) assert not fails
def test_model_to_graphviz_for_model_with_data_container(self): with pm.Model() as model: x = pm.ConstantData("x", [1.0, 2.0, 3.0]) y = pm.MutableData("y", [1.0, 2.0, 3.0]) beta = pm.Normal("beta", 0, 10.0) obs_sigma = floatX(np.sqrt(1e-2)) pm.Normal("obs", beta * x, obs_sigma, observed=y) pm.sample( 1000, init=None, tune=1000, chains=1, compute_convergence_checks=False, ) for formatting in {"latex", "latex_with_params"}: with pytest.raises(ValueError, match="Unsupported formatting"): pm.model_to_graphviz(model, formatting=formatting) exp_without = [ 'x [label="x\n~\nConstantData" shape=box style="rounded, filled"]', 'y [label="x\n~\nMutableData" shape=box style="rounded, filled"]', 'beta [label="beta\n~\nNormal"]', 'obs [label="obs\n~\nNormal" style=filled]', ] exp_with = [ 'x [label="x\n~\nConstantData" shape=box style="rounded, filled"]', 'y [label="x\n~\nMutableData" shape=box style="rounded, filled"]', 'beta [label="beta\n~\nNormal(mu=0.0, sigma=10.0)"]', f'obs [label="obs\n~\nNormal(mu=f(f(beta), x), sigma={obs_sigma})" style=filled]', ] for formatting, expected_substrings in [ ("plain", exp_without), ("plain_with_params", exp_with), ]: g = pm.model_to_graphviz(model, formatting=formatting) # check formatting of RV nodes for expected in expected_substrings: assert expected in g.source
def test_sample_posterior_predictive_after_set_data(self): with pm.Model() as model: x = pm.MutableData("x", [1.0, 2.0, 3.0]) y = pm.ConstantData("y", [1.0, 2.0, 3.0]) beta = pm.Normal("beta", 0, 10.0) pm.Normal("obs", beta * x, np.sqrt(1e-2), observed=y) trace = pm.sample( 1000, tune=1000, chains=1, return_inferencedata=False, compute_convergence_checks=False, ) # Predict on new data. with model: x_test = [5, 6, 9] pm.set_data(new_data={"x": x_test}) y_test = pm.sample_posterior_predictive(trace) assert y_test.posterior_predictive["obs"].shape == (1, 1000, 3) np.testing.assert_allclose(x_test, y_test.posterior_predictive["obs"].mean( ("chain", "draw")), atol=1e-1)
def test_creation_of_data_outside_model_context(self): with pytest.raises((IndexError, TypeError)) as error: pm.ConstantData("data", [1.1, 2.2, 3.3]) error.match("No model on context stack")