def test_logistic_regression_good_numeric(): data = pd.DataFrame({ "y": np.random.choice([1, 0], 50), "x": np.random.normal(size=50) }) model = Model(data) model.fit("y ~ x", family="bernoulli")
def test_logistic_regression_empty_index(): data = pd.DataFrame({ "y": np.random.choice(["a", "b"], 50), "x": np.random.normal(size=50) }) model = Model(data) model.fit("y ~ x", family="bernoulli")
def test_categorical_term(): data = pd.DataFrame( { "y": np.random.normal(size=6), "x1": np.random.normal(size=6), "x2": [1, 1, 0, 0, 1, 1], "g1": ["a"] * 3 + ["b"] * 3, "g2": ["x", "x", "z", "z", "y", "y"], } ) model = Model("y ~ x1 + x2 + g1 + (g1|g2) + (x2|g2)", data) fitted = model.fit(draws=10) df = az.summary(fitted) names = [ "Intercept", "x1", "x2", "g1[b]", "1|g2_sigma", "1|g2[x]", "1|g2[y]", "1|g2[z]", "g1|g2_sigma[b]", "g1|g2[b, x]", "g1|g2[b, y]", "g1|g2[b, z]", "x2|g2_sigma", "x2|g2[x]", "x2|g2[y]", "x2|g2[z]", "y_sigma", ] assert list(df.index) == names
def test_response_prior(): data = pd.DataFrame({ "y": np.random.randint(3, 10, size=50), "x": np.random.normal(size=50) }) priors = {"sigma": Prior("Uniform", lower=0, upper=50)} model = Model("y ~ x", data, priors=priors) assert model.response.prior.args["sigma"] == priors["sigma"] priors = {"alpha": Prior("Uniform", lower=1, upper=20)} model = Model("y ~ x", data, family="negativebinomial", priors=priors) assert model.response.prior.args["alpha"] == priors["alpha"] priors = {"alpha": Prior("Uniform", lower=0, upper=50)} model = Model("y ~ x", data, family="gamma", priors=priors) assert model.response.prior.args["alpha"] == Prior("Uniform", lower=0, upper=50) priors = {"alpha": Prior("Uniform", lower=0, upper=50)} model = Model("y ~ x", data, family="gamma", priors=priors) assert model.response.prior.args["alpha"] == Prior("Uniform", lower=0, upper=50)
def test_bad_links(): """Passes names of links that are not suitable for the family.""" data = pd.DataFrame( { "g": np.random.choice([0, 1], size=100), "y": np.random.randint(3, 10, size=100), "x": np.random.randint(3, 10, size=100), } ) FAMILIES = { "bernoulli": ["inverse", "inverse_squared", "log"], "beta": ["inverse", "inverse_squared", "log"], "gamma": ["logit", "probit", "cloglog"], "gaussian": ["logit", "probit", "cloglog"], "negativebinomial": ["logit", "probit", "inverse", "inverse_squared"], "poisson": ["logit", "probit", "cloglog", "inverse", "inverse_squared"], "wald": ["logit", "probit", "cloglog"], } for family, links in FAMILIES.items(): for link in links: with pytest.raises(ValueError): if family == "bernoulli": Model("g ~ x", data, family=family, link=link) else: Model("y ~ x", data, family=family, link=link)
def test_prior_shape(): data = pd.DataFrame( { "score": np.random.normal(size=100), "q": np.random.choice(["1", "2", "3", "4", "5"], size=100), "s": np.random.choice(["a", "b", "c"], size=100), "g": np.random.choice(["A", "B", "C"], size=100), } ) model = Model("score ~ 0 + q", data) assert model.terms["q"].prior.args["mu"].shape == (5,) assert model.terms["q"].prior.args["sigma"].shape == (5,) model = Model("score ~ q", data) assert model.terms["q"].prior.args["mu"].shape == (4,) assert model.terms["q"].prior.args["sigma"].shape == (4,) model = Model("score ~ 0 + q:s", data) assert model.terms["q:s"].prior.args["mu"].shape == (15,) assert model.terms["q:s"].prior.args["sigma"].shape == (15,) # "s" is automatically added to ensure full rank matrix model = Model("score ~ q:s", data) assert model.terms["Intercept"].prior.args["mu"].shape == () assert model.terms["Intercept"].prior.args["sigma"].shape == () assert model.terms["s"].prior.args["mu"].shape == (2,) assert model.terms["s"].prior.args["sigma"].shape == (2,) assert model.terms["q:s"].prior.args["mu"].shape == (12,) assert model.terms["q:s"].prior.args["sigma"].shape == (12,)
def test_auto_scale(diabetes_data): # By default, should scale everything except custom Prior() objects priors = {"S1": 0.3, "BP": Prior("Cauchy", alpha=1, beta=17.5)} model = Model("BMI ~ S1 + S2 + BP", diabetes_data, priors=priors) p1 = model.terms["S1"].prior p2 = model.terms["S2"].prior p3 = model.terms["BP"].prior assert p1.name == p2.name == "Normal" assert 0 < p1.args["sigma"] < 1 assert p2.args["sigma"] > p1.args["sigma"] assert p3.name == "Cauchy" assert p3.args["beta"] == 17.5 # With auto_scale off, custom priors are considered, but not custom scaling. # Prior has no effect, and prior for BP has effect. priors = {"S1": 0.3, "BP": Prior("Cauchy", alpha=1, beta=17.5)} model = Model("BMI ~ S1 + S2 + BP", diabetes_data, priors=priors, auto_scale=False) p1_off = model.terms["S1"].prior p2_off = model.terms["S2"].prior p3_off = model.terms["BP"].prior assert p1_off.name == "Normal" assert p2_off.name == "Flat" assert p1_off.args["sigma"] == 1 assert "sigma" not in p2_off.args assert p3_off.name == "Cauchy"
def test_model_term_names_property_interaction(crossed_data): crossed_data["fourcats"] = sum([[x] * 10 for x in ["a", "b", "c", "d"]], list()) * 3 model = Model("Y ~ threecats*fourcats", crossed_data) model.build() assert model.term_names == [ "Intercept", "threecats", "fourcats", "threecats:fourcats" ]
def test_logistic_regression_bad_numeric(): data = pd.DataFrame({ "y": np.random.choice([1, 2], 50), "x": np.random.normal(size=50) }) with pytest.raises(ValueError): model = Model(data) model.fit("y ~ x", family="bernoulli")
def test_complete_separation(): data = pd.DataFrame({"y": [0] * 5 + [1] * 5, "g": ["a"] * 5 + ["b"] * 5}) with pytest.raises(PerfectSeparationError): Model("y ~ g", data, family="bernoulli", automatic_priors="mle") # No error is raised priors = {"common": Prior("Normal", mu=0, sigma=10)} Model("y ~ g", data, family="bernoulli", priors=priors)
def test_laplace(): data = pd.DataFrame(np.repeat((0, 1), (30, 60)), columns=["w"]) priors = {"Intercept": Prior("Uniform", lower=0, upper=1)} model = Model("w ~ 1", data=data, family="bernoulli", priors=priors, link="identity") results = model.fit(method="laplace") mode_n = np.round(results["Intercept"][0], 2) std_n = np.round(results["Intercept"][1][0], 2) mode_a = data.mean() std_a = data.std() / len(data) ** 0.5 np.testing.assert_array_almost_equal((mode_n, std_n), (mode_a.item(), std_a.item()), decimal=2)
def test_omit_offsets_true(): data = pd.DataFrame({ "y": np.random.normal(size=100), "x1": np.random.normal(size=100), "g1": ["a"] * 50 + ["b"] * 50, }) model = Model(data) fitted = model.fit("y ~ x1 + (x1|g1)", omit_offsets=True) offsets = [v for v in fitted.posterior.dims if "offset" in v] assert not offsets
def test_omit_offsets_false(): data = pd.DataFrame({ "y": np.random.normal(size=100), "x1": np.random.normal(size=100), "g1": ["a"] * 50 + ["b"] * 50, }) model = Model("y ~ x1 + (x1|g1)", data) fitted = model.fit(omit_offsets=False) offsets = [v for v in fitted.posterior.dims if "offset" in v] assert offsets == ["1|g1_offset_dim_0", "x1|g1_offset_dim_0"]
def test_beta_regression(): from os.path import dirname, join data_dir = join(dirname(__file__), "data") data = pd.read_csv(join(data_dir, "gasoline.csv")) model = Model("yield ~ temp + batch", data, family="beta", categorical="batch") idata = model.fit(target_accept=0.9)
def test_model_init_and_intercept(diabetes_data): model = Model(diabetes_data, intercept=True) assert hasattr(model, 'data') assert 'Intercept' in model.terms assert len(model.terms) == 1 assert model.y is None assert hasattr(model, 'backend') model = Model(diabetes_data) assert 'Intercept' not in model.terms assert not model.terms
def test_family_bad_type(): data = pd.DataFrame({"x": [1], "y": [1]}) with pytest.raises(ValueError): Model("y ~ x", data, family=0) with pytest.raises(ValueError): Model("y ~ x", data, family=set("gaussian")) with pytest.raises(ValueError): Model("y ~ x", data, family={"family": "gaussian"})
def test_set_prior_unexisting_term(): data = pd.DataFrame( { "y": np.random.normal(size=100), "x": np.random.normal(size=100), } ) prior = Prior("Uniform", lower=0, upper=50) model = Model("y ~ x", data) with pytest.raises(ValueError): model.set_priors(priors={("x", "z"): prior})
def test_posterior_predictive(crossed_data): crossed_data["count"] = (crossed_data["Y"] - crossed_data["Y"].min()).round() model = Model("count ~ threecats + continuous + dummy", crossed_data, family="poisson") fitted = model.fit(tune=0, draws=2) pps = model.posterior_predictive(fitted, draws=500, inplace=False) assert pps.posterior_predictive["count"].shape == (1, 500, 120) pps = model.posterior_predictive(fitted, draws=500, inplace=True) assert pps is None assert fitted.posterior_predictive["count"].shape == (1, 500, 120)
def test_omit_offsets_true(): data = pd.DataFrame( { "y": np.random.normal(size=100), "x1": np.random.normal(size=100), "g1": ["a"] * 50 + ["b"] * 50, } ) model = Model("y ~ x1 + (x1|g1)", data) fitted = model.fit(omit_offsets=True) offsets = [var for var in fitted.posterior.var() if var.endswith("_offset")] assert not offsets
def test_model_terms_cleaned_levels_interaction(crossed_data): crossed_data["fourcats"] = sum([[x] * 10 for x in ["a", "b", "c", "d"]], list()) * 3 model = Model(crossed_data) model.fit("Y ~ threecats*fourcats", run=False) assert model.terms["threecats:fourcats"].cleaned_levels == [ "threecats[b]:fourcats[b]", "threecats[b]:fourcats[c]", "threecats[b]:fourcats[d]", "threecats[c]:fourcats[b]", "threecats[c]:fourcats[c]", "threecats[c]:fourcats[d]", ]
def test_model_term_names_property(diabetes_data): model = Model(diabetes_data) model.add('BMI ~ age_grp') model.add('BP') model.add('S1') model.build(backend='pymc') assert model.term_names == ['Intercept', 'age_grp', 'BP', 'S1']
def test_auto_scale(diabetes_data): # By default, should scale everything except custom Prior() objects priors = {"S1": 0.3, "BP": Prior("Cauchy", alpha=1, beta=17.5)} model = Model("BMI ~ S1 + S2 + BP", diabetes_data, priors=priors) model.build(backend="pymc3") p1 = model.terms["S1"].prior p2 = model.terms["S2"].prior p3 = model.terms["BP"].prior assert p1.name == p2.name == "Normal" assert 0 < p1.args["sigma"] < 1 assert p2.args["sigma"] > p1.args["sigma"] assert p3.name == "Cauchy" assert p3.args["beta"] == 17.5 # With auto_scale off, everything should be flat unless explicitly named in priors model = Model("BMI ~ S1 + S2 + BP", diabetes_data, priors=priors, auto_scale=False) model.build(backend="pymc3") p1_off = model.terms["S1"].prior p2_off = model.terms["S2"].prior p3_off = model.terms["BP"].prior assert p1_off.name == "Normal" assert p2_off.name == "Flat" assert 0 < p1_off.args["sigma"] < 1 assert "sigma" not in p2_off.args assert p3_off.name == "Cauchy" assert p3_off.args["beta"] == 17.5
def test_set_prior_with_tuple(): data = pd.DataFrame( { "y": np.random.normal(size=100), "x": np.random.normal(size=100), "z": np.random.normal(size=100), } ) prior = Prior("Uniform", lower=0, upper=50) model = Model("y ~ x + z", data) model.set_priors(priors={("x", "z"): prior}) assert model.terms["x"].prior == prior assert model.terms["z"].prior == prior
def test_model_term_names_property(diabetes_data): model = Model(diabetes_data) model.add("BMI ~ age_grp") model.add("BP") model.add("S1") model.build(backend="pymc") assert model.term_names == ["Intercept", "age_grp", "BP", "S1"]
def test_model_graph(crossed_data): model = Model("Y ~ 0 + threecats", crossed_data) # Graph cannot be plotted until model is built. with pytest.raises(ValueError): model.graph() model.build() model.graph()
def test_plot_priors(crossed_data): model = Model("Y ~ 0 + threecats", crossed_data) # Priors cannot be plotted until model is built. with pytest.raises(ValueError): model.plot_priors() model.build() model.plot_priors()
def test_distribute_group_specific_effect_over(diabetes_data): # 163 unique levels of BMI in diabetes_data # With intercept model = Model("BP ~ (C(age_grp)|BMI)", diabetes_data) model.build() # Treatment encoding because of the intercept lvls = sorted(list(diabetes_data["age_grp"].unique()))[1:] assert "C(age_grp)|BMI" in model.terms assert "1|BMI" in model.terms assert model.terms["C(age_grp)|BMI"].pymc_coords["C(age_grp)_coord_group_expr"] == lvls # This is equal to the sub-matrix of Z that corresponds to this term. # 442 is the number of observations. 163 the number of groups. # 2 is the number of levels of the categorical variable 'C(age_grp)' after removing # the reference level. Then the number of columns is 326 = 163 * 2. assert model.terms["C(age_grp)|BMI"].data.shape == (442, 326) # Without intercept. Reference level is not removed. model = Model("BP ~ (0 + C(age_grp)|BMI)", diabetes_data) model.build() assert "C(age_grp)|BMI" in model.terms assert not "1|BMI" in model.terms assert model.terms["C(age_grp)|BMI"].data.shape == (442, 489)
def test_constant_terms(): data = pd.DataFrame( { "y": np.random.normal(size=10), "x": np.random.choice([1], size=10), "z": np.random.choice(["A"], size=10), } ) with pytest.raises(ValueError): Model("y ~ 0 + x", data) with pytest.raises(ValueError): Model("y ~ 0 + z", data)
def test_hyperprior_on_common_effect(): data = pd.DataFrame({ "y": np.random.normal(size=100), "x1": np.random.normal(size=100), "g1": ["a"] * 50 + ["b"] * 50, }) slope = Prior("Normal", mu=0, sd=Prior("HalfCauchy", beta=2)) priors = {"x1": slope} with pytest.raises(ValueError): Model("y ~ x1 + (x1|g1)", data, priors=priors) priors = {"common": slope} with pytest.raises(ValueError): Model("y ~ x1 + (x1|g1)", data, priors=priors)
def test_model_categorical_argument(): data = pd.DataFrame( { "y": np.random.normal(size=100), "x": np.random.randint(2, size=100), "z": np.random.randint(2, size=100), } ) model = Model("y ~ 0 + x", data, categorical="x") assert model.terms["x"].categorical model = Model("y ~ 0 + x*z", data, categorical=["x", "z"]) assert model.terms["x"].categorical assert model.terms["z"].categorical assert model.terms["x:z"].categorical