def test_gaussian_multiple_populations_crossval_kde(db_path, sampler): sigma_x = 1 sigma_y = 0.5 y_observed = 2 def model(args): return {"y": st.norm(args['x'], sigma_y).rvs()} models = [model] models = list(map(FunctionModel, models)) nr_populations = 4 population_size = ConstantPopulationSize(600) parameter_given_model_prior_distribution = [ Distribution(x=st.norm(0, sigma_x)) ] parameter_perturbation_kernels = [ GridSearchCV( MultivariateNormalTransition(), {"scaling": np.logspace(-1, 1.5, 5)}, ) ] abc = ABCSMC( models, parameter_given_model_prior_distribution, MinMaxDistance(measures_to_use=["y"]), population_size, transitions=parameter_perturbation_kernels, eps=MedianEpsilon(0.2), sampler=sampler, ) abc.new(db_path, {"y": y_observed}) minimum_epsilon = -1 abc.do_not_stop_when_only_single_model_alive() history = abc.run(minimum_epsilon, max_nr_populations=nr_populations) posterior_x, posterior_weight = history.get_distribution(0, None) posterior_x = posterior_x["x"].values sort_indices = np.argsort(posterior_x) f_empirical = sp.interpolate.interp1d( np.hstack((-200, posterior_x[sort_indices], 200)), np.hstack((0, np.cumsum(posterior_weight[sort_indices]), 1)), ) sigma_x_given_y = 1 / np.sqrt(1 / sigma_x**2 + 1 / sigma_y**2) mu_x_given_y = sigma_x_given_y**2 * y_observed / sigma_y**2 expected_posterior_x = st.norm(mu_x_given_y, sigma_x_given_y) x = np.linspace(-8, 8) max_distribution_difference = np.absolute( f_empirical(x) - expected_posterior_x.cdf(x)).max() assert max_distribution_difference < 0.052 assert history.max_t == nr_populations - 1 mean_emp, std_emp = mean_and_std(posterior_x, posterior_weight) assert abs(mean_emp - mu_x_given_y) < 0.07 assert abs(std_emp - sigma_x_given_y) < 0.12
def test_gaussian_single_population(db_path, sampler): sigma_prior = 1 sigma_ground_truth = 1 observed_data = 1 def model(args): return {"y": st.norm(args['x'], sigma_ground_truth).rvs()} models = [model] models = list(map(FunctionModel, models)) nr_populations = 1 population_size = ConstantPopulationSize(600) parameter_given_model_prior_distribution = [ Distribution(x=RV("norm", 0, sigma_prior)) ] abc = ABCSMC( models, parameter_given_model_prior_distribution, MinMaxDistance(measures_to_use=["y"]), population_size, eps=MedianEpsilon(0.1), sampler=sampler, ) abc.new(db_path, {"y": observed_data}) minimum_epsilon = -1 abc.do_not_stop_when_only_single_model_alive() history = abc.run(minimum_epsilon, max_nr_populations=nr_populations) posterior_x, posterior_weight = history.get_distribution(0, None) posterior_x = posterior_x["x"].values sort_indices = np.argsort(posterior_x) f_empirical = sp.interpolate.interp1d( np.hstack((-200, posterior_x[sort_indices], 200)), np.hstack((0, np.cumsum(posterior_weight[sort_indices]), 1)), ) sigma_x_given_y = 1 / np.sqrt(1 / sigma_prior**2 + 1 / sigma_ground_truth**2) mu_x_given_y = (sigma_x_given_y**2 * observed_data / sigma_ground_truth**2) expected_posterior_x = st.norm(mu_x_given_y, sigma_x_given_y) x = np.linspace(-8, 8) max_distribution_difference = np.absolute( f_empirical(x) - expected_posterior_x.cdf(x)).max() assert max_distribution_difference < 0.12 assert history.max_t == nr_populations - 1 mean_emp, std_emp = mean_and_std(posterior_x, posterior_weight) assert abs(mean_emp - mu_x_given_y) < 0.07 assert abs(std_emp - sigma_x_given_y) < 0.1
def test_gaussian_multiple_populations_adpative_population_size( db_path, sampler): sigma_x = 1 sigma_y = .5 y_observed = 2 def model(args): return {"y": st.norm(args['x'], sigma_y).rvs()} models = [model] models = list(map(SimpleModel, models)) nr_populations = 4 population_size = AdaptivePopulationSize(600) parameter_given_model_prior_distribution = [ Distribution(x=st.norm(0, sigma_x)) ] abc = ABCSMC(models, parameter_given_model_prior_distribution, MinMaxDistanceFunction(measures_to_use=["y"]), population_size, eps=MedianEpsilon(.2), sampler=sampler) abc.new(db_path, {"y": y_observed}) minimum_epsilon = -1 abc.do_not_stop_when_only_single_model_alive() history = abc.run(minimum_epsilon, max_nr_populations=nr_populations) posterior_x, posterior_weight = history.get_distribution(0, None) posterior_x = posterior_x["x"].as_matrix() sort_indices = sp.argsort(posterior_x) f_empirical = sp.interpolate.interp1d( sp.hstack((-200, posterior_x[sort_indices], 200)), sp.hstack((0, sp.cumsum(posterior_weight[sort_indices]), 1))) sigma_x_given_y = 1 / sp.sqrt(1 / sigma_x**2 + 1 / sigma_y**2) mu_x_given_y = sigma_x_given_y**2 * y_observed / sigma_y**2 expected_posterior_x = st.norm(mu_x_given_y, sigma_x_given_y) x = sp.linspace(-8, 8) max_distribution_difference = sp.absolute( f_empirical(x) - expected_posterior_x.cdf(x)).max() assert max_distribution_difference < 0.15 assert history.max_t == nr_populations - 1 mean_emp, std_emp = mean_and_std(posterior_x, posterior_weight) assert abs(mean_emp - mu_x_given_y) < .07 assert abs(std_emp - sigma_x_given_y) < .12
def test_two_competing_gaussians_single_population(db_path, sampler, transition): sigma_x = 0.5 sigma_y = 0.5 y_observed = 1 def model(args): return {"y": st.norm(args['x'], sigma_y).rvs()} models = [model, model] models = list(map(FunctionModel, models)) population_size = ConstantPopulationSize(500) mu_x_1, mu_x_2 = 0, 1 parameter_given_model_prior_distribution = [ Distribution(x=st.norm(mu_x_1, sigma_x)), Distribution(x=st.norm(mu_x_2, sigma_x)), ] abc = ABCSMC( models, parameter_given_model_prior_distribution, MinMaxDistance(measures_to_use=["y"]), population_size, transitions=[transition(), transition()], eps=MedianEpsilon(0.02), sampler=sampler, ) abc.new(db_path, {"y": y_observed}) minimum_epsilon = -1 nr_populations = 1 abc.do_not_stop_when_only_single_model_alive() history = abc.run(minimum_epsilon, max_nr_populations=1) mp = history.get_model_probabilities(history.max_t) def p_y_given_model(mu_x_model): return st.norm(mu_x_model, np.sqrt(sigma_y**2 + sigma_x**2)).pdf(y_observed) p1_expected_unnormalized = p_y_given_model(mu_x_1) p2_expected_unnormalized = p_y_given_model(mu_x_2) p1_expected = p1_expected_unnormalized / (p1_expected_unnormalized + p2_expected_unnormalized) p2_expected = p2_expected_unnormalized / (p1_expected_unnormalized + p2_expected_unnormalized) assert history.max_t == nr_populations - 1 assert abs(mp.p[0] - p1_expected) + abs(mp.p[1] - p2_expected) < 0.07
def test_continuous_non_gaussian(db_path, sampler): def model(args): return {"result": sp.rand() * args['u']} models = [model] models = list(map(SimpleModel, models)) population_size = ConstantPopulationSize(250) parameter_given_model_prior_distribution = [Distribution(u=RV("uniform", 0, 1))] abc = ABCSMC(models, parameter_given_model_prior_distribution, MinMaxDistanceFunction(measures_to_use=["result"]), population_size, eps=MedianEpsilon(.2), sampler=sampler) d_observed = .5 abc.new(db_path, {"result": d_observed}) abc.do_not_stop_when_only_single_model_alive() minimum_epsilon = -1 history = abc.run(minimum_epsilon, max_nr_populations=2) posterior_x, posterior_weight = history.get_distribution(0, None) posterior_x = posterior_x["u"].values sort_indices = sp.argsort(posterior_x) f_empirical = sp.interpolate.interp1d(sp.hstack((-200, posterior_x[sort_indices], 200)), sp.hstack((0, sp.cumsum( posterior_weight[ sort_indices]), 1))) @sp.vectorize def f_expected(u): return (sp.log(u)-sp.log(d_observed)) / (- sp.log(d_observed)) * \ (u > d_observed) x = sp.linspace(0.1, 1) max_distribution_difference = sp.absolute(f_empirical(x) - f_expected(x)).max() assert max_distribution_difference < 0.12