def test_single_parameter_percentile(): dist_f = PercentileDistanceFunction(measures_to_use=["a"]) abc = MockABC([{"a": -3}, {"a": 3}, {"a": 10}]) dist_f.initialize(abc.sample_from_prior()) d = dist_f({"a": 1}, {"a": 2}) expected = ( 1 / (sp.percentile([-3, 3, 10], 80) - sp.percentile([-3, 3, 10], 20))) assert expected == d
def test_two_competing_gaussians_multiple_population(db_path, sampler, transition): # Define a gaussian model sigma = .5 def model(args): return {"y": st.norm(args['x'], sigma).rvs()} # We define two models, but they are identical so far models = [model, model] models = list(map(SimpleModel, models)) # However, our models' priors are not the same. Their mean differs. mu_x_1, mu_x_2 = 0, 1 parameter_given_model_prior_distribution = [ Distribution(x=st.norm(mu_x_1, sigma)), Distribution(x=st.norm(mu_x_2, sigma)) ] # We plug all the ABC setup together nr_populations = 3 population_size = ConstantPopulationSize(400) abc = ABCSMC(models, parameter_given_model_prior_distribution, PercentileDistanceFunction(measures_to_use=["y"]), population_size, eps=MedianEpsilon(.2), transitions=[transition(), transition()], sampler=sampler) # Finally we add meta data such as model names and define where to store the results # y_observed is the important piece here: our actual observation. y_observed = 1 abc.new(db_path, {"y": y_observed}) # We run the ABC with 3 populations max minimum_epsilon = .05 history = abc.run(minimum_epsilon, max_nr_populations=nr_populations) # Evaluate the model probabililties mp = history.get_model_probabilities(history.max_t) def p_y_given_model(mu_x_model): return st.norm(mu_x_model, sp.sqrt(sigma**2 + sigma**2)).pdf(y_observed) p1_expected_unnormalized = p_y_given_model(mu_x_1) p2_expected_unnormalized = p_y_given_model(mu_x_2) p1_expected = p1_expected_unnormalized / (p1_expected_unnormalized + p2_expected_unnormalized) p2_expected = p2_expected_unnormalized / (p1_expected_unnormalized + p2_expected_unnormalized) assert history.max_t == nr_populations - 1 assert abs(mp.p[0] - p1_expected) + abs(mp.p[1] - p2_expected) < .07
def two_competing_gaussians_multiple_population(db_path, sampler, n_sim): # Define a gaussian model sigma = .5 def model(args): return {"y": st.norm(args['x'], sigma).rvs()} # We define two models, but they are identical so far models = [model, model] models = list(map(SimpleModel, models)) # However, our models' priors are not the same. Their mean differs. mu_x_1, mu_x_2 = 0, 1 parameter_given_model_prior_distribution = [ Distribution(x=RV("norm", mu_x_1, sigma)), Distribution(x=RV("norm", mu_x_2, sigma)) ] # We plug all the ABC setup together nr_populations = 2 pop_size = ConstantPopulationSize(23, nr_samples_per_parameter=n_sim) abc = ABCSMC(models, parameter_given_model_prior_distribution, PercentileDistanceFunction(measures_to_use=["y"]), pop_size, eps=MedianEpsilon(), sampler=sampler) # Finally we add meta data such as model names and # define where to store the results # y_observed is the important piece here: our actual observation. y_observed = 1 abc.new(db_path, {"y": y_observed}) # We run the ABC with 3 populations max minimum_epsilon = .05 history = abc.run(minimum_epsilon, max_nr_populations=nr_populations) # Evaluate the model probabililties mp = history.get_model_probabilities(history.max_t) def p_y_given_model(mu_x_model): res = st.norm(mu_x_model, sp.sqrt(sigma**2 + sigma**2)).pdf(y_observed) return res p1_expected_unnormalized = p_y_given_model(mu_x_1) p2_expected_unnormalized = p_y_given_model(mu_x_2) p1_expected = p1_expected_unnormalized / (p1_expected_unnormalized + p2_expected_unnormalized) p2_expected = p2_expected_unnormalized / (p1_expected_unnormalized + p2_expected_unnormalized) assert history.max_t == nr_populations-1 # the next line only tests if we obtain correct numerical types try: mp0 = mp.p[0] except KeyError: mp0 = 0 try: mp1 = mp.p[1] except KeyError: mp1 = 0 assert abs(mp0 - p1_expected) + abs(mp1 - p2_expected) < sp.inf # check that sampler only did nr_particles samples in first round pops = history.get_all_populations() # since we had calibration (of epsilon), check that was saved pre_evals = pops[pops['t'] == History.PRE_TIME]['samples'].values assert pre_evals >= pop_size.nr_particles # our samplers should not have overhead in calibration, except batching batch_size = sampler.batch_size if hasattr(sampler, 'batch_size') else 1 max_expected = pop_size.nr_particles + batch_size - 1 if pre_evals > max_expected: # Violations have been observed occasionally for the redis server # due to runtime conditions with the increase of the evaluations # counter. This could be overcome, but as it usually only happens # for low-runtime models, this should not be a problem. Thus, only # print a warning here. logger.warn( f"Had {pre_evals} simulations in the calibration iteration, " f"but a maximum of {max_expected} would have been sufficient for " f"the population size of {pop_size.nr_particles}.")