Пример #1
0
def test_single_parameter_percentile():
    dist_f = PercentileDistanceFunction(measures_to_use=["a"])
    abc = MockABC([{"a": -3}, {"a": 3}, {"a": 10}])
    dist_f.initialize(abc.sample_from_prior())
    d = dist_f({"a": 1}, {"a": 2})
    expected = (
        1 / (sp.percentile([-3, 3, 10], 80) - sp.percentile([-3, 3, 10], 20)))
    assert expected == d
Пример #2
0
def test_two_competing_gaussians_multiple_population(db_path, sampler,
                                                     transition):
    # Define a gaussian model
    sigma = .5

    def model(args):
        return {"y": st.norm(args['x'], sigma).rvs()}

    # We define two models, but they are identical so far
    models = [model, model]
    models = list(map(SimpleModel, models))

    # However, our models' priors are not the same. Their mean differs.
    mu_x_1, mu_x_2 = 0, 1
    parameter_given_model_prior_distribution = [
        Distribution(x=st.norm(mu_x_1, sigma)),
        Distribution(x=st.norm(mu_x_2, sigma))
    ]

    # We plug all the ABC setup together
    nr_populations = 3
    population_size = ConstantPopulationSize(400)

    abc = ABCSMC(models,
                 parameter_given_model_prior_distribution,
                 PercentileDistanceFunction(measures_to_use=["y"]),
                 population_size,
                 eps=MedianEpsilon(.2),
                 transitions=[transition(), transition()],
                 sampler=sampler)

    # Finally we add meta data such as model names and define where to store the results
    # y_observed is the important piece here: our actual observation.
    y_observed = 1
    abc.new(db_path, {"y": y_observed})

    # We run the ABC with 3 populations max
    minimum_epsilon = .05
    history = abc.run(minimum_epsilon, max_nr_populations=nr_populations)

    # Evaluate the model probabililties
    mp = history.get_model_probabilities(history.max_t)

    def p_y_given_model(mu_x_model):
        return st.norm(mu_x_model,
                       sp.sqrt(sigma**2 + sigma**2)).pdf(y_observed)

    p1_expected_unnormalized = p_y_given_model(mu_x_1)
    p2_expected_unnormalized = p_y_given_model(mu_x_2)
    p1_expected = p1_expected_unnormalized / (p1_expected_unnormalized +
                                              p2_expected_unnormalized)
    p2_expected = p2_expected_unnormalized / (p1_expected_unnormalized +
                                              p2_expected_unnormalized)
    assert history.max_t == nr_populations - 1
    assert abs(mp.p[0] - p1_expected) + abs(mp.p[1] - p2_expected) < .07
Пример #3
0
def two_competing_gaussians_multiple_population(db_path, sampler, n_sim):
    # Define a gaussian model
    sigma = .5

    def model(args):
        return {"y": st.norm(args['x'], sigma).rvs()}

    # We define two models, but they are identical so far
    models = [model, model]
    models = list(map(SimpleModel, models))

    # However, our models' priors are not the same. Their mean differs.
    mu_x_1, mu_x_2 = 0, 1
    parameter_given_model_prior_distribution = [
        Distribution(x=RV("norm", mu_x_1, sigma)),
        Distribution(x=RV("norm", mu_x_2, sigma))
    ]

    # We plug all the ABC setup together
    nr_populations = 2
    pop_size = ConstantPopulationSize(23, nr_samples_per_parameter=n_sim)
    abc = ABCSMC(models, parameter_given_model_prior_distribution,
                 PercentileDistanceFunction(measures_to_use=["y"]),
                 pop_size,
                 eps=MedianEpsilon(),
                 sampler=sampler)

    # Finally we add meta data such as model names and
    # define where to store the results
    # y_observed is the important piece here: our actual observation.
    y_observed = 1
    abc.new(db_path, {"y": y_observed})

    # We run the ABC with 3 populations max
    minimum_epsilon = .05
    history = abc.run(minimum_epsilon, max_nr_populations=nr_populations)

    # Evaluate the model probabililties
    mp = history.get_model_probabilities(history.max_t)

    def p_y_given_model(mu_x_model):
        res = st.norm(mu_x_model, sp.sqrt(sigma**2 + sigma**2)).pdf(y_observed)
        return res

    p1_expected_unnormalized = p_y_given_model(mu_x_1)
    p2_expected_unnormalized = p_y_given_model(mu_x_2)
    p1_expected = p1_expected_unnormalized / (p1_expected_unnormalized
                                              + p2_expected_unnormalized)
    p2_expected = p2_expected_unnormalized / (p1_expected_unnormalized
                                              + p2_expected_unnormalized)
    assert history.max_t == nr_populations-1
    # the next line only tests if we obtain correct numerical types
    try:
        mp0 = mp.p[0]
    except KeyError:
        mp0 = 0

    try:
        mp1 = mp.p[1]
    except KeyError:
        mp1 = 0

    assert abs(mp0 - p1_expected) + abs(mp1 - p2_expected) < sp.inf

    # check that sampler only did nr_particles samples in first round
    pops = history.get_all_populations()
    # since we had calibration (of epsilon), check that was saved
    pre_evals = pops[pops['t'] == History.PRE_TIME]['samples'].values
    assert pre_evals >= pop_size.nr_particles
    # our samplers should not have overhead in calibration, except batching
    batch_size = sampler.batch_size if hasattr(sampler, 'batch_size') else 1
    max_expected = pop_size.nr_particles + batch_size - 1
    if pre_evals > max_expected:
        # Violations have been observed occasionally for the redis server
        # due to runtime conditions with the increase of the evaluations
        # counter. This could be overcome, but as it usually only happens
        # for low-runtime models, this should not be a problem. Thus, only
        # print a warning here.
        logger.warn(
            f"Had {pre_evals} simulations in the calibration iteration, "
            f"but a maximum of {max_expected} would have been sufficient for "
            f"the population size of {pop_size.nr_particles}.")