示例#1
0
def test_grid_search_single_sample_multivariate_normal():
    """
    Supposed to run into problems b/c nr splits > then nr_samples
    """
    cv = 5
    m = MultivariateNormalTransition()
    m_grid = GridSearchCV(m, {"scaling": np.logspace(-5, 1.5, 5)}, cv=cv)
    df, w = data(1)
    m_grid.fit(df, w)
    assert m_grid.cv == cv
示例#2
0
def test_gaussian_multiple_populations_crossval_kde(db_path, sampler):
    sigma_x = 1
    sigma_y = 0.5
    y_observed = 2

    def model(args):
        return {"y": st.norm(args['x'], sigma_y).rvs()}

    models = [model]
    models = list(map(FunctionModel, models))
    nr_populations = 4
    population_size = ConstantPopulationSize(600)
    parameter_given_model_prior_distribution = [
        Distribution(x=st.norm(0, sigma_x))
    ]
    parameter_perturbation_kernels = [
        GridSearchCV(
            MultivariateNormalTransition(),
            {"scaling": np.logspace(-1, 1.5, 5)},
        )
    ]
    abc = ABCSMC(
        models,
        parameter_given_model_prior_distribution,
        MinMaxDistance(measures_to_use=["y"]),
        population_size,
        transitions=parameter_perturbation_kernels,
        eps=MedianEpsilon(0.2),
        sampler=sampler,
    )
    abc.new(db_path, {"y": y_observed})

    minimum_epsilon = -1

    abc.do_not_stop_when_only_single_model_alive()
    history = abc.run(minimum_epsilon, max_nr_populations=nr_populations)
    posterior_x, posterior_weight = history.get_distribution(0, None)
    posterior_x = posterior_x["x"].values
    sort_indices = np.argsort(posterior_x)
    f_empirical = sp.interpolate.interp1d(
        np.hstack((-200, posterior_x[sort_indices], 200)),
        np.hstack((0, np.cumsum(posterior_weight[sort_indices]), 1)),
    )

    sigma_x_given_y = 1 / np.sqrt(1 / sigma_x**2 + 1 / sigma_y**2)
    mu_x_given_y = sigma_x_given_y**2 * y_observed / sigma_y**2
    expected_posterior_x = st.norm(mu_x_given_y, sigma_x_given_y)
    x = np.linspace(-8, 8)
    max_distribution_difference = np.absolute(
        f_empirical(x) - expected_posterior_x.cdf(x)).max()
    assert max_distribution_difference < 0.052
    assert history.max_t == nr_populations - 1
    mean_emp, std_emp = mean_and_std(posterior_x, posterior_weight)
    assert abs(mean_emp - mu_x_given_y) < 0.07
    assert abs(std_emp - sigma_x_given_y) < 0.12
示例#3
0
def test_grid_search_multivariate_normal():
    m = MultivariateNormalTransition()
    m_grid = GridSearchCV(m, {"scaling": np.logspace(-5, 1.5, 5)}, n_jobs=1)
    df, w = data(20)
    m_grid.fit(df, w)