def test_bb_quantile():
    values = np.array([0, 1, 2])
    weights = np.array([0.1, 0.4, 0.5])

    calc_median = mabsbb.make_bb_quantile_closure(0.5)

    assert calc_median(values, weights) == pytest.approx(1)

    calc_a_bunch = mabsbb.make_bb_quantile_closure([0, 0.1, 0.5, 1])

    res = calc_a_bunch(values, weights)
    assert res[0] == pytest.approx(0)
    assert res[0.1] == pytest.approx(0)
    assert res[0.5] == pytest.approx(1)
    assert res[1] == pytest.approx(2)
Exemplo n.º 2
0
def test_bayesian_bootstrap_vs_bootstrap_geometric_quantiles(
        spark_context_or_none):
    num_enrollments = 20000

    rs = np.random.RandomState(42)
    data = rs.geometric(p=0.1, size=num_enrollments)

    quantiles = [0.3, 0.5, 0.9]

    def calc_quantiles(x):
        return dict(zip(quantiles, np.quantile(x, quantiles)))

    bb_res = mabsbb.bootstrap_one_branch(
        data,
        stat_fn=mabsbb.make_bb_quantile_closure(quantiles),
        sc=spark_context_or_none)
    pboot_res = mafsb.bootstrap_one_branch(data,
                                           stat_fn=calc_quantiles,
                                           sc=spark_context_or_none)

    for q in bb_res.index:
        for l in bb_res.columns:
            assert bb_res.loc[q, l] == pytest.approx(pboot_res.loc[q, l],
                                                     rel=5e-3), (q, l, bb_res,
                                                                 pboot_res)
Exemplo n.º 3
0
def test_bayesian_bootstrap_vs_bootstrap_poisson_quantiles(spark_context):
    num_enrollments = 10001

    rs = np.random.RandomState(42)
    data = rs.poisson(lam=10, size=num_enrollments)

    quantiles = [0.1, 0.5, 0.95]

    def calc_quantiles(x):
        return dict(zip(quantiles, np.quantile(x, quantiles)))

    bb_res = mabsbb.bootstrap_one_branch(
        spark_context,
        data,
        stat_fn=mabsbb.make_bb_quantile_closure(quantiles))
    pboot_res = mafsb.bootstrap_one_branch(spark_context,
                                           data,
                                           stat_fn=calc_quantiles)

    for q in bb_res.index:
        for l in bb_res.columns:
            assert bb_res.loc[q, l] == pytest.approx(pboot_res.loc[q, l],
                                                     rel=5e-3), (q, l, bb_res,
                                                                 pboot_res)