Пример #1
0
def test_log_pdf(xscale: Scale):
    normed_test_loc = 0.5
    normed_test_s = 0.1
    test_loc = xscale.denormalize_point(normed_test_loc)
    test_s = normed_test_s * xscale.width

    ergoLogisticMixture = LogisticMixture(
        components=[
            Logistic(
                loc=xscale.denormalize_point(0.2),
                s=0.5 * xscale.width,
                scale=xscale,
            ),
            Logistic(loc=test_loc, s=test_s, scale=xscale),
        ],
        probs=[1.8629593e-29, 1.0],
    )
    ergoLogistic = Logistic(loc=test_loc, s=test_s, scale=xscale)

    ## Test PDF
    normed_scipydist = scipy.stats.logistic(normed_test_loc, normed_test_s)
    for x in np.linspace(0, 1, 10):
        denormalized_x = xscale.denormalize_point(x)
        assert (normed_scipydist.pdf(x) / xscale.width == pytest.approx(
            float(ergoLogistic.pdf(denormalized_x)), rel=1e-3) ==
                pytest.approx(float(ergoLogisticMixture.pdf(denormalized_x)),
                              rel=1e-3))
Пример #2
0
def test_percentiles_from_mixture():
    mixture = LogisticMixture(
        components=[Logistic(loc=1, scale=0.1),
                    Logistic(loc=2, scale=0.1)],
        probs=[0.5, 0.5],
    )
    conditions = mixture.percentiles(percentiles=[0.1, 0.5, 0.9])
    for condition in conditions:
        if condition.max == 0.5:
            assert condition.p == pytest.approx(1.5, rel=0.01)
    return conditions
Пример #3
0
def test_logistic_mixture_normalization():
    mixture = LogisticMixture([Logistic(-40, 1), Logistic(50, 10)], [0.5, 0.5])

    for scale_min, scale_max in [(0, 10), (10, 100), (-10, 10), (-100, -10)]:
        assert (mixture.normalize(scale_min,
                                  scale_max).denormalize(scale_min,
                                                         scale_max) == mixture)

    normalized = mixture.normalize(-50, 50)
    assert normalized == LogisticMixture(
        [Logistic(0.1, 0.01), Logistic(1, 0.1)], [0.5, 0.5])
Пример #4
0
def test_logistic_mixture_normalization():
    scale = Scale(-50, 50)
    scalex2 = Scale(-100, 100)
    mixture = LogisticMixture(
        components=[Logistic(-40, 1, scale),
                    Logistic(50, 10, scale)],
        probs=[0.5, 0.5],
    )

    mixturex2 = LogisticMixture(
        components=[Logistic(-80, 2, scalex2),
                    Logistic(100, 20, scalex2)],
        probs=[0.5, 0.5],
    )

    assert mixturex2 == mixture.normalize().denormalize(scalex2)
    assert mixture == mixturex2.normalize().denormalize(scale)

    normalized = (mixture.normalize()
                  )  # not necessary to normalize but here for readability

    assert normalized == LogisticMixture(
        [Logistic(0.1, 0.01, Scale(0, 1)),
         Logistic(1, 0.1, Scale(0, 1))],
        [0.5, 0.5],
    )
Пример #5
0
def test_mixed_2(histogram):
    conditions = (
        HistogramCondition(histogram["xs"], histogram["densities"]),
        IntervalCondition(p=0.4, max=1),
        IntervalCondition(p=0.45, max=1.2),
        IntervalCondition(p=0.48, max=1.3),
        IntervalCondition(p=0.5, max=2),
        IntervalCondition(p=0.7, max=2.2),
        IntervalCondition(p=0.9, max=2.3),
    )
    dist = LogisticMixture.from_conditions(conditions,
                                           num_components=3,
                                           verbose=True)
    assert dist.pdf1(-5) == pytest.approx(0, abs=0.1)
    assert dist.pdf1(6) == pytest.approx(0, abs=0.1)
    my_cache = {}
    my_cache[conditions] = 3
    conditions_2 = (
        HistogramCondition(histogram["xs"], histogram["densities"]),
        IntervalCondition(p=0.4, max=1),
        IntervalCondition(p=0.45, max=1.2),
        IntervalCondition(p=0.48, max=1.3),
        IntervalCondition(p=0.5, max=2),
        IntervalCondition(p=0.7, max=2.2),
        IntervalCondition(p=0.9, max=2.3),
    )
    assert hash(conditions) == hash(conditions_2)
    assert my_cache[conditions_2] == 3
Пример #6
0
def test_ppf_cdf_round_trip():
    mixture = LogisticMixture.from_samples(
        np.array([0.5, 0.4, 0.8, 0.8, 0.9, 0.95, 0.15, 0.1]),
        {"num_components": 3})
    x = 0.65
    prob = mixture.cdf(x)
    assert mixture.ppf(prob) == pytest.approx(x, rel=1e-3)
Пример #7
0
def test_mixture_from_histogram(histogram):
    conditions = [HistogramCondition(histogram["xs"], histogram["densities"])]
    mixture = LogisticMixture.from_conditions(conditions,
                                              num_components=3,
                                              verbose=True)
    for (x, density) in zip(histogram["xs"], histogram["densities"]):
        assert mixture.pdf1(x) == pytest.approx(density, abs=0.2)
Пример #8
0
def test_mixture_from_percentile():
    for value in [0.01, 0.1, 1, 3]:
        conditions = [IntervalCondition(p=0.5, max=value)]
        dist = LogisticMixture.from_conditions(conditions,
                                               num_components=1,
                                               verbose=True)
        loc = dist.components[0].loc
        assert loc == pytest.approx(value, rel=0.1), loc
Пример #9
0
def test_fit_mixture_small():
    mixture = LogisticMixture.from_samples(np.array([0.1, 0.2, 0.8, 0.9]),
                                           num_components=2)
    for prob in mixture.probs:
        assert prob == pytest.approx(0.5, 0.1)
    locs = sorted([component.loc for component in mixture.components])
    assert locs[0] == pytest.approx(0.15, abs=0.1)
    assert locs[1] == pytest.approx(0.85, abs=0.1)
Пример #10
0
def test_mixture_from_percentile():
    for value in [0.01, 0.1, 1, 3]:
        conditions = [IntervalCondition(p=0.5, max=value)]
        dist = LogisticMixture.from_conditions(conditions,
                                               {"num_components": 1},
                                               verbose=True,
                                               scale=Scale(0, 3))
        loc = dist.components[0].base_dist.true_loc
        assert loc == pytest.approx(value, rel=0.1), loc
Пример #11
0
def test_mixture_from_histogram(histogram):
    conditions = [HistogramCondition(histogram["xs"], histogram["densities"])]

    mixture = LogisticMixture.from_conditions(
        conditions,
        {"num_components": 3},
        Scale(min(histogram["xs"]), max(histogram["xs"])),
    )
    for (x, density) in zip(histogram["xs"], histogram["densities"]):
        assert mixture.pdf(x) == pytest.approx(density, abs=0.2)
Пример #12
0
def test_mixture_ppf_adversarial():
    # Make a mixture with one very improbable distribution and one dominant
    mixture = LogisticMixture(
        [Logistic(10, 3), Logistic(5, 5)], [1.8629593e-29, 1.0])
    assert mixture.ppf(0.5) == pytest.approx(5.0, rel=1e-3)
    assert mixture.ppf(0.01) == pytest.approx(-17.9755, rel=1e-3)
    assert mixture.ppf(0.001) == pytest.approx(-29.5337, rel=1e-3)
    assert mixture.ppf(0.99) == pytest.approx(27.9755, rel=1e-3)
    assert mixture.ppf(0.999) == pytest.approx(39.5337, rel=1e-3)
Пример #13
0
def test_fit_samples(logistic_mixture):
    data = np.array([logistic_mixture.sample() for _ in range(0, 1000)])
    fitted_mixture = LogisticMixture.from_samples(data, {"num_components": 2})
    true_locs = sorted([c.loc for c in logistic_mixture.components])
    true_scales = sorted([c.s for c in logistic_mixture.components])
    fitted_locs = sorted([c.loc for c in fitted_mixture.components])
    fitted_scales = sorted([c.s for c in fitted_mixture.components])
    for (true_loc, fitted_loc) in zip(true_locs, fitted_locs):
        assert fitted_loc == pytest.approx(float(true_loc), rel=0.2)
    for (true_scale, fitted_scale) in zip(true_scales, fitted_scales):
        assert fitted_scale == pytest.approx(float(true_scale), rel=0.2)
Пример #14
0
def test_weights_mixture():
    conditions = [
        IntervalCondition(p=0.4, max=1, weight=0.01),
        IntervalCondition(p=0.5, max=2, weight=100),
        IntervalCondition(p=0.8, max=2.2, weight=0.01),
        IntervalCondition(p=0.9, max=2.3, weight=0.01),
    ]
    dist = LogisticMixture.from_conditions(conditions, {"num_components": 1},
                                           verbose=True,
                                           scale=Scale(0, 3))
    assert dist.components[0].base_dist.true_loc == pytest.approx(2, rel=0.1)
Пример #15
0
def test_fit_mixture_large():
    data1 = onp.random.logistic(loc=0.7, scale=0.1, size=1000)
    data2 = onp.random.logistic(loc=0.4, scale=0.2, size=1000)
    data = onp.concatenate([data1, data2])
    mixture = LogisticMixture.from_samples(data, num_components=2)
    locs = sorted([component.loc for component in mixture.components])
    scales = sorted([component.scale for component in mixture.components])
    assert locs[0] == pytest.approx(0.4, abs=0.2)
    assert locs[1] == pytest.approx(0.7, abs=0.2)
    assert scales[0] == pytest.approx(0.1, abs=0.2)
    assert scales[1] == pytest.approx(0.2, abs=0.2)
Пример #16
0
def test_mixture_from_percentiles():
    conditions = [
        IntervalCondition(p=0.1, max=1),
        IntervalCondition(p=0.5, max=2),
        IntervalCondition(p=0.6, max=3),
    ]
    dist = LogisticMixture.from_conditions(conditions, {"num_components": 4},
                                           verbose=False,
                                           scale=Scale(0, 3))
    for condition in conditions:
        assert dist.cdf(condition.max) == pytest.approx(condition.p, rel=0.1)
Пример #17
0
def test_mixture_from_percentiles():
    conditions = [
        IntervalCondition(p=0.1, max=1),
        IntervalCondition(p=0.5, max=2),
        IntervalCondition(p=0.6, max=3),
    ]
    dist = LogisticMixture.from_conditions(conditions,
                                           num_components=3,
                                           verbose=True)
    for condition in conditions:
        assert dist.cdf(condition.max) == pytest.approx(condition.p, rel=0.1)
Пример #18
0
def test_weights_mixture():
    conditions = [
        IntervalCondition(p=0.4, max=1, weight=0.01),
        IntervalCondition(p=0.5, max=2, weight=100),
        IntervalCondition(p=0.8, max=2.2, weight=0.01),
        IntervalCondition(p=0.9, max=2.3, weight=0.01),
    ]
    dist = LogisticMixture.from_conditions(conditions,
                                           num_components=1,
                                           verbose=True)
    assert dist.components[0].loc == pytest.approx(2, rel=0.1)
Пример #19
0
def test_fit_mixture_small(fixed_params):
    xscale = Scale(0, 1)
    mixture = LogisticMixture.from_samples(
        data=np.array([0.1, 0.2, 0.8, 0.9]),
        fixed_params=fixed_params,
        scale=xscale,
    )
    for prob in mixture.probs:
        assert prob == pytest.approx(0.5, 0.1)
    locs = sorted(
        [component.base_dist.loc for component in mixture.components])
    assert locs[0] == pytest.approx(0.15, abs=0.1)
    assert locs[1] == pytest.approx(0.85, abs=0.1)
Пример #20
0
def test_pdf(xscale: Scale):
    normed_test_loc = 0.5
    normed_test_s = 0.1
    test_loc = xscale.denormalize_point(normed_test_loc)
    test_s = normed_test_s * xscale.width

    ergoLogisticMixture = LogisticMixture(
        components=[
            Logistic(
                loc=xscale.denormalize_point(0.2),
                s=0.5 * xscale.width,
                scale=xscale,
            ),
            Logistic(loc=test_loc, s=test_s, scale=xscale),
        ],
        probs=[1.8629593e-29, 1.0],
    )
    ergoLogistic = Logistic(loc=test_loc, s=test_s, scale=xscale)

    ## Make sure it integrates to 1
    _xs = xscale.denormalize_points(np.linspace(0, 1, 100))
    densities_logistic = np.array([float(ergoLogistic.pdf(x)) for x in _xs])
    densities_mixture = np.array(
        [float(ergoLogisticMixture.pdf(x)) for x in _xs])
    auc_logistic = float(trapz(densities_logistic, x=_xs))
    auc_mixture = float(trapz(densities_mixture, x=_xs))
    assert (1 == pytest.approx(auc_logistic, abs=0.03) == pytest.approx(
        auc_mixture, abs=0.03))

    if not isinstance(xscale, LogScale):
        scipydist = scipy.stats.logistic(test_loc, test_s)

        for x in np.linspace(xscale.denormalize_point(0),
                             xscale.denormalize_point(1), 10):
            assert (scipydist.pdf(x) == pytest.approx(
                float(ergoLogistic.pdf(x)), rel=1e-3) == pytest.approx(
                    float(ergoLogisticMixture.pdf(x)), rel=1e-3))
Пример #21
0
def test_percentile_roundtrip():
    conditions = [
        IntervalCondition(p=0.01, max=0.61081324517545),
        IntervalCondition(p=0.1, max=0.8613634657212543),
        IntervalCondition(p=0.25, max=1),
        IntervalCondition(p=0.5, max=1.5),
        IntervalCondition(p=0.75, max=2),
        IntervalCondition(p=0.9, max=2.1386364698410034),
        IntervalCondition(p=0.99, max=2.3891870975494385),
    ]
    mixture = LogisticMixture.from_conditions(conditions,
                                              num_components=3,
                                              verbose=True)
    recovered_conditions = mixture.percentiles(
        percentiles=[condition.p for condition in conditions])
    for (condition, recovered_condition) in zip(conditions,
                                                recovered_conditions):
        assert recovered_condition.max == pytest.approx(condition.max, rel=0.1)
Пример #22
0
def test_fit_mixture_large(fixed_params):
    xscale = Scale(-2, 3)
    data1 = onp.random.logistic(loc=0.7, scale=0.1, size=1000)
    data2 = onp.random.logistic(loc=0.4, scale=0.2, size=1000)
    data = onp.concatenate([data1, data2])
    mixture = LogisticMixture.from_samples(
        data=data,
        fixed_params=fixed_params,
        scale=xscale,
    )
    # FIXME: What's going on below with scales?
    components = sorted([(component.base_dist.loc, component.base_dist.s)
                         for component in mixture.components])
    assert components[0][0] == pytest.approx(xscale.normalize_point(0.4),
                                             abs=0.2)
    assert components[1][0] == pytest.approx(xscale.normalize_point(0.7),
                                             abs=0.2)
    assert components[0][1] == pytest.approx(0.2, abs=0.2)
    assert components[1][1] == pytest.approx(0.1, abs=0.2)
Пример #23
0
def test_truncated_ppf(xscale: Scale):
    normed_test_loc = 0.5
    normed_test_s = 0.1
    test_loc = xscale.denormalize_point(normed_test_loc)
    test_s = normed_test_s * xscale.width

    normed_baseline_dist = scipy.stats.logistic(normed_test_loc, normed_test_s)

    def ppf_through_cdf(dist, q):
        return scipy.optimize.bisect(lambda x: dist.cdf(x) - q,
                                     dist.ppf(0.0001),
                                     dist.ppf(0.9999),
                                     maxiter=1000)

    # No bounds
    dist_w_no_bounds = Truncate(Logistic(loc=test_loc, s=test_s, scale=xscale))

    for x in np.linspace(0.01, 0.99, 8):
        assert dist_w_no_bounds.ppf(x) == pytest.approx(float(
            xscale.denormalize_point(normed_baseline_dist.ppf(x))),
                                                        rel=0.001)

    # Floor
    dist_w_floor = Truncate(
        Logistic(loc=test_loc, s=test_s, scale=xscale),
        floor=xscale.denormalize_point(0.5),
    )

    mix_w_floor = LogisticMixture(
        components=[
            Truncate(  # type: ignore
                Logistic(test_loc, s=test_s, scale=xscale),
                floor=xscale.denormalize_point(0.5),
            )
        ],
        probs=[1.0],
    )

    for x in np.linspace(0.01, 0.99, 8):
        assert dist_w_floor.ppf(x) == pytest.approx(float(mix_w_floor.ppf(x)),
                                                    rel=0.001)
        assert dist_w_floor.ppf(x) == pytest.approx(float(
            ppf_through_cdf(dist_w_floor, x)),
                                                    rel=0.001)

    # Ceiling
    dist_w_ceiling = Truncate(
        Logistic(loc=test_loc, s=test_s, scale=xscale),
        ceiling=xscale.denormalize_point(0.8),
    )

    mix_w_ceiling = LogisticMixture(
        components=[
            Truncate(  # type: ignore
                Logistic(test_loc, s=test_s, scale=xscale),
                ceiling=xscale.denormalize_point(0.8),
            )
        ],
        probs=[1.0],
    )

    for x in np.linspace(0.01, 0.99, 8):
        assert dist_w_ceiling.ppf(x) == pytest.approx(float(
            mix_w_ceiling.ppf(x)),
                                                      rel=0.001)
        assert dist_w_ceiling.ppf(x) == pytest.approx(float(
            ppf_through_cdf(dist_w_ceiling, x)),
                                                      rel=0.001)

    # Floor and Ceiling

    dist_w_floor_and_ceiling = Truncate(
        Logistic(loc=test_loc, s=test_s, scale=xscale),
        floor=xscale.denormalize_point(0.2),
        ceiling=xscale.denormalize_point(0.8),
    )

    mix_w_floor_and_ceiling = LogisticMixture(
        components=[
            Truncate(  # type: ignore
                Logistic(test_loc, s=test_s, scale=xscale),
                floor=xscale.denormalize_point(0.2),
                ceiling=xscale.denormalize_point(0.8),
            )
        ],
        probs=[1.0],
    )

    for x in np.linspace(0.01, 0.99, 8):
        assert dist_w_floor_and_ceiling.ppf(x) == pytest.approx(float(
            mix_w_floor_and_ceiling.ppf(x)),
                                                                rel=0.001)
        assert dist_w_floor_and_ceiling.ppf(x) == pytest.approx(float(
            ppf_through_cdf(dist_w_floor_and_ceiling, x)),
                                                                rel=0.001)
Пример #24
0
def test_mixture_ppf_adversarial():
    # Make a mixture with one very improbable distribution and one dominant
    mixture = LogisticMixture([Logistic(10, 3), Logistic(5, 5)], [1.8629593e-29, 1.0])
    assert mixture.ppf(0.5) == pytest.approx(5.0, rel=1e-3)
    assert mixture.ppf(0.01) == pytest.approx(-17.9755, rel=1e-3)
    assert mixture.ppf(0.001) == pytest.approx(-29.5337, rel=1e-3)
    assert mixture.ppf(0.99) == pytest.approx(27.9755, rel=1e-3)
    assert mixture.ppf(0.999) == pytest.approx(39.5337, rel=1e-3)

    # Make a mixture with hugely overlapping distributions
    mixture = LogisticMixture(
        [
            Logistic(4000000.035555004, 200000.02),
            Logistic(4000000.0329152746, 200000.0),
        ],
        [0.5, 0.5],
    )
    assert mixture.ppf(0.5) == pytest.approx(4000000.0342351394, rel=1e-3)
    assert mixture.ppf(0.01) == pytest.approx(3080976.018257023, rel=1e-3)
    assert mixture.ppf(0.001) == pytest.approx(2618649.009437881, rel=1e-3)
    assert mixture.ppf(0.99) == pytest.approx(4919024.050213255, rel=1e-3)
    assert mixture.ppf(0.999) == pytest.approx(5381351.059032397, rel=1e-3)
Пример #25
0
def test_mixture_ppf():
    # Make a mixtures with known properties. The median should be 10 for this mixture.
    logistic_params = np.array([[15, 2.3658268, 0.5], [5, 2.3658268, 0.5]])
    mixture = LogisticMixture.from_params(logistic_params)
    ppf5 = mixture.ppf(0.5)
    assert ppf5 == pytest.approx(10, rel=1e-3)
Пример #26
0
def test_mixture_cdf():
    # Make a mixture with known properties. The median should be 15 for this mixture.
    logistic_params = np.array([[10, 3.658268, 0.5], [20, 3.658268, 0.5]])
    mixture = LogisticMixture.from_params(logistic_params)
    cdf50 = mixture.cdf(15)
    assert cdf50 == pytest.approx(0.5, rel=1e-3)