Пример #1
0
    def community_conditions(self,
                             crossentropy_weight=0.1,
                             interval_weight=10000.0):
        from ergo.conditions import (
            CrossEntropyCondition,
            IntervalCondition,
            Condition,
        )

        pairs = self.community_pairs(normalized=True)

        # Note that this histogram is normalized - it sums to 1 even if the pairs don't!
        point_density_dist = dist.PointDensity.from_pairs(pairs,
                                                          scale=self.scale,
                                                          normalized=True)

        condition = CrossEntropyCondition(point_density_dist,
                                          weight=crossentropy_weight)

        community_conditions: List[Condition] = [condition]

        if self.low_open:
            community_conditions.append(
                IntervalCondition(p=self.p_below,
                                  max=self.scale.low,
                                  weight=interval_weight))

        if self.high_open:
            community_conditions.append(
                IntervalCondition(p=self.p_above,
                                  min=self.scale.high,
                                  weight=interval_weight))

        return community_conditions
Пример #2
0
def test_weights_mixture():
    conditions = [
        IntervalCondition(p=0.4, max=1, weight=0.01),
        IntervalCondition(p=0.5, max=2, weight=100),
        IntervalCondition(p=0.8, max=2.2, weight=0.01),
        IntervalCondition(p=0.9, max=2.3, weight=0.01),
    ]
    dist = LogisticMixture.from_conditions(conditions, {"num_components": 1},
                                           verbose=True,
                                           scale=Scale(0, 3))
    assert dist.components[0].base_dist.true_loc == pytest.approx(2, rel=0.1)
Пример #3
0
def test_mixture_from_percentiles():
    conditions = [
        IntervalCondition(p=0.1, max=1),
        IntervalCondition(p=0.5, max=2),
        IntervalCondition(p=0.6, max=3),
    ]
    dist = LogisticMixture.from_conditions(conditions, {"num_components": 4},
                                           verbose=False,
                                           scale=Scale(0, 3))
    for condition in conditions:
        assert dist.cdf(condition.max) == pytest.approx(condition.p, rel=0.1)
Пример #4
0
def test_normalization_interval_condition():
    def normalization_interval_condition_test(p, min, max, low, high):
        condition = IntervalCondition(p=p, min=min, max=max)
        scale = Scale(low, high)
        assert condition.normalize(scale).denormalize(scale) == condition

    # straightforward scenario
    normalization_interval_condition_test(p=0.5,
                                          min=10,
                                          max=100,
                                          low=10,
                                          high=1000)

    # left open
    normalization_interval_condition_test(p=0.5,
                                          min=None,
                                          max=10000,
                                          low=10,
                                          high=1000)

    # right open
    normalization_interval_condition_test(p=0.5,
                                          min=10,
                                          max=None,
                                          low=10,
                                          high=1000)

    # negative values
    normalization_interval_condition_test(p=0.5,
                                          min=-1000,
                                          max=-100,
                                          low=-10000,
                                          high=-1000)

    # p = 1
    normalization_interval_condition_test(p=1,
                                          min=10,
                                          max=100,
                                          low=10,
                                          high=1000)

    # interval bigger than scale
    normalization_interval_condition_test(p=1,
                                          min=0,
                                          max=1000,
                                          low=10,
                                          high=100)

    assert IntervalCondition(p=0.5, min=0, max=5).normalize(Scale(
        0, 10)) == IntervalCondition(p=0.5, min=0, max=0.5)
Пример #5
0
def test_variance_condition():
    def get_variance(dist):
        xs = np.linspace(dist.scale.low, dist.scale.high, dist.ps.size)
        mean = np.dot(dist.ps, xs)
        return np.dot(dist.ps, np.square(xs - mean))

    base_conditions = [
        MaxEntropyCondition(weight=0.1),
        SmoothnessCondition(),
        IntervalCondition(p=0.95, min=0.3, max=0.7),
    ]
    base_dist = HistogramDist.from_conditions(base_conditions, verbose=True)
    base_variance = get_variance(base_dist)
    increased_variance = base_variance + 0.01

    # Increase in variance should decrease peak
    var_condition = VarianceCondition(variance=increased_variance, weight=1)
    var_conditions = base_conditions + [var_condition]
    var_dist = HistogramDist.from_conditions(var_conditions, verbose=True)
    assert np.max(var_dist.ps) < np.max(base_dist.ps)

    # Highly weighted variance condition should make var very close to specified var
    strong_condition = VarianceCondition(variance=increased_variance,
                                         weight=1000)
    strong_var_conditions = base_conditions + [strong_condition]
    strong_var_dist = HistogramDist.from_conditions(strong_var_conditions,
                                                    verbose=True)
    assert get_variance(strong_var_dist) == pytest.approx(
        float(increased_variance), abs=0.001)
Пример #6
0
def test_variance_condition():
    base_conditions = [
        MaxEntropyCondition(weight=0.001),
        SmoothnessCondition(),
        IntervalCondition(p=0.95, min=0.3, max=0.7),
    ]
    base_dist = PointDensity.from_conditions(base_conditions,
                                             verbose=True,
                                             scale=Scale(0, 1))
    base_variance = base_dist.variance()
    increased_variance = base_variance + 0.01

    # Increase in variance should decrease peak
    var_condition = VarianceCondition(variance=increased_variance, weight=1)
    var_conditions = base_conditions + [var_condition]
    var_dist = PointDensity.from_conditions(var_conditions,
                                            verbose=True,
                                            scale=Scale(0, 1))
    assert np.max(var_dist.normed_densities) < np.max(
        base_dist.normed_densities)

    # Highly weighted variance condition should make var very close to specified var
    strong_condition = VarianceCondition(variance=increased_variance,
                                         weight=100000)
    strong_var_conditions = base_conditions + [strong_condition]
    strong_var_dist = PointDensity.from_conditions(strong_var_conditions,
                                                   verbose=True,
                                                   scale=Scale(0, 1))
    assert strong_var_dist.variance() == pytest.approx(
        float(increased_variance), abs=0.001)
Пример #7
0
def test_mixture_from_percentile():
    for value in [0.01, 0.1, 1, 3]:
        conditions = [IntervalCondition(p=0.5, max=value)]
        dist = LogisticMixture.from_conditions(conditions,
                                               {"num_components": 1},
                                               verbose=True,
                                               scale=Scale(0, 3))
        loc = dist.components[0].base_dist.true_loc
        assert loc == pytest.approx(value, rel=0.1), loc
Пример #8
0
    def percentiles(self, percentiles=None):
        from ergo.conditions import IntervalCondition

        if percentiles is None:
            percentiles = [0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99]
        values = [self.ppf(q) for q in percentiles]
        return [
            IntervalCondition(percentile, max=float(value))
            for (percentile, value) in zip(percentiles, values)
        ]
Пример #9
0
def test_interval_condition():
    dist = Uniform(min=-1, max=1)

    assert IntervalCondition(p=0.5, min=0, max=1).loss(dist) == 0
    assert IntervalCondition(p=0.25, min=0, max=1).loss(dist) == 0.25**2
    assert IntervalCondition(p=1, max=0).loss(dist) == 0.5**2
    assert IntervalCondition(p=1).loss(dist) == 0
    assert IntervalCondition(p=0, min=-1, max=1).loss(dist) == 1
    assert IntervalCondition(p=0, min=-1, max=1, weight=10).loss(dist) == 10

    assert (IntervalCondition(p=0.25, min=0,
                              max=1).describe_fit(dist)["loss"] == 0.25**2)
    assert (IntervalCondition(
        p=0, min=-1, max=0).describe_fit(dist)["p_in_interval"] == 0.5)
    assert (IntervalCondition(
        p=1, min=-1, max=0).describe_fit(dist)["p_in_interval"] == 0.5)
Пример #10
0
def test_fit_hist_regression_1():
    """
    Regression test for bug: "This custom question has a weird histogram - why?"

    see https://elicit.ought.org/builder/gflpsSBAb

    for more on the bug, see
    https://docs.google.com/document/d/1CFklTKtbKzXi6-lRaEsX4ZiY3Yzpbfdg7i2j1NvKP34/edit#heading=h.ph1huakxn33f
    """
    conditions = [
        IntervalCondition(p=0.25, max=2.0),
        IntervalCondition(p=0.75, max=4.0),
        IntervalCondition(p=0.9, max=6.0),
        MaxEntropyCondition(weight=0.1),
    ]

    histogram_dist = HistogramDist.from_conditions(conditions,
                                                   scale=Scale(low=0, high=52))

    assert histogram_dist.cdf(2) == pytest.approx(0.25, abs=0.05)
    assert histogram_dist.ppf(0.9) == pytest.approx(6, abs=1)
Пример #11
0
def test_interval_plus_entropy(scale: Scale):
    conditions = [
        IntervalCondition(p=0.5, max=scale.denormalize_point(0.3)),
        MaxEntropyCondition(weight=0.01),
    ]

    fitted_dist = PointDensity.from_conditions(
        conditions,
        scale=scale,
    )

    # We expect at most 3 different densities: one for inside the interval, one for outside,
    # and one between.
    assert np.unique(fitted_dist.normed_densities).size <= 3
Пример #12
0
def test_fit_point_density_regression_p_in_range():
    """
    Regression test for a bug where:
    1. < 100% of p is in the entire range, for a closed-bound question
    2. the p is smashed up against the edges of the range
    rather than distributed evenly over the whole range

    e.g. see https://elicit.ought.org/builder/Mib4yBPDE

    For more on the bug, see
    https://docs.google.com/document/d/1CFklTKtbKzXi6-lRaEsX4ZiY3Yzpbfdg7i2j1NvKP34/edit#heading=h.lypz52bknpyq
    """
    pointdensity_dist = PointDensity.from_conditions(
        conditions=[IntervalCondition(min=0, max=1, p=0.5)], scale=Scale(0, 1))

    assert pointdensity_dist.cdf(1) == pytest.approx(1, abs=1e-4)
Пример #13
0
def test_mode_condition():
    base_conditions = [IntervalCondition(p=0.4, max=0.5)]
    base_dist = HistogramDist.from_conditions(base_conditions, verbose=True)

    # Most likely condition should increase chance of specified outcome
    outcome_conditions = base_conditions + [ModeCondition(outcome=0.25)]
    outcome_dist = HistogramDist.from_conditions(outcome_conditions,
                                                 verbose=True)
    assert outcome_dist.pdf(0.25) > base_dist.pdf(0.25)

    # Highly weighted most likely condition should make specified outcome most likely
    strong_condition = ModeCondition(outcome=0.25, weight=1000)
    strong_outcome_conditions = base_conditions + [strong_condition]
    strong_outcome_dist = HistogramDist.from_conditions(
        strong_outcome_conditions, verbose=True)
    assert strong_condition.loss(strong_outcome_dist) == pytest.approx(
        0, abs=0.001)
Пример #14
0
def test_percentile_roundtrip(fixed_params):
    conditions = [
        IntervalCondition(p=0.01, max=0.61081324517545),
        IntervalCondition(p=0.1, max=0.8613634657212543),
        IntervalCondition(p=0.25, max=1),
        IntervalCondition(p=0.5, max=1.5),
        IntervalCondition(p=0.75, max=2),
        IntervalCondition(p=0.9, max=2.1386364698410034),
        IntervalCondition(p=0.99, max=2.3891870975494385),
    ]

    mixture = LogisticMixture.from_conditions(
        conditions,
        fixed_params,
        scale=Scale(0, 4),
        verbose=True,
    )
    recovered_conditions = mixture.percentiles(
        percentiles=[condition.p for condition in conditions])
    for (condition, recovered_condition) in zip(conditions,
                                                recovered_conditions):
        assert recovered_condition.max == pytest.approx(condition.max, rel=0.1)
Пример #15
0
def test_mixed_2(point_densities):
    conditions = (
        PointDensityCondition(point_densities["xs"],
                              point_densities["densities"]),
        IntervalCondition(p=0.4, max=1),
        IntervalCondition(p=0.45, max=1.2),
        IntervalCondition(p=0.48, max=1.3),
        IntervalCondition(p=0.5, max=2),
        IntervalCondition(p=0.7, max=2.2),
        IntervalCondition(p=0.9, max=2.3),
    )
    dist = LogisticMixture.from_conditions(conditions, {"num_components": 3},
                                           verbose=True,
                                           scale=Scale(0, 1))
    assert dist.pdf(-5) == pytest.approx(0, abs=0.1)
    assert dist.pdf(6) == pytest.approx(0, abs=0.1)
    my_cache = {}
    my_cache[conditions] = 3
    conditions_2 = (
        PointDensityCondition(point_densities["xs"],
                              point_densities["densities"]),
        IntervalCondition(p=0.4, max=1),
        IntervalCondition(p=0.45, max=1.2),
        IntervalCondition(p=0.48, max=1.3),
        IntervalCondition(p=0.5, max=2),
        IntervalCondition(p=0.7, max=2.2),
        IntervalCondition(p=0.9, max=2.3),
    )
    assert hash(conditions) == hash(conditions_2)
    assert my_cache[conditions_2] == 3
Пример #16
0
def test_mixed_2(histogram):
    conditions = (
        HistogramCondition(histogram["xs"], histogram["densities"]),
        IntervalCondition(p=0.4, max=1),
        IntervalCondition(p=0.45, max=1.2),
        IntervalCondition(p=0.48, max=1.3),
        IntervalCondition(p=0.5, max=2),
        IntervalCondition(p=0.7, max=2.2),
        IntervalCondition(p=0.9, max=2.3),
    )
    dist = LogisticMixture.from_conditions(conditions, {"num_components": 3},
                                           verbose=True)
    assert dist.pdf(-5) == pytest.approx(0, abs=0.1)
    assert dist.pdf(6) == pytest.approx(0, abs=0.1)
    my_cache = {}
    my_cache[conditions] = 3
    conditions_2 = (
        HistogramCondition(histogram["xs"], histogram["densities"]),
        IntervalCondition(p=0.4, max=1),
        IntervalCondition(p=0.45, max=1.2),
        IntervalCondition(p=0.48, max=1.3),
        IntervalCondition(p=0.5, max=2),
        IntervalCondition(p=0.7, max=2.2),
        IntervalCondition(p=0.9, max=2.3),
    )
    assert hash(conditions) == hash(conditions_2)
    assert my_cache[conditions_2] == 3
Пример #17
0
def test_density_percentile():
    for value in [0.01, 0.1, 0.5, 0.9]:
        conditions = [IntervalCondition(p=0.5, max=value)]
        dist = PointDensity.from_conditions(conditions, scale=Scale(0, 1))
        assert dist.ppf(0.5) == pytest.approx(value, abs=0.1)
Пример #18
0
def test_hist_from_percentile():
    for value in [0.01, 0.1, 0.5, 0.9]:
        conditions = [IntervalCondition(p=0.5, max=value)]
        dist = HistogramDist.from_conditions(conditions)
        assert dist.ppf(0.5) == pytest.approx(value, abs=0.1)
Пример #19
0
 def normalization_interval_condition_test(p, min, max, low, high):
     condition = IntervalCondition(p=p, min=min, max=max)
     scale = Scale(low, high)
     assert condition.normalize(scale).denormalize(scale) == condition