def community_conditions(self, crossentropy_weight=0.1, interval_weight=10000.0): from ergo.conditions import ( CrossEntropyCondition, IntervalCondition, Condition, ) pairs = self.community_pairs(normalized=True) # Note that this histogram is normalized - it sums to 1 even if the pairs don't! point_density_dist = dist.PointDensity.from_pairs(pairs, scale=self.scale, normalized=True) condition = CrossEntropyCondition(point_density_dist, weight=crossentropy_weight) community_conditions: List[Condition] = [condition] if self.low_open: community_conditions.append( IntervalCondition(p=self.p_below, max=self.scale.low, weight=interval_weight)) if self.high_open: community_conditions.append( IntervalCondition(p=self.p_above, min=self.scale.high, weight=interval_weight)) return community_conditions
def test_weights_mixture(): conditions = [ IntervalCondition(p=0.4, max=1, weight=0.01), IntervalCondition(p=0.5, max=2, weight=100), IntervalCondition(p=0.8, max=2.2, weight=0.01), IntervalCondition(p=0.9, max=2.3, weight=0.01), ] dist = LogisticMixture.from_conditions(conditions, {"num_components": 1}, verbose=True, scale=Scale(0, 3)) assert dist.components[0].base_dist.true_loc == pytest.approx(2, rel=0.1)
def test_mixture_from_percentiles(): conditions = [ IntervalCondition(p=0.1, max=1), IntervalCondition(p=0.5, max=2), IntervalCondition(p=0.6, max=3), ] dist = LogisticMixture.from_conditions(conditions, {"num_components": 4}, verbose=False, scale=Scale(0, 3)) for condition in conditions: assert dist.cdf(condition.max) == pytest.approx(condition.p, rel=0.1)
def test_normalization_interval_condition(): def normalization_interval_condition_test(p, min, max, low, high): condition = IntervalCondition(p=p, min=min, max=max) scale = Scale(low, high) assert condition.normalize(scale).denormalize(scale) == condition # straightforward scenario normalization_interval_condition_test(p=0.5, min=10, max=100, low=10, high=1000) # left open normalization_interval_condition_test(p=0.5, min=None, max=10000, low=10, high=1000) # right open normalization_interval_condition_test(p=0.5, min=10, max=None, low=10, high=1000) # negative values normalization_interval_condition_test(p=0.5, min=-1000, max=-100, low=-10000, high=-1000) # p = 1 normalization_interval_condition_test(p=1, min=10, max=100, low=10, high=1000) # interval bigger than scale normalization_interval_condition_test(p=1, min=0, max=1000, low=10, high=100) assert IntervalCondition(p=0.5, min=0, max=5).normalize(Scale( 0, 10)) == IntervalCondition(p=0.5, min=0, max=0.5)
def test_variance_condition(): def get_variance(dist): xs = np.linspace(dist.scale.low, dist.scale.high, dist.ps.size) mean = np.dot(dist.ps, xs) return np.dot(dist.ps, np.square(xs - mean)) base_conditions = [ MaxEntropyCondition(weight=0.1), SmoothnessCondition(), IntervalCondition(p=0.95, min=0.3, max=0.7), ] base_dist = HistogramDist.from_conditions(base_conditions, verbose=True) base_variance = get_variance(base_dist) increased_variance = base_variance + 0.01 # Increase in variance should decrease peak var_condition = VarianceCondition(variance=increased_variance, weight=1) var_conditions = base_conditions + [var_condition] var_dist = HistogramDist.from_conditions(var_conditions, verbose=True) assert np.max(var_dist.ps) < np.max(base_dist.ps) # Highly weighted variance condition should make var very close to specified var strong_condition = VarianceCondition(variance=increased_variance, weight=1000) strong_var_conditions = base_conditions + [strong_condition] strong_var_dist = HistogramDist.from_conditions(strong_var_conditions, verbose=True) assert get_variance(strong_var_dist) == pytest.approx( float(increased_variance), abs=0.001)
def test_variance_condition(): base_conditions = [ MaxEntropyCondition(weight=0.001), SmoothnessCondition(), IntervalCondition(p=0.95, min=0.3, max=0.7), ] base_dist = PointDensity.from_conditions(base_conditions, verbose=True, scale=Scale(0, 1)) base_variance = base_dist.variance() increased_variance = base_variance + 0.01 # Increase in variance should decrease peak var_condition = VarianceCondition(variance=increased_variance, weight=1) var_conditions = base_conditions + [var_condition] var_dist = PointDensity.from_conditions(var_conditions, verbose=True, scale=Scale(0, 1)) assert np.max(var_dist.normed_densities) < np.max( base_dist.normed_densities) # Highly weighted variance condition should make var very close to specified var strong_condition = VarianceCondition(variance=increased_variance, weight=100000) strong_var_conditions = base_conditions + [strong_condition] strong_var_dist = PointDensity.from_conditions(strong_var_conditions, verbose=True, scale=Scale(0, 1)) assert strong_var_dist.variance() == pytest.approx( float(increased_variance), abs=0.001)
def test_mixture_from_percentile(): for value in [0.01, 0.1, 1, 3]: conditions = [IntervalCondition(p=0.5, max=value)] dist = LogisticMixture.from_conditions(conditions, {"num_components": 1}, verbose=True, scale=Scale(0, 3)) loc = dist.components[0].base_dist.true_loc assert loc == pytest.approx(value, rel=0.1), loc
def percentiles(self, percentiles=None): from ergo.conditions import IntervalCondition if percentiles is None: percentiles = [0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99] values = [self.ppf(q) for q in percentiles] return [ IntervalCondition(percentile, max=float(value)) for (percentile, value) in zip(percentiles, values) ]
def test_interval_condition(): dist = Uniform(min=-1, max=1) assert IntervalCondition(p=0.5, min=0, max=1).loss(dist) == 0 assert IntervalCondition(p=0.25, min=0, max=1).loss(dist) == 0.25**2 assert IntervalCondition(p=1, max=0).loss(dist) == 0.5**2 assert IntervalCondition(p=1).loss(dist) == 0 assert IntervalCondition(p=0, min=-1, max=1).loss(dist) == 1 assert IntervalCondition(p=0, min=-1, max=1, weight=10).loss(dist) == 10 assert (IntervalCondition(p=0.25, min=0, max=1).describe_fit(dist)["loss"] == 0.25**2) assert (IntervalCondition( p=0, min=-1, max=0).describe_fit(dist)["p_in_interval"] == 0.5) assert (IntervalCondition( p=1, min=-1, max=0).describe_fit(dist)["p_in_interval"] == 0.5)
def test_fit_hist_regression_1(): """ Regression test for bug: "This custom question has a weird histogram - why?" see https://elicit.ought.org/builder/gflpsSBAb for more on the bug, see https://docs.google.com/document/d/1CFklTKtbKzXi6-lRaEsX4ZiY3Yzpbfdg7i2j1NvKP34/edit#heading=h.ph1huakxn33f """ conditions = [ IntervalCondition(p=0.25, max=2.0), IntervalCondition(p=0.75, max=4.0), IntervalCondition(p=0.9, max=6.0), MaxEntropyCondition(weight=0.1), ] histogram_dist = HistogramDist.from_conditions(conditions, scale=Scale(low=0, high=52)) assert histogram_dist.cdf(2) == pytest.approx(0.25, abs=0.05) assert histogram_dist.ppf(0.9) == pytest.approx(6, abs=1)
def test_interval_plus_entropy(scale: Scale): conditions = [ IntervalCondition(p=0.5, max=scale.denormalize_point(0.3)), MaxEntropyCondition(weight=0.01), ] fitted_dist = PointDensity.from_conditions( conditions, scale=scale, ) # We expect at most 3 different densities: one for inside the interval, one for outside, # and one between. assert np.unique(fitted_dist.normed_densities).size <= 3
def test_fit_point_density_regression_p_in_range(): """ Regression test for a bug where: 1. < 100% of p is in the entire range, for a closed-bound question 2. the p is smashed up against the edges of the range rather than distributed evenly over the whole range e.g. see https://elicit.ought.org/builder/Mib4yBPDE For more on the bug, see https://docs.google.com/document/d/1CFklTKtbKzXi6-lRaEsX4ZiY3Yzpbfdg7i2j1NvKP34/edit#heading=h.lypz52bknpyq """ pointdensity_dist = PointDensity.from_conditions( conditions=[IntervalCondition(min=0, max=1, p=0.5)], scale=Scale(0, 1)) assert pointdensity_dist.cdf(1) == pytest.approx(1, abs=1e-4)
def test_mode_condition(): base_conditions = [IntervalCondition(p=0.4, max=0.5)] base_dist = HistogramDist.from_conditions(base_conditions, verbose=True) # Most likely condition should increase chance of specified outcome outcome_conditions = base_conditions + [ModeCondition(outcome=0.25)] outcome_dist = HistogramDist.from_conditions(outcome_conditions, verbose=True) assert outcome_dist.pdf(0.25) > base_dist.pdf(0.25) # Highly weighted most likely condition should make specified outcome most likely strong_condition = ModeCondition(outcome=0.25, weight=1000) strong_outcome_conditions = base_conditions + [strong_condition] strong_outcome_dist = HistogramDist.from_conditions( strong_outcome_conditions, verbose=True) assert strong_condition.loss(strong_outcome_dist) == pytest.approx( 0, abs=0.001)
def test_percentile_roundtrip(fixed_params): conditions = [ IntervalCondition(p=0.01, max=0.61081324517545), IntervalCondition(p=0.1, max=0.8613634657212543), IntervalCondition(p=0.25, max=1), IntervalCondition(p=0.5, max=1.5), IntervalCondition(p=0.75, max=2), IntervalCondition(p=0.9, max=2.1386364698410034), IntervalCondition(p=0.99, max=2.3891870975494385), ] mixture = LogisticMixture.from_conditions( conditions, fixed_params, scale=Scale(0, 4), verbose=True, ) recovered_conditions = mixture.percentiles( percentiles=[condition.p for condition in conditions]) for (condition, recovered_condition) in zip(conditions, recovered_conditions): assert recovered_condition.max == pytest.approx(condition.max, rel=0.1)
def test_mixed_2(point_densities): conditions = ( PointDensityCondition(point_densities["xs"], point_densities["densities"]), IntervalCondition(p=0.4, max=1), IntervalCondition(p=0.45, max=1.2), IntervalCondition(p=0.48, max=1.3), IntervalCondition(p=0.5, max=2), IntervalCondition(p=0.7, max=2.2), IntervalCondition(p=0.9, max=2.3), ) dist = LogisticMixture.from_conditions(conditions, {"num_components": 3}, verbose=True, scale=Scale(0, 1)) assert dist.pdf(-5) == pytest.approx(0, abs=0.1) assert dist.pdf(6) == pytest.approx(0, abs=0.1) my_cache = {} my_cache[conditions] = 3 conditions_2 = ( PointDensityCondition(point_densities["xs"], point_densities["densities"]), IntervalCondition(p=0.4, max=1), IntervalCondition(p=0.45, max=1.2), IntervalCondition(p=0.48, max=1.3), IntervalCondition(p=0.5, max=2), IntervalCondition(p=0.7, max=2.2), IntervalCondition(p=0.9, max=2.3), ) assert hash(conditions) == hash(conditions_2) assert my_cache[conditions_2] == 3
def test_mixed_2(histogram): conditions = ( HistogramCondition(histogram["xs"], histogram["densities"]), IntervalCondition(p=0.4, max=1), IntervalCondition(p=0.45, max=1.2), IntervalCondition(p=0.48, max=1.3), IntervalCondition(p=0.5, max=2), IntervalCondition(p=0.7, max=2.2), IntervalCondition(p=0.9, max=2.3), ) dist = LogisticMixture.from_conditions(conditions, {"num_components": 3}, verbose=True) assert dist.pdf(-5) == pytest.approx(0, abs=0.1) assert dist.pdf(6) == pytest.approx(0, abs=0.1) my_cache = {} my_cache[conditions] = 3 conditions_2 = ( HistogramCondition(histogram["xs"], histogram["densities"]), IntervalCondition(p=0.4, max=1), IntervalCondition(p=0.45, max=1.2), IntervalCondition(p=0.48, max=1.3), IntervalCondition(p=0.5, max=2), IntervalCondition(p=0.7, max=2.2), IntervalCondition(p=0.9, max=2.3), ) assert hash(conditions) == hash(conditions_2) assert my_cache[conditions_2] == 3
def test_density_percentile(): for value in [0.01, 0.1, 0.5, 0.9]: conditions = [IntervalCondition(p=0.5, max=value)] dist = PointDensity.from_conditions(conditions, scale=Scale(0, 1)) assert dist.ppf(0.5) == pytest.approx(value, abs=0.1)
def test_hist_from_percentile(): for value in [0.01, 0.1, 0.5, 0.9]: conditions = [IntervalCondition(p=0.5, max=value)] dist = HistogramDist.from_conditions(conditions) assert dist.ppf(0.5) == pytest.approx(value, abs=0.1)
def normalization_interval_condition_test(p, min, max, low, high): condition = IntervalCondition(p=p, min=min, max=max) scale = Scale(low, high) assert condition.normalize(scale).denormalize(scale) == condition