示例#1
0
def test_normalization_point_densities_condition(point_densities):
    original = PointDensityCondition(point_densities["xs"],
                                     point_densities["densities"])
    normalized_denormalized = original.normalize(Scale(10, 1000)).denormalize(
        Scale(10, 1000))
    for (density,
         norm_denorm_density) in zip(point_densities["densities"],
                                     normalized_denormalized.densities):
        assert density == pytest.approx(
            norm_denorm_density,
            rel=0.001,
        )
    for (x, norm_denorm_x) in zip(point_densities["xs"],
                                  normalized_denormalized.xs):
        assert x == pytest.approx(
            norm_denorm_x,
            rel=0.001,
        )

    # half-assed test that xs and densities are at least
    # getting transformed in the right direction
    normalized = original.normalize(Scale(1, 4))
    for idx, (normalized_x, normalized_density) in enumerate(
            zip(normalized.xs, normalized.densities)):
        orig_x = point_densities["xs"][idx]
        orig_density = point_densities["densities"][idx]
        assert orig_x > normalized_x
        assert orig_density < normalized_density
示例#2
0
def test_logistic_mixture_normalization():
    scale = Scale(-50, 50)
    scalex2 = Scale(-100, 100)
    mixture = LogisticMixture(
        components=[Logistic(-40, 1, scale),
                    Logistic(50, 10, scale)],
        probs=[0.5, 0.5],
    )

    mixturex2 = LogisticMixture(
        components=[Logistic(-80, 2, scalex2),
                    Logistic(100, 20, scalex2)],
        probs=[0.5, 0.5],
    )

    assert mixturex2 == mixture.normalize().denormalize(scalex2)
    assert mixture == mixturex2.normalize().denormalize(scale)

    normalized = (mixture.normalize()
                  )  # not necessary to normalize but here for readability

    assert normalized == LogisticMixture(
        [Logistic(0.1, 0.01, Scale(0, 1)),
         Logistic(1, 0.1, Scale(0, 1))],
        [0.5, 0.5],
    )
示例#3
0
 def __init__(
     self,
     loc: float,
     s: float,
     scale: Optional[Scale] = None,
     metadata=None,
     normalized=False,
 ):
     # TODO (#303): Raise ValueError on scale < 0
     if normalized:
         self.loc = loc
         self.s = np.max([s, 0.0000001])
         self.metadata = metadata
         if scale is not None:
             self.scale = scale
         else:
             self.scale = Scale(0, 1)
         self.true_s = self.s * self.scale.width
         self.true_loc = self.scale.denormalize_point(loc)
     elif scale is None:
         raise ValueError("Either a Scale or normalized parameters are required")
     else:
         self.loc = scale.normalize_point(loc)
         self.s = np.max([s, 0.0000001]) / scale.width
         self.scale = scale
         self.metadata = metadata
         self.true_s = s  # convenience field only used in repr currently
         self.true_loc = loc  # convenience field only used in repr currently
示例#4
0
    def __init__(
        self,
        xs,
        densities,
        scale: Scale,
        normalized=False,
        traceable=False,
        cumulative_normed_ps=None,
    ):
        if scale is None:
            raise ValueError

        self.scale = scale

        if normalized:
            self.normed_xs = xs
            self.normed_densities = densities

        else:
            self.normed_xs = scale.normalize_points(xs)
            self.normed_densities = scale.normalize_densities(
                self.normed_xs, densities)

        self.cumulative_normed_ps = cumulative_normed_ps
        if cumulative_normed_ps is None:
            self.cumulative_normed_ps = np.append(np.array([0]),
                                                  np.cumsum(self.bin_probs))
示例#5
0
    def from_pairs(cls,
                   pairs,
                   scale: Scale,
                   normalized=False,
                   interpolate=True):
        sorted_pairs = sorted([(v["x"], v["density"]) for v in pairs])
        xs = np.array([x for (x, density) in sorted_pairs])
        densities = np.array([density for (x, density) in sorted_pairs])

        if not normalized:
            xs = scale.normalize_points(xs)
            densities = scale.normalize_densities(xs, densities)

        if interpolate:
            # interpolate ps at target_xs
            if not (len(xs) == len(constants.target_xs)
                    and np.isclose(xs, constants.target_xs, rtol=1e-04).all()):
                f = interp1d(xs, densities)
                densities = f(constants.target_xs)

        # Make sure AUC is 1
        auc = np.sum(densities) / densities.size
        densities /= auc

        return cls(constants.target_xs,
                   densities,
                   scale=scale,
                   normalized=True)
示例#6
0
 def denormalize(self, scale: Scale):
     denormalized_min = (scale.denormalize_point(self.min)
                         if self.min is not None else None)
     denormalized_max = (scale.denormalize_point(self.max)
                         if self.max is not None else None)
     return self.__class__(self.p, denormalized_min, denormalized_max,
                           self.weight)
示例#7
0
def test_log_pdf(xscale: Scale):
    normed_test_loc = 0.5
    normed_test_s = 0.1
    test_loc = xscale.denormalize_point(normed_test_loc)
    test_s = normed_test_s * xscale.width

    ergoLogisticMixture = LogisticMixture(
        components=[
            Logistic(
                loc=xscale.denormalize_point(0.2),
                s=0.5 * xscale.width,
                scale=xscale,
            ),
            Logistic(loc=test_loc, s=test_s, scale=xscale),
        ],
        probs=[1.8629593e-29, 1.0],
    )
    ergoLogistic = Logistic(loc=test_loc, s=test_s, scale=xscale)

    ## Test PDF
    normed_scipydist = scipy.stats.logistic(normed_test_loc, normed_test_s)
    for x in np.linspace(0, 1, 10):
        denormalized_x = xscale.denormalize_point(x)
        assert (normed_scipydist.pdf(x) / xscale.width == pytest.approx(
            float(ergoLogistic.pdf(denormalized_x)), rel=1e-3) ==
                pytest.approx(float(ergoLogisticMixture.pdf(denormalized_x)),
                              rel=1e-3))
示例#8
0
def test_variance_condition():
    base_conditions = [
        MaxEntropyCondition(weight=0.001),
        SmoothnessCondition(),
        IntervalCondition(p=0.95, min=0.3, max=0.7),
    ]
    base_dist = PointDensity.from_conditions(base_conditions,
                                             verbose=True,
                                             scale=Scale(0, 1))
    base_variance = base_dist.variance()
    increased_variance = base_variance + 0.01

    # Increase in variance should decrease peak
    var_condition = VarianceCondition(variance=increased_variance, weight=1)
    var_conditions = base_conditions + [var_condition]
    var_dist = PointDensity.from_conditions(var_conditions,
                                            verbose=True,
                                            scale=Scale(0, 1))
    assert np.max(var_dist.normed_densities) < np.max(
        base_dist.normed_densities)

    # Highly weighted variance condition should make var very close to specified var
    strong_condition = VarianceCondition(variance=increased_variance,
                                         weight=100000)
    strong_var_conditions = base_conditions + [strong_condition]
    strong_var_dist = PointDensity.from_conditions(strong_var_conditions,
                                                   verbose=True,
                                                   scale=Scale(0, 1))
    assert strong_var_dist.variance() == pytest.approx(
        float(increased_variance), abs=0.001)
示例#9
0
def test_cdf(xscale: Scale):
    scipydist_normed = scipy.stats.logistic(0.5, 0.05)
    true_loc = xscale.denormalize_point(0.5)
    true_s = 0.05 * xscale.width

    ergodist = Logistic(loc=true_loc, s=true_s, scale=xscale)

    for x in np.linspace(0, 1, 10):
        assert scipydist_normed.cdf(x) == pytest.approx(float(
            ergodist.cdf(xscale.denormalize_point(x))),
                                                        rel=1e-3)

    # TODO: consider a better approach for log scale
    if isinstance(xscale, LogScale):
        for x in np.linspace(xscale.low, xscale.high, 10):
            assert scipydist_normed.cdf(
                xscale.normalize_point(x)) == pytest.approx(float(
                    ergodist.cdf(x)),
                                                            rel=1e-3)
    else:
        scipydist_true = scipy.stats.logistic(true_loc, true_s)
        for x in np.linspace(xscale.low, xscale.high, 10):
            assert scipydist_true.cdf(x) == pytest.approx(float(
                ergodist.cdf(x)),
                                                          rel=1e-3)
示例#10
0
def normalized_logistic_mixture():
    return LogisticMixture(
        components=[
            Logistic(loc=0.15, s=0.037034005, scale=Scale(0, 1)),
            Logistic(loc=0.85, s=0.032395907, scale=Scale(0, 1)),
        ],
        probs=[0.6, 0.4],
    )
示例#11
0
文件: truncate.py 项目: wjurayj/ergo
 def denormalize(self, scale: Scale):
     denormed_base_dist = self.base_dist.denormalize(scale)
     denormed_floor = scale.denormalize_point(self.floor)
     denormed_ceiling = scale.denormalize_point(self.ceiling)
     return self.__class__(
         base_dist=denormed_base_dist,
         floor=denormed_floor,
         ceiling=denormed_ceiling,
     )
示例#12
0
def test_density_norm_denorm_roundtrip(scale: Scale):
    rv = scipy.stats.logistic(loc=0.5, scale=0.15)
    normed_xs = np.linspace(0.01, 1, 201)
    normed_densities_truth_set = rv.pdf(normed_xs)
    xs = scale.denormalize_points(normed_xs)

    denormed_densities = scale.denormalize_densities(
        xs, normed_densities_truth_set)
    normed_densities = scale.normalize_densities(normed_xs, denormed_densities)

    assert np.allclose(normed_densities_truth_set,
                       normed_densities)  # type: ignore
示例#13
0
def test_export_import():
    log_scale = LogScale(low=-1, high=1, log_base=2)
    log_scale_export = log_scale.export()
    assert log_scale_export["width"] == 2
    assert log_scale_export["class"] == "LogScale"

    assert (scale_factory(log_scale.export())) == log_scale

    linear_scale = Scale(low=1, high=10000)
    assert (scale_factory(linear_scale.export())) == linear_scale

    linear_date_scale = TimeScale(low=631152000, high=946684800)
    assert (scale_factory(linear_date_scale.export())) == linear_date_scale
示例#14
0
def test_serialization():
    assert hash(Scale(0, 100)) == hash(Scale(0, 100))
    assert hash(Scale(0, 100)) != hash(Scale(100, 200))

    assert hash(LogScale(0, 100, 10)) == hash(LogScale(0, 100, 10))
    assert hash(LogScale(0, 100, 10)) != hash(LogScale(0, 100, 100))

    assert hash(TimeScale(946684800, 1592914415)) == hash(
        TimeScale(946684800, 1592914415))
    assert hash(TimeScale(631152000, 1592914415)) != hash(
        TimeScale(946684800, 1592914415))

    assert (hash(LogScale(0, 100, 1)) != hash(Scale(0, 100)) != hash(
        TimeScale(631152000, 946684800)))
示例#15
0
    def prepare_logistic(self, normalized_dist: dist.Logistic) -> dist.Logistic:
        """
        Transform a single logistic distribution by clipping the
        parameters and adding scale information as needed for submission to
        Metaculus. The loc and scale have to be within a certain range
        for the Metaculus API to accept the prediction.

        :param dist: a (normalized) logistic distribution
        :return: a transformed logistic distribution
        """
        if hasattr(normalized_dist, "base_dist"):
            normalized_dist = normalized_dist.base_dist  # type: ignore

        if normalized_dist.s <= 0:
            raise ValueError("logistic_params.scale must be greater than 0")

        clipped_loc = min(normalized_dist.loc, max_loc)
        clipped_scale = float(onp.clip(normalized_dist.s, min_scale, max_scale))  # type: ignore

        if self.low_open:
            low = float(onp.clip(normalized_dist.cdf(0), min_open_low, max_open_low,))
        else:
            low = 0

        if self.high_open:
            high = float(
                onp.clip(normalized_dist.cdf(1), min_open_high + low, max_open_high,)
            )
        else:
            high = 1

        return dist.Logistic(
            clipped_loc, clipped_scale, Scale(0, 1), {"low": low, "high": high}
        )
示例#16
0
    def __init__(
        self,
        logps=None,
        scale=None,
        traceable=False,
        direct_init=None,
    ):
        # We assume that xs are evenly spaced in [0,1]
        if direct_init:
            self.logps = direct_init["logps"]
            self.ps = direct_init["ps"]
            self.cum_ps = direct_init["cum_ps"]
            self.xs = direct_init["xs"]
            self.size = direct_init["size"]
            self.scale = direct_init["scale"]
        else:
            init_numpy = np if traceable else onp
            self.logps = logps
            self.ps = np.exp(logps)
            self.cum_ps = np.array(init_numpy.cumsum(self.ps))
            self.size = logps.size
            self.scale = scale if scale else Scale(0, 1)
            self.xs = np.linspace(0, 1, self.logps.size)

        self.density_norm_term = self.scale.width / self.logps.size
示例#17
0
def test_mixed_2(point_densities):
    conditions = (
        PointDensityCondition(point_densities["xs"],
                              point_densities["densities"]),
        IntervalCondition(p=0.4, max=1),
        IntervalCondition(p=0.45, max=1.2),
        IntervalCondition(p=0.48, max=1.3),
        IntervalCondition(p=0.5, max=2),
        IntervalCondition(p=0.7, max=2.2),
        IntervalCondition(p=0.9, max=2.3),
    )
    dist = LogisticMixture.from_conditions(conditions, {"num_components": 3},
                                           verbose=True,
                                           scale=Scale(0, 1))
    assert dist.pdf(-5) == pytest.approx(0, abs=0.1)
    assert dist.pdf(6) == pytest.approx(0, abs=0.1)
    my_cache = {}
    my_cache[conditions] = 3
    conditions_2 = (
        PointDensityCondition(point_densities["xs"],
                              point_densities["densities"]),
        IntervalCondition(p=0.4, max=1),
        IntervalCondition(p=0.45, max=1.2),
        IntervalCondition(p=0.48, max=1.3),
        IntervalCondition(p=0.5, max=2),
        IntervalCondition(p=0.7, max=2.2),
        IntervalCondition(p=0.9, max=2.3),
    )
    assert hash(conditions) == hash(conditions_2)
    assert my_cache[conditions_2] == 3
示例#18
0
 def normalize(self):
     return PointDensity(
         self.normed_xs,
         self.normed_densities,
         scale=Scale(0.0, 1.0),
         normalized=True,
     )
示例#19
0
 def denormalize(self, scale: Scale):
     denormalized_xs = np.array(
         [scale.denormalize_point(x) for x in self.xs])
     denormalized_densities = np.array(
         [density / scale.width for density in self.densities])
     return self.__class__(denormalized_xs, denormalized_densities,
                           self.weight)
示例#20
0
def test_density_frompairs():
    pairs = [
        {
            "x": 0,
            "density": 1
        },
        {
            "x": 0.2,
            "density": 1
        },
        {
            "x": 0.4,
            "density": 1
        },
        {
            "x": 0.6,
            "density": 1
        },
        {
            "x": 1,
            "density": 1
        },
    ]
    dist = PointDensity.from_pairs(pairs, scale=Scale(0, 1))
    for condition in dist.percentiles():
        assert condition.max == pytest.approx(condition.p, abs=0.01)
示例#21
0
def logistic_mixture_norm_test():
    xscale = Scale(-50, 50)
    return LogisticMixture(
        components=[Logistic(-40, 1, xscale),
                    Logistic(50, 10, xscale)],
        probs=[0.5, 0.5],
    )
示例#22
0
def test_zero_log_issue():
    """
    Regression test for a bug where
    1. distribution is specified which has 0 density in some bins, and
    2. a condition or method that uses self.normed_log_densities or similar is called
    """
    pairs = [
        {
            "x": 0,
            "density": 1
        },
        {
            "x": 0.2,
            "density": 0
        },
        {
            "x": 0.4,
            "density": 0
        },
        {
            "x": 0.6,
            "density": 1
        },
        {
            "x": 1,
            "density": 1
        },
    ]
    dist = PointDensity.from_pairs(pairs, scale=Scale(0, 1))
    sc = SmoothnessCondition()
    fit = sc.describe_fit(dist)
    assert not np.isnan(fit["loss"])
示例#23
0
 def from_pairs(cls, pairs, scale: Scale, normalized=False):
     sorted_pairs = sorted([(v["x"], v["density"]) for v in pairs])
     xs = [x for (x, density) in sorted_pairs]
     if not normalized:
         xs = scale.normalize_points(xs)
     densities = [density for (x, density) in sorted_pairs]
     logps = onp.log(onp.array(densities))
     return cls(logps, scale)
示例#24
0
def logistic_mixture10():
    xscale = Scale(-20, 40)
    return LogisticMixture(
        components=[
            Logistic(loc=15, s=2.3658268, scale=xscale),
            Logistic(loc=5, s=2.3658268, scale=xscale),
        ],
        probs=[0.5, 0.5],
    )
示例#25
0
def logistic_mixture_p_uneven():
    xscale = Scale(-10, 20)
    return LogisticMixture(
        components=[
            Logistic(loc=10, s=3, scale=xscale),
            Logistic(loc=5, s=5, scale=xscale),
        ],
        probs=[1.8629593e-29, 1.0],
    )
示例#26
0
def logistic_mixture():
    xscale = Scale(0, 150000)
    return LogisticMixture(
        components=[
            Logistic(loc=10000, s=1000, scale=xscale),
            Logistic(loc=100000, s=10000, scale=xscale),
        ],
        probs=[0.8, 0.2],
    )
示例#27
0
def test_mixture_from_percentile():
    for value in [0.01, 0.1, 1, 3]:
        conditions = [IntervalCondition(p=0.5, max=value)]
        dist = LogisticMixture.from_conditions(conditions,
                                               {"num_components": 1},
                                               verbose=True,
                                               scale=Scale(0, 3))
        loc = dist.components[0].base_dist.true_loc
        assert loc == pytest.approx(value, rel=0.1), loc
示例#28
0
def logistic_mixture15():
    xscale = Scale(-10, 40)
    return LogisticMixture(
        components=[
            Logistic(loc=10, s=3.658268, scale=xscale),
            Logistic(loc=20, s=3.658268, scale=xscale),
        ],
        probs=[0.5, 0.5],
    )
示例#29
0
def test_add_endpoints():
    xs = [0.25, 0.5, 0.75]

    standard_densities = [0.25, 0.5, 0.75]
    expected_densities = np.array([0, 0.25, 0.5, 0.75, 1])

    _, densities = PointDensity.add_endpoints(xs,
                                              standard_densities,
                                              scale=Scale(0, 1))
    assert densities == pytest.approx(expected_densities, abs=1e-5)

    to_clamp_densities = [0.1, 0.5, 0.1]
    expected_densities = np.array([0, 0.1, 0.5, 0.1, 0])

    _, densities = PointDensity.add_endpoints(xs,
                                              to_clamp_densities,
                                              scale=Scale(0, 1))
    assert densities == pytest.approx(expected_densities, abs=1e-5)
示例#30
0
def smooth_logistic_mixture():
    xscale = Scale(1, 1000000.0)
    return LogisticMixture(
        components=[
            Logistic(loc=400000, s=100000, scale=xscale),
            Logistic(loc=700000, s=50000, scale=xscale),
        ],
        probs=[0.8, 0.2],
    )