def test_point_density(scale, dist_source): scale_mid = scale.low + scale.width / 2 rv = logistic(loc=scale_mid, scale=scale.width / 30) xs = scale.denormalize_points(constants.target_xs) orig_densities = rv.pdf(xs) orig_cdfs = rv.cdf(xs) orig_pairs = [{ "x": x, "density": density } for (x, density) in zip(xs, orig_densities)] direct_dist = PointDensity.from_pairs(orig_pairs, scale) if dist_source == "direct": dist = direct_dist elif dist_source == "from_pairs": orig_pairs = [{ "x": x, "density": density } for (x, density) in zip(xs, orig_densities)] dist = PointDensity.from_pairs(orig_pairs, scale) elif dist_source == "to_arrays": _xs, _density = direct_dist.to_arrays() pairs = [{"x": x, "density": d} for x, d in zip(_xs, _density)] dist = PointDensity.from_pairs(pairs, scale) elif dist_source == "to_arrays/2": _xs, _density = direct_dist.to_arrays( num_xs=int(constants.point_density_default_num_points / 2), add_endpoints=True, ) pairs = [{"x": x, "density": d} for x, d in zip(_xs, _density)] dist = PointDensity.from_pairs(pairs, scale) elif dist_source == "structured": dist = PointDensity.structure(direct_dist.destructure()) elif dist_source == "denormalized": dist = direct_dist.normalize().denormalize(scale) elif dist_source == "from_conditions": cond = CrossEntropyCondition(p_dist=direct_dist) dist = PointDensity.from_conditions([cond], scale=scale) # PDF dist_densities = np.array([float(dist.pdf(x)) for x in xs]) if dist_source == "to_arrays/2": assert dist_densities == pytest.approx(orig_densities, abs=0.08) else: assert dist_densities == pytest.approx(orig_densities, abs=0.01) # CDF dist_cdfs = np.array([float(dist.cdf(x)) for x in xs]) assert dist_cdfs == pytest.approx(orig_cdfs, abs=0.06) # PPF MIN_CHECK_DENSITY = 1e-3 check_idxs = [ i for i in range(constants.point_density_default_num_points) if orig_densities[i] > MIN_CHECK_DENSITY ] dist_ppfs = np.array([float(dist.ppf(c)) for c in orig_cdfs[check_idxs]]) assert dist_ppfs == pytest.approx(xs[check_idxs], rel=0.25)
def test_zero_log_issue(): """ Regression test for a bug where 1. distribution is specified which has 0 density in some bins, and 2. a condition or method that uses self.normed_log_densities or similar is called """ pairs = [ { "x": 0, "density": 1 }, { "x": 0.2, "density": 0 }, { "x": 0.4, "density": 0 }, { "x": 0.6, "density": 1 }, { "x": 1, "density": 1 }, ] dist = PointDensity.from_pairs(pairs, scale=Scale(0, 1)) sc = SmoothnessCondition() fit = sc.describe_fit(dist) assert not np.isnan(fit["loss"])
def test_density_frompairs(): pairs = [ { "x": 0, "density": 1 }, { "x": 0.2, "density": 1 }, { "x": 0.4, "density": 1 }, { "x": 0.6, "density": 1 }, { "x": 1, "density": 1 }, ] dist = PointDensity.from_pairs(pairs, scale=Scale(0, 1)) for condition in dist.percentiles(): assert condition.max == pytest.approx(condition.p, abs=0.01)
def test_mean(scale: Scale): true_mean = scale.low + scale.width / 2 rv = norm(loc=true_mean, scale=scale.width / 10) xs = constants.target_xs pairs = [{ "x": x, "density": rv.pdf(x) } for x in scale.denormalize_points(xs)] pd_norm = PointDensity.from_pairs(pairs, scale) calculated_mean = float(pd_norm.mean()) assert true_mean == pytest.approx(calculated_mean, rel=1e-3, abs=1e-3)
def point_density_from_scale(scale: Scale): scale_mid = scale.low + scale.width / 2 rv = logistic(loc=scale_mid, scale=scale.width / 30) xs = scale.denormalize_points(constants.target_xs) densities = rv.pdf(xs) pairs = [{ "x": x, "density": density } for (x, density) in zip(xs, densities)] return PointDensity.from_pairs(pairs, scale)