Exemplo n.º 1
0
def test_unbinned_data():
    n = 751
    gauss, gauss_binned, obs, obs_binned = create_gauss_binned(n, 70)
    x = znp.linspace(-5, 10, 200)
    centers = obs_binned.binning.centers[0]
    y_binned = gauss_binned.pdf(x)
    y_true = gauss.pdf(x)
    max_error = np.max(y_true) / 10
    np.testing.assert_allclose(y_true, y_binned, atol=max_error)

    ycenter_binned = gauss_binned.pdf(centers)
    ycenter_true = gauss.pdf(centers)
    np.testing.assert_allclose(ycenter_binned, ycenter_true, atol=max_error / 10)

    x_outside = znp.array([-7.0, 3.0, 12])
    y_outside = gauss_binned.pdf(x_outside)
    assert y_outside[0] == 0
    assert y_outside[1] > 0
    assert y_outside[2] == 0

    plt.figure()
    plt.title("Binned Gauss evaluated on unbinned edges")
    plt.plot(centers, ycenter_true, label="unbinned pdf")
    plt.plot(centers, ycenter_binned, "--", label="binned pdf")
    plt.legend()
    pytest.zfit_savefig()
    # plt.show()

    plt.figure()
    plt.title("Binned Gauss evaluated on unbinned data")
    plt.plot(x, y_true, label="unbinned pdf")
    plt.plot(x, y_binned, "--", label="binned pdf")
    plt.legend()
    pytest.zfit_savefig()
Exemplo n.º 2
0
def test_spline_from_binned_from_unbinned():
    n = 1004
    gauss, gauss_binned, obs, obs_binned = create_gauss_binned(n)

    x = znp.linspace(-5, 10, n // 5)

    # values = gauss_binned.rel_counts(obs_binned)

    sample = gauss_binned.sample(n, limits=obs_binned)

    title = "Comparison of binned gaussian and sample"
    plt.figure()
    plt.title(title)
    mplhep.histplot(sample.to_hist(), label="sampled binned")
    plt.plot(
        obs_binned.binning.centers[0],
        gauss_binned.counts(obs_binned),
        label="counts binned",
    )
    plt.legend()
    pytest.zfit_savefig()

    spline_gauss = SplinePDF(gauss_binned, obs=obs)
    # spline_gauss.set_yield(n)  # HACK
    y = spline_gauss.ext_pdf(x)
    y_true = gauss.ext_pdf(x)
    plt.figure()
    plt.title("Comparison of unbinned gauss to binned to interpolated")
    plt.plot(
        obs_binned.binning.centers[0],
        gauss_binned.ext_pdf(obs_binned),
        "x",
        label="binned",
    )
    plt.plot(x, y_true, label="original")
    plt.plot(x, y, ".", label="interpolated")
    plt.legend()
    pytest.zfit_savefig()

    np.testing.assert_allclose(y, y_true, atol=50)
Exemplo n.º 3
0
def test_unbinned_from_binned_from_unbinned():
    n = 1004
    gauss, gauss_binned, obs, obs_binned = create_gauss_binned(n)

    x = znp.linspace(-5, 10, n // 5)

    # values = gauss_binned.rel_counts(obs_binned)

    sample = gauss_binned.sample(n, limits=obs_binned)

    title = "Comparison of binned gaussian and sample"
    plt.figure()
    plt.title(title)
    mplhep.histplot(sample.to_hist(), label="sampled binned")
    plt.plot(
        obs_binned.binning.centers[0],
        gauss_binned.counts(obs_binned),
        label="counts binned",
    )
    plt.legend()
    pytest.zfit_savefig()

    unbinned = zfit.pdf.UnbinnedFromBinnedPDF(gauss_binned, obs=obs)
    y = unbinned.ext_pdf(x)
    y_true = gauss.ext_pdf(x)
    plt.figure()
    plt.title("Comparison of unbinned gauss to binned to unbinned again")
    plt.plot(
        obs_binned.binning.centers[0],
        gauss_binned.ext_pdf(obs_binned),
        "x",
        label="binned",
    )
    plt.plot(x, y_true, label="original")
    plt.plot(x, y, ".", label="unbinned")
    plt.legend()
    pytest.zfit_savefig()
    np.testing.assert_allclose(y, y_true, atol=50)

    nsample = 500000
    sample_binned = unbinned.sample(nsample).to_binned(obs_binned)
    sample_binned_hist = sample_binned.to_hist()
    sample_gauss = gauss.sample(nsample).to_binned(obs_binned)
    sample_gauss_hist = sample_gauss.to_hist()

    title = "Comparison of unbinned gaussian and unbinned from binned sampled"
    plt.figure()
    plt.title(title)
    mplhep.histplot(sample_binned_hist, label="unbinned from binned")
    mplhep.histplot(sample_gauss_hist, label="original")
    plt.legend()
    pytest.zfit_savefig()

    diff = (sample_binned_hist.values() - sample_gauss_hist.values()) / (
        sample_gauss_hist.variances() + 1
    ) ** 0.5
    np.testing.assert_allclose(diff, 0, atol=7)  # 7 sigma away
Exemplo n.º 4
0
def test_conv_1d_shifted(interpolation):
    kerlim = (-3, 3)  # symmetric to make the np conv comparison simple
    obs_kernel = zfit.Space("obs1", limits=kerlim)
    obs = zfit.Space("obs1", limits=(5, 15))
    func1 = zfit.pdf.GaussianKDE1DimV1(obs=obs, data=np.random.uniform(6, 12, size=100))
    # func1 = zfit.pdf.Uniform(6, 12, obs=obs)
    func2 = zfit.pdf.Uniform(11, 11.5, obs=obs)
    func = zfit.pdf.SumPDF([func1, func2], 0.5)

    func1k = zfit.pdf.Gauss(0.0, 1, obs=obs_kernel)
    func2k = zfit.pdf.Gauss(1.0, 0.4, obs=obs_kernel)
    funck = zfit.pdf.SumPDF([func1k, func2k], 0.5)

    conv = zfit.pdf.FFTConvPDFV1(func=func, kernel=funck, n=200)

    xnp = tf.linspace(obs_kernel.rect_lower, obs.rect_upper, 4023)

    # true convolution
    kernel_points = obs_kernel.filter(xnp)
    x = obs.filter(xnp)
    probs = conv.pdf(x=x)
    true_conv = true_conv_np(func, funck, obs, x=x, xkernel=kernel_points)

    integral = conv.integrate(
        limits=obs,
    )
    probs_np = probs.numpy()
    np.testing.assert_allclose(probs_np, true_conv, rtol=0.01, atol=0.01)

    assert pytest.approx(1, rel=1e-3) == integral.numpy()

    plt.figure()
    plt.title("Conv FFT 1Dim shift testing")
    plt.plot(x, probs_np, label="zfit")
    plt.plot(x, true_conv, label="numpy")
    plt.legend()
    pytest.zfit_savefig()
Exemplo n.º 5
0
def test_binned_from_unbinned_2D():
    zfit.run.set_graph_mode(True)
    n = 100000

    mu = zfit.Parameter("mu", 1, 0, 19)
    sigma = zfit.Parameter("sigma", 6, 0, 120)
    obsx = zfit.Space("x", (-5, 10))
    obsy = zfit.Space("y", (-50, 100))
    gaussx = zfit.pdf.Gauss(mu=mu, sigma=sigma, obs=obsx)
    muy = mu + 3
    sigmay = sigma * 20
    gaussy = zfit.pdf.Gauss(mu=muy, sigma=sigmay, obs=obsy)
    gauss2D = zfit.pdf.ProductPDF([gaussx, gaussy])

    axisx = zfit.binned.VariableBinning(
        np.concatenate([np.linspace(-5, 5, 43), np.linspace(5, 10, 30)[1:]], axis=0),
        name="x",
    )
    axisxhist = hist.axis.Variable(
        np.concatenate([np.linspace(-5, 5, 43), np.linspace(5, 10, 30)[1:]], axis=0),
        name="x",
    )
    axisy = zfit.binned.RegularBinning(15, -50, 100, name="y")
    axisyhist = hist.axis.Regular(15, -50, 100, name="y")
    obs_binnedx = zfit.Space(["x"], binning=axisx)
    obs_binnedy = zfit.Space("y", binning=axisy)
    obs_binned = obs_binnedx * obs_binnedy

    gauss_binned = BinnedFromUnbinnedPDF(pdf=gauss2D, space=obs_binned, extended=n)
    values = gauss_binned.rel_counts(obs_binned)  # TODO: good test?
    start = time.time()
    ntrial = 10
    for _ in range(ntrial):
        values = gauss_binned.rel_counts(obs_binned)
    print(f"Time taken {(time.time() - start) / ntrial}")
    hist2d = hist.Hist(axisxhist, axisyhist)
    nruns = 5
    npoints = 5_000_000
    for _ in range(nruns):
        normal2d = np.random.normal(
            [float(mu), float(muy)], [float(sigma), float(sigmay)], size=(npoints, 2)
        )
        hist2d.fill(*normal2d.T, threads=4)

    diff = np.abs(values * hist2d.sum() - hist2d.counts()) - 6.5 * np.sqrt(
        hist2d.counts()
    )  # 5 sigma for 1000 bins
    print(diff)
    np.testing.assert_array_less(diff, 0)

    sample = gauss_binned.sample(n, limits=obs_binned)
    hist_sampled = sample.to_hist()
    hist_pdf = gauss_binned.to_hist()
    max_error = hist_sampled.values() * 6**2  # 6 sigma away
    np.testing.assert_array_less(
        (hist_sampled.values() - hist_pdf.values()) ** 2, max_error
    )
    plt.figure()
    plt.title("Gauss 2D binned sampled.")
    mplhep.hist2dplot(hist_sampled)
    pytest.zfit_savefig()
    plt.figure()
    plt.title("Gauss 2D binned plot, irregular (x<4.5 larger bins than x>4.5) binning.")
    mplhep.hist2dplot(hist_pdf)
    pytest.zfit_savefig()
Exemplo n.º 6
0
def test_morphing_templates(alphas):
    bins1 = 15
    irregular_str = "irregular templates" if alphas is not None else ""

    counts1 = np.random.uniform(70, high=100, size=bins1)  # generate counts
    counts = [
        counts1 - np.random.uniform(high=20, size=bins1),
        counts1,
        counts1 + np.random.uniform(high=20, size=bins1),
    ]
    if alphas is not None:
        counts.append(counts1 + np.random.uniform(high=5, size=bins1))
    binning = zfit.binned.RegularBinning(bins1, 0, 10, name="obs1")
    obs = zfit.Space(obs="obs1", binning=binning)
    histos = [BinnedData.from_tensor(obs, count) for count in counts]
    pdfs = [zfit.pdf.HistogramPDF(h) for h in histos]
    if alphas is not None:
        pdfs = {a: p for a, p in zip(alphas, pdfs)}
    alpha = zfit.Parameter("alpha", 0, -5, 5)
    morph = SplineMorphingPDF(alpha=alpha, hists=pdfs)
    if alphas is None:
        alphas = [-1, 0, 1]
    for i, a in enumerate(alphas):
        alpha.set_value(a)
        np.testing.assert_allclose(morph.counts(), counts[i])
        if len(alphas) > i + 1:
            alpha.set_value((a + alphas[i + 1]) / 2)
            max_dist = (counts[i] - counts[i + 1])**2 + 5  # tolerance
            max_dist *= 1.1  # not strict, it can be a bit higher
            numpy.testing.assert_array_less((morph.counts() - counts[i])**2,
                                            max_dist)
            numpy.testing.assert_array_less(
                (morph.counts() - counts[i + 1])**2, max_dist)

    import matplotlib.cm as cm

    amin, amax = -2, 2
    n = 5

    template_alphas = np.array(list(alphas))

    for do_3d in [True, False]:
        plt.figure()
        if do_3d:
            ax = plt.gcf().add_subplot(111, projection="3d")
        else:
            ax = plt.gca()
        plotstyle = "3d plot" if do_3d else "hist plot"
        plt.title(f"Morphing with splines {irregular_str} {plotstyle}")

        for a in list(znp.linspace(amin, amax, n * 2)) + list(template_alphas):
            normed_a = (a - amin) / (amax -
                                     amin) / 1.3  # 3 is a scaling factor
            color = cm.get_cmap("winter")(normed_a)
            alpha.set_value(a)
            histo = morph.ext_pdf(None)
            histo = BinnedData.from_tensor(obs, histo)
            histo = histo.to_hist()
            values = histo.values()
            x = histo.axes.edges[0][:-1]
            y = np.broadcast_to(a, values.shape)
            z = values
            label = None
            if do_3d:
                ax.step(x, y, z, color=color, where="pre", label=label)
            else:
                if np.min((a - template_alphas)**2) < 0.0001:
                    label = f"alpha={a}"
                mplhep.histplot(histo, label=label, color=color)
        ax.set_xlabel("observable")
        ax.set_ylabel("alpha")
        if do_3d:
            ax.set_zlabel("ext_pdf")
        plt.legend()
        pytest.zfit_savefig()
Exemplo n.º 7
0
def test_binned_loss(weights, Loss, simultaneous):
    obs = zfit.Space("obs1", limits=(-15, 25))
    gaussian1, mu1, sigma1 = create_gauss1(obs=obs)
    gaussian2, mu2, sigma2 = create_gauss2(obs=obs)
    test_values_np_shifted = test_values_np - 1.8  # shift them a bit
    test_values_np_shifted *= 1.2
    test_values = znp.array(test_values_np_shifted)
    test_values = zfit.Data.from_tensor(obs=obs,
                                        tensor=test_values,
                                        weights=weights)
    init_yield = test_values_np.shape[0] * 1.2
    scale = zfit.Parameter("yield", init_yield, 0, init_yield * 4, step_size=1)
    binning = zfit.binned.RegularBinning(32,
                                         obs.lower[0],
                                         obs.upper[0],
                                         name="obs1")
    obs_binned = obs.with_binning(binning)
    test_values_binned = test_values.to_binned(obs_binned)
    binned_gauss = zfit.pdf.BinnedFromUnbinnedPDF(gaussian1,
                                                  obs_binned,
                                                  extended=scale)
    if simultaneous:
        obs_binned2 = obs.with_binning(14)
        test_values_binned2 = test_values.to_binned(obs_binned2)
        binned_gauss2 = zfit.pdf.BinnedFromUnbinnedPDF(gaussian1,
                                                       obs_binned2,
                                                       extended=scale)
        loss = Loss(
            [binned_gauss, binned_gauss2],
            data=[test_values_binned, test_values_binned2],
        )

    else:
        loss = Loss(model=binned_gauss, data=test_values_binned)

    title = (f"Binned gaussian fit"
             f"{' (randomly weighted)' if weights is not None else ''} with "
             f"{loss.name}")
    plt.figure()
    plt.title(title)
    mplhep.histplot(binned_gauss.to_hist(), label="PDF before fit")
    mplhep.histplot(test_values_binned.to_hist(), label="Data")

    # timing, uncomment to test
    # loss.value_gradient(params=loss.get_params())
    # loss.value()
    # loss.gradient()
    # import time, progressbar
    # start = time.time()
    # for _ in progressbar.progressbar(range(1000)):
    #     loss.value()
    #     loss.gradient()
    # print(f"Needed: {time.time() - start}")

    minimizer = zfit.minimize.Minuit(gradient=False)
    result = minimizer.minimize(loss=loss)

    params = result.params
    mplhep.histplot(binned_gauss.to_hist(), label="PDF after fit")
    plt.legend()
    pytest.zfit_savefig()

    result.hesse(name="hesse")
    result.errors(name="asymerr")
    print(result)
    rel_tol_errors = 0.1
    mu_error = 0.03 if not simultaneous else 0.021
    sigma_error = 0.0156 if simultaneous else 0.022
    params_list = [mu1, sigma1]
    errors = [mu_error, sigma_error]
    if loss.is_extended:
        params_list.append(scale)
        errors.append(122 if simultaneous else 170)
    for param, errorval in zip(params_list, errors):
        assert (pytest.approx(result.params[param]["hesse"]["error"],
                              rel=rel_tol_errors) == errorval)
        assert (pytest.approx(result.params[param]["asymerr"]["lower"],
                              rel=rel_tol_errors) == -errorval)
        assert (pytest.approx(result.params[param]["asymerr"]["upper"],
                              rel=rel_tol_errors) == errorval)

    abs_tol_val = 0.15 if weights is None else 0.08  # more fluctuating with weights
    abs_tol_val *= 2 if isinstance(loss, zfit.loss.BinnedChi2) else 1

    assert params[mu1]["value"] == pytest.approx(
        np.mean(test_values_np_shifted), abs=abs_tol_val)
    assert params[sigma1]["value"] == pytest.approx(
        np.std(test_values_np_shifted), abs=abs_tol_val)
    if loss.is_extended:
        nexpected = test_values_np_shifted.shape[0]
        assert params[scale]["value"] == pytest.approx(nexpected,
                                                       abs=3 * nexpected**0.5)
    constraints = zfit.constraint.GaussianConstraint(
        params=[mu2, sigma2],
        observation=[mu_constr[0], sigma_constr[0]],
        uncertainty=[mu_constr[1], sigma_constr[1]],
    )
    gaussian2 = zfit.models.tobinned.BinnedFromUnbinnedPDF(gaussian2,
                                                           obs_binned,
                                                           extended=scale)
    loss = Loss(model=gaussian2,
                data=test_values_binned,
                constraints=constraints)

    minimizer = zfit.minimize.Minuit(gradient=False)
    result = minimizer.minimize(loss=loss, params=[mu2, sigma2])
    params = result.params
    if weights is None:
        assert params[mu2]["value"] > np.mean(test_values_np_shifted)
        assert params[sigma2]["value"] < np.std(test_values_np_shifted)