def test_binned_extended_simple(Loss): # zfit.run.set_graph_mode(False) counts = np.random.uniform(high=1, size=(10, 20)) # generate counts counts2 = np.random.normal(loc=5, size=(10, 20)) counts3 = np.linspace(0, 10, num=10)[:, None] * np.linspace( 0, 5, num=20)[None, :] binning = [ zfit.binned.VariableBinning( [-10, -5, 5, 7, 10, 15, 17, 21, 23.5, 27, 30], name="obs1"), RegularBinning(20, 0, 10, name="obs2"), ] obs = zfit.Space(obs=["obs1", "obs2"], binning=binning) mc1 = BinnedData.from_tensor(space=obs, values=counts, variances=znp.ones_like(counts) * 1.3) mc2 = BinnedData.from_tensor(obs, counts2) mc3 = BinnedData.from_tensor(obs, counts3) sum_counts = counts + counts2 + counts3 observed_data = BinnedData.from_tensor(space=obs, values=sum_counts, variances=(sum_counts + 0.5)**2) pdf = BinnedTemplatePDFV1(data=mc1) pdf2 = BinnedTemplatePDFV1(data=mc2) pdf3 = BinnedTemplatePDFV1(data=mc3) pdf_sum = BinnedSumPDF(pdfs=[pdf, pdf2, pdf3], obs=obs) nll = Loss(pdf_sum, data=observed_data) nll.value(), nll.gradient() # TODO: add some check?
def test_from_and_to_hist(): h3 = hist.NamedHist( hist.axis.Regular(25, -3.5, 3, name="x", flow=False), hist.axis.Regular(21, -4, 5, name="y", flow=False), storage=hist.storage.Weight(), ) x2 = np.random.randn(1_000) y2 = 0.5 * np.random.randn(1_000) h3.fill(x=x2, y=y2) from zfit._data.binneddatav1 import BinnedData for _ in range(10): # make sure this works many times h1 = BinnedData.from_hist(h3) np.testing.assert_allclose(h1.variances(), h3.variances()) np.testing.assert_allclose(h1.values(), h3.values()) unbinned = h1.to_unbinned() assert unbinned.value().shape[1] == 2 assert unbinned.value().shape[0] == unbinned.weights.shape[0] h3recreated = h1.to_hist() assert h3recreated == h3 bh3 = bh.Histogram(h1) np.testing.assert_allclose(h1.variances(), bh3.variances()) np.testing.assert_allclose(h1.values(), bh3.values())
def sample( self, n: int = None, limits: ztyping.LimitsType = None ) -> ZfitBinnedData: """Draw a random binned sample from the PDF. Args: n: |@doc:pdf.sample.n| Number of samples to draw. For an extended PDF, the argument is optional and will be the poisson-fluctuated expected number of events, i.e. the yield. |@docend:pdf.sample.n| limits: |@doc:pdf.sample.limits| Limits of the sampling. By default, this is the same as the default space of the PDF. |@docend:pdf.sample.limits| Returns: ``ZfitBinnedData``: Sampled dataset """ if n is None: if self.is_extended: n = znp.random.poisson(self.get_yield(), size=1) else: raise ValueError( f"n cannot be None for sampling of {self} or needs to be extended." ) original_limits = limits limits = self._check_convert_limits(limits) values = self._call_sample(n, limits) if not isinstance(values, ZfitBinnedData): values = BinnedData.from_tensor(space=limits, values=values, variances=None) if isinstance(original_limits, ZfitSpace): values = values.with_obs(original_limits) return values
def __init__( self, data: ztyping.BinnedDataInputType, extended: Optional[ztyping.ExtendedInputType] = None, norm: Optional[ztyping.NormInputType] = None, name: str = "HistogramPDF", ) -> None: """Binned PDF resembling a histogram. Simple histogram PDF that can be used to model a histogram as a PDF. Args: data: Histogram to be used as PDF. extended: |@doc:pdf.init.extended| The overall yield of the PDF. If this is parameter-like, it will be used as the yield, the expected number of events, and the PDF will be extended. An extended PDF has additional functionality, such as the `ext_*` methods and the `counts` (for binned PDFs). |@docend:pdf.init.extended| |@doc:pdf.init.extended.auto| If `True`, the PDF will be extended automatically if the PDF is extended using the total number of events in the histogram. This is the default. |@docend:pdf.init.extended.auto| norm: |@doc:pdf.init.norm| Normalization of the PDF. By default, this is the same as the default space of the PDF. |@docend:pdf.init.norm| name: |@doc:model.init.name| Human-readable name or label of the PDF for better identification. Has no programmatical functional purpose as identification. |@docend:model.init.name| """ if extended is None: extended = True if not isinstance(data, ZfitBinnedData): if isinstance(data, PlottableHistogram): from zfit._data.binneddatav1 import BinnedData data = BinnedData.from_hist(data) else: raise TypeError( "data must be of type PlottableHistogram (UHI) or ZfitBinnedData" ) params = {} if extended is True: self._automatically_extended = True extended = znp.sum(data.values()) else: self._automatically_extended = False super().__init__(obs=data.space, extended=extended, norm=norm, params=params, name=name) self._data = data
def test_binned_template_pdf(): bins1 = 5 bins2 = 7 counts = np.random.uniform(high=1, size=(bins1, bins2)) # generate counts counts2 = np.random.normal(loc=5, size=(bins1, bins2)) counts3 = (np.linspace(0, 10, num=bins1)[:, None] * np.linspace(0, 5, num=bins2)[None, :]) binnings = [ zfit.binned.RegularBinning(bins1, 0, 10, name="obs1"), zfit.binned.RegularBinning(7, -10, bins2, name="obs2"), ] binning = binnings axes = zfit.binned.Binnings(binning) obs = zfit.Space(obs=["obs1", "obs2"], binning=binning) data = BinnedData.from_tensor(space=obs, values=counts, variances=znp.ones_like(counts) * 1.3) data2 = BinnedData.from_tensor(obs, counts2) data3 = BinnedData.from_tensor(obs, counts3) pdf = BinnedTemplatePDFV1(data=data, extended=np.sum(counts)) pdf2 = BinnedTemplatePDFV1(data=data2, extended=np.sum(counts2)) pdf3 = BinnedTemplatePDFV1(data=data3, extended=np.sum(counts3)) assert len(pdf.ext_pdf(data)) > 0 pdf_sum = BinnedSumPDF(pdfs=[pdf, pdf2, pdf3], obs=obs) probs = pdf_sum.counts(data) true_sum_counts = counts + counts2 + counts3 np.testing.assert_allclose(true_sum_counts, probs) nsamples = 100_000_000 sample = pdf_sum.sample(n=nsamples) np.testing.assert_allclose(true_sum_counts, sample.values() / nsamples * pdf_sum.get_yield(), rtol=0.03) # integrate true_integral = znp.sum(true_sum_counts) integral = pdf_sum.ext_integrate(limits=obs) assert pytest.approx(float(true_integral)) == float(integral)
def to_binneddata(self, **kwargs) -> zfit.data.BinnedData: """Create an Asimov dataset as `BinnedData` using either `counts` (for extended) or `rel_counts` Args: **kwargs (): arguments to `counts` or `rel_counts`. Returns: BinnedData: Binned data representing the Asimov dataset of this PDF. """ values = self.values(**kwargs) data = BinnedData.from_tensor(space=self.space, values=values) return data
def test_morphing_templates2D(alphas): bins1 = 10 bins2 = 7 shape = (bins1, bins2) counts1 = np.random.uniform(70, high=100, size=shape) # generate counts # counts1 = np.random.uniform(70, high=100, size=bins1) # generate counts counts = [ counts1 - np.random.uniform(high=20, size=shape), counts1, counts1 + np.random.uniform(high=20, size=shape), ] if alphas is not None: counts.append(counts1 + np.random.uniform(high=5, size=shape)) binning1 = zfit.binned.VariableBinning(sorted( np.random.uniform(0, 10, size=bins1 + 1)), name="obs1") binning2 = zfit.binned.RegularBinning(bins2, 0, 10, name="obs2") obs1 = zfit.Space(obs="obs1", binning=binning1) obs2 = zfit.Space(obs="obs2", binning=binning2) obs = obs1 * obs2 datasets = [BinnedData.from_tensor(obs, count) for count in counts] pdfs = [ BinnedTemplatePDFV1(data=data, extended=np.sum(data.values())) for data in datasets ] if alphas is not None: pdfs = {a: p for a, p in zip(alphas, pdfs)} alpha = zfit.Parameter("alpha", 0, -5, 5) morph = SplineMorphingPDF(alpha=alpha, hists=pdfs) if alphas is None: alphas = [-1, 0, 1] for i, a in enumerate(alphas): alpha.set_value(a) np.testing.assert_allclose(morph.counts(), counts[i]) assert pytest.approx(np.sum(counts[i])) == zfit.run(morph.get_yield()) if len(alphas) > i + 1: alpha.set_value((a + alphas[i + 1]) / 2) max_dist = (counts[i] - counts[i + 1])**2 + 5 # tolerance max_dist *= 1.1 # not strict, it can be a bit higher numpy.testing.assert_array_less((morph.counts() - counts[i])**2, max_dist) numpy.testing.assert_array_less( (morph.counts() - counts[i + 1])**2, max_dist)
def __init__( self, model: ztyping.BinnedPDFInputType, data: ztyping.BinnedDataInputType, constraints: ConstraintsInputType = None, options: OptionsInputType = None, ): model = convert_to_container(model) data = convert_to_container(data) from zfit._data.binneddatav1 import BinnedData data = [ BinnedData.from_hist(d) if (isinstance(d, PlottableHistogram) and not isinstance(d, ZfitBinnedData)) else d for d in data ] not_binned_pdf = [ mod for mod in model if not isinstance(mod, ZfitBinnedPDF) ] not_binned_data = [ dat for dat in data if not isinstance(dat, ZfitBinnedData) ] not_binned_pdf_msg = ( "The following PDFs are not binned but need to be. They can be wrapped in an " f"BinnedFromUnbinnedPDF. {not_binned_pdf} ") not_binned_data_msg = ( "The following datasets are not binned but need to be. They can be converted to a binned " f"using the `to_binned` method. {not_binned_data}") error_msg = "" if not_binned_pdf: error_msg += not_binned_pdf_msg if not_binned_data: error_msg += not_binned_data_msg if error_msg: raise ValueError(error_msg) super().__init__( model=model, data=data, constraints=constraints, fit_range=None, options=options, )
def _convert_input_binned_x(self, x, none_is_space=None): if x is None and none_is_space: return self.space if isinstance(x, uhi.typing.plottable.PlottableHistogram) and not isinstance( x, ZfitBinnedData ): x = BinnedData.from_hist(x) if not isinstance(x, ZfitBinnedData): if not isinstance(x, ZfitSpace): if not isinstance(x, ZfitUnbinnedData): try: x = Data.from_tensor(obs=self.obs, tensor=x) except Exception as error: raise TypeError( f"Data to {self} has to be Binned Data, not {x}. (It can also be unbinned Data)" + f" but conversion to it failed (see also above) with the following error:" + f" {error})" ) from error # TODO: should we allow spaces? Or what? return x
def test_from_and_to_binned(): h3 = hist.Hist( hist.axis.Regular(3, -3, 3, name="x", flow=False), hist.axis.Regular(2, -5, 5, name="y", flow=False), storage=hist.storage.Weight(), ) x2 = np.random.randn(1_000) y2 = 0.5 * np.random.randn(1_000) h3.fill(x=x2, y=y2) from zfit._data.binneddatav1 import BinnedData h1 = BinnedData.from_hist(h3) for _ in range(10): # make sure this works many times unbinned = h1.to_unbinned() binned = unbinned.to_binned(space=h1.space) np.testing.assert_allclose(binned.values(), h1.values()) # we can't test the variances, this info is lost h1 = binned bh3 = bh.Histogram(h1) np.testing.assert_allclose(h1.values(), bh3.values())
def test_with_obs(): from zfit._data.binneddatav1 import BinnedData h1 = hist.NamedHist( hist.axis.Regular(25, -3.5, 3, name="x", flow=False), hist.axis.Regular(21, -4, 5, name="y", flow=False), hist.axis.Regular(15, -2, 1, name="z", flow=False), storage=hist.storage.Weight(), ) x2 = np.random.randn(1_000) y2 = 0.5 * np.random.randn(1_000) z2 = 0.3 * np.random.randn(1_000) h1.fill(x=x2, y=y2, z=z2) h = BinnedData.from_hist(h1) obs = ("x", "y", "z") obs2 = ("y", "x", "z") assert obs == h.obs h2 = h.with_obs(obs2) assert h2.obs == obs2 np.testing.assert_allclose(h.values()[:, 3, 5], h2.values()[3, :, 5]) np.testing.assert_allclose(h.variances()[:, 3, 5], h2.variances()[3, :, 5])
def test_morphing_templates(alphas): bins1 = 15 irregular_str = "irregular templates" if alphas is not None else "" counts1 = np.random.uniform(70, high=100, size=bins1) # generate counts counts = [ counts1 - np.random.uniform(high=20, size=bins1), counts1, counts1 + np.random.uniform(high=20, size=bins1), ] if alphas is not None: counts.append(counts1 + np.random.uniform(high=5, size=bins1)) binning = zfit.binned.RegularBinning(bins1, 0, 10, name="obs1") obs = zfit.Space(obs="obs1", binning=binning) histos = [BinnedData.from_tensor(obs, count) for count in counts] pdfs = [zfit.pdf.HistogramPDF(h) for h in histos] if alphas is not None: pdfs = {a: p for a, p in zip(alphas, pdfs)} alpha = zfit.Parameter("alpha", 0, -5, 5) morph = SplineMorphingPDF(alpha=alpha, hists=pdfs) if alphas is None: alphas = [-1, 0, 1] for i, a in enumerate(alphas): alpha.set_value(a) np.testing.assert_allclose(morph.counts(), counts[i]) if len(alphas) > i + 1: alpha.set_value((a + alphas[i + 1]) / 2) max_dist = (counts[i] - counts[i + 1])**2 + 5 # tolerance max_dist *= 1.1 # not strict, it can be a bit higher numpy.testing.assert_array_less((morph.counts() - counts[i])**2, max_dist) numpy.testing.assert_array_less( (morph.counts() - counts[i + 1])**2, max_dist) import matplotlib.cm as cm amin, amax = -2, 2 n = 5 template_alphas = np.array(list(alphas)) for do_3d in [True, False]: plt.figure() if do_3d: ax = plt.gcf().add_subplot(111, projection="3d") else: ax = plt.gca() plotstyle = "3d plot" if do_3d else "hist plot" plt.title(f"Morphing with splines {irregular_str} {plotstyle}") for a in list(znp.linspace(amin, amax, n * 2)) + list(template_alphas): normed_a = (a - amin) / (amax - amin) / 1.3 # 3 is a scaling factor color = cm.get_cmap("winter")(normed_a) alpha.set_value(a) histo = morph.ext_pdf(None) histo = BinnedData.from_tensor(obs, histo) histo = histo.to_hist() values = histo.values() x = histo.axes.edges[0][:-1] y = np.broadcast_to(a, values.shape) z = values label = None if do_3d: ax.step(x, y, z, color=color, where="pre", label=label) else: if np.min((a - template_alphas)**2) < 0.0001: label = f"alpha={a}" mplhep.histplot(histo, label=label, color=color) ax.set_xlabel("observable") ax.set_ylabel("alpha") if do_3d: ax.set_zlabel("ext_pdf") plt.legend() pytest.zfit_savefig()
def test_binned_template_pdf_bbfull(TemplateLikePDF): bins1 = 15 bins2 = 10 counts1 = np.random.uniform(high=150, size=(bins1, bins2)) # generate counts counts2 = np.random.normal(loc=50, size=(bins1, bins2)) counts3 = (np.linspace(10, 100, num=bins1)[:, None] * np.linspace(10, 500, num=bins2)[None, :]) binnings = [ zfit.binned.RegularBinning(bins1, 0, 10, name="obs1"), zfit.binned.RegularBinning(bins2, -10, 7, name="obs2"), ] binning = binnings obs = zfit.Space(obs=["obs1", "obs2"], binning=binning) mc1 = BinnedData.from_tensor(space=obs, values=counts1, variances=znp.ones_like(counts1) * 1.3) mc2 = BinnedData.from_tensor(obs, counts2) mc3 = BinnedData.from_tensor(obs, counts3) counts_mc = counts1 + counts2 + counts3 counts1_data = np.random.uniform(high=150, size=(bins1, bins2)) # generate counts counts2_data = np.random.normal(loc=50, size=(bins1, bins2)) counts3_data = (np.linspace(10, 100, num=bins1)[:, None] * np.linspace(20, 490, num=bins2)[None, :]) counts_data = counts1_data + counts2_data + counts3_data counts_data *= 1.1 data = BinnedData.from_tensor(space=obs, values=counts_data) pdf1 = TemplateLikePDF(data=mc1, sysshape=True) pdf2 = TemplateLikePDF(data=mc2, sysshape=True) pdf3 = TemplateLikePDF(data=mc3, sysshape=True) assert len(pdf1.counts()) > 0 pdf_sum = BinnedSumPDF(pdfs=[pdf1, pdf2, pdf3], obs=obs) counts1_flat = np.reshape(counts1, -1) constraints1 = zfit.constraint.GaussianConstraint( pdf1.params.values(), observation=np.ones_like(counts1_flat), uncertainty=np.sqrt(counts1_flat) / counts1_flat, ) counts2_flat = np.reshape(counts2, -1) constraints2 = zfit.constraint.GaussianConstraint( pdf2.params.values(), observation=np.ones_like(counts2_flat), uncertainty=np.sqrt(counts2_flat) / counts2_flat, ) counts3_flat = np.reshape(counts3, -1) constraints3 = zfit.constraint.GaussianConstraint( pdf3.params.values(), observation=np.ones_like(counts3_flat), uncertainty=np.sqrt(counts3_flat) / counts3_flat, ) # constraints2 = zfit.constraint.PoissonConstraint(pdf2.params.values(), np.reshape(counts2, -1)) # constraints3 = zfit.constraint.PoissonConstraint(pdf3.params.values(), np.reshape(counts3, -1)) loss = ExtendedBinnedNLL( pdf_sum, data, constraints=[constraints1, constraints2, constraints3]) # for i in progressbar.progressbar(range(1000000)): loss.value() loss.gradients() print("start minimization") minimizer = zfit.minimize.Minuit(verbosity=8, gradient=False) minimizer.minimize(loss) counts = pdf_sum.counts() np.testing.assert_array_less( counts_mc, counts_data ) # this is an assumption, if that is wrong, the test is flawed np.testing.assert_array_less(counts, counts_data) np.testing.assert_array_less(counts_mc, counts)
def to_binned(self, space): from zfit._data.binneddatav1 import BinnedData return BinnedData.from_unbinned(space=space, data=self)