Exemplo n.º 1
0
def test_example():
    xy = np.array([
        [-2, 1.5],
        [-2, -3.5],
        [-2, 1.5],  # x = -2
        [0.0, -2.0],
        [0.0, -2.0],
        [0.0, 0.0],
        [0.0, 2.0],
        [0.0, 4.0],  # x = 0
        [2, 1.5],  # x = +2
    ])
    h = hist.Hist(hist.axis.Regular(5, -5, 5, name="x"),
                  hist.axis.Regular(5, -5, 5, name="y")).fill(*xy.T)

    # Profile out the y-axis
    hp = h.profile("y")

    # Exclude edge bins since no values from above will fall into them
    # When there are values there, ROOT does something funky in those bins,
    # despite these bins not being in the axis that is profiled out, and
    # despite there being no overflow... to be understood.
    assert hp.values()[1:-1] == approx(np.array([0.0, 0.4, 2.0]))
    assert hp.variances()[1:-1] == approx(np.array(
        [2.66666667, 1.088, float("nan")]),
                                          nan_ok=True)
Exemplo n.º 2
0
    def to_hist(self) -> hist.NamedHist:
        """Convert the binned data to a :py:class:`~hist.NamedHist`.

        While a binned data object can be used inside zfit (PDFs,...), it lacks many convenience features that the `hist
        library <https://hist.readthedocs.io/>`_ offers, such as plots.
        """
        binning = binning_to_histaxes(self.holder.space.binning)
        h = hist.Hist(*binning, storage=bh.storage.Weight())
        h.view(flow=flow).value = self.values()  # TODO: flow?
        h.view(flow=flow).variance = self.variances()  # TODO: flow?
        return h
Exemplo n.º 3
0
def main():
    ab_hist = hist.Hist(0,100000,bucket_int=100)
    aa_hist = hist.Hist(0,100000,bucket_int=100)
    errors = 0
    count = 0
    with open(sys.argv[1], "r") as datafile:
        for line in datafile:
            line_fields = line.strip().split()
            a_to_b = int(line_fields[1])
            a_to_a = int(line_fields[5])
            if a_to_b < 0 or a_to_b > a_to_a:
                errors += 1
            else:
                aa_hist.add_value(a_to_a)
                ab_hist.add_value(a_to_b)
            count += 1

    print "# Total: {0}".format(count)
    print "# Errors: {0} ({1}%)".format(errors, float(errors)/count * 100)
    aa_hist.pprint()
Exemplo n.º 4
0
def unbinned_to_hist_eager(values, edges, weights=None):
    if weights is not None and weights.shape == () and None in weights:
        weights = None
    binning = [
        hist.axis.Variable(np.reshape(edge, (-1, )), flow=False)
        for edge in edges
    ]
    h = hist.Hist(*binning, storage=hist.storage.Weight())
    h.fill(*(values[:, i] for i in range(values.shape[1])), weight=weights)

    return znp.array(h.values(flow=False),
                     znp.float64), znp.array(h.variances(flow=False),
                                             znp.float64)
Exemplo n.º 5
0
def d3(input_files: List[Any], options: Any, output: Any,
       people: bool) -> None:

    opts = yaml.safe_load(options)
    cats = opts["categories"]
    all_orgs = set(org for cat in cats for org in cat["orgs"])
    filter_repos = set(opts["repos"]) if "repos" in opts else set()

    h = hist.Hist(
        hist.axis.StrCategory([], growth=True, name="author"),
        hist.axis.StrCategory([], growth=True, name="org_repo"),
        storage=hist.storage.Int64(),
    )

    for input_file in input_files:
        author_file = Path(input_file.name)
        print("Reading", author_file, file=sys.stderr)

        data = yaml.safe_load(input_file)
        print("Finished reading", author_file, file=sys.stderr)

        org = (pr["repository_url"].split("/")[-2] for pr in data)
        repo = (pr["repository_url"].split("/")[-1] for pr in data)

        org_repo = [
            f"{o}:{r}" for o, r in zip(org, repo)
            if o in all_orgs and (not filter_repos or r in filter_repos)
        ]

        h.fill(
            author=author_file.stem,
            org_repo=org_repo,
        )

    nodes = get_nodes(h, cats)
    print(f"Built nodes list with {len(nodes)} nodes")

    if not people:
        links = get_people_links(h, nodes)
        print("Filled links histogram")
        out_dict = dict(nodes=nodes, links=links)

    else:
        people_nodes = get_people_nodes(h)
        links = repo_people_links(h)
        out_dict = dict(nodes=nodes + people_nodes, links=links)

    j = json.dumps(out_dict, sort_keys=True, indent=2)
    print(j, file=output)

    print("Finished")
Exemplo n.º 6
0
def get_people_links(h: hist.Hist,
                     nodes: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    nodes_ids = [v["id"] for v in nodes]

    lh = hist.Hist(
        hist.axis.StrCategory(nodes_ids, name="source"),
        hist.axis.StrCategory(nodes_ids, name="target"),
        storage=hist.storage.Int64(),
    )

    for a, b in itertools.combinations(nodes_ids, 2):
        for author in h.axes["author"]:
            if (res1 := h[author, a]) > 0 and (res2 := h[author, b] > 0):
                lh.fill(source=a, target=b)
Exemplo n.º 7
0
def test_bh_conversion():
    h = bh.Histogram(bh.axis.Regular(3, 2, 1, metadata={"name": "x"}))
    h.axes[0].name = "y"

    h2 = hist.Hist(h)

    assert isinstance(h2.axes[0], hist.axis.Regular)
    assert h2.axes[0].name == "y"
    assert h2.axes[0].metadata == {"name": "x"}

    h3 = bh.Histogram(h2)

    assert not isinstance(h3.axes[0], hist.axis.Regular)
    assert h2.axes[0].name == "y"
    assert h3.axes[0].metadata == {"name": "x"}
Exemplo n.º 8
0
def test_pdf_formhist():
    h = hist.Hist(
        hist.axis.Regular(3, -3, 3, name="x", flow=False),
        hist.axis.Regular(2, -5, 5, name="y", flow=False),
        storage=hist.storage.Weight(),
    )

    x2 = np.random.randn(1_000)
    y2 = 0.5 * np.random.randn(1_000)

    h.fill(x=x2, y=y2)
    pdf = zfit.pdf.HistogramPDF(data=h, extended=False)
    assert not pdf.is_extended
    ntot = h.sum().value
    np.testing.assert_allclose(h.values() / ntot, pdf.rel_counts(h))
    with pytest.raises(NotExtendedPDFError):
        _ = pdf.counts()

    np.testing.assert_allclose(h.density(), pdf.pdf(h))
    with pytest.raises(NotExtendedPDFError):
        _ = pdf.ext_pdf(h)

    # test extended
    ext_pdf = zfit.pdf.HistogramPDF(data=h, extended=True)
    assert ext_pdf.is_extended

    # test pdf
    np.testing.assert_allclose(h.values() / ntot, ext_pdf.rel_counts(h))
    np.testing.assert_allclose(h.counts(), ext_pdf.counts(h))
    # test counts
    np.testing.assert_allclose(h.density(), pdf.pdf(h))
    np.testing.assert_allclose(h.density() * ntot, ext_pdf.ext_pdf(h))

    # test sample
    sample = ext_pdf.sample(n=1_000)
    assert sample.nevents == 1_000
    assert sample.rank == 2

    sample = pdf.sample(n=1_000)
    assert sample.nevents == 1_000
    assert sample.rank == 2

    # test integral
    limits = ext_pdf.space
    assert pytest.approx(ntot, ext_pdf.ext_integrate(limits))
    assert pytest.approx(1.0, ext_pdf.integrate(limits))
    assert pytest.approx(1.0, pdf.integrate(limits))
Exemplo n.º 9
0
def mcmc_test_1d(mcmc_inst, trial, name, graph_title):
    # instantiate MCMC object
    burn = int(trial / 10)

    # instantiate histogram object
    minx = -5.0
    maxx = 5.0
    nbins = 40
    histo = hist.Hist(minx, maxx, nbins)

    # burn-in
    for i in range(burn):
        mcmc_inst.update()

    # MCMC simulation
    t1 = time.clock()
    for i in range(trial):
        histo.set_value(mcmc_inst.x)
        mcmc_inst.update()
    t2 = time.clock()
    print('time = %f' % (1000.0 * (t2 - t1) / trial))

    # show histogram
    ys = [histo[i] / (trial * histo.span) for i in range(nbins)]
    xs = [histo.span * i + minx for i in range(nbins)]
    plt.bar(xs, ys, width=histo.span)

    # compute error
    err = 0.0
    for i in range(nbins):
        diff = mcmc_inst.inv_dist(xs[i]) - ys[i]
        err += diff * diff
    print('error = %f' % math.sqrt(err / nbins))

    # show invariant distributin
    ndiv = 400
    span = (maxx - minx) / ndiv
    xs = [span * i + minx for i in range(ndiv)]
    zs = [mcmc_inst.inv_dist(xs[i]) for i in range(ndiv)]
    plt.plot(xs, zs, 'r-')

    plt.suptitle('%s: %d samples' % (graph_title, trial), size='18')
    plt.savefig('%s_%d.png' % (name, trial))
    plt.show()
Exemplo n.º 10
0
def count(input_file, output):
    data: List[Dict[str, Any]] = yaml.safe_load(input_file)

    h = hist.Hist(
        hist.axis.StrCategory([], growth=True, name="org"),
        hist.axis.StrCategory([], growth=True, name="repo"),
        storage=hist.storage.Int64(),
    )

    h.fill(
        org=[pr["repository_url"].split("/")[-2] for pr in data],
        repo=[pr["repository_url"].split("/")[-1] for pr in data],
    )

    org_totals = h.project("org")

    for k, v in sort_count(org_totals):
        print(k, v, sep=":", end=" - ", file=output)

        strs = (f"{repo}({c})" for repo, c in sort_count(h[k, :]))
        print(*strs, sep=", ", file=output)
Exemplo n.º 11
0
def test_from_and_to_binned():
    h3 = hist.Hist(
        hist.axis.Regular(3, -3, 3, name="x", flow=False),
        hist.axis.Regular(2, -5, 5, name="y", flow=False),
        storage=hist.storage.Weight(),
    )

    x2 = np.random.randn(1_000)
    y2 = 0.5 * np.random.randn(1_000)

    h3.fill(x=x2, y=y2)

    from zfit._data.binneddatav1 import BinnedData

    h1 = BinnedData.from_hist(h3)
    for _ in range(10):  # make sure this works many times
        unbinned = h1.to_unbinned()
        binned = unbinned.to_binned(space=h1.space)
        np.testing.assert_allclose(binned.values(), h1.values())
        # we can't test the variances, this info is lost
        h1 = binned
    bh3 = bh.Histogram(h1)
    np.testing.assert_allclose(h1.values(), bh3.values())
Exemplo n.º 12
0
    def __init__(
        self,
        year='2017',
        systematics=True,
        jet_arbitration='pt',
        tagger='v2',
        nnlops_rew=False,
        skipJER=False,
        tightMatch=False,
        newTrigger=False,
        looseTau=False,
        newVjetsKfactor=False,
        ak4tagger='deepcsv',
        skipRunB=False,
        finebins=False,
        ewkHcorr=False,
        evtVizInfo=False,
    ):
        self._year = year
        self._tagger = tagger
        self.systematics = systematics
        self._nnlops_rew = nnlops_rew  # for 2018, reweight POWHEG to NNLOPS
        self._jet_arbitration = jet_arbitration
        self._skipJER = skipJER
        self._tightMatch = tightMatch
        self._newVjetsKfactor = newVjetsKfactor
        self._newTrigger = newTrigger  # Fewer triggers, new maps (2017 only, ~no effect)
        self._looseTau = looseTau  # Looser tau veto
        self._ewkHcorr = ewkHcorr
        self._ak4tagger = ak4tagger
        self._skipRunB = skipRunB
        self._finebins = finebins
        self._evtVizInfo = evtVizInfo

        if self._ak4tagger == 'deepcsv':
            self._ak4tagBranch = 'btagDeepB'
        elif self._ak4tagger == 'deepjet':
            self._ak4tagBranch = 'btagDeepFlavB'
        else:
            raise NotImplementedError()

        self._btagSF = BTagCorrector(year, self._ak4tagger, 'medium')

        self._msdSF = {
            '2016': 1.,
            '2017': 0.987,
            '2018': 0.970,
        }

        self._muontriggers = {
            '2016': [
                'Mu50',  # TODO: check
            ],
            '2017': [
                'Mu50',
                'TkMu50',
            ],
            '2018': [
                'Mu50',  # TODO: check
            ],
        }

        self._triggers = {
            '2016': [
                'PFHT800',
                'PFHT900',
                'AK8PFJet360_TrimMass30',
                'AK8PFHT700_TrimR0p1PT0p03Mass50',
                'PFHT650_WideJetMJJ950DEtaJJ1p5',
                'PFHT650_WideJetMJJ900DEtaJJ1p5',
                'AK8DiPFJet280_200_TrimMass30_BTagCSV_p20',
                'PFJet450',
            ],
            '2017': [
                'AK8PFJet330_PFAK8BTagCSV_p17',
                'PFHT1050',
                'AK8PFJet400_TrimMass30',
                'AK8PFJet420_TrimMass30',  # redundant
                'AK8PFHT800_TrimMass50',
                'PFJet500',
                'AK8PFJet500',
            ],
            '2018': [
                'AK8PFJet400_TrimMass30',
                'AK8PFJet420_TrimMass30',
                'AK8PFHT800_TrimMass50',
                'PFHT1050',
                'PFJet500',
                'AK8PFJet500',
                'AK8PFJet330_TrimMass30_PFAK8BoostedDoubleB_np4',
            ],
        }

        # https://twiki.cern.ch/twiki/bin/view/CMS/MissingETOptionalFiltersRun2
        self._met_filters = {
            '2016': {
                'data': [
                    'goodVertices',
                    'globalSuperTightHalo2016Filter',
                    'HBHENoiseFilter',
                    'HBHENoiseIsoFilter',
                    'EcalDeadCellTriggerPrimitiveFilter',
                    'BadPFMuonFilter',
                    'eeBadScFilter',
                ],
                'mc': [
                    'goodVertices',
                    'globalSuperTightHalo2016Filter',
                    'HBHENoiseFilter',
                    'HBHENoiseIsoFilter',
                    'EcalDeadCellTriggerPrimitiveFilter',
                    'BadPFMuonFilter',
                    # 'eeBadScFilter',
                ],
            },
            '2017': {
                'data': [
                    'goodVertices',
                    'globalSuperTightHalo2016Filter',
                    'HBHENoiseFilter',
                    'HBHENoiseIsoFilter',
                    'EcalDeadCellTriggerPrimitiveFilter',
                    'BadPFMuonFilter',
                    'eeBadScFilter',
                    'ecalBadCalibFilterV2',
                ],
                'mc': [
                    'goodVertices',
                    'globalSuperTightHalo2016Filter',
                    'HBHENoiseFilter',
                    'HBHENoiseIsoFilter',
                    'EcalDeadCellTriggerPrimitiveFilter',
                    'BadPFMuonFilter',
                    # 'eeBadScFilter',
                    'ecalBadCalibFilterV2',
                ],
            },
            '2018': {
                'data': [
                    'goodVertices',
                    'globalSuperTightHalo2016Filter',
                    'HBHENoiseFilter',
                    'HBHENoiseIsoFilter',
                    'EcalDeadCellTriggerPrimitiveFilter',
                    'BadPFMuonFilter',
                    'eeBadScFilter',
                    'ecalBadCalibFilterV2',
                ],
                'mc': [
                    'goodVertices',
                    'globalSuperTightHalo2016Filter',
                    'HBHENoiseFilter',
                    'HBHENoiseIsoFilter',
                    'EcalDeadCellTriggerPrimitiveFilter',
                    'BadPFMuonFilter',
                    # 'eeBadScFilter',
                    'ecalBadCalibFilterV2',
                ],
            },
        }

        self._json_paths = {
            '2016':
            'jsons/Cert_271036-284044_13TeV_23Sep2016ReReco_Collisions16_JSON.txt',
            '2017':
            'jsons/Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON_v1.txt',
            '2018':
            'jsons/Cert_314472-325175_13TeV_17SeptEarlyReReco2018ABC_PromptEraD_Collisions18_JSON.txt',
        }

        if self._tagger == 'v3':
            taggerbins = (
                hist2.axis.Variable([0, 0.7, 0.89, 1],
                                    name='ddb',
                                    label=r'Jet ddb score',
                                    flow=False),
                hist2.axis.Variable([0, 0.44, .84, 1],
                                    name='ddc',
                                    label=r'Jet ddc score',
                                    flow=False),
                hist2.axis.Variable([0, 0.017, 0.11, 1],
                                    name='ddcvb',
                                    label=r'Jet ddcvb score',
                                    flow=False),
            )
        else:
            taggerbins = (
                # hist2.axis.Variable([0, 0.7, 0.89, 1], name='ddb', label=r'Jet ddb score', flow=False),
                # hist2.axis.Variable([0, 0.34, .45, 0.49, 1], name='ddc', label=r'Jet ddc score', flow=False),
                # hist2.axis.Variable([0, 0.03, 0.035, 1], name='ddcvb', label=r'Jet ddcvb score', flow=F
                hist2.axis.Variable([0, 0.7, 1],
                                    name='ddb',
                                    label=r'Jet ddb score',
                                    flow=False),
                hist2.axis.Variable([0, 0.4, 0.45, 0.5, 0.7, 1],
                                    name='ddc',
                                    label=r'Jet ddc score',
                                    flow=False),
                hist2.axis.Variable([0, 0.01, 0.03, 0.1, 1],
                                    name='ddcvb',
                                    label=r'Jet ddcvb score',
                                    flow=False),
            )
        if self._finebins:
            mass_bins = hist2.axis.Regular(200,
                                           40,
                                           200,
                                           name='msd',
                                           label=r'Jet $m_{sd}$')
            pt_bins = hist2.axis.Variable(
                [450, 475, 500, 550, 600, 675, 800, 1200],
                name='pt',
                label=r'Jet $p_{T}$ [GeV]')
        else:
            mass_bins = hist2.axis.Regular(23,
                                           40,
                                           201,
                                           name='msd',
                                           label=r'Jet $m_{sd}$',
                                           flow=False)
            pt_bins = hist2.axis.Variable(
                [450, 475, 500, 550, 600, 675, 800, 1200],
                name='pt',
                label=r'Jet $p_{T}$ [GeV]',
                flow=False)
        gen_axis = hist2.axis.IntCategory([0, 1, 2, 3], name='genflavor')

        optbins = np.r_[np.linspace(0, 0.15, 30, endpoint=False),
                        np.linspace(0.15, 1, 86)]
        self.make_output = lambda: {
            'sumw':
            0.,
            'to_check': {
                "mass": processor.column_accumulator(np.array([])),
                "njet": processor.column_accumulator(np.array([])),
                "fname": processor.column_accumulator(np.array([])),
                "run": processor.column_accumulator(np.array([])),
                "luminosityBlock": processor.column_accumulator(np.array([])),
                "event": processor.column_accumulator(np.array([])),
            },
            'cutflow_msd':
            hist2.Hist(
                hist2.axis.StrCategory([], name='region', growth=True),
                gen_axis,
                hist2.axis.IntCategory(
                    [0, 1, 2, 3], name='cut', label='Cut index', growth=True),
                mass_bins,
                hist2.storage.Weight(),
            ),
            'cutflow_eta':
            hist2.Hist(
                hist2.axis.StrCategory([], name='region', growth=True),
                gen_axis,
                hist2.axis.IntCategory(
                    [0, 1, 2, 3], name='cut', label='Cut index', growth=True),
                hist2.axis.Regular(
                    40, -2.5, 2.5, name='eta', label=r'Jet $\eta$'),
                hist2.storage.Weight(),
            ),
            'cutflow_pt':
            hist2.Hist(
                hist2.axis.StrCategory([], name='region', growth=True),
                gen_axis,
                hist2.axis.IntCategory(
                    [0, 1, 2, 3], name='cut', label='Cut index', growth=True),
                hist2.axis.Regular(
                    100, 400, 1200, name='pt', label=r'Jet $p_{T}$ [GeV]'),
                hist2.storage.Weight(),
            ),
            'nminus1_n2ddt':
            hist2.Hist(
                hist2.axis.StrCategory([], name='region', growth=True),
                hist2.axis.Regular(
                    40, -0.25, 0.25, name='n2ddt', label='N2ddt value'),
                hist2.storage.Weight(),
            ),
            'btagWeight':
            hist2.Hist(
                hist2.axis.Regular(
                    50, 0, 3, name='val', label='BTag correction'),
                hist2.storage.Weight(),
            ),
            'templates':
            hist2.Hist(
                hist2.axis.StrCategory([], name='region', growth=True),
                hist2.axis.StrCategory([], name='systematic', growth=True),
                hist2.axis.StrCategory([], name='runid', growth=True),
                gen_axis,
                pt_bins,
                mass_bins,
                *taggerbins,
                hist2.storage.Weight(),
            ),
            # 'etaphi': hist2.Hist(
            #     hist2.axis.StrCategory([], name='region', growth=True),
            #     hist2.axis.StrCategory([], name='systematic', growth=True),
            #     hist2.axis.StrCategory([], name='runid', growth=True),
            #     hist2.axis.IntCategory([0, 1, 2, 3], name='genflavor'),
            #     hist2.axis.Regular(30, -2.5, 2.5, name='eta', label=r'Jet $\eta$'),
            #     hist2.axis.Regular(30, -3.14, 3.14, name='phi', label=r'Jet $\phi$'),
            #     pt_bins,
            #     *taggerbins[1:],
            #     hist2.storage.Weight(),
            # ),
            'wtag':
            hist2.Hist(
                hist2.axis.StrCategory([], name='region', growth=True),
                hist2.axis.StrCategory([], name='systematic', growth=True),
                gen_axis,
                hist2.axis.Variable(
                    [-1, 0, 1
                     ], name='n2ddt', label=r'N2ddt value', flow=False),
                hist2.axis.Variable([200, 250, 300, 350, 400, 450, 1200],
                                    name='pt',
                                    label=r'Jet $p_{T}$ [GeV]'),
                hist2.axis.Regular(
                    46, 40, 201, name='msd', label=r'Jet $m_{sd}$', flow=False
                ),
                *taggerbins[1:],
                hist2.storage.Weight(),
            ),
            'signal_opt':
            hist2.Hist(
                hist2.axis.StrCategory([], name='region', growth=True),
                hist2.axis.IntCategory([0, 1, 2, 3], name='genflavor'),
                hist2.axis.Variable(
                    optbins, name='ddc', label=r'Jet CvL score'),
                hist2.axis.Variable(
                    optbins, name='ddcvb', label=r'Jet CvB score'),
                hist2.axis.Variable([40, 70, 80, 90, 100, 110, 120, 130, 140],
                                    name='msd',
                                    label=r'Jet $m_{sd}$'),
                hist2.storage.Weight(),
            ),
            'signal_optb':
            hist2.Hist(
                hist2.axis.StrCategory([], name='region', growth=True),
                hist2.axis.IntCategory([0, 1, 2, 3], name='genflavor'),
                hist2.axis.Variable(
                    optbins, name='ddb', label=r'Jet BvL score'),
                hist2.axis.Variable([40, 70, 80, 90, 100, 110, 120, 130, 140],
                                    name='msd',
                                    label=r'Jet $m_{sd}$'),
                hist2.storage.Weight(),
            ),
            'genresponse_noweight':
            hist2.Hist(
                hist2.axis.StrCategory([], name='region', growth=True),
                hist2.axis.StrCategory([], name='systematic', growth=True),
                hist2.axis.Variable([450, 500, 550, 600, 675, 800, 1200],
                                    name='pt',
                                    label=r'Jet $p_{T}$ [GeV]'),
                hist2.axis.Variable(np.geomspace(400, 1200, 60),
                                    name='genpt',
                                    label=r'Generated Higgs $p_{T}$ [GeV]'),
                hist2.storage.Double(),
            ),
            'genresponse':
            hist2.Hist(
                hist2.axis.StrCategory([], name='region', growth=True),
                hist2.axis.StrCategory([], name='systematic', growth=True),
                hist2.axis.Variable([450, 500, 550, 600, 675, 800, 1200],
                                    name='pt',
                                    label=r'Jet $p_{T}$ [GeV]'),
                hist2.axis.Variable([200, 300, 450, 650, 7500],
                                    name='genpt',
                                    label=r'Generated Higgs $p_{T}$ [GeV]'),
                hist2.storage.Weight(),
            ),
        }
Exemplo n.º 13
0
def test_binned_from_unbinned_2D():
    zfit.run.set_graph_mode(True)
    n = 100000

    mu = zfit.Parameter("mu", 1, 0, 19)
    sigma = zfit.Parameter("sigma", 6, 0, 120)
    obsx = zfit.Space("x", (-5, 10))
    obsy = zfit.Space("y", (-50, 100))
    gaussx = zfit.pdf.Gauss(mu=mu, sigma=sigma, obs=obsx)
    muy = mu + 3
    sigmay = sigma * 20
    gaussy = zfit.pdf.Gauss(mu=muy, sigma=sigmay, obs=obsy)
    gauss2D = zfit.pdf.ProductPDF([gaussx, gaussy])

    axisx = zfit.binned.VariableBinning(
        np.concatenate([np.linspace(-5, 5, 43), np.linspace(5, 10, 30)[1:]], axis=0),
        name="x",
    )
    axisxhist = hist.axis.Variable(
        np.concatenate([np.linspace(-5, 5, 43), np.linspace(5, 10, 30)[1:]], axis=0),
        name="x",
    )
    axisy = zfit.binned.RegularBinning(15, -50, 100, name="y")
    axisyhist = hist.axis.Regular(15, -50, 100, name="y")
    obs_binnedx = zfit.Space(["x"], binning=axisx)
    obs_binnedy = zfit.Space("y", binning=axisy)
    obs_binned = obs_binnedx * obs_binnedy

    gauss_binned = BinnedFromUnbinnedPDF(pdf=gauss2D, space=obs_binned, extended=n)
    values = gauss_binned.rel_counts(obs_binned)  # TODO: good test?
    start = time.time()
    ntrial = 10
    for _ in range(ntrial):
        values = gauss_binned.rel_counts(obs_binned)
    print(f"Time taken {(time.time() - start) / ntrial}")
    hist2d = hist.Hist(axisxhist, axisyhist)
    nruns = 5
    npoints = 5_000_000
    for _ in range(nruns):
        normal2d = np.random.normal(
            [float(mu), float(muy)], [float(sigma), float(sigmay)], size=(npoints, 2)
        )
        hist2d.fill(*normal2d.T, threads=4)

    diff = np.abs(values * hist2d.sum() - hist2d.counts()) - 6.5 * np.sqrt(
        hist2d.counts()
    )  # 5 sigma for 1000 bins
    print(diff)
    np.testing.assert_array_less(diff, 0)

    sample = gauss_binned.sample(n, limits=obs_binned)
    hist_sampled = sample.to_hist()
    hist_pdf = gauss_binned.to_hist()
    max_error = hist_sampled.values() * 6**2  # 6 sigma away
    np.testing.assert_array_less(
        (hist_sampled.values() - hist_pdf.values()) ** 2, max_error
    )
    plt.figure()
    plt.title("Gauss 2D binned sampled.")
    mplhep.hist2dplot(hist_sampled)
    pytest.zfit_savefig()
    plt.figure()
    plt.title("Gauss 2D binned plot, irregular (x<4.5 larger bins than x>4.5) binning.")
    mplhep.hist2dplot(hist_pdf)
    pytest.zfit_savefig()
Exemplo n.º 14
0
#  Copyright (c) 2022 zfit
import hist
import mplhep
import numpy as np
from matplotlib import pyplot as plt

import zfit

# noinspection PyTypeChecker
histos = []
for i in range(5):
    h = hist.Hist(hist.axis.Regular(13, -3, 2, name="x", flow=False))
    x = np.random.normal(size=1_000_000 * (i + 1)) + i**1.5 / 2 * ((-1)**i)
    h.fill(x=x)
    histos.append(h)
mplhep.histplot(histos,
                stack=True,
                histtype="fill",
                label=[f"process {i + 1}" for i in range(5)])
plt.legend()
plt.show()
pdfs = [zfit.pdf.HistogramPDF(h) for h in histos]
sumpdf = zfit.pdf.BinnedSumPDF(pdfs)

h_back = sumpdf.to_hist()
pdf_syst = zfit.pdf.BinwiseScaleModifier(sumpdf, modifiers=True)

mplhep.histplot(h_back)
# plt.legend()

plt.show()