def test():
    array = ak.Array(
        ak.layout.RegularArray(
            ak.layout.NumpyArray(np.r_[1, 2, 3, 4, 5, 6, 7, 8, 9]), 3))
    mask = ak.Array(
        ak.layout.NumpyArray(
            np.array([[True, True, True], [True, True, False],
                      [True, False, True]])))

    assert ak.mask(array, mask).to_list() == [[1, 2, 3], [4, 5, None],
                                              [7, None, 9]]
Пример #2
0
def test():
    arr1 = ak.Array({"a": [1, 2], "b": [1, None]})
    arr2 = ak.mask(arr1, [True, True])
    assert isinstance(arr2.layout, ak.layout.ByteMaskedArray)
    assert isinstance(arr2.layout.content, ak.layout.RecordArray)
    assert isinstance(arr2.layout.content.field("b"),
                      ak.layout.IndexedOptionArray64)

    assert isinstance(arr2.b.layout, ak.layout.IndexedOptionArray64)
    assert isinstance(arr2.b.layout.content, ak.layout.NumpyArray)

    assert ak.is_none(arr2.b).tolist() == [False, True]

    arr3 = ak.virtual(lambda: arr2, form=arr2.layout.form, length=len(arr2))
    assert ak.is_none(arr3.b).tolist() == [False, True]
Пример #3
0
def test_tomask():
    array = ak.Array([[0.0, 1.1, 2.2], [], [3.3, 4.4], [5.5],
                      [6.6, 7.7, 8.8, 9.9]])
    mask1 = ak.Array([True, True, False, False, True])
    assert ak.to_list(array[mask1]) == [[0.0, 1.1, 2.2], [],
                                        [6.6, 7.7, 8.8, 9.9]]
    assert ak.to_list(ak.mask(array, mask1)) == [
        [0.0, 1.1, 2.2],
        [],
        None,
        None,
        [6.6, 7.7, 8.8, 9.9],
    ]

    mask2 = ak.Array([[False, True, False], [], [True, True], [False],
                      [True, False, False, True]])
    assert ak.to_list(array[mask2]) == [[1.1], [], [3.3, 4.4], [], [6.6, 9.9]]
    assert ak.to_list(ak.mask(array, mask2)) == [
        [None, 1.1, None],
        [],
        [3.3, 4.4],
        [None],
        [6.6, None, None, 9.9],
    ]
Пример #4
0
def mask_inf(var_array, var_name=None, var_inf_counter=None):
    """
    Mask inf values in `var_array` with None. If var_inf_counter is passed, append there inplace for a given `var_name` the fraction of its inf values.

    Arguments:
        - var_array: awkward array, values of a given feature for a given set of taus
        - var_name (optional, default=None): string, variable name
        - var_inf_counter (optional, default=None): defaultdict(list), stores fraction of inf values for variables

    Returns
        var_array witn masked infs values to None
    """
    if np.sum(np.isinf(var_array)) > 0:
        is_inf_mask = np.isinf(var_array)
        var_array = ak.mask(var_array, is_inf_mask, valid_when=False)
        if var_inf_counter is not None:
            var_inf_counter[var_name].append(
                np.sum(is_inf_mask) / ak.count(var_array))
    return var_array
Пример #5
0
    def process_shift(self, events, shift_name):
        dataset = events.metadata['dataset']
        isRealData = not hasattr(events, "genWeight")
        selection = PackedSelection()
        weights = Weights(len(events), storeIndividual=True)
        output = self.make_output()
        if shift_name is None and not isRealData:
            output['sumw'] = ak.sum(events.genWeight)

        if isRealData or self._newTrigger:
            trigger = np.zeros(len(events), dtype='bool')
            for t in self._triggers[self._year]:
                if t in events.HLT.fields:
                    trigger = trigger | events.HLT[t]
            selection.add('trigger', trigger)
            del trigger
        else:
            selection.add('trigger', np.ones(len(events), dtype='bool'))

        if isRealData:
            selection.add(
                'lumimask', lumiMasks[self._year](events.run,
                                                  events.luminosityBlock))
        else:
            selection.add('lumimask', np.ones(len(events), dtype='bool'))

        if isRealData and self._skipRunB and self._year == '2017':
            selection.add('dropB', events.run > 299329)
        else:
            selection.add('dropB', np.ones(len(events), dtype='bool'))

        if isRealData:
            trigger = np.zeros(len(events), dtype='bool')
            for t in self._muontriggers[self._year]:
                if t in events.HLT.fields:
                    trigger |= np.array(events.HLT[t])
            selection.add('muontrigger', trigger)
            del trigger
        else:
            selection.add('muontrigger', np.ones(len(events), dtype='bool'))

        metfilter = np.ones(len(events), dtype='bool')
        for flag in self._met_filters[
                self._year]['data' if isRealData else 'mc']:
            metfilter &= np.array(events.Flag[flag])
        selection.add('metfilter', metfilter)
        del metfilter

        fatjets = events.FatJet
        fatjets['msdcorr'] = corrected_msoftdrop(fatjets)
        fatjets['qcdrho'] = 2 * np.log(fatjets.msdcorr / fatjets.pt)
        fatjets['n2ddt'] = fatjets.n2b1 - n2ddt_shift(fatjets, year=self._year)
        fatjets['msdcorr_full'] = fatjets['msdcorr'] * self._msdSF[self._year]

        candidatejet = fatjets[
            # https://github.com/DAZSLE/BaconAnalyzer/blob/master/Analyzer/src/VJetLoader.cc#L269
            (fatjets.pt > 200)
            & (abs(fatjets.eta) < 2.5)
            & fatjets.isTight  # this is loose in sampleContainer
        ]

        candidatejet = candidatejet[:, :
                                    2]  # Only consider first two to match generators
        if self._jet_arbitration == 'pt':
            candidatejet = ak.firsts(candidatejet)
        elif self._jet_arbitration == 'mass':
            candidatejet = ak.firsts(candidatejet[ak.argmax(
                candidatejet.msdcorr, axis=1, keepdims=True)])
        elif self._jet_arbitration == 'n2':
            candidatejet = ak.firsts(candidatejet[ak.argmin(candidatejet.n2ddt,
                                                            axis=1,
                                                            keepdims=True)])
        elif self._jet_arbitration == 'ddb':
            candidatejet = ak.firsts(candidatejet[ak.argmax(
                candidatejet.btagDDBvLV2, axis=1, keepdims=True)])
        elif self._jet_arbitration == 'ddc':
            candidatejet = ak.firsts(candidatejet[ak.argmax(
                candidatejet.btagDDCvLV2, axis=1, keepdims=True)])
        else:
            raise RuntimeError("Unknown candidate jet arbitration")

        if self._tagger == 'v1':
            bvl = candidatejet.btagDDBvL
            cvl = candidatejet.btagDDCvL
            cvb = candidatejet.btagDDCvB
        elif self._tagger == 'v2':
            bvl = candidatejet.btagDDBvLV2
            cvl = candidatejet.btagDDCvLV2
            cvb = candidatejet.btagDDCvBV2
        elif self._tagger == 'v3':
            bvl = candidatejet.particleNetMD_Xbb
            cvl = candidatejet.particleNetMD_Xcc / (
                1 - candidatejet.particleNetMD_Xbb)
            cvb = candidatejet.particleNetMD_Xcc / (
                candidatejet.particleNetMD_Xcc +
                candidatejet.particleNetMD_Xbb)

        elif self._tagger == 'v4':
            bvl = candidatejet.particleNetMD_Xbb
            cvl = candidatejet.btagDDCvLV2
            cvb = candidatejet.particleNetMD_Xcc / (
                candidatejet.particleNetMD_Xcc +
                candidatejet.particleNetMD_Xbb)
        else:
            raise ValueError("Not an option")

        selection.add('minjetkin', (candidatejet.pt >= 450)
                      & (candidatejet.pt < 1200)
                      & (candidatejet.msdcorr >= 40.)
                      & (candidatejet.msdcorr < 201.)
                      & (abs(candidatejet.eta) < 2.5))
        selection.add('_strict_mass', (candidatejet.msdcorr > 85) &
                      (candidatejet.msdcorr < 130))
        selection.add('_high_score', cvl > 0.8)
        selection.add('minjetkinmu', (candidatejet.pt >= 400)
                      & (candidatejet.pt < 1200)
                      & (candidatejet.msdcorr >= 40.)
                      & (candidatejet.msdcorr < 201.)
                      & (abs(candidatejet.eta) < 2.5))
        selection.add('minjetkinw', (candidatejet.pt >= 200)
                      & (candidatejet.pt < 1200)
                      & (candidatejet.msdcorr >= 40.)
                      & (candidatejet.msdcorr < 201.)
                      & (abs(candidatejet.eta) < 2.5))
        selection.add('jetid', candidatejet.isTight)
        selection.add('n2ddt', (candidatejet.n2ddt < 0.))
        if not self._tagger == 'v2':
            selection.add('ddbpass', (bvl >= 0.89))
            selection.add('ddcpass', (cvl >= 0.83))
            selection.add('ddcvbpass', (cvb >= 0.2))
        else:
            selection.add('ddbpass', (bvl >= 0.7))
            selection.add('ddcpass', (cvl >= 0.45))
            selection.add('ddcvbpass', (cvb >= 0.03))

        jets = events.Jet
        jets = jets[(jets.pt > 30.) & (abs(jets.eta) < 2.5) & jets.isTight]
        # only consider first 4 jets to be consistent with old framework
        jets = jets[:, :4]
        dphi = abs(jets.delta_phi(candidatejet))
        selection.add(
            'antiak4btagMediumOppHem',
            ak.max(jets[dphi > np.pi / 2][self._ak4tagBranch],
                   axis=1,
                   mask_identity=False) <
            BTagEfficiency.btagWPs[self._ak4tagger][self._year]['medium'])
        ak4_away = jets[dphi > 0.8]
        selection.add(
            'ak4btagMedium08',
            ak.max(ak4_away[self._ak4tagBranch], axis=1, mask_identity=False) >
            BTagEfficiency.btagWPs[self._ak4tagger][self._year]['medium'])

        met = events.MET
        selection.add('met', met.pt < 140.)

        goodmuon = ((events.Muon.pt > 10)
                    & (abs(events.Muon.eta) < 2.4)
                    & (events.Muon.pfRelIso04_all < 0.25)
                    & events.Muon.looseId)
        nmuons = ak.sum(goodmuon, axis=1)
        leadingmuon = ak.firsts(events.Muon[goodmuon])

        if self._looseTau:
            goodelectron = ((events.Electron.pt > 10)
                            & (abs(events.Electron.eta) < 2.5)
                            &
                            (events.Electron.cutBased >= events.Electron.VETO))
            nelectrons = ak.sum(goodelectron, axis=1)

            ntaus = ak.sum(
                ((events.Tau.pt > 20)
                 & (abs(events.Tau.eta) < 2.3)
                 & events.Tau.idDecayMode
                 & ((events.Tau.idMVAoldDM2017v2 & 2) != 0)
                 & ak.all(events.Tau.metric_table(events.Muon[goodmuon]) > 0.4,
                          axis=2)
                 & ak.all(events.Tau.metric_table(
                     events.Electron[goodelectron]) > 0.4,
                          axis=2)),
                axis=1,
            )
        else:
            goodelectron = (
                (events.Electron.pt > 10)
                & (abs(events.Electron.eta) < 2.5)
                & (events.Electron.cutBased >= events.Electron.LOOSE))
            nelectrons = ak.sum(goodelectron, axis=1)

            ntaus = ak.sum(
                (events.Tau.pt > 20)
                &
                events.Tau.idDecayMode  # bacon iso looser than Nano selection
                & ak.all(events.Tau.metric_table(events.Muon[goodmuon]) > 0.4,
                         axis=2)
                & ak.all(events.Tau.metric_table(events.Electron[goodelectron])
                         > 0.4,
                         axis=2),
                axis=1,
            )

        selection.add('noleptons',
                      (nmuons == 0) & (nelectrons == 0) & (ntaus == 0))
        selection.add('onemuon',
                      (nmuons == 1) & (nelectrons == 0) & (ntaus == 0))
        selection.add('muonkin',
                      (leadingmuon.pt > 55.) & (abs(leadingmuon.eta) < 2.1))
        selection.add('muonDphiAK8',
                      abs(leadingmuon.delta_phi(candidatejet)) > 2 * np.pi / 3)

        # W-Tag (Tag and Probe)
        # tag side
        selection.add(
            'ak4btagMediumOppHem',
            ak.max(jets[dphi > np.pi / 2][self._ak4tagBranch],
                   axis=1,
                   mask_identity=False) >
            BTagEfficiency.btagWPs[self._ak4tagger][self._year]['medium'])
        selection.add('met40p', met.pt > 40.)
        selection.add('tightMuon',
                      (leadingmuon.tightId) & (leadingmuon.pt > 53.))
        # selection.add('ptrecoW', (leadingmuon + met).pt > 250.)
        selection.add('ptrecoW200', (leadingmuon + met).pt > 200.)
        selection.add(
            'ak4btagNearMu',
            leadingmuon.delta_r(leadingmuon.nearest(ak4_away, axis=None)) <
            2.0)
        _bjets = jets[self._ak4tagBranch] > BTagEfficiency.btagWPs[
            self._ak4tagger][self._year]['medium']
        # _nearAK8 = jets.delta_r(candidatejet)  < 0.8
        # _nearMu = jets.delta_r(ak.firsts(events.Muon))  < 0.3
        # selection.add('ak4btagOld', ak.sum(_bjets & ~_nearAK8 & ~_nearMu, axis=1) >= 1)
        _nearAK8 = jets.delta_r(candidatejet) < 0.8
        _nearMu = jets.delta_r(leadingmuon) < 0.3
        selection.add('ak4btagOld',
                      ak.sum(_bjets & ~_nearAK8 & ~_nearMu, axis=1) >= 1)

        # _nearAK8 = jets.delta_r(candidatejet)  < 0.8
        # _nearMu = jets.delta_r(candidatejet.nearest(events.Muon[goodmuon], axis=None))  < 0.3
        # selection.add('ak4btagNew', ak.sum(_bjets & ~_nearAK8 & ~_nearMu, axis=1) >= 1)

        # probe side
        selection.add('minWjetpteta',
                      (candidatejet.pt >= 200) & (abs(candidatejet.eta) < 2.4))
        # selection.add('noNearMuon', candidatejet.delta_r(candidatejet.nearest(events.Muon[goodmuon], axis=None)) > 1.0)
        selection.add('noNearMuon', candidatejet.delta_r(leadingmuon) > 1.0)
        #####

        if isRealData:
            genflavor = ak.zeros_like(candidatejet.pt)
        else:
            if 'HToCC' in dataset or 'HToBB' in dataset:
                if self._ewkHcorr:
                    add_HiggsEW_kFactors(weights, events.GenPart, dataset)

            weights.add('genweight', events.genWeight)
            if "PSWeight" in events.fields:
                add_ps_weight(weights, events.PSWeight)
            else:
                add_ps_weight(weights, None)
            if "LHEPdfWeight" in events.fields:
                add_pdf_weight(weights, events.LHEPdfWeight)
            else:
                add_pdf_weight(weights, None)
            if "LHEScaleWeight" in events.fields:
                add_scalevar_7pt(weights, events.LHEScaleWeight)
                add_scalevar_3pt(weights, events.LHEScaleWeight)
            else:
                add_scalevar_7pt(weights, [])
                add_scalevar_3pt(weights, [])

            add_pileup_weight(weights, events.Pileup.nPU, self._year, dataset)
            bosons = getBosons(events.GenPart)
            matchedBoson = candidatejet.nearest(bosons,
                                                axis=None,
                                                threshold=0.8)
            if self._tightMatch:
                match_mask = (
                    (candidatejet.pt - matchedBoson.pt) / matchedBoson.pt <
                    0.5) & ((candidatejet.msdcorr - matchedBoson.mass) /
                            matchedBoson.mass < 0.3)
                selmatchedBoson = ak.mask(matchedBoson, match_mask)
                genflavor = bosonFlavor(selmatchedBoson)
            else:
                genflavor = bosonFlavor(matchedBoson)
            genBosonPt = ak.fill_none(ak.firsts(bosons.pt), 0)
            if self._newVjetsKfactor:
                add_VJets_kFactors(weights, events.GenPart, dataset)
            else:
                add_VJets_NLOkFactor(weights, genBosonPt, self._year, dataset)
            if shift_name is None:
                output['btagWeight'].fill(val=self._btagSF.addBtagWeight(
                    weights, ak4_away, self._ak4tagBranch))
            if self._nnlops_rew and dataset in [
                    'GluGluHToCC_M125_13TeV_powheg_pythia8'
            ]:
                weights.add('minlo_rew',
                            powheg_to_nnlops(ak.to_numpy(genBosonPt)))

            if self._newTrigger:
                add_jetTriggerSF(
                    weights, ak.firsts(fatjets),
                    self._year if not self._skipRunB else f'{self._year}CDEF',
                    selection)
            else:
                add_jetTriggerWeight(weights, candidatejet.msdcorr,
                                     candidatejet.pt, self._year)

            add_mutriggerSF(weights, leadingmuon, self._year, selection)
            add_mucorrectionsSF(weights, leadingmuon, self._year, selection)

            if self._year in ("2016", "2017"):
                weights.add("L1Prefiring", events.L1PreFiringWeight.Nom,
                            events.L1PreFiringWeight.Up,
                            events.L1PreFiringWeight.Dn)

            logger.debug("Weight statistics: %r" % weights.weightStatistics)

        msd_matched = candidatejet.msdcorr * self._msdSF[self._year] * (
            genflavor > 0) + candidatejet.msdcorr * (genflavor == 0)

        regions = {
            'signal': [
                'noleptons', 'minjetkin', 'met', 'metfilter', 'jetid',
                'antiak4btagMediumOppHem', 'n2ddt', 'trigger', 'lumimask'
            ],
            'signal_noddt': [
                'noleptons', 'minjetkin', 'met', 'jetid',
                'antiak4btagMediumOppHem', 'trigger', 'lumimask', 'metfilter'
            ],
            # 'muoncontrol': ['minjetkinmu', 'jetid', 'n2ddt', 'ak4btagMedium08', 'onemuon', 'muonkin', 'muonDphiAK8', 'muontrigger', 'lumimask', 'metfilter'],
            'muoncontrol': [
                'onemuon', 'muonkin', 'muonDphiAK8', 'metfilter',
                'minjetkinmu', 'jetid', 'ak4btagMedium08', 'n2ddt',
                'muontrigger', 'lumimask'
            ],
            'muoncontrol_noddt': [
                'onemuon', 'muonkin', 'muonDphiAK8', 'jetid', 'metfilter',
                'minjetkinmu', 'jetid', 'ak4btagMedium08', 'muontrigger',
                'lumimask'
            ],
            'wtag': [
                'onemuon', 'tightMuon', 'minjetkinw', 'jetid', 'met40p',
                'metfilter', 'ptrecoW200', 'ak4btagOld', 'muontrigger',
                'lumimask'
            ],
            'wtag0': [
                'onemuon', 'tightMuon', 'met40p', 'metfilter', 'ptrecoW200',
                'ak4btagOld', 'muontrigger', 'lumimask'
            ],
            'wtag2': [
                'onemuon', 'tightMuon', 'minjetkinw', 'jetid',
                'ak4btagMediumOppHem', 'met40p', 'metfilter', 'ptrecoW200',
                'ak4btagOld', 'muontrigger', 'lumimask'
            ],
            'noselection': [],
        }

        def normalize(val, cut):
            if cut is None:
                ar = ak.to_numpy(ak.fill_none(val, np.nan))
                return ar
            else:
                ar = ak.to_numpy(ak.fill_none(val[cut], np.nan))
                return ar

        import time
        tic = time.time()
        if shift_name is None:
            for region, cuts in regions.items():
                allcuts = set([])
                cut = selection.all(*allcuts)
                output['cutflow_msd'].fill(region=region,
                                           genflavor=normalize(
                                               genflavor, None),
                                           cut=0,
                                           weight=weights.weight(),
                                           msd=normalize(msd_matched, None))
                output['cutflow_eta'].fill(region=region,
                                           genflavor=normalize(genflavor, cut),
                                           cut=0,
                                           weight=weights.weight()[cut],
                                           eta=normalize(
                                               candidatejet.eta, cut))
                output['cutflow_pt'].fill(region=region,
                                          genflavor=normalize(genflavor, cut),
                                          cut=0,
                                          weight=weights.weight()[cut],
                                          pt=normalize(candidatejet.pt, cut))
                for i, cut in enumerate(cuts + ['ddcvbpass', 'ddcpass']):
                    allcuts.add(cut)
                    cut = selection.all(*allcuts)
                    output['cutflow_msd'].fill(region=region,
                                               genflavor=normalize(
                                                   genflavor, cut),
                                               cut=i + 1,
                                               weight=weights.weight()[cut],
                                               msd=normalize(msd_matched, cut))
                    output['cutflow_eta'].fill(
                        region=region,
                        genflavor=normalize(genflavor, cut),
                        cut=i + 1,
                        weight=weights.weight()[cut],
                        eta=normalize(candidatejet.eta, cut))
                    output['cutflow_pt'].fill(
                        region=region,
                        genflavor=normalize(genflavor, cut),
                        cut=i + 1,
                        weight=weights.weight()[cut],
                        pt=normalize(candidatejet.pt, cut))

                    if self._evtVizInfo and 'ddcpass' in allcuts and isRealData and region == 'signal':
                        if 'event' not in events.fields:
                            continue
                        _cut = selection.all(*allcuts, '_strict_mass',
                                             '_high_score')
                        # _cut = selection.all('_strict_mass'')
                        output['to_check'][
                            'mass'] += processor.column_accumulator(
                                normalize(msd_matched, _cut))
                        nfatjet = ak.sum(
                            ((fatjets.pt > 200) &
                             (abs(fatjets.eta) < 2.5) & fatjets.isTight),
                            axis=1)
                        output['to_check'][
                            'njet'] += processor.column_accumulator(
                                normalize(nfatjet, _cut))
                        output['to_check'][
                            'fname'] += processor.column_accumulator(
                                np.array([events.metadata['filename']] *
                                         len(normalize(msd_matched, _cut))))
                        output['to_check'][
                            'event'] += processor.column_accumulator(
                                normalize(events.event, _cut))
                        output['to_check'][
                            'luminosityBlock'] += processor.column_accumulator(
                                normalize(events.luminosityBlock, _cut))
                        output['to_check'][
                            'run'] += processor.column_accumulator(
                                normalize(events.run, _cut))

        if shift_name is None:
            systematics = [None] + list(weights.variations)
        else:
            systematics = [shift_name]

        def fill(region, systematic, wmod=None):
            selections = regions[region]
            cut = selection.all(*selections)
            sname = 'nominal' if systematic is None else systematic
            if wmod is None:
                if systematic in weights.variations:
                    weight = weights.weight(modifier=systematic)[cut]
                else:
                    weight = weights.weight()[cut]
            else:
                weight = weights.weight()[cut] * wmod[cut]

            output['templates'].fill(
                region=region,
                systematic=sname,
                runid=runmap(events.run)[cut],
                genflavor=normalize(genflavor, cut),
                pt=normalize(candidatejet.pt, cut),
                msd=normalize(msd_matched, cut),
                ddb=normalize(bvl, cut),
                ddc=normalize(cvl, cut),
                ddcvb=normalize(cvb, cut),
                weight=weight,
            )
            if region in [
                    'wtag', 'wtag0', 'wtag2', 'wtag3', 'wtag4', 'wtag5',
                    'wtag6', 'wtag7', 'noselection'
            ]:  # and sname in ['nominal', 'pileup_weightDown', 'pileup_weightUp', 'jet_triggerDown', 'jet_triggerUp']:
                output['wtag'].fill(
                    region=region,
                    systematic=sname,
                    genflavor=normalize(genflavor, cut),
                    pt=normalize(candidatejet.pt, cut),
                    msd=normalize(msd_matched, cut),
                    n2ddt=normalize(candidatejet.n2ddt, cut),
                    ddc=normalize(cvl, cut),
                    ddcvb=normalize(cvb, cut),
                    weight=weight,
                )
            # if region in ['signal', 'noselection']:
            #     output['etaphi'].fill(
            #         region=region,
            #         systematic=sname,
            #         runid=runmap(events.run)[cut],
            #         genflavor=normalize(genflavor, cut),
            #         pt=normalize(candidatejet.pt, cut),
            #         eta=normalize(candidatejet.eta, cut),
            #         phi=normalize(candidatejet.phi, cut),
            #         ddc=normalize(cvl, cut),
            #         ddcvb=normalize(cvb, cut),
            #     ),
            if not isRealData:
                if wmod is not None:
                    _custom_weight = events.genWeight[cut] * wmod[cut]
                else:
                    _custom_weight = np.ones_like(weight)
                output['genresponse_noweight'].fill(
                    region=region,
                    systematic=sname,
                    pt=normalize(candidatejet.pt, cut),
                    genpt=normalize(genBosonPt, cut),
                    weight=_custom_weight,
                )

                output['genresponse'].fill(
                    region=region,
                    systematic=sname,
                    pt=normalize(candidatejet.pt, cut),
                    genpt=normalize(genBosonPt, cut),
                    weight=weight,
                )
            if systematic is None:
                output['signal_opt'].fill(
                    region=region,
                    genflavor=normalize(genflavor, cut),
                    ddc=normalize(cvl, cut),
                    ddcvb=normalize(cvb, cut),
                    msd=normalize(msd_matched, cut),
                    weight=weight,
                )
                output['signal_optb'].fill(
                    region=region,
                    genflavor=normalize(genflavor, cut),
                    ddb=normalize(bvl, cut),
                    msd=normalize(msd_matched, cut),
                    weight=weight,
                )

        for region in regions:
            cut = selection.all(*(set(regions[region]) - {'n2ddt'}))
            if shift_name is None:
                output['nminus1_n2ddt'].fill(
                    region=region,
                    n2ddt=normalize(candidatejet.n2ddt, cut),
                    weight=weights.weight()[cut],
                )
            for systematic in systematics:
                if isRealData and systematic is not None:
                    continue
                fill(region, systematic)
            if shift_name is None and 'GluGluH' in dataset and 'LHEWeight' in events.fields:
                for i in range(9):
                    fill(region, 'LHEScale_%d' % i, events.LHEScaleWeight[:,
                                                                          i])
                for c in events.LHEWeight.fields[1:]:
                    fill(region, 'LHEWeight_%s' % c, events.LHEWeight[c])

        toc = time.time()
        output["filltime"] = toc - tic
        if shift_name is None:
            output["weightStats"] = weights.weightStatistics
        return {dataset: output}
Пример #6
0
    """
    Mask inf values in `var_array` with None. If var_inf_counter is passed, append there inplace for a given `var_name` the fraction of its inf values.

    Arguments:
        - var_array: awkward array, values of a given feature for a given set of taus
        - var_name (optional, default=None): string, variable name
        - var_inf_counter (optional, default=None): defaultdict(list), stores fraction of inf values for variables
        - raise_exception (optional, default=True): bool, whether to raise exception instead of masking inf values 

    Returns
        var_array witn masked infs values to None
    """
    if np.any(is_inf_mask:=np.isinf(var_array)):
        if raise_exception:
            raise ValueError(f'Inf value detected in {var_name}')
        var_array = ak.mask(var_array, is_inf_mask, valid_when=False)
        if var_inf_counter is not None:
            var_inf_counter[var_name].append(np.sum(is_inf_mask) / ak.count(var_array))
    return var_array

def mask_nan(var_array, var_name=None, var_nan_counter=None, raise_exception=True):
    """
    Mask nan values in `var_array` with None. If var_nan_counter is passed, append there inplace for a given `var_name` the fraction of its nan values.
    Arguments:
        - var_array: awkward array, values of a given feature for a given set of taus
        - var_name (optional, default=None): string, variable name
        - var_nan_counter (optional, default=None): defaultdict(list), stores fraction of nan values for variables
        - raise_exception (optional, default=True): bool, whether to raise exception instead of masking NaN values 
    Returns
        var_array witn masked nan values to None
    """
Пример #7
0
def make_perm_table(bhad, blep, wja, wjb, lepton, met, nu):
    '''
    Inputs:
        Jets, leptons, neutrinos, jet assignment object ordering, array of disrminiant probabilities (Total, mass discriminant, neutrino discrminant), and associated event weights
    Returns:
        awkward Table containing
            1: Jet objects (BLeps, BHads, WJas, WJbs)
            2: Leptons, Neutrinos
            3: Calculated probabilities (Total -> Prob, mass discriminant -> MassDiscr, neutrino discrminant -> NuDiscr)
    '''

        ## these attributes are based on those from URTTbar/interface/Permutation.h https://gitlab.cern.ch/jdulemba/URTTbar/-/blob/htt_analysis_2016legacydata_9410/interface/Permutation.h
    isWLepComplete = (ak.num(lepton.pt) == 1) & (ak.num(nu.pt) == 1)
    isTLepComplete = isWLepComplete & (ak.num(blep.pt) == 1)

    WLep = ak.Array({
        'pt'    : ak.fill_none( (ak.mask(lepton, isWLepComplete)+ak.mask(nu, isWLepComplete)).pt, []),
        'eta'   : ak.fill_none( (ak.mask(lepton, isWLepComplete)+ak.mask(nu, isWLepComplete)).eta, []),
        'phi'   : ak.fill_none( (ak.mask(lepton, isWLepComplete)+ak.mask(nu, isWLepComplete)).phi, []),
        'mass'  : ak.fill_none( (ak.mask(lepton, isWLepComplete)+ak.mask(nu, isWLepComplete)).mass, []),
        'charge': ak.fill_none( (ak.mask(lepton, isWLepComplete)).charge, []),
    }, with_name="PtEtaPhiMLorentzVector")
    TLep = ak.Array({
        'pt'    : ak.fill_none( (ak.mask(WLep, isTLepComplete)+ak.mask(blep, isTLepComplete)).pt, []),
        'eta'   : ak.fill_none( (ak.mask(WLep, isTLepComplete)+ak.mask(blep, isTLepComplete)).eta, []),
        'phi'   : ak.fill_none( (ak.mask(WLep, isTLepComplete)+ak.mask(blep, isTLepComplete)).phi, []),
        'mass'  : ak.fill_none( (ak.mask(WLep, isTLepComplete)+ak.mask(blep, isTLepComplete)).mass, []),
    }, with_name="PtEtaPhiMLorentzVector")


    ## Event categories
        # fill empty [] events with values for easier comparisons
    bhad_jetIdx = ak.fill_none(ak.pad_none(bhad, 1).jetIdx, -999)
    blep_jetIdx = ak.fill_none(ak.pad_none(blep, 1).jetIdx, -999)
    wja_jetIdx = ak.fill_none(ak.pad_none(wja, 1).jetIdx, -999)
    wjb_jetIdx = ak.fill_none(ak.pad_none(wjb, 1).jetIdx, -999)

        # merged jets event categories
            # only bhad and blep merged
    Merged_BHadBLep = (bhad_jetIdx >= 0) & (bhad_jetIdx == blep_jetIdx) & (bhad_jetIdx != wja_jetIdx) & (bhad_jetIdx != wjb_jetIdx)
            # only bhad and wja merged
    Merged_BHadWJa = (bhad_jetIdx >= 0) & (bhad_jetIdx == wja_jetIdx) & (bhad_jetIdx != blep_jetIdx) & (bhad_jetIdx != wjb_jetIdx)
            # only bhad and wjb merged
    Merged_BHadWJb = (bhad_jetIdx >= 0) & (bhad_jetIdx == wjb_jetIdx) & (bhad_jetIdx != blep_jetIdx) & (bhad_jetIdx != wja_jetIdx)
            # only blep and wja merged
    Merged_BLepWJa = (blep_jetIdx >= 0) & (blep_jetIdx == wja_jetIdx) & (blep_jetIdx != bhad_jetIdx) & (blep_jetIdx != wjb_jetIdx)
            # only blep and wjb merged
    Merged_BLepWJb = (blep_jetIdx >= 0) & (blep_jetIdx == wjb_jetIdx) & (blep_jetIdx != bhad_jetIdx) & (blep_jetIdx != wja_jetIdx)
            # only wja and wjb merged
    Merged_WJets = (wja_jetIdx >= 0) & (wja_jetIdx == wjb_jetIdx) & (wja_jetIdx != bhad_jetIdx) & (wja_jetIdx != blep_jetIdx)
            # only two jets merged
    Merged_Event = (Merged_BHadBLep) | (Merged_BHadWJa) | (Merged_BHadWJb) | (Merged_BLepWJa) | (Merged_BLepWJb) | (Merged_WJets)
            # only bhad, blep, wja merged
    Merged_BHadBLepWJa = (bhad_jetIdx >= 0) & (bhad_jetIdx == blep_jetIdx) & (bhad_jetIdx == wja_jetIdx) & (bhad_jetIdx != wjb_jetIdx)
            # only bhad, blep, wjb merged
    Merged_BHadBLepWJb = (bhad_jetIdx >= 0) & (bhad_jetIdx == blep_jetIdx) & (bhad_jetIdx == wjb_jetIdx) & (bhad_jetIdx != wja_jetIdx)
            # only bhad, wja, wjb merged
    Merged_BHadWJaWJb = (bhad_jetIdx >= 0) & (bhad_jetIdx == wja_jetIdx) & (bhad_jetIdx == wjb_jetIdx) & (bhad_jetIdx != blep_jetIdx)
            # only blep, wja, wjb merged
    Merged_BLepWJaWJb = (blep_jetIdx >= 0) & (blep_jetIdx == wja_jetIdx) & (blep_jetIdx == wjb_jetIdx) & (blep_jetIdx != bhad_jetIdx)
        #

        # lost jet event categories
            # only bhad is lost, other jets exist and are resolved
    Lost_BHad = (bhad_jetIdx < 0) & (blep_jetIdx >= 0) & (wja_jetIdx >= 0) & (wjb_jetIdx >= 0) & (blep_jetIdx != wja_jetIdx) & (blep_jetIdx != wjb_jetIdx) & (wja_jetIdx != wjb_jetIdx)
            # only blep is lost, other jets exist and are resolved
    Lost_BLep = (blep_jetIdx < 0) & (bhad_jetIdx >= 0) & (wja_jetIdx >= 0) & (wjb_jetIdx >= 0) & (bhad_jetIdx != wja_jetIdx) & (bhad_jetIdx != wjb_jetIdx) & (wja_jetIdx != wjb_jetIdx)
            # only wja is lost, other jets exist and are resolved
    Lost_WJa = (wja_jetIdx < 0) & (bhad_jetIdx >= 0) & (blep_jetIdx >= 0) & (wjb_jetIdx >= 0) & (bhad_jetIdx != blep_jetIdx) & (bhad_jetIdx != wjb_jetIdx) & (blep_jetIdx != wjb_jetIdx)
            # only wjb is lost, other jets exist and are resolved
    Lost_WJb = (wjb_jetIdx < 0) & (bhad_jetIdx >= 0) & (blep_jetIdx >= 0) & (wja_jetIdx >= 0) & (bhad_jetIdx != blep_jetIdx) & (bhad_jetIdx != wja_jetIdx) & (blep_jetIdx != wja_jetIdx)
            # only one jet is lost, others exist and are resolved
    Lost_Event = (Lost_BHad) | (Lost_BLep) | (Lost_WJa) | (Lost_WJb)
    ##

    n_perm_matches = ak.unflatten(ak.num(bhad.pt)+ak.num(blep.pt)+ak.num(wja.pt)+ak.num(wjb.pt), np.ones(len(bhad.pt), dtype=int))
    #isEmpty = (bhad_jetIdx < 0) & (blep_jetIdx < 0) & (wja_jetIdx < 0) & (wjb_jetIdx < 0)

        # find number of unique matches
    jetIdx_stack = np.stack( (ak.to_numpy(ak.flatten(bhad_jetIdx)), ak.to_numpy(ak.flatten(blep_jetIdx)), ak.to_numpy(ak.flatten(wja_jetIdx)), ak.to_numpy(ak.flatten(wjb_jetIdx)) ), axis=1)
    unique_matches = ak.unflatten(np.array([len(list(set([ind for ind in inds if ind >= 0]))) for inds in jetIdx_stack.tolist()]), np.ones(len(bhad.pt), dtype=int))


    # create WHad
        # init variables
    whad_pt  = np.zeros(len(bhad.pt))
    whad_eta = np.zeros(len(bhad.pt))
    whad_phi = np.zeros(len(bhad.pt))
    whad_mass= np.zeros(len(bhad.pt))

            # inds where only WJb p4 is used as WHad
    use_wjb_inds = ak.flatten(Merged_BHadWJa | Merged_BLepWJa | Merged_WJets | Lost_WJa)
    whad_pt[use_wjb_inds]  = ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(wjb.pt, 1), np.nan)[use_wjb_inds]))
    whad_eta[use_wjb_inds] = ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(wjb.eta, 1), np.nan)[use_wjb_inds]))
    whad_phi[use_wjb_inds] = ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(wjb.phi, 1), np.nan)[use_wjb_inds]))
    whad_mass[use_wjb_inds]= ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(wjb.mass, 1), np.nan)[use_wjb_inds]))

            # inds where only WJa p4 is used as WHad
    use_wja_inds = ak.flatten(Merged_BHadWJb | Merged_BLepWJb | Lost_WJb)
    whad_pt[use_wja_inds]  = ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(wja.pt, 1), np.nan)[use_wja_inds]))
    whad_eta[use_wja_inds] = ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(wja.eta, 1), np.nan)[use_wja_inds]))
    whad_phi[use_wja_inds] = ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(wja.phi, 1), np.nan)[use_wja_inds]))
    whad_mass[use_wja_inds]= ak.to_numpy(ak.flatten(ak.fill_none(ak.pad_none(wja.mass, 1), np.nan)[use_wja_inds]))

            # inds where combined p4 from WJa and WJb is used as WHad (all other inds)
    use_comb_inds = ~(use_wjb_inds | use_wja_inds)
    whad_pt[use_comb_inds]  = ak.to_numpy(ak.flatten( (ak.fill_none(ak.pad_none( wja, 1), 0) + ak.fill_none(ak.pad_none( wjb, 1), 0)).pt[use_comb_inds] ))
    whad_eta[use_comb_inds] = ak.to_numpy(ak.flatten( (ak.fill_none(ak.pad_none( wja, 1), 0) + ak.fill_none(ak.pad_none( wjb, 1), 0)).eta[use_comb_inds] ))
    whad_phi[use_comb_inds] = ak.to_numpy(ak.flatten( (ak.fill_none(ak.pad_none( wja, 1), 0) + ak.fill_none(ak.pad_none( wjb, 1), 0)).phi[use_comb_inds] ))
    whad_mass[use_comb_inds]= ak.to_numpy(ak.flatten( (ak.fill_none(ak.pad_none( wja, 1), 0) + ak.fill_none(ak.pad_none( wjb, 1), 0)).mass[use_comb_inds] ))
    whad_pt[whad_pt == 0.] = np.nan
    whad_eta[whad_eta == 0.] = np.nan
    whad_phi[whad_phi == 0.] = np.nan
    whad_mass[whad_mass == 0.] = np.nan
    
    WHad = ak.Array({
        'pt'    : ak.unflatten(whad_pt[~np.isnan(whad_pt)], (~np.isnan(whad_pt)).astype(int)),
        'eta'   : ak.unflatten(whad_eta[~np.isnan(whad_eta)], (~np.isnan(whad_eta)).astype(int)),
        'phi'   : ak.unflatten(whad_phi[~np.isnan(whad_phi)], (~np.isnan(whad_phi)).astype(int)),
        'mass'  : ak.unflatten(whad_mass[~np.isnan(whad_mass)], (~np.isnan(whad_mass)).astype(int)),
        'charge': -1*ak.fill_none(ak.mask(WLep, ~np.isnan(whad_mass)).charge, []), # opposite charge as WLep for events that exist
    }, with_name="PtEtaPhiMLorentzVector")


    isWHadComplete = (ak.num(WHad.pt) == 1)
    isTHadComplete = (isWHadComplete) & (ak.num(bhad.pt) == 1)

    #set_trace()

    # create THad
    THad = ak.Array({
        'pt'    : ak.fill_none( (ak.mask(WHad, isTHadComplete)+ak.mask(bhad, isTHadComplete)).pt, []),
        'eta'   : ak.fill_none( (ak.mask(WHad, isTHadComplete)+ak.mask(bhad, isTHadComplete)).eta, []),
        'phi'   : ak.fill_none( (ak.mask(WHad, isTHadComplete)+ak.mask(bhad, isTHadComplete)).phi, []),
        'mass'  : ak.fill_none( (ak.mask(WHad, isTHadComplete)+ak.mask(bhad, isTHadComplete)).mass, []),
    }, with_name="PtEtaPhiMLorentzVector")

    # create TTbar
    isComplete = isTHadComplete & isTLepComplete
    TTbar = ak.Array({
        'pt'    : ak.fill_none( (ak.mask(THad, isComplete)+ak.mask(TLep, isComplete)).pt, []),
        'eta'   : ak.fill_none( (ak.mask(THad, isComplete)+ak.mask(TLep, isComplete)).eta, []),
        'phi'   : ak.fill_none( (ak.mask(THad, isComplete)+ak.mask(TLep, isComplete)).phi, []),
        'mass'  : ak.fill_none( (ak.mask(THad, isComplete)+ak.mask(TLep, isComplete)).mass, []),
    }, with_name="PtEtaPhiMLorentzVector")

    
        ## Combine everything into a single table, all objects are JaggedArrays
    permutations = ak.zip({
        "BHad" : bhad,
        "BLep" : blep,
        "WJa" : wja,
        "WJb" : wjb,
        "Lepton" : lepton,
        "MET" : met,
        "Nu" : nu,
        "WLep" : WLep,
        "TLep" : TLep,
        "WHad" : WHad,
        "THad" : THad,
        "TTbar" : TTbar,
        "n_perm_matches" : n_perm_matches,
        #"isEmpty" : isEmpty,
        "unique_matches" : unique_matches,
        "Merged_BHadBLep" : Merged_BHadBLep,
        "Merged_BHadWJa" : Merged_BHadWJa,
        "Merged_BHadWJb" : Merged_BHadWJb,
        "Merged_BLepWJa" : Merged_BLepWJa,
        "Merged_BLepWJb" : Merged_BLepWJb,
        "Merged_WJets" : Merged_WJets,
        "Merged_Event" : Merged_Event,
        "Lost_BHad" : Lost_BHad,
        "Lost_BLep" : Lost_BLep,
        "Lost_WJa" : Lost_WJa,
        "Lost_WJb" : Lost_WJb,
        "Lost_Event" : Lost_Event,
    }, depth_limit=1)

    #set_trace()
    
    return permutations
def test_mask():
    array = ak.Array([1, 2, 3, 4, 5])
    mask = ak.Array([False, True, True, True, False])
    assert ak.to_list(ak.mask(array, mask)) == [None, 2, 3, 4, None]