def test_concatenate():
    one = awkward1.Array([1.1, 2.2, 3.3, 4.4, 5.5], check_valid=True)
    two = awkward1.Array([[], [1], [2, 2], [3, 3, 3]], check_valid=True)
    three = awkward1.Array([True, False, False, True, True], check_valid=True)

    assert awkward1.to_list(awkward1.concatenate([one, two, three])) == [1.1, 2.2, 3.3, 4.4, 5.5, [], [1], [2, 2], [3, 3, 3], 1.0, 0.0, 0.0, 1.0, 1.0]
    assert isinstance(awkward1.concatenate([one, two, three], highlevel=False), awkward1.layout.UnionArray8_64)
    assert len(awkward1.concatenate([one, two, three], highlevel=False).contents) == 2
def test_merge_parameters():
    one = awkward1.from_iter(
        [[121, 117, 99, 107, 121], [115, 116, 117, 102, 102]], highlevel=False)
    two = awkward1.from_iter(["good", "stuff"], highlevel=False)

    assert awkward1.to_list(awkward1.concatenate(
        [one, two])) == [[121, 117, 99, 107, 121], [115, 116, 117, 102, 102],
                         "good", "stuff"]
    assert awkward1.to_list(awkward1.concatenate([two, one])) == [
        "good", "stuff", [121, 117, 99, 107, 121], [115, 116, 117, 102, 102]
    ]
def test():
    one = awkward1.Array(["uno", "dos", "tres"])
    two = awkward1.Array(["un", "deux", "trois", "quatre"])
    three = awkward1.Array(["onay", "ootay", "eethray"])
    merged = awkward1.concatenate([one, two, three])
    assert awkward1.to_list(merged) == ["uno", "dos", "tres", "un", "deux", "trois", "quatre", "onay", "ootay", "eethray"]
    assert awkward1.to_list(merged == "uno") == [True, False, False, False, False, False, False, False, False, False]
    assert awkward1.to_list(one == numpy.array(["UNO", "dos", "tres"])) == [False, True, True]
    assert awkward1.to_list(merged == numpy.array(["UNO", "dos", "tres", "one", "two", "three", "quatre", "onay", "two", "three"])) == [False, True, True, False, False, False, True, True, False, False]
def test_merge():
    content = awkward1.from_iter([[0.0, 1.1, 2.2], [], [3.3, 4.4], [5.5], [6.6, 7.7, 8.8, 9.9]], highlevel=False)
    mask = awkward1.layout.Index8(numpy.array([0, 0, 1, 1, 0], dtype=numpy.int8))
    array1 = awkward1.layout.ByteMaskedArray(mask, content, valid_when=False)
    assert awkward1.to_list(array1) == [[0.0, 1.1, 2.2], [], None, None, [6.6, 7.7, 8.8, 9.9]]
    array2 = awkward1.Array([[0.0, 1.1, 2.2], [], None, None, [6.6, 7.7, 8.8, 9.9]])
    array12 = awkward1.concatenate([array1, array2], highlevel=False)
    assert awkward1.to_list(array12) == [[0.0, 1.1, 2.2], [], None, None, [6.6, 7.7, 8.8, 9.9], [0.0, 1.1, 2.2], [], None, None, [6.6, 7.7, 8.8, 9.9]]
    assert isinstance(array12, awkward1.layout.IndexedOptionArray64)
    assert isinstance(array12.content, (awkward1.layout.ListArray64, awkward1.layout.ListOffsetArray64))
    assert isinstance(array12.content.content, awkward1.layout.NumpyArray)
    assert awkward1.to_list(array12.content.content) == [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 0.0, 1.1, 2.2, 6.6, 7.7, 8.8, 9.9]
def test_concatenate():
    one = awkward1.Array([1, 2, 3])
    two = awkward1.Array([4.4, 5.5])
    three = awkward1.Array([6, 7, 8])
    four = awkward1.Array([[9, 9, 9], [10, 10, 10]])
    assert awkward1.concatenate([one, two, three, four]).tolist() == [
        1, 2, 3, 4.4, 5.5, 6, 7, 8, [9, 9, 9], [10, 10, 10]
    ]
    assert awkward1.concatenate([four, one,
                                 two, three]).tolist() == [[9, 9, 9],
                                                           [10, 10,
                                                            10], 1, 2, 3, 4.4,
                                                           5.5, 6, 7, 8]
    assert awkward1.concatenate([one, two, four, three]).tolist() == [
        1, 2, 3, 4.4, 5.5, [9, 9, 9], [10, 10, 10], 6, 7, 8
    ]

    five = awkward1.Array(["nine", "ten"])
    assert awkward1.concatenate(
        [one, two, three,
         five]).tolist() == [1, 2, 3, 4.4, 5.5, 6, 7, 8, "nine", "ten"]
    assert awkward1.concatenate(
        [five, one, two,
         three]).tolist() == ["nine", "ten", 1, 2, 3, 4.4, 5.5, 6, 7, 8]
    assert awkward1.concatenate(
        [one, two, five,
         three]).tolist() == [1, 2, 3, 4.4, 5.5, "nine", "ten", 6, 7, 8]
Пример #6
0
    def get(self, el, mu, meas='QCD'):

        if meas == 'QCD':
            el_key, mu_key = 'el_QCD_NC', 'mu_QCD'
        elif meas == 'TT':
            el_key, mu_key = 'el_TT', 'mu_TT'
        elif meas == 'data':
            el_key, mu_key = 'el_data', 'mu_data'

        n_lep = ak.num(el) + ak.num(mu)
        sign = (-1)**(n_lep + 1)
        el_fr = self.evaluator[el_key](el.conePt, np.abs(el.etaSC))
        mu_fr = self.evaluator[mu_key](mu.conePt, np.abs(mu.eta))
        fr = ak.concatenate([el_fr, mu_fr], axis=1)
        return ak.prod(fr, axis=1) * sign
Пример #7
0
def test_range_slices():
    a1 = awkward1.from_iter(numpy.array([0, 1, 2], dtype=numpy.int64), highlevel=False)
    a2 = awkward1.from_iter(numpy.array([3, 4], dtype=numpy.int64), highlevel=False)
    a3 = awkward1.from_iter(numpy.array([5], dtype=numpy.int64), highlevel=False)
    a4 = awkward1.from_iter(numpy.array([], dtype=numpy.int64), highlevel=False)
    a5 = awkward1.from_iter(numpy.array([6, 7, 8, 9], dtype=numpy.int64), highlevel=False)
    aspart = awkward1.partition.IrregularlyPartitionedArray([a1, a2, a3, a4, a5])
    asfull = awkward1.concatenate([a1, a2, a3, a4, a5], highlevel=False)
    aslist = awkward1.to_list(asfull)

    for start in range(10):
        for stop in range(10):
            for step in (1, 2, 3, 4, 5, -1, -2, -3, -4, -5):
                assert awkward1.to_list(asfull[start:stop:step]) == aslist[start:stop:step]
                assert aspart._ext.getitem_range(start, stop, step).tojson() == asfull[start:stop:step].tojson()
    def process(self, events):

        output = self.accumulator.identity()

        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet) > 2

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        ## Muons
        muon = Collections(ev, "Muon", "tight").get()
        vetomuon = Collections(ev, "Muon", "veto").get()
        dimuon = choose(muon, 2)
        SSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) > 0, axis=1)
        OSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) < 0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]

        ## Electrons
        electron = Collections(ev, "Electron", "tight").get()
        vetoelectron = Collections(ev, "Electron", "veto").get()
        dielectron = choose(electron, 2)
        SSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) > 0, axis=1)
        OSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) < 0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]

        ## Merge electrons and muons - this should work better now in ak1
        lepton = ak.concatenate([muon, electron], axis=1)
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) > 0,
                          axis=1)
        OSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) < 0,
                          axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]

        ## Jets
        jet = getJets(ev, minPt=25, maxEta=4.7)
        jet = jet[(jet.pt > 25) & (jet.jetId > 1)]
        jet = jet[~match(jet, muon,
                         deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~match(
            jet, electron,
            deltaRCut=0.4)]  # remove jets that overlap with electrons

        ## event selectors
        filters = getFilters(ev, year=self.year, dataset=dataset)
        triggers = getTriggers(ev, year=self.year, dataset=dataset)

        dilep = ((ak.num(electron) == 1) & (ak.num(muon) == 1))
        lep0pt = ((ak.num(electron[(electron.pt > 25)]) +
                   ak.num(muon[(muon.pt > 25)])) > 0)
        lep1pt = ((ak.num(electron[(electron.pt > 20)]) +
                   ak.num(muon[(muon.pt > 20)])) > 1)
        lepveto = ((ak.num(vetoelectron) + ak.num(vetomuon)) == 2)

        # define the weight
        weight = Weights(len(ev))

        if not dataset == 'MuonEG':
            # lumi weight
            weight.add("weight", ev.weight * cfg['lumi'][self.year])

            ## PU weight - not in the babies...
            #weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False)

            # b-tag SFs
            #weight.add("btag", self.btagSF.Method1a(btag, light))

            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))

        selection = PackedSelection()
        selection.add('lepveto', lepveto)
        selection.add('dilep', dilep)
        selection.add('trigger', (triggers))
        selection.add('filter', (filters))
        selection.add('p_T(lep0)>25', lep0pt)
        selection.add('p_T(lep1)>20', lep1pt)
        selection.add('OS', OSlepton)
        selection.add('N_jet>2', (ak.num(jet) >= 3))
        selection.add('MET>30', (ev.MET.pt > 30))

        os_reqs = [
            'lepveto', 'dilep', 'trigger', 'filter', 'p_T(lep0)>25',
            'p_T(lep1)>20', 'OS'
        ]
        bl_reqs = os_reqs + ['N_jet>2', 'MET>30']

        os_reqs_d = {sel: True for sel in os_reqs}
        os_selection = selection.require(**os_reqs_d)
        bl_reqs_d = {sel: True for sel in bl_reqs}
        BL = selection.require(**bl_reqs_d)

        cutflow = Cutflow(output, ev, weight=weight)
        cutflow_reqs_d = {}
        for req in bl_reqs:
            cutflow_reqs_d.update({req: True})
            cutflow.addRow(req, selection.require(**cutflow_reqs_d))

        # first, make a few super inclusive plots
        output['PV_npvs'].fill(dataset=dataset,
                               multiplicity=ev.PV[os_selection].npvs,
                               weight=weight.weight()[os_selection])
        output['PV_npvsGood'].fill(dataset=dataset,
                                   multiplicity=ev.PV[os_selection].npvsGood,
                                   weight=weight.weight()[os_selection])
        output['N_jet'].fill(dataset=dataset,
                             multiplicity=ak.num(jet)[os_selection],
                             weight=weight.weight()[os_selection])

        output['MET'].fill(dataset=dataset,
                           pt=ev.MET[os_selection].pt,
                           phi=ev.MET[os_selection].phi,
                           weight=weight.weight()[os_selection])

        output['j1'].fill(dataset=dataset,
                          pt=ak.flatten(jet.pt[:, 0:1][BL]),
                          eta=ak.flatten(jet.eta[:, 0:1][BL]),
                          phi=ak.flatten(jet.phi[:, 0:1][BL]),
                          weight=weight.weight()[BL])

        # Now, take care of systematic unceratinties
        if not dataset == 'MuonEG':
            alljets = getJets(ev, minPt=0, maxEta=4.7)
            alljets = alljets[(alljets.jetId > 1)]
            for var in self.variations:
                # get the collections that change with the variations
                jet_var = getPtEtaPhi(alljets, pt_var=var)
                jet_var = jet_var[(jet_var.pt > 25)]
                jet_var = jet_var[~match(
                    jet_var, muon,
                    deltaRCut=0.4)]  # remove jets that overlap with muons
                jet_var = jet_var[~match(
                    jet_var, electron,
                    deltaRCut=0.4)]  # remove jets that overlap with electrons

                # get the modified selection -> more difficult
                selection.add(
                    'N_jet>2_' + var, (ak.num(jet_var.pt) > 3)
                )  # something needs to be improved with getPtEtaPhi function
                selection.add('MET>30_' + var, (getattr(ev.MET, var) > 30))

                bl_reqs = os_reqs + ['N_jet>2_' + var, 'MET>30_' + var]
                bl_reqs_d = {sel: True for sel in bl_reqs}
                BL = selection.require(**bl_reqs_d)

                # the OS selection remains unchanged
                output['N_jet_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(jet_var)[os_selection],
                    weight=weight.weight()[os_selection])

                # We don't need to redo all plots with variations. E.g., just add uncertainties to the jet plots.
                output['j1_' + var].fill(dataset=dataset,
                                         pt=ak.flatten(jet_var.pt[:, 0:1][BL]),
                                         eta=ak.flatten(jet_var.eta[:,
                                                                    0:1][BL]),
                                         phi=ak.flatten(jet_var.phi[:,
                                                                    0:1][BL]),
                                         weight=weight.weight()[BL])

        return output
Пример #9
0
    def __init__(self, ev, obj, wp, verbose=0, year=2018):
        self.obj = obj
        self.wp = wp
        if self.wp == None:
            self.selection_dict = {}
        else:
            self.selection_dict = obj_def[self.obj][self.wp]

        self.v = verbose
        #self.year = df['year'][0] ## to be implemented in next verison of babies
        self.year = year

        if self.obj == "Muon":
            # collections are already there, so we just need to calculate missing ones
            ev['Muon', 'absMiniIso'] = ev.Muon.miniPFRelIso_all * ev.Muon.pt
            ev['Muon', 'ptErrRel'] = ev.Muon.ptErr / ev.Muon.pt

            # this is what we are using:
            # - jetRelIso if the matched jet is within deltaR<0.4, pfRelIso03_all otherwise
            # - btagDeepFlavB discriminator of the matched jet if jet is within deltaR<0.4, 0 otherwise
            # (FOR TTH) - pt_cone = 0.9*pt of matched jet if jet is within deltaR<0.4, pt/(pt+iso) otherwise
            # (FOR SS) - pt_cone = pt*(1 + max(0,I_m-I_1)) if pt_rel > I_3; max(pt, pt(matched_jet)*I_2) otherwise

            #TTH conePt
            mask_close = (ak.fill_none(ev.Muon.delta_r(ev.Muon.matched_jet),
                                       99) < 0.4) * 1
            mask_far = ~(ak.fill_none(ev.Muon.delta_r(ev.Muon.matched_jet), 99)
                         < 0.4) * 1
            #conePt = 0.9 * ak.fill_none(ev.Muon.matched_jet.pt,0) * mask_close + ev.Muon.pt*(1 + ev.Muon.miniPFRelIso_all)*mask_far

            #SS conePt
            if (self.year == 2017) or (self.year == 2018):
                I_1 = 0.11
                I_2 = 0.74
                I_3 = 6.8
            elif (self.year == 2016):
                I_1 = 0.16
                I_2 = 0.76
                I_3 = 7.2
            PF_unflatten = ak.from_regular(
                ev.Muon.miniPFRelIso_all[:, :, np.newaxis])
            max_miniIso = ak.max(
                ak.concatenate(
                    [PF_unflatten - I_1,
                     ak.zeros_like(PF_unflatten)], axis=2),
                axis=2)  #equivalent to max(0, ev.Muon.miniPFRelIso_all - I_1)
            muon_pt_unflatten = ak.from_regular(ev.Muon.pt[:, :, np.newaxis])
            jet_pt_unflatten = ak.from_regular(
                ev.Muon.matched_jet.pt[:, :, np.newaxis])
            max_pt = ak.max(
                ak.concatenate([muon_pt_unflatten, jet_pt_unflatten * I_2],
                               axis=2),
                axis=2)  #max(ev.Muon.pt, ev.Muon.matched_jet.pt * I_2)
            conePt = (ev.Muon.pt *
                      (1 + max_miniIso)) * (ev.Muon.jetPtRelv2 > I_3) + (
                          max_pt * ~(ev.Muon.jetPtRelv2 > I_3))

            deepJet = ak.fill_none(ev.Muon.matched_jet.btagDeepFlavB,
                                   0) * mask_close
            jetRelIsoV2 = ev.Muon.jetRelIso * mask_close + ev.Muon.pfRelIso03_all * mask_far  # default to 0 if no match

            ev['Muon', 'deepJet'] = ak.copy(deepJet)
            ev['Muon', 'jetRelIsoV2'] = jetRelIsoV2
            ev['Muon', 'conePt'] = conePt
            ev['Muon', 'jetRelIso'] = ev.Muon.jetRelIso
            ev['Muon', 'jetPtRelv2'] = ev.Muon.jetPtRelv2
            ev['Muon', 'boolFCNCIso'] = self.getFCNCIsolation(
                ev.Muon.jetRelIso, ev.Muon.jetPtRelv2, I_2,
                I_3) & (ev.Muon.miniPFRelIso_all < I_1)
            ev['Muon', 'boolFCNCfake'] = (ev.Muon.genPartFlav !=
                                          1) & (ev.Muon.genPartFlav != 15)

            self.cand = ev.Muon

        elif self.obj == "Electron":
            # calculate new variables. asignment is awkward, but what can you do.
            ev['Electron',
               'absMiniIso'] = ev.Electron.miniPFRelIso_all * ev.Electron.pt
            ev['Electron', 'etaSC'] = ev.Electron.eta + ev.Electron.deltaEtaSC

            # the following line is only needed if we do our own matching.
            # right now, we keep using the NanoAOD match, but check the deltaR distance
            # jet_index, mask_match, mask_nomatch = self.matchJets(ev.Electron, ev.Jet)

            # this is what we are using:
            # - jetRelIso if the matched jet is within deltaR<0.4, pfRelIso03_all otherwise
            # - btagDeepFlavB discriminator of the matched jet if jet is within deltaR<0.4, 0 otherwise
            # - pt_cone = 0.9*pt of matched jet if jet is within deltaR<0.4, pt/(pt+iso), 0 otherwise

            mask_close = (ak.fill_none(
                ev.Electron.delta_r(ev.Electron.matched_jet), 99) < 0.4) * 1
            mask_far = ~(ak.fill_none(
                ev.Electron.delta_r(ev.Electron.matched_jet), 99) < 0.4) * 1

            deepJet = ak.fill_none(ev.Electron.matched_jet.btagDeepFlavB,
                                   0) * mask_close
            jetRelIsoV2 = ev.Electron.jetRelIso * mask_close + ev.Electron.pfRelIso03_all * mask_far  # default to 0 if no match

            #TTH conePt
            #conePt = 0.9 * ak.fill_none(ev.Electron.matched_jet.pt,0) * mask_close + ev.Electron.pt*(1 + ev.Electron.miniPFRelIso_all)*mask_far
            #SS conePt
            if (self.year == 2017) or (self.year == 2018):
                I_1 = 0.07
                I_2 = 0.78
                I_3 = 8.0
            elif (self.year == 2016):
                I_1 = 0.12
                I_2 = 0.8
                I_3 = 7.2
            PF_unflatten = ak.from_regular(
                ev.Electron.miniPFRelIso_all[:, :, np.newaxis])
            max_miniIso = ak.max(
                ak.concatenate(
                    [PF_unflatten - I_1,
                     ak.zeros_like(PF_unflatten)], axis=2),
                axis=2)  #equivalent to max(0, ev.Muon.miniPFRelIso_all - I_1)
            electron_pt_unflatten = ak.from_regular(ev.Electron.pt[:, :,
                                                                   np.newaxis])
            jet_pt_unflatten = ak.from_regular(
                ev.Electron.matched_jet.pt[:, :, np.newaxis])
            max_pt = ak.max(
                ak.concatenate([electron_pt_unflatten, jet_pt_unflatten * I_2],
                               axis=2),
                axis=2)  #max(ev.Muon.pt, ev.Muon.matched_jet.pt * I_2)
            conePt = (ev.Electron.pt *
                      (1 + max_miniIso)) * (ev.Electron.jetPtRelv2 > I_3) + (
                          max_pt * ~(ev.Electron.jetPtRelv2 > I_3))

            ev['Electron', 'deepJet'] = ak.copy(deepJet)
            ev['Electron', 'jetRelIsoV2'] = jetRelIsoV2
            ev['Electron', 'conePt'] = conePt

            ev['Electron', 'jetRelIso'] = ev.Electron.jetRelIso
            ev['Electron', 'jetPtRelv2'] = ev.Electron.jetPtRelv2
            ev['Electron', 'boolFCNCIso'] = self.getFCNCIsolation(
                ev.Electron.jetRelIso, ev.Electron.jetPtRelv2, I_2,
                I_3) & (ev.Electron.miniPFRelIso_all < I_1)
            ev['Electron',
               'boolFCNCfake'] = (ev.Electron.genPartFlav !=
                                  1) & (ev.Electron.genPartFlav != 15)

            self.cand = ev.Electron

        self.getSelection()

        if self.obj == "Electron" and self.wp == "tight":
            self.selection = self.selection & self.getElectronMVAID(
            ) & self.getIsolation(0.07, 0.78, 8.0) & self.isTriggerSafeNoIso()
            if self.v > 0: print(" - custom ID and multi-isolation")

        if self.obj == "Muon" and self.wp == "tight":
            self.selection = self.selection & self.getIsolation(
                0.11, 0.74, 6.8)
            if self.v > 0: print(" - custom multi-isolation")
            #self.selection = self.selection & ak.fill_none(ev.Muon.matched_jet.btagDeepFlavB<0.2770, True)
            #self.selection = self.selection & (ev.Muon.matched_jet.btagDeepFlavB<0.2770)
            #if self.v>0: print (" - deepJet")

        if self.obj == "Electron" and (self.wp == "tightTTH"
                                       or self.wp == 'fakeableTTH'
                                       or self.wp == "tightSSTTH"
                                       or self.wp == 'fakeableSSTTH'):
            self.selection = self.selection & self.getSigmaIEtaIEta()
            if self.v > 0: print(" - SigmaIEtaIEta")
            #self.selection = self.selection & ak.fill_none(ev.Electron.matched_jet.btagDeepFlavB<0.2770, True)
            #self.selection = self.selection & (ev.Electron.matched_jet.btagDeepFlavB<0.2770)
            #self.selection = self.selection & (ev.Jet[ev.Electron.jetIdx].btagDeepFlavB<0.2770)
            #if self.v>0: print (" - deepJet")

        if self.obj == 'Muon' and (self.wp == 'fakeableTTH'
                                   or self.wp == 'fakeableSSTTH'):
            self.selection = self.selection & (
                self.cand.deepJet < self.getThreshold(self.cand.conePt,
                                                      min_pt=20,
                                                      max_pt=45,
                                                      low=0.2770,
                                                      high=0.0494))
            if self.v > 0: print(" - interpolated deepJet")
Пример #10
0
    def process(self, events):
        
        output = self.accumulator.identity()
        
        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet)>2
        
        ev = events[presel]
        dataset = ev.metadata['dataset']
        
        # load the config - probably not needed anymore
        cfg = loadConfig()
        
        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)
        
        ## Generated leptons
        gen_lep = ev.GenL
        leading_gen_lep = gen_lep[ak.singletons(ak.argmax(gen_lep.pt, axis=1))]
        trailing_gen_lep = gen_lep[ak.singletons(ak.argmin(gen_lep.pt, axis=1))]

        ## Muons
        muon     = Collections(ev, "Muon", "tightSSTTH").get()
        vetomuon = Collections(ev, "Muon", "vetoTTH").get()
        dimuon   = choose(muon, 2)
        SSmuon   = ak.any((dimuon['0'].charge * dimuon['1'].charge)>0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]
        
        ## Electrons
        electron     = Collections(ev, "Electron", "tightSSTTH").get()
        vetoelectron = Collections(ev, "Electron", "vetoTTH").get()
        dielectron   = choose(electron, 2)
        SSelectron   = ak.any((dielectron['0'].charge * dielectron['1'].charge)>0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]
        
        ## Merge electrons and muons - this should work better now in ak1
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge)>0, axis=1)

        lepton   = ak.concatenate([muon, electron], axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]
        
        n_nonprompt = getNonPromptFromFlavour(electron) + getNonPromptFromFlavour(muon)
        n_chargeflip = getChargeFlips(electron, ev.GenPart) + getChargeFlips(muon, ev.GenPart)

        ## Jets
        jet       = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet       = jet[ak.argsort(jet.pt_nom, ascending=False)] # need to sort wrt smeared and recorrected jet pt
        jet       = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons
        jet       = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons
        
        central   = jet[(abs(jet.eta)<2.4)]
        btag      = getBTagsDeepFlavB(jet, year=self.year) # should study working point for DeepJet
        light     = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd       = getFwdJet(light)
        fwd_noPU  = getFwdJet(light, puId=False)
        
        ## forward jets
        j_fwd = fwd[ak.singletons(ak.argmax(fwd.p, axis=1))] # highest momentum spectator
        
        jf          = cross(j_fwd, jet)
        mjf         = (jf['0']+jf['1']).mass
        j_fwd2      = jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'] # this is the jet that forms the largest invariant mass with j_fwd
        delta_eta   = abs(j_fwd2.eta - j_fwd.eta)

        ## MET -> can switch to puppi MET
        met_pt  = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1)
        
        ## event selectors
        filters   = getFilters(ev, year=self.year, dataset=dataset)
        
        dilep     = ((ak.num(electron) + ak.num(muon))==2)
        pos_charge = ((ak.sum(electron.pdgId, axis=1) + ak.sum(muon.pdgId, axis=1))<0)
        neg_charge = ((ak.sum(electron.pdgId, axis=1) + ak.sum(muon.pdgId, axis=1))>0)
        lep0pt    = ((ak.num(electron[(electron.pt>25)]) + ak.num(muon[(muon.pt>25)]))>0)
        lep0pt_40 = ((ak.num(electron[(electron.pt>40)]) + ak.num(muon[(muon.pt>40)]))>0)
        lep0pt_100 = ((ak.num(electron[(electron.pt>100)]) + ak.num(muon[(muon.pt>100)]))>0)
        lep1pt    = ((ak.num(electron[(electron.pt>20)]) + ak.num(muon[(muon.pt>20)]))>1)
        lep1pt_30 = ((ak.num(electron[(electron.pt>30)]) + ak.num(muon[(muon.pt>30)]))>1)
        lepveto   = ((ak.num(vetoelectron) + ak.num(vetomuon))==2)
        
        # define the weight
        weight = Weights( len(ev) )
        
        #mult = 1
        #if dataset=='inclusive': mult = 0.0478/47.448
        #if dataset=='plus': mult = 0.0036/7.205

        if not dataset=='MuonEG':
            # lumi weight
            weight.add("weight", ev.weight*cfg['lumi'][self.year])
            #weight.add("weight", ev.genWeight*cfg['lumi'][self.year]*mult)
            
            # PU weight - not in the babies...
            weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False)
            
            # b-tag SFs
            weight.add("btag", self.btagSF.Method1a(btag, light))
            
            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))
        
        selection = PackedSelection()
        selection.add('lepveto',       lepveto)
        selection.add('dilep',         dilep )
        selection.add('filter',        (filters) )
        selection.add('p_T(lep0)>25',  lep0pt )
        selection.add('p_T(lep0)>40',  lep0pt_40 )
        selection.add('p_T(lep1)>20',  lep1pt )
        selection.add('p_T(lep1)>30',  lep1pt_30 )
        selection.add('SS',            ( SSlepton | SSelectron | SSmuon) )
        selection.add('pos',           ( pos_charge ) )
        selection.add('neg',           ( neg_charge ) )
        selection.add('N_jet>3',       (ak.num(jet)>=4) )
        selection.add('N_jet>4',       (ak.num(jet)>=5) )
        selection.add('N_central>2',   (ak.num(central)>=3) )
        selection.add('N_central>3',   (ak.num(central)>=4) )
        selection.add('N_btag>0',      (ak.num(btag)>=1) )
        selection.add('MET>50',        (ev.MET.pt>50) )
        selection.add('ST',            (st>600) )
        selection.add('N_fwd>0',       (ak.num(fwd)>=1 ))
        selection.add('delta_eta',     (ak.any(delta_eta>2, axis=1) ) )
        selection.add('fwd_p>500',     (ak.any(j_fwd.p>500, axis=1) ) )
        
        ss_reqs = ['lepveto', 'dilep', 'SS', 'filter', 'p_T(lep0)>25', 'p_T(lep1)>20', 'N_jet>3', 'N_central>2', 'N_btag>0']
        bl_reqs = ss_reqs + ['N_fwd>0', 'N_jet>4', 'N_central>3', 'ST', 'MET>50', 'delta_eta']
        sr_reqs = bl_reqs + ['fwd_p>500', 'p_T(lep0)>40', 'p_T(lep1)>30']

        ss_reqs_d = { sel: True for sel in ss_reqs }
        ss_selection = selection.require(**ss_reqs_d)
        bl_reqs_d = { sel: True for sel in bl_reqs }
        BL = selection.require(**bl_reqs_d)
        sr_reqs_d = { sel: True for sel in sr_reqs }
        SR = selection.require(**sr_reqs_d)

        cutflow     = Cutflow(output, ev, weight=weight)
        cutflow_reqs_d = {}
        for req in sr_reqs:
            cutflow_reqs_d.update({req: True})
            cutflow.addRow( req, selection.require(**cutflow_reqs_d) )
        
        # first, make a few super inclusive plots
        output['PV_npvs'].fill(dataset=dataset, multiplicity=ev.PV[ss_selection].npvs, weight=weight.weight()[ss_selection])
        output['PV_npvsGood'].fill(dataset=dataset, multiplicity=ev.PV[ss_selection].npvsGood, weight=weight.weight()[ss_selection])
        output['N_jet'].fill(dataset=dataset, multiplicity=ak.num(jet)[ss_selection], weight=weight.weight()[ss_selection])
        output['N_b'].fill(dataset=dataset, multiplicity=ak.num(btag)[ss_selection], weight=weight.weight()[ss_selection])
        output['N_central'].fill(dataset=dataset, multiplicity=ak.num(central)[ss_selection], weight=weight.weight()[ss_selection])
        output['N_ele'].fill(dataset=dataset, multiplicity=ak.num(electron)[ss_selection], weight=weight.weight()[ss_selection])
        output['N_mu'].fill(dataset=dataset, multiplicity=ak.num(electron)[ss_selection], weight=weight.weight()[ss_selection])
        output['N_fwd'].fill(dataset=dataset, multiplicity=ak.num(fwd)[ss_selection], weight=weight.weight()[ss_selection])
        output['nLepFromTop'].fill(dataset=dataset, multiplicity=ev[BL].nLepFromTop, weight=weight.weight()[BL])
        output['nLepFromTau'].fill(dataset=dataset, multiplicity=ev.nLepFromTau[BL], weight=weight.weight()[BL])
        output['nLepFromZ'].fill(dataset=dataset, multiplicity=ev.nLepFromZ[BL], weight=weight.weight()[BL])
        output['nLepFromW'].fill(dataset=dataset, multiplicity=ev.nLepFromW[BL], weight=weight.weight()[BL])
        output['nGenTau'].fill(dataset=dataset, multiplicity=ev.nGenTau[BL], weight=weight.weight()[BL])
        output['nGenL'].fill(dataset=dataset, multiplicity=ak.num(ev.GenL[BL], axis=1), weight=weight.weight()[BL])
        output['chargeFlip_vs_nonprompt'].fill(dataset=dataset, n1=n_chargeflip[ss_selection], n2=n_nonprompt[ss_selection], n_ele=ak.num(electron)[ss_selection], weight=weight.weight()[ss_selection])
        
        output['MET'].fill(
            dataset = dataset,
            pt  = ev.MET[ss_selection].pt,
            phi  = ev.MET[ss_selection].phi,
            weight = weight.weight()[ss_selection]
        )

        output['lead_gen_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_gen_lep[BL].pt)),
            eta = ak.to_numpy(ak.flatten(leading_gen_lep[BL].eta)),
            phi = ak.to_numpy(ak.flatten(leading_gen_lep[BL].phi)),
            weight = weight.weight()[BL]
        )

        output['trail_gen_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].pt)),
            eta = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].eta)),
            phi = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['lead_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(leading_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(leading_lepton[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['trail_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(trailing_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(trailing_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(trailing_lepton[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['j1'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet.pt_nom[:, 0:1][BL]),
            eta = ak.flatten(jet.eta[:, 0:1][BL]),
            phi = ak.flatten(jet.phi[:, 0:1][BL]),
            weight = weight.weight()[BL]
        )
        
        output['j2'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 1:2][BL].pt_nom),
            eta = ak.flatten(jet[:, 1:2][BL].eta),
            phi = ak.flatten(jet[:, 1:2][BL].phi),
            weight = weight.weight()[BL]
        )
        
        output['j3'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 2:3][BL].pt_nom),
            eta = ak.flatten(jet[:, 2:3][BL].eta),
            phi = ak.flatten(jet[:, 2:3][BL].phi),
            weight = weight.weight()[BL]
        )
        
        
        return output
Пример #11
0
         # Repeat events by resample factor
         if resample_:
             counts_ = counts_ * resample_factor_
 
         if selections is None:
             selections = selections_
             counts = counts_
         else:
             msk_selections = np.full_like( selections, False, dtype='bool' )
             for key in selections_:
                 msk_selections |= ( selections == key )
             counts[ msk_selections ] += counts_
 
         # Repeat events by resample factor
         if resample_:
             events_sel_ = ak.concatenate( ( [events_sel_] * resample_factor_ ), axis=0 )
             
         # Randomize proton arrays
         if random_protons_:
             protons_sel_ = events_sel_.ProtCand
         
             index_rnd_ = np.random.permutation( len( events_sel_ ) )
         
             protons_rnd_ = protons_sel_[ index_rnd_ ]
         
             events_sel_[ "ProtCandRnd" ] = protons_rnd_    
     
             print ( ak.num( events_sel_.ProtCand ) )
             print ( ak.num( events_sel_.ProtCandRnd ) )    
 
         #protons_ = select_protons( events_sel_ )
Пример #12
0
    def process(self, events):

        output = self.accumulator.identity()

        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet) > 2

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        ## Muons
        muon = Collections(ev, "Muon", "tightSSTTH").get()
        vetomuon = Collections(ev, "Muon", "vetoTTH").get()
        dimuon = choose(muon, 2)
        SSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) > 0, axis=1)
        OSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) < 0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]

        ## Electrons
        electron = Collections(ev, "Electron", "tightSSTTH").get()
        vetoelectron = Collections(ev, "Electron", "vetoTTH").get()
        dielectron = choose(electron, 2)
        SSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) > 0, axis=1)
        OSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) < 0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]

        ## Merge electrons and muons - this should work better now in ak1
        lepton = ak.concatenate([muon, electron], axis=1)
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) > 0,
                          axis=1)
        OSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) < 0,
                          axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]
        second_lepton = lepton[~(trailing_lepton_idx & leading_lepton_idx)]

        ## Jets
        jet = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet = jet[ak.argsort(
            jet.pt_nom, ascending=False
        )]  # need to sort wrt smeared and recorrected jet pt
        jet = jet[~match(jet, muon,
                         deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~match(
            jet, electron,
            deltaRCut=0.4)]  # remove jets that overlap with electrons

        central = jet[(abs(jet.eta) < 2.4)]
        btag = getBTagsDeepFlavB(
            jet, year=self.year)  # should study working point for DeepJet
        light = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd = getFwdJet(light)
        fwd_noPU = getFwdJet(light, puId=False)

        ## forward jets
        high_p_fwd = fwd[ak.singletons(ak.argmax(
            fwd.p, axis=1))]  # highest momentum spectator
        high_pt_fwd = fwd[ak.singletons(ak.argmax(
            fwd.pt_nom, axis=1))]  # highest transverse momentum spectator
        high_eta_fwd = fwd[ak.singletons(ak.argmax(abs(
            fwd.eta), axis=1))]  # most forward spectator

        ## Get the two leading b-jets in terms of btag score
        high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:, :2]

        jf = cross(high_p_fwd, jet)
        mjf = (jf['0'] + jf['1']).mass
        deltaEta = abs(high_p_fwd.eta -
                       jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'].eta)
        deltaEtaMax = ak.max(deltaEta, axis=1)
        mjf_max = ak.max(mjf, axis=1)

        jj = choose(jet, 2)
        mjj_max = ak.max((jj['0'] + jj['1']).mass, axis=1)

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt,
                                                            axis=1)
        lt = met_pt + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1)
        ht_central = ak.sum(central.pt, axis=1)

        # define the weight
        weight = Weights(len(ev))

        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'),
                         dataset):
            # lumi weight
            weight.add("weight", ev.weight * cfg['lumi'][self.year])

            # PU weight - not in the babies...
            weight.add("PU",
                       ev.puWeight,
                       weightUp=ev.puWeightUp,
                       weightDown=ev.puWeightDown,
                       shift=False)

            # b-tag SFs
            weight.add(
                "btag",
                self.btagSF.Method1a(btag,
                                     light,
                                     b_direction='central',
                                     c_direction='central'))

            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))

        sel = Selection(
            dataset=dataset,
            events=ev,
            year=self.year,
            ele=electron,
            ele_veto=vetoelectron,
            mu=muon,
            mu_veto=vetomuon,
            jet_all=jet,
            jet_central=central,
            jet_btag=btag,
            jet_fwd=fwd,
            met=ev.MET,
        )

        BL = sel.dilep_baseline(SS=False)

        BL_minusNb = sel.dilep_baseline(SS=False, omit=['N_btag>0'])
        output['N_b'].fill(dataset=dataset,
                           multiplicity=ak.num(btag)[BL_minusNb],
                           weight=weight.weight()[BL_minusNb])

        if re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            #rle = ak.to_numpy(ak.zip([ev.run, ev.luminosityBlock, ev.event]))
            run_ = ak.to_numpy(ev.run)
            lumi_ = ak.to_numpy(ev.luminosityBlock)
            event_ = ak.to_numpy(ev.event)
            output['%s_run' % dataset] += processor.column_accumulator(
                run_[BL])
            output['%s_lumi' % dataset] += processor.column_accumulator(
                lumi_[BL])
            output['%s_event' % dataset] += processor.column_accumulator(
                event_[BL])

        # Now, take care of systematic unceratinties
        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'),
                         dataset):
            alljets = getJets(ev, minPt=0, maxEta=4.7)
            alljets = alljets[(alljets.jetId > 1)]
            for var in self.variations:
                # get the collections that change with the variations

                btag = getBTagsDeepFlavB(
                    jet,
                    year=self.year)  # should study working point for DeepJet
                weight = Weights(len(ev))
                weight.add("weight", ev.weight * cfg['lumi'][self.year])
                weight.add("PU",
                           ev.puWeight,
                           weightUp=ev.puWeightUp,
                           weightDown=ev.puWeightDown,
                           shift=False)
                if var == 'centralUp':
                    weight.add(
                        "btag",
                        self.btagSF.Method1a(btag,
                                             light,
                                             b_direction='central',
                                             c_direction='up'))
                elif var == 'centralDown':
                    weight.add(
                        "btag",
                        self.btagSF.Method1a(btag,
                                             light,
                                             b_direction='central',
                                             c_direction='down'))
                elif var == 'upCentral':
                    weight.add(
                        "btag",
                        self.btagSF.Method1a(btag,
                                             light,
                                             b_direction='up',
                                             c_direction='central'))
                elif var == 'downCentral':
                    weight.add(
                        "btag",
                        self.btagSF.Method1a(btag,
                                             light,
                                             b_direction='down',
                                             c_direction='central'))

                weight.add("lepton", self.leptonSF.get(electron, muon))
                met = ev.MET
                sel = Selection(
                    dataset=dataset,
                    events=ev,
                    year=self.year,
                    ele=electron,
                    ele_veto=vetoelectron,
                    mu=muon,
                    mu_veto=vetomuon,
                    jet_all=jet,
                    jet_central=central,
                    jet_btag=btag,
                    jet_fwd=fwd,
                    met=met,
                )

                BL = sel.dilep_baseline(SS=False)

                BL_minusNb = sel.dilep_baseline(SS=False, omit=['N_btag>0'])
                output['N_b_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(btag)[BL_minusNb],
                    weight=weight.weight()[BL_minusNb])

        return output
Пример #13
0
    def process(self, events):
        
        output = self.accumulator.identity()
        
        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet)>2
        
        ev = events[presel]
        dataset = ev.metadata['dataset']
        
        # load the config - probably not needed anymore
        cfg = loadConfig()
        
        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)
        
        ## Muons
        muon     = Collections(ev, "Muon", "tightSSTTH").get()
        vetomuon = Collections(ev, "Muon", "vetoTTH").get()
        dimuon   = choose(muon, 2)
        SSmuon   = ak.any((dimuon['0'].charge * dimuon['1'].charge)>0, axis=1)
        OSmuon   = ak.any((dimuon['0'].charge * dimuon['1'].charge)<0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]
        
        ## Electrons
        electron     = Collections(ev, "Electron", "tightSSTTH").get()
        vetoelectron = Collections(ev, "Electron", "vetoTTH").get()
        dielectron   = choose(electron, 2)
        SSelectron   = ak.any((dielectron['0'].charge * dielectron['1'].charge)>0, axis=1)
        OSelectron   = ak.any((dielectron['0'].charge * dielectron['1'].charge)<0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]
        
        ## Merge electrons and muons - this should work better now in ak1
        lepton   = ak.concatenate([muon, electron], axis=1)
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge)>0, axis=1)
        OSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge)<0, axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]
        
        ## Jets
        jet       = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet       = jet[ak.argsort(jet.pt_nom, ascending=False)] # need to sort wrt smeared and recorrected jet pt
        jet       = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons
        jet       = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons
        
        central   = jet[(abs(jet.eta)<2.4)]
        btag      = getBTagsDeepFlavB(jet, year=self.year) # should study working point for DeepJet
        light     = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd       = getFwdJet(light)
        fwd_noPU  = getFwdJet(light, puId=False)
        
        ## forward jets
        high_p_fwd   = fwd[ak.singletons(ak.argmax(fwd.p, axis=1))] # highest momentum spectator
        high_pt_fwd  = fwd[ak.singletons(ak.argmax(fwd.pt_nom, axis=1))]  # highest transverse momentum spectator
        high_eta_fwd = fwd[ak.singletons(ak.argmax(abs(fwd.eta), axis=1))] # most forward spectator
        
        ## Get the two leading b-jets in terms of btag score
        high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:,:2]
        
        jf          = cross(high_p_fwd, jet)
        mjf         = (jf['0']+jf['1']).mass
        deltaEta    = abs(high_p_fwd.eta - jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'].eta)
        deltaEtaMax = ak.max(deltaEta, axis=1)
        mjf_max     = ak.max(mjf, axis=1)
        
        jj          = choose(jet, 2)
        mjj_max     = ak.max((jj['0']+jj['1']).mass, axis=1)
        
        ## MET -> can switch to puppi MET
        met_pt  = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1)
        ht_central = ak.sum(central.pt, axis=1)
        
        # define the weight
        weight = Weights( len(ev) )
        
        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            # lumi weight
            weight.add("weight", ev.weight*cfg['lumi'][self.year])
            
            # PU weight - not in the babies...
            weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False)
            
            # b-tag SFs
            weight.add("btag", self.btagSF.Method1a(btag, light))
            
            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))
        
        
        cutflow     = Cutflow(output, ev, weight=weight)

        sel = Selection(
            dataset = dataset,
            events = ev,
            year = self.year,
            ele = electron,
            ele_veto = vetoelectron,
            mu = muon,
            mu_veto = vetomuon,
            jet_all = jet,
            jet_central = central,
            jet_btag = btag,
            jet_fwd = fwd,
            met = ev.MET,
        )

        BL = sel.dilep_baseline(cutflow=cutflow, SS=False)
        
        # first, make a few super inclusive plots
        output['PV_npvs'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvs, weight=weight.weight()[BL])
        output['PV_npvsGood'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvsGood, weight=weight.weight()[BL])
        output['N_jet'].fill(dataset=dataset, multiplicity=ak.num(jet)[BL], weight=weight.weight()[BL])

        BL_minusNb = sel.dilep_baseline(SS=False, omit=['N_btag>0'])
        output['N_b'].fill(dataset=dataset, multiplicity=ak.num(btag)[BL_minusNb], weight=weight.weight()[BL_minusNb])

        output['N_central'].fill(dataset=dataset, multiplicity=ak.num(central)[BL], weight=weight.weight()[BL])
        output['N_ele'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight.weight()[BL])
        output['N_mu'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight.weight()[BL])

        BL_minusFwd = sel.dilep_baseline(SS=False, omit=['N_fwd>0'])
        output['N_fwd'].fill(dataset=dataset, multiplicity=ak.num(fwd)[BL_minusFwd], weight=weight.weight()[BL_minusFwd])
        
        BL_minusMET = sel.dilep_baseline(SS=False, omit=['MET>50'])
        output['MET'].fill(
            dataset = dataset,
            pt  = ev.MET[BL_minusMET].pt,
            phi  = ev.MET[BL_minusMET].phi,
            weight = weight.weight()[BL_minusMET]
        )
        
        #output['electron'].fill(
        #    dataset = dataset,
        #    pt  = ak.to_numpy(ak.flatten(electron[BL].pt)),
        #    eta = ak.to_numpy(ak.flatten(electron[BL].eta)),
        #    phi = ak.to_numpy(ak.flatten(electron[BL].phi)),
        #    weight = weight.weight()[BL]
        #)
        #
        #output['muon'].fill(
        #    dataset = dataset,
        #    pt  = ak.to_numpy(ak.flatten(muon[BL].pt)),
        #    eta = ak.to_numpy(ak.flatten(muon[BL].eta)),
        #    phi = ak.to_numpy(ak.flatten(muon[BL].phi)),
        #    weight = weight.weight()[BL]
        #)
        
        output['lead_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(leading_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(leading_lepton[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['trail_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(trailing_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(trailing_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(trailing_lepton[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['fwd_jet'].fill(
            dataset = dataset,
            pt  = ak.flatten(high_p_fwd[BL].pt_nom),
            eta = ak.flatten(high_p_fwd[BL].eta),
            phi = ak.flatten(high_p_fwd[BL].phi),
            weight = weight.weight()[BL]
        )
        
        output['b1'].fill(
            dataset = dataset,
            pt  = ak.flatten(high_score_btag[:, 0:1][BL].pt_nom),
            eta = ak.flatten(high_score_btag[:, 0:1][BL].eta),
            phi = ak.flatten(high_score_btag[:, 0:1][BL].phi),
            weight = weight.weight()[BL]
        )
        
        output['b2'].fill(
            dataset = dataset,
            pt  = ak.flatten(high_score_btag[:, 1:2][BL].pt_nom),
            eta = ak.flatten(high_score_btag[:, 1:2][BL].eta),
            phi = ak.flatten(high_score_btag[:, 1:2][BL].phi),
            weight = weight.weight()[BL]
        )
        
        output['j1'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet.pt_nom[:, 0:1][BL]),
            eta = ak.flatten(jet.eta[:, 0:1][BL]),
            phi = ak.flatten(jet.phi[:, 0:1][BL]),
            weight = weight.weight()[BL]
        )
        
        output['j2'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 1:2][BL].pt_nom),
            eta = ak.flatten(jet[:, 1:2][BL].eta),
            phi = ak.flatten(jet[:, 1:2][BL].phi),
            weight = weight.weight()[BL]
        )
        
        output['j3'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 2:3][BL].pt_nom),
            eta = ak.flatten(jet[:, 2:3][BL].eta),
            phi = ak.flatten(jet[:, 2:3][BL].phi),
            weight = weight.weight()[BL]
        )

        if re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            #rle = ak.to_numpy(ak.zip([ev.run, ev.luminosityBlock, ev.event]))
            run_ = ak.to_numpy(ev.run)
            lumi_ = ak.to_numpy(ev.luminosityBlock)
            event_ = ak.to_numpy(ev.event)
            output['%s_run'%dataset] += processor.column_accumulator(run_[BL])
            output['%s_lumi'%dataset] += processor.column_accumulator(lumi_[BL])
            output['%s_event'%dataset] += processor.column_accumulator(event_[BL])
        
        # Now, take care of systematic unceratinties
        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            alljets = getJets(ev, minPt=0, maxEta=4.7)
            alljets = alljets[(alljets.jetId>1)]
            for var in self.variations:
                # get the collections that change with the variations
                jet = getPtEtaPhi(alljets, pt_var=var)
                jet = jet[(jet.pt>25)]
                jet = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons
                jet = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons

                central   = jet[(abs(jet.eta)<2.4)]
                btag      = getBTagsDeepFlavB(jet, year=self.year) # should study working point for DeepJet
                light     = getBTagsDeepFlavB(jet, year=self.year, invert=True)
                fwd       = getFwdJet(light)
                fwd_noPU  = getFwdJet(light, puId=False)
        
                ## forward jets
                high_p_fwd   = fwd[ak.singletons(ak.argmax(fwd.p, axis=1))] # highest momentum spectator
                high_pt_fwd  = fwd[ak.singletons(ak.argmax(fwd.pt, axis=1))]  # highest transverse momentum spectator
                high_eta_fwd = fwd[ak.singletons(ak.argmax(abs(fwd.eta), axis=1))] # most forward spectator
        
                ## Get the two leading b-jets in terms of btag score
                high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:,:2]

                met = ev.MET
                met['pt'] = getattr(met, var)

                sel = Selection(
                    dataset = dataset,
                    events = ev,
                    year = self.year,
                    ele = electron,
                    ele_veto = vetoelectron,
                    mu = muon,
                    mu_veto = vetomuon,
                    jet_all = jet,
                    jet_central = central,
                    jet_btag = btag,
                    jet_fwd = fwd,
                    met = met,
                )

                BL = sel.dilep_baseline(SS=False)

                # get the modified selection -> more difficult
                #selection.add('N_jet>2_'+var, (ak.num(jet.pt)>=3)) # stupid bug here...
                #selection.add('N_btag=2_'+var,      (ak.num(btag)==2) ) 
                #selection.add('N_central>1_'+var,   (ak.num(central)>=2) )
                #selection.add('N_fwd>0_'+var,       (ak.num(fwd)>=1) )
                #selection.add('MET>30_'+var, (getattr(ev.MET, var)>30) )

                ### Don't change the selection for now...
                #bl_reqs = os_reqs + ['N_jet>2_'+var, 'MET>30_'+var, 'N_btag=2_'+var, 'N_central>1_'+var, 'N_fwd>0_'+var]
                #bl_reqs_d = { sel: True for sel in bl_reqs }
                #BL = selection.require(**bl_reqs_d)

                # the OS selection remains unchanged
                output['N_jet_'+var].fill(dataset=dataset, multiplicity=ak.num(jet)[BL], weight=weight.weight()[BL])
                BL_minusFwd = sel.dilep_baseline(SS=False, omit=['N_fwd>0'])
                output['N_fwd_'+var].fill(dataset=dataset, multiplicity=ak.num(fwd)[BL_minusFwd], weight=weight.weight()[BL_minusFwd])
                BL_minusNb = sel.dilep_baseline(SS=False, omit=['N_btag>0'])
                output['N_b_'+var].fill(dataset=dataset, multiplicity=ak.num(btag)[BL_minusNb], weight=weight.weight()[BL_minusNb])
                output['N_central_'+var].fill(dataset=dataset, multiplicity=ak.num(central)[BL], weight=weight.weight()[BL])


                # We don't need to redo all plots with variations. E.g., just add uncertainties to the jet plots.
                output['j1_'+var].fill(
                    dataset = dataset,
                    pt  = ak.flatten(jet.pt[:, 0:1][BL]),
                    eta = ak.flatten(jet.eta[:, 0:1][BL]),
                    phi = ak.flatten(jet.phi[:, 0:1][BL]),
                    weight = weight.weight()[BL]
                )
                
                output['b1_'+var].fill(
                    dataset = dataset,
                    pt  = ak.flatten(high_score_btag[:, 0:1].pt[:, 0:1][BL]),
                    eta = ak.flatten(high_score_btag[:, 0:1].eta[:, 0:1][BL]),
                    phi = ak.flatten(high_score_btag[:, 0:1].phi[:, 0:1][BL]),
                    weight = weight.weight()[BL]
                )
                
                output['fwd_jet_'+var].fill(
                    dataset = dataset,
                    pt  = ak.flatten(high_p_fwd[BL].pt),
                    #p   = ak.flatten(high_p_fwd[BL].p),
                    eta = ak.flatten(high_p_fwd[BL].eta),
                    phi = ak.flatten(high_p_fwd[BL].phi),
                    weight = weight.weight()[BL]
                )

                BL_minusMET = sel.dilep_baseline(SS=False, omit=['MET>50'])        
                output['MET_'+var].fill(
                    dataset = dataset,
                    pt  = getattr(ev.MET, var)[BL_minusMET],
                    phi  = ev.MET[BL_minusMET].phi,
                    weight = weight.weight()[BL_minusMET]
                )
        
        return output
Пример #14
0
    def process(self, events):

        # get meta infos
        dataset = events.metadata["dataset"]
        isRealData = not hasattr(events, "genWeight")
        n_events = len(events)
        selection = processor.PackedSelection()
        weights = processor.Weights(n_events)
        output = self.accumulator.identity()

        # weights
        if not isRealData:
            output['sumw'][dataset] += awkward1.sum(events.genWeight)
        
        # trigger
        triggers = {}
        for channel in ["e","mu"]:
            trigger = np.zeros(len(events), dtype='bool')
            for t in self._trigger[channel]:
                try:
                    trigger = trigger | events.HLT[t]
                except:
                    warnings.warn("Missing trigger %s" % t, RuntimeWarning)
            triggers[channel] = trigger
            
        # met filter
        met_filters = ["goodVertices",
                       "globalSuperTightHalo2016Filter",
                       "HBHENoiseFilter",
                       "HBHENoiseIsoFilter",
                       "EcalDeadCellTriggerPrimitiveFilter",
                       "BadPFMuonFilter",
                       ]
        met_filters_mask = np.ones(len(events), dtype='bool')
        for t in met_filters:
            met_filters_mask = met_filters_mask & events.Flag[t]
        selection.add("met_filter", awkward1.to_numpy(met_filters_mask))
        
        # load objects
        muons = events.Muon
        electrons = events.Electron
        jets = events.Jet
        fatjets = events.FatJet
        subjets = events.SubJet
        fatjetsLS = events.FatJetLS
        met = events.MET
        
        # muons
        goodmuon = (
            (muons.mediumId)
            & (muons.miniPFRelIso_all <= 0.2)
            & (muons.pt >= 27)
            & (abs(muons.eta) <= 2.4)
            & (abs(muons.dz) < 0.1)
            & (abs(muons.dxy) < 0.05)
            & (muons.sip3d < 4)
        )
        good_muons = muons[goodmuon]
        ngood_muons = awkward1.sum(goodmuon, axis=1)

        # electrons
        goodelectron = (
            (electrons.mvaFall17V2noIso_WP90)
            & (electrons.pt >= 30)
            & (abs(electrons.eta) <= 1.479)
            & (abs(electrons.dz) < 0.1)
            & (abs(electrons.dxy) < 0.05)
            & (electrons.sip3d < 4)
        )
        good_electrons = electrons[goodelectron]
        ngood_electrons = awkward1.sum(goodelectron, axis=1)
        
        # good leptons
        good_leptons = awkward1.concatenate([good_muons, good_electrons], axis=1)
        good_leptons = good_leptons[awkward1.argsort(good_leptons.pt)]
        
        # lepton candidate
        candidatelep = awkward1.firsts(good_leptons)
        
        # lepton channel selection
        selection.add("ch_e", awkward1.to_numpy((triggers["e"]) & (ngood_electrons==1) & (ngood_muons==0))) # not sure if need to require 0 muons or 0 electrons in the next line
        selection.add("ch_mu", awkward1.to_numpy((triggers["mu"]) & (ngood_electrons==0) & (ngood_muons==1)))
        
        # jets
        ht = awkward1.sum(jets[jets.pt > 30].pt,axis=1)
        selection.add("ht_400", awkward1.to_numpy(ht>=400))
        goodjet = (
            (jets.isTight)
            & (jets.pt > 30)
            & (abs(jets.eta) <= 2.5)
            )
        good_jets = jets[goodjet]

        # fat jets
        jID = "isTight"
        # TODO: add mass correction

        # a way to get the first two subjets
        # cart = awkward1.cartesian([fatjets, subjets], nested=True)
        # idxes = awkward1.pad_none(awkward1.argsort(cart['0'].delta_r(cart['1'])), 2, axis=2)
        # sj1 = subjets[idxes[:,:,0]]
        # sj2 = subjets[idxes[:,:,1]]
        
        good_fatjet = (
            (getattr(fatjets, jID))
            & (abs(fatjets.eta) <= 2.4)
            & (fatjets.pt > 50)
            & (fatjets.msoftdrop > 30)
            & (fatjets.msoftdrop < 210)
            #& (fatjets.pt.copy(content=fatjets.subjets.content.counts) == 2) # TODO: require 2 subjets?
            # this can probably be done w FatJet_subJetIdx1 or FatJet_subJetIdx2
            & (awkward1.all(fatjets.subjets.pt >= 20))
            & (awkward1.all(abs(fatjets.subjets.eta) <= 2.4))
        )
        good_fatjets = fatjets[good_fatjet]

        # hbb candidate
        mask_hbb = (
            (good_fatjets.pt > 200)
            & (good_fatjets.delta_r(candidatelep) > 2.0)
            )
        candidateHbb = awkward1.firsts(good_fatjets[mask_hbb])

        # b-tag #& (good_fatjets.particleNetMD_Xbb > 0.9)
        selection.add('hbb_btag',awkward1.to_numpy(candidateHbb.deepTagMD_ZHbbvsQCD >= 0.8)) # score would be larger for tight category (0.97)  
        
        # No AK4 b-tagged jets away from bb jet
        jets_HbbV = jets[good_jets.delta_r(candidateHbb) >= 1.2]
        selection.add('hbb_vetobtagaway',  awkward1.to_numpy(awkward1.max(jets_HbbV.btagDeepB, axis=1, mask_identity=False) > BTagEfficiency.btagWPs[self._year]['medium']))
        
        # fat jets Lepton Subtracted
        # wjj candidate
        mask_wjj = (
            (fatjetsLS.pt > 50)
            & (fatjetsLS.delta_r(candidatelep) > 1.2)
            # need to add 2 subjets w pt > 20 & eta<2.4
            # need to add ID?
            )
        candidateWjj = awkward1.firsts(fatjetsLS[mask_wjj][awkward1.argmin(fatjetsLS[mask_wjj].delta_r(candidatelep),axis=1,keepdims=True)])
        # add t2/t1 <= 0.75 (0.45 HP)
        selection.add('hww_mass',  awkward1.to_numpy(candidateWjj.mass >= 10))

        print('met ',met)
        # wjjlnu info
        #HSolverLiInfo  hwwInfoLi;
        # qqSDmass = candidateWjj.msoftdrop
        # hwwLi   = hSolverLi->minimize(candidatelep.p4(), met.p4(), wjjcand.p4(), qqSDmass, hwwInfoLi)
        #neutrino = hwwInfoLi.neutrino;
        #wlnu     = hwwInfoLi.wlnu;
        #wqq      = hwwInfoLi.wqqjet;
        #hWW      = hwwInfoLi.hWW;
        #wwDM     = PhysicsUtilities::deltaR( wlnu,wqq) * hWW.pt()/2.0;
        # add dlvqq <= 11 (2.5 HP)
               
        # in the meantime let's add the mass
        '''
        mm = (candidatejet - candidatelep).mass2
        jmass = (mm>0)*np.sqrt(np.maximum(0, mm)) + (mm<0)*candidatejet.mass
        joffshell = jmass < 62.5
        massassumption = 80.*joffshell + (125 - 80.)*~joffshell
        x = massassumption**2/(2*candidatelep.pt*met.pt) + np.cos(candidatelep.phi - met.phi)
        met_eta = (
            (x < 1)*np.arcsinh(x*np.sinh(candidatelep.eta))
            + (x > 1)*(
                candidatelep.eta - np.sign(candidatelep.eta)*np.arccosh(candidatelep.eta)
                )
            )
        met_p4 = TLorentzVectorArray.from_ptetaphim(np.array([0.]),np.array([0.]),np.array([0.]),np.array([0.]))
        if met.size > 0:
            met_p4 = TLorentzVectorArray.from_ptetaphim(met.pt, met_eta.fillna(0.), met.phi, np.zeros(met.size))
        
        # hh system
        candidateHH = candidateWjj + met_p4 + candidateHbb
        selection.add('hh_mass', candidateHH.mass >= 700)
        selection.add('hh_centrality', candidateHH.pt/candidateHH.mass >= 0.3)
        '''
        
        channels = {"e": ["met_filter","ch_e","ht_400","hbb_btag","hbb_vetobtagaway","hww_mass"], #,"hh_mass","hh_centrality"],
                    "mu": ["met_filter","ch_mu","ht_400","hbb_btag","hbb_vetobtagaway","hww_mass"] #,"hh_mass","hh_centrality"],
                    }

        # need to add gen info
        
        if not isRealData:
            weights.add('genweight', events.genWeight)
            add_pileup_weight(weights, events.Pileup.nPU, self._year, dataset)
            
        for channel, cuts in channels.items():
            allcuts = set()
            output['cutflow'].fill(dataset=dataset, channel=channel, cut=0, weight=weights.weight())
            for i, cut in enumerate(cuts):
                allcuts.add(cut)
                cut = selection.all(*allcuts)
                output['cutflow'].fill(dataset=dataset, channel=channel, cut=i + 1, weight=weights.weight()[cut])

        return output
Пример #15
0
    def process(self, events):
        
        output = self.accumulator.identity()
        
        # we can use a very loose preselection to filter the events. nothing is done with this presel, though
        presel = ak.num(events.Jet)>=2
        
        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()
        
        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)
        
        
        ## Electrons
        electron = Collections(ev, "Electron", "tightFCNC", 0, self.year).get()
        electron = electron[(electron.pt > 15) & (np.abs(electron.eta) < 2.4)]

        electron = electron[(electron.genPartIdx >= 0)]
        electron = electron[(np.abs(electron.matched_gen.pdgId)==11)]  #from here on all leptons are gen-matched
        electron = electron[( (electron.genPartFlav==1) | (electron.genPartFlav==15) )] #and now they are all prompt
     
        
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]
        
        trailing_electron_idx = ak.singletons(ak.argmin(electron.pt, axis=1))
        trailing_electron = electron[trailing_electron_idx]
        
        leading_parent = find_first_parent(leading_electron.matched_gen)
        trailing_parent = find_first_parent(trailing_electron.matched_gen)
        
       
        is_flipped = ( ( (electron.matched_gen.pdgId*(-1) == electron.pdgId) | (find_first_parent(electron.matched_gen)*(-1) == electron.pdgId) ) & (np.abs(electron.pdgId) == 11) )
        
        
        flipped_electron = electron[is_flipped]
        flipped_electron = flipped_electron[(ak.fill_none(flipped_electron.pt, 0)>0)]
        flipped_electron = flipped_electron[~(ak.is_none(flipped_electron))]
        n_flips = ak.num(flipped_electron)
                
        ##Muons
        muon     = Collections(ev, "Muon", "tightFCNC").get()
        muon = muon[(muon.pt > 15) & (np.abs(muon.eta) < 2.4)]
        
        muon = muon[(muon.genPartIdx >= 0)]
        muon = muon[(np.abs(muon.matched_gen.pdgId)==13)] #from here, all muons are gen-matched
        muon = muon[( (muon.genPartFlav==1) | (muon.genPartFlav==15) )] #and now they are all prompt
       
        
        ##Leptons

        lepton   = ak.concatenate([muon, electron], axis=1)
        SSlepton = (ak.sum(lepton.charge, axis=1) != 0) & (ak.num(lepton)==2)
        OSlepton = (ak.sum(lepton.charge, axis=1) == 0) & (ak.num(lepton)==2)
        
        emulepton = (ak.num(electron) == 1) & (ak.num(muon) == 1)
        no_mumu = (ak.num(muon) <= 1)
        
        
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]
        
        
        
        #jets
        jet       = getJets(ev, minPt=40, maxEta=2.4, pt_var='pt')
        jet       = jet[ak.argsort(jet.pt, ascending=False)] # need to sort wrt smeared and recorrected jet pt
        jet       = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons
        jet       = jet[~match(jet, electron, deltaRCut=0.4)] 
        
        ## MET -> can switch to puppi MET
        met_pt  = ev.MET.pt
        met_phi = ev.MET.phi

        # setting up the various weights
        weight = Weights( len(ev) )
        weight2 = Weights( len(ev))
        
        if not dataset=='MuonEG':
            # generator weight
            weight.add("weight", ev.genWeight)
            weight2.add("weight", ev.genWeight)
            
        weight2.add("charge flip", self.charge_flip_ratio.flip_weight(electron))
                                   
                      
        #selections    
        filters   = getFilters(ev, year=self.year, dataset=dataset)
        ss = (SSlepton)
        os = (OSlepton)
        jet_all = (ak.num(jet) >= 2)
        diele = (ak.num(electron) == 2)
        emu = (emulepton)
        flips = (n_flips == 1)
        no_flips = (n_flips == 0)
        nmm = no_mumu
        
        
        selection = PackedSelection()
        selection.add('filter',      (filters) )
        selection.add('ss',          ss )
        selection.add('os',          os )
        selection.add('jet',         jet_all )
        selection.add('ee',          diele)
        selection.add('emu',         emu)
        selection.add('flip',        flips)
        selection.add('nflip',       no_flips)
        selection.add('no_mumu',     nmm)
        
        bl_reqs = ['filter'] + ['jet']

        bl_reqs_d = { sel: True for sel in bl_reqs }
        baseline = selection.require(**bl_reqs_d)
        
        f_reqs = bl_reqs + ['flip'] + ['ss'] + ['ee']
        f_reqs_d = {sel: True for sel in f_reqs}
        flip_sel = selection.require(**f_reqs_d)
        
        f2_reqs = bl_reqs + ['flip'] + ['ss'] + ['emu']
        f2_reqs_d = {sel: True for sel in f2_reqs}
        flip_sel2 = selection.require(**f2_reqs_d)
        
        f3_reqs = bl_reqs + ['flip'] + ['ss'] + ['no_mumu']
        f3_reqs_d = {sel: True for sel in f3_reqs}
        flip_sel3 = selection.require(**f3_reqs_d)
        
        nf_reqs = bl_reqs + ['nflip'] + ['os'] + ['ee']
        nf_reqs_d = {sel: True for sel in nf_reqs}
        n_flip_sel = selection.require(**nf_reqs_d)
        
        nf2_reqs = bl_reqs + ['nflip'] + ['os'] + ['emu']
        nf2_reqs_d = {sel: True for sel in nf2_reqs}
        n_flip_sel2 = selection.require(**nf2_reqs_d)
        
        nf3_reqs = bl_reqs + ['nflip'] + ['os'] + ['no_mumu']
        nf3_reqs_d = {sel: True for sel in nf3_reqs}
        n_flip_sel3 = selection.require(**nf3_reqs_d)
        
        s_reqs = bl_reqs + ['ss'] + ['no_mumu']
        s_reqs_d = { sel: True for sel in s_reqs }
        ss_sel = selection.require(**s_reqs_d)
        
        o_reqs = bl_reqs + ['os'] + ['no_mumu']
        o_reqs_d = {sel: True for sel in o_reqs }
        os_sel = selection.require(**o_reqs_d)
        
        ees_reqs = bl_reqs + ['ss'] + ['ee']
        ees_reqs_d = { sel: True for sel in ees_reqs }
        eess_sel = selection.require(**ees_reqs_d)
        
        eeo_reqs = bl_reqs + ['os'] + ['ee']
        eeo_reqs_d = {sel: True for sel in eeo_reqs }
        eeos_sel = selection.require(**eeo_reqs_d)
        
        ems_reqs = bl_reqs + ['ss'] + ['emu']
        ems_reqs_d = { sel: True for sel in ems_reqs }
        emss_sel = selection.require(**ems_reqs_d)
        
        emo_reqs = bl_reqs + ['os'] + ['emu']
        emo_reqs_d = {sel: True for sel in emo_reqs }
        emos_sel = selection.require(**emo_reqs_d)
        
       
        #outputs
        output['N_jet'].fill(dataset=dataset, multiplicity=ak.num(jet)[baseline], weight=weight.weight()[baseline])
        
        output['N_ele'].fill(dataset=dataset, multiplicity=ak.num(lepton)[ss_sel], weight=weight.weight()[ss_sel])
                      
        output['N_ele2'].fill(dataset=dataset, multiplicity=ak.num(lepton)[os_sel], weight=weight2.weight()[os_sel])
        
        output['electron_flips'].fill(dataset=dataset, multiplicity = n_flips[flip_sel], weight=weight.weight()[flip_sel])

        output['electron_flips2'].fill(dataset=dataset, multiplicity = n_flips[n_flip_sel], weight=weight2.weight()[n_flip_sel])
        
        output['electron_flips3'].fill(dataset=dataset, multiplicity = n_flips[flip_sel2], weight=weight.weight()[flip_sel2])

        output['electron_flips4'].fill(dataset=dataset, multiplicity = n_flips[n_flip_sel2], weight=weight2.weight()[n_flip_sel2])
        

        output["electron"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[flip_sel3].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[flip_sel3].eta))),
            weight = weight.weight()[flip_sel3]
        )
        
        output["electron2"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[n_flip_sel3].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[n_flip_sel3].eta))),
            weight = weight2.weight()[n_flip_sel3]
        )
        
        output["flipped_electron"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[flip_sel].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[flip_sel].eta))),
            weight = weight.weight()[flip_sel]
        )
        
        output["flipped_electron2"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[n_flip_sel].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[n_flip_sel].eta))),
            weight = weight2.weight()[n_flip_sel]
        )
        
        output["flipped_electron3"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[flip_sel2].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[flip_sel2].eta))),
            weight = weight.weight()[flip_sel2]
        )
        
        output["flipped_electron4"].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_electron[n_flip_sel2].pt)),
            eta = np.abs(ak.to_numpy(ak.flatten(leading_electron[n_flip_sel2].eta))),
            weight = weight2.weight()[n_flip_sel2]
        )
        
        #output["lepton_parent"].fill(
        #    dataset = dataset,
        #    pdgID = np.abs(ak.to_numpy(ak.flatten(leading_parent[ss_sel]))),
        #    weight = weight.weight()[ss_sel]
        #)
        #
        #output["lepton_parent2"].fill(
        #    dataset = dataset,
        #    pdgID = np.abs(ak.to_numpy(ak.flatten(trailing_parent[ss_sel]))),
        #    weight = weight.weight()[ss_sel]
        #)

        return output
Пример #16
0
    def dilep_baseline(self, omit=[], cutflow=None, tight=False, SS=True):
        '''
        give it a cutflow object if you want it to be filed.
        cuts in the omit list will not be applied
        '''
        self.selection = PackedSelection()

        is_dilep   = ((ak.num(self.ele) + ak.num(self.mu))==2)
        pos_charge = ((ak.sum(self.ele.pdgId, axis=1) + ak.sum(self.mu.pdgId, axis=1))<0)
        neg_charge = ((ak.sum(self.ele.pdgId, axis=1) + ak.sum(self.mu.pdgId, axis=1))>0)
        lep0pt     = ((ak.num(self.ele[(self.ele.pt>25)]) + ak.num(self.mu[(self.mu.pt>25)]))>0)
        lep1pt     = ((ak.num(self.ele[(self.ele.pt>20)]) + ak.num(self.mu[(self.mu.pt>20)]))>1)
        lepveto    = ((ak.num(self.ele_veto) + ak.num(self.mu_veto))==2)

        dimu    = choose(self.mu, 2)
        diele   = choose(self.ele, 2)
        dilep   = cross(self.mu, self.ele)

        if SS:
            is_SS = ( ak.any((dimu['0'].charge * dimu['1'].charge)>0, axis=1) | \
                      ak.any((diele['0'].charge * diele['1'].charge)>0, axis=1) | \
                      ak.any((dilep['0'].charge * dilep['1'].charge)>0, axis=1) )
        else:
            is_OS = ( ak.any((dimu['0'].charge * dimu['1'].charge)<0, axis=1) | \
                      ak.any((diele['0'].charge * diele['1'].charge)<0, axis=1) | \
                      ak.any((dilep['0'].charge * dilep['1'].charge)<0, axis=1) )

        lepton = ak.concatenate([self.ele, self.mu], axis=1)
        lepton_pdgId_pt_ordered = ak.fill_none(
            ak.pad_none(
                lepton[ak.argsort(lepton.pt, ascending=False)].pdgId, 2, clip=True),
        0)

        triggers  = getTriggers(self.events,
            ak.flatten(lepton_pdgId_pt_ordered[:,0:1]),
            ak.flatten(lepton_pdgId_pt_ordered[:,1:2]), year=self.year, dataset=self.dataset)

        ht = ak.sum(self.jet_all.pt, axis=1)
        st = self.met.pt + ht + ak.sum(self.mu.pt, axis=1) + ak.sum(self.ele.pt, axis=1)

        self.selection.add('lepveto',       lepveto)
        self.selection.add('dilep',         is_dilep)
        #self.selection.add('filter',        self.filters)
        self.selection.add('trigger',       triggers)
        self.selection.add('p_T(lep0)>25',  lep0pt)
        self.selection.add('p_T(lep1)>20',  lep1pt)
        if SS:
            self.selection.add('SS',            is_SS )
        else:
            self.selection.add('OS',            is_OS )
        self.selection.add('N_jet>3',       (ak.num(self.jet_all)>3) )
        self.selection.add('N_jet>4',       (ak.num(self.jet_all)>4) )
        self.selection.add('N_central>2',   (ak.num(self.jet_central)>2) )
        self.selection.add('N_central>3',   (ak.num(self.jet_central)>3) )
        self.selection.add('N_btag>0',      (ak.num(self.jet_btag)>0) )
        self.selection.add('N_fwd>0',       (ak.num(self.jet_fwd)>0) )
        self.selection.add('MET>30',        (self.met.pt>30) )
        self.selection.add('MET>50',        (self.met.pt>50) )
        self.selection.add('ST>600',        (st>600) )

        ss_reqs = [
        #    'filter',
            'lepveto',
            'dilep',
            'p_T(lep0)>25',
            'p_T(lep1)>20',
            'trigger',
            'SS' if SS else 'OS',
            'N_jet>3',
            'N_central>2',
            'N_btag>0',
            'MET>30',
            'N_fwd>0',
        ]
        
        if tight:
            ss_reqs += [
                'N_jet>4',
                'N_central>3',
                'ST>600',
                'MET>50',
                #'delta_eta',
            ]

        ss_reqs_d = { sel: True for sel in ss_reqs if not sel in omit }
        ss_selection = self.selection.require(**ss_reqs_d)

        if cutflow:
            #
            cutflow_reqs_d = {}
            for req in ss_reqs:
                cutflow_reqs_d.update({req: True})
                cutflow.addRow( req, self.selection.require(**cutflow_reqs_d) )

        return ss_selection
    def process(self, events):
        
        output = self.accumulator.identity()
        
        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet)>2
        
        ev = events[presel]
        dataset = ev.metadata['dataset']
        
        # load the config - probably not needed anymore
        cfg = loadConfig()
        
        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)
        
        ## Generated leptons
        gen_lep = ev.GenL
        leading_gen_lep = gen_lep[ak.singletons(ak.argmax(gen_lep.pt, axis=1))]
        trailing_gen_lep = gen_lep[ak.singletons(ak.argmin(gen_lep.pt, axis=1))]

        ## Muons
        muon     = Collections(ev, "Muon", "tightTTH").get()
        vetomuon = Collections(ev, "Muon", "vetoTTH").get()
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]
        
        ## Electrons
        electron     = Collections(ev, "Electron", "tightTTH").get()
        vetoelectron = Collections(ev, "Electron", "vetoTTH").get()
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]
        
        ## Merge electrons and muons - this should work better now in ak1
        dilepton = cross(muon, electron)

        dimuon = choose(muon,2)
        OS_dimuon = dimuon[(dimuon['0'].charge*dimuon['1'].charge < 0)]

        dielectron = choose(electron)
        OS_dielectron = dielectron[(dielectron['0'].charge*dielectron['1'].charge < 0)]

        OS_dimuon_bestZmumu = OS_dimuon[ak.singletons(ak.argmin(abs(OS_dimuon.mass-91.2), axis=1))]
        OS_dielectron_bestZee = OS_dielectron[ak.singletons(ak.argmin(abs(OS_dielectron.mass-91.2), axis=1))]
        OS_dilepton_mass = ak.fill_none(ak.pad_none(ak.concatenate([OS_dimuon_bestZmumu.mass, OS_dielectron_bestZee.mass], axis=1), 1, clip=True), -1)

        lepton   = ak.concatenate([muon, electron], axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]
        
        ## Jets
        jet       = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet       = jet[ak.argsort(jet.pt_nom, ascending=False)] # need to sort wrt smeared and recorrected jet pt
        jet       = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons
        jet       = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons
        
        central   = jet[(abs(jet.eta)<2.4)]
        btag      = getBTagsDeepFlavB(jet, year=self.year) # should study working point for DeepJet
        light     = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd       = getFwdJet(light)
        fwd_noPU  = getFwdJet(light, puId=False)
        
        ## forward jets
        j_fwd = fwd[ak.singletons(ak.argmax(fwd.p, axis=1))] # highest momentum spectator
        
        jf          = cross(j_fwd, jet)
        mjf         = (jf['0']+jf['1']).mass
        j_fwd2      = jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'] # this is the jet that forms the largest invariant mass with j_fwd
        delta_eta   = abs(j_fwd2.eta - j_fwd.eta)

        ## MET -> can switch to puppi MET
        met_pt  = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1)
        
        # define the weight
        weight = Weights( len(ev) )
        
        if not dataset=='MuonEG':
            # lumi weight
            weight.add("weight", ev.weight*cfg['lumi'][self.year])
            #weight.add("weight", ev.genWeight*cfg['lumi'][self.year]*mult)
            
            # PU weight - not in the babies...
            weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False)
            
            # b-tag SFs
            weight.add("btag", self.btagSF.Method1a(btag, light))
            
            ## lepton SFs
            #weight.add("lepton", self.leptonSF.get(electron, muon))
        
        cutflow     = Cutflow(output, ev, weight=weight)

        sel = Selection(
            dataset = dataset,
            events = ev,
            year = self.year,
            ele = electron,
            ele_veto = vetoelectron,
            mu = muon,
            mu_veto = vetomuon,
            jet_all = jet,
            jet_central = central,
            jet_btag = btag,
            jet_fwd = fwd,
            met = ev.MET,
        )

        BL = sel.trilep_baseline(cutflow=cutflow)
        
        # first, make a few super inclusive plots
        output['PV_npvs'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvs, weight=weight.weight()[BL])
        output['PV_npvsGood'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvsGood, weight=weight.weight()[BL])
        output['N_jet'].fill(dataset=dataset, multiplicity=ak.num(jet)[BL], weight=weight.weight()[BL])
        output['N_b'].fill(dataset=dataset, multiplicity=ak.num(btag)[BL], weight=weight.weight()[BL])
        output['N_central'].fill(dataset=dataset, multiplicity=ak.num(central)[BL], weight=weight.weight()[BL])
        output['N_ele'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight.weight()[BL])
        output['N_mu'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight.weight()[BL])
        output['N_fwd'].fill(dataset=dataset, multiplicity=ak.num(fwd)[BL], weight=weight.weight()[BL])
        output['nLepFromTop'].fill(dataset=dataset, multiplicity=ev[BL].nLepFromTop, weight=weight.weight()[BL])
        output['nLepFromTau'].fill(dataset=dataset, multiplicity=ev.nLepFromTau[BL], weight=weight.weight()[BL])
        output['nLepFromZ'].fill(dataset=dataset, multiplicity=ev.nLepFromZ[BL], weight=weight.weight()[BL])
        output['nLepFromW'].fill(dataset=dataset, multiplicity=ev.nLepFromW[BL], weight=weight.weight()[BL])
        output['nGenTau'].fill(dataset=dataset, multiplicity=ev.nGenTau[BL], weight=weight.weight()[BL])
        output['nGenL'].fill(dataset=dataset, multiplicity=ak.num(ev.GenL[BL], axis=1), weight=weight.weight()[BL])
        
        # make a plot of the dilepton mass, but without applying the cut on the dilepton mass itself (N-1 plot)
        output['dilep_mass'].fill(dataset=dataset, mass=ak.flatten(OS_dilepton_mass[sel.trilep_baseline(omit=['offZ'])]), weight=weight.weight()[sel.trilep_baseline(omit=['offZ'])])

        output['MET'].fill(
            dataset = dataset,
            pt  = ev.MET[BL].pt,
            phi  = ev.MET[BL].phi,
            weight = weight.weight()[BL]
        )

        output['lead_gen_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_gen_lep[BL].pt)),
            eta = ak.to_numpy(ak.flatten(leading_gen_lep[BL].eta)),
            phi = ak.to_numpy(ak.flatten(leading_gen_lep[BL].phi)),
            weight = weight.weight()[BL]
        )

        output['trail_gen_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].pt)),
            eta = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].eta)),
            phi = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['lead_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(leading_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(leading_lepton[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['trail_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(trailing_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(trailing_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(trailing_lepton[BL].phi)),
            weight = weight.weight()[BL]
        )
        
        output['j1'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet.pt_nom[:, 0:1][BL]),
            eta = ak.flatten(jet.eta[:, 0:1][BL]),
            phi = ak.flatten(jet.phi[:, 0:1][BL]),
            weight = weight.weight()[BL]
        )
        
        output['j2'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 1:2][BL].pt_nom),
            eta = ak.flatten(jet[:, 1:2][BL].eta),
            phi = ak.flatten(jet[:, 1:2][BL].phi),
            weight = weight.weight()[BL]
        )
        
        #output['j3'].fill(
        #    dataset = dataset,
        #    pt  = ak.flatten(jet[:, 2:3][BL].pt_nom),
        #    eta = ak.flatten(jet[:, 2:3][BL].eta),
        #    phi = ak.flatten(jet[:, 2:3][BL].phi),
        #    weight = weight.weight()[BL]
        #)
        
        
        output['fwd_jet'].fill(
            dataset = dataset,
            pt  = ak.flatten(j_fwd[BL].pt),
            eta = ak.flatten(j_fwd[BL].eta),
            phi = ak.flatten(j_fwd[BL].phi),
            weight = weight.weight()[BL]
        )
            
        output['high_p_fwd_p'].fill(dataset=dataset, p = ak.flatten(j_fwd[BL].p), weight = weight.weight()[BL])
        
        return output
def create_table(fileNames,
                 label,
                 random_protons=False,
                 resample_factor=-1,
                 step_size=100000,
                 firstEvent=None,
                 entryStop=None,
                 debug=False):

    fileNames_ = fileNames
    label_ = label
    random_protons_ = random_protons
    resample_factor_ = resample_factor
    step_size_ = step_size
    firstEvent_ = firstEvent
    entryStop_ = entryStop

    fill_proton_extra_ = True

    how_ = None
    #how_ = "zip"

    print("Random protons: {}".format(random_protons_))

    resample = False
    if resample_factor_ > 1: resample = True
    print("Resample: {} / Resample factor: {}".format(resample,
                                                      resample_factor_))

    np.random.seed(42)

    dset_chunk_size = 50000

    columns_protons = [
        "run", "lumiblock", "event", "slice", "xi", "thx", "thy", "t",
        "ismultirp", "rpid", "arm", "jet0_pt", "jet0_eta", "jet0_phi",
        "jet0_energy", "jet0_mass", "jet0_corrmass", "jet0_tau1", "jet0_tau2",
        "jet0_vertexz", "muon0_pt", "muon0_eta", "muon0_phi", "muon0_energy",
        "muon0_charge", "muon0_iso", "muon0_dxy", "muon0_dz", "met", "met_x",
        "met_y", "met_phi", "nVertices", "num_bjets_ak8", "num_bjets_ak4",
        "num_jets_ak4", "pfcand_nextracks", "pfcand_nextracks_noDRl",
        "recoMWhad", "recoMWlep", "recoMWW", "recoRapidityWW", "dphiWW",
        "WLeptonicPt", "WLeptonicPhi"
    ]

    columns_protons_multiRP = columns_protons.copy()

    if random_protons_:
        columns_protons.extend(
            ["run_rnd", "lumiblock_rnd", "event_rnd", "slice_rnd"])
        columns_protons_multiRP.extend(
            ["run_rnd", "lumiblock_rnd", "event_rnd", "slice_rnd"])

    if fill_proton_extra_:
        columns_protons.extend(
            ["trackx1", "tracky1", "trackpixshift1", "rpid1"])
        columns_protons_multiRP.extend([
            "trackx1", "tracky1", "trackpixshift1", "rpid1", "trackx2",
            "tracky2", "trackpixshift2", "rpid2"
        ])

    columns_ppstracks = [
        "run", "lumiblock", "event", "slice", "x", "y", "rpid"
    ]

    if random_protons_:
        columns_ppstracks.extend(
            ["run_rnd", "lumiblock_rnd", "event_rnd", "slice_rnd"])

    protons_keys = {}
    for col_ in columns_protons_multiRP:
        protons_keys[col_] = col_
    protons_keys["ismultirp"] = "ismultirp_"

    ppstracks_keys = {}
    for col_ in columns_ppstracks:
        ppstracks_keys[col_] = col_

    counts_label_protons_ = "Proton" if not random_protons_ else "ProtonRnd"

    with h5py.File('output-' + label_ + '.h5', 'w') as f:

        dset_protons_multiRP = f.create_dataset(
            'protons_multiRP', (dset_chunk_size, len(columns_protons_multiRP)),
            compression="gzip",
            chunks=True,
            maxshape=(None, len(columns_protons_multiRP)))
        print("Initial dataset shape: {}".format(dset_protons_multiRP.shape))

        dset_protons_singleRP = f.create_dataset(
            'protons_singleRP', (dset_chunk_size, len(columns_protons)),
            compression="gzip",
            chunks=True,
            maxshape=(None, len(columns_protons)))
        print("Initial dataset shape: {}".format(dset_protons_singleRP.shape))

        dset_ppstracks = f.create_dataset(
            'ppstracks', (dset_chunk_size, len(columns_ppstracks)),
            compression="gzip",
            chunks=True,
            maxshape=(None, len(columns_ppstracks)))
        print("Initial dataset shape: {}".format(dset_ppstracks.shape))

        protons_multiRP_list = {}
        for col_ in columns_protons_multiRP:
            protons_multiRP_list[col_] = []

        protons_singleRP_list = {}
        for col_ in columns_protons:
            protons_singleRP_list[col_] = []

        ppstracks_list = {}
        for col_ in columns_ppstracks:
            ppstracks_list[col_] = []

        selections = None
        counts = None

        dset_multiRP_slice = 0
        dset_multiRP_idx = 0
        dset_multiRP_entries = 0

        dset_singleRP_slice = 0
        dset_singleRP_idx = 0
        dset_singleRP_entries = 0

        dset_ppstracks_slice = 0
        dset_ppstracks_idx = 0
        dset_ppstracks_entries = 0

        for file_ in fileNames_:
            print(file_)
            root_ = uproot4.open(file_)

            print("Number of events in tree: {}".format(
                np.array(root_["demo/SlimmedNtuple/event"]).size))

            tree_ = root_["demo/SlimmedNtuple"]

            keys_nonproton = [
                "run", "event", "lumiblock", "nVertices", "num_bjets_ak8",
                "num_bjets_ak4", "num_jets_ak4", "pfcand_nextracks",
                "pfcand_nextracks_noDRl", "recoMWhad", "recoMWlep", "recoMWW",
                "recoRapidityWW", "dphiWW", "WLeptonicPt", "WLeptonicPhi"
            ]
            keys_jet = tree_.keys(filter_name="jet*")
            keys_nonproton.extend(keys_jet)
            keys_muon = tree_.keys(filter_name="muon*")
            keys_nonproton.extend(keys_muon)
            keys_met = tree_.keys(filter_name="met*")
            keys_nonproton.extend(keys_met)
            keys_proton = tree_.keys(filter_name="proton*")
            keys_ppstrack = tree_.keys(filter_name="pps_track*")
            keys = []
            keys.extend(keys_nonproton)
            keys.extend(keys_proton)
            keys.extend(keys_ppstrack)
            keys_proton_extra = [
                'proton_trackx2', 'proton_tracky2', 'proton_trackpixshift2',
                'proton_rpid2'
            ]
            if how_ == "zip":
                for key_ in keys_proton_extra:
                    if key_ in keys: keys.remove(key_)
            print(keys)

            for events_ in tree_.iterate(keys,
                                         library="ak",
                                         how=how_,
                                         step_size=step_size_,
                                         entry_start=firstEvent_,
                                         entry_stop=entryStop_):
                print(len(events_), events_)

                print("Num jets: {}".format(ak.num(events_["jet_pt"])))
                print("Num muons: {}".format(ak.num(events_["muon_pt"])))
                print("Num protons: {}".format(ak.num(events_["proton_xi"])))
                print("Num pps tracks: {}".format(
                    ak.num(events_["pps_track_x"])))

                selections_ = []
                counts_ = []

                selections_.append("All")
                counts_.append(len(events_))

                # Event selections
                msk_1jet = (ak.num(events_["jet_pt"]) >= 1)
                selections_.append("Jet")
                counts_.append(np.sum(np.array(msk_1jet).astype("int32")))

                msk_1muon = msk_1jet & (ak.num(events_["muon_pt"]) >= 1)
                selections_.append("Muon")
                counts_.append(np.sum(np.array(msk_1muon).astype("int32")))

                events_ = events_[msk_1muon]

                selections_ = np.array(selections_)
                counts_ = np.array(counts_)

                # Repeat events by resample factor
                if resample:
                    counts_ = counts_ * resample_factor_

                if selections is None:
                    selections = selections_
                    counts = counts_
                else:
                    msk_selections = np.full_like(selections,
                                                  False,
                                                  dtype='bool')
                    for key in selections_:
                        msk_selections |= (selections == key)
                    counts[msk_selections] += counts_

                # Repeat events by resample factor
                slices_ = np.zeros(len(events_), dtype=np.int32)
                if resample:
                    events_size_ = len(events_)
                    events_ = ak.concatenate(([events_] * resample_factor_),
                                             axis=0)
                    slices_ = np.zeros(resample_factor_ * events_size_,
                                       dtype=np.int32)
                    for idx_ in range(resample_factor_):
                        slices_[(idx_ * events_size_):((idx_ + 1) *
                                                       events_size_)] = idx_

                events_["slice"] = slices_

                print("Run: {}".format(events_["run"]))
                print("Lumi: {}".format(events_["lumiblock"]))
                print("Event: {}".format(events_["event"]))
                print("Slice: {}".format(events_["slice"]))
                print("Num jets: {}".format(ak.num(events_["jet_pt"])))
                print("Num muons: {}".format(ak.num(events_["muon_pt"])))
                print("Num protons: {}".format(ak.num(events_["proton_xi"])))
                print("Num pps tracks: {}".format(
                    ak.num(events_["pps_track_x"])))

                # Fetch protons
                protons_ = None
                protons_extra_ = None
                ppstracks_ = None
                if how_ == "zip":
                    protons_ = events_["proton"]
                    ppstracks_ = events_["pps_track"]
                elif how_ is None:
                    keys_proton_ = keys_proton.copy()
                    for key_ in keys_proton_extra:
                        if key_ in keys_proton_: keys_proton_.remove(key_)

                    arrays_proton = {}
                    for key_ in keys_proton_:
                        arrays_proton[key_[len("proton_"):]] = events_[key_]
                    protons_ = ak.zip(arrays_proton)

                    if fill_proton_extra_:
                        arrays_proton_extra = {}
                        for key_ in keys_proton_extra:
                            arrays_proton_extra[
                                key_[len("proton_"):]] = events_[key_]
                        protons_extra_ = ak.zip(arrays_proton_extra)

                    arrays_ppstrack = {}
                    for key_ in keys_ppstrack:
                        arrays_ppstrack[
                            key_[len("pps_track_"):]] = events_[key_]
                    ppstracks_ = ak.zip(arrays_ppstrack)

                # Randomize proton arrays
                run_rnd_ = None
                lumiblock_rnd_ = None
                event_rnd_ = None
                slice_rnd_ = None
                if random_protons_:
                    index_rnd_ = np.random.permutation(len(events_))

                    events_run_ = events_["run"]
                    events_lumiblock_ = events_["lumiblock"]
                    events_event_ = events_["event"]
                    events_slice_ = events_["slice"]
                    run_rnd_ = events_run_[index_rnd_]
                    lumiblock_rnd_ = events_lumiblock_[index_rnd_]
                    event_rnd_ = events_event_[index_rnd_]
                    slice_rnd_ = events_slice_[index_rnd_]

                    protons_rnd_ = protons_[index_rnd_]
                    ppstracks_rnd_ = ppstracks_[index_rnd_]
                    protons_extra_rnd_ = None
                    if protons_extra_:
                        protons_extra_rnd_ = protons_extra_[index_rnd_]

                    print("Run: {}".format(events_run_))
                    print("Run randomized: {}".format(run_rnd_))
                    print("Lumi: {}".format(events_lumiblock_))
                    print("Lumi randomized: {}".format(lumiblock_rnd_))
                    print("Event: {}".format(events_event_))
                    print("Event randomized: {}".format(event_rnd_))
                    print("Slice: {}".format(events_slice_))
                    print("Slice randomized: {}".format(slice_rnd_))
                    print("Num protons: {}".format(ak.num(protons_)))
                    print("Num protons randomized: {}".format(
                        ak.num(protons_rnd_)))
                    print("Num pps tracks: {}".format(ak.num(ppstracks_)))
                    print("Num pps tracks randomized: {}".format(
                        ak.num(ppstracks_rnd_)))
                    if protons_extra_rnd_:
                        print("Num protons extra: {}".format(
                            ak.num(protons_extra_)))
                        print("Num protons extra randomized: {}".format(
                            ak.num(protons_extra_rnd_)))

                    protons_ = protons_rnd_
                    protons_extra_ = protons_extra_rnd_
                    ppstracks_ = ppstracks_rnd_

                print("Num protons: {}".format(ak.num(protons_)))
                print("Num pps tracks: {}".format(ak.num(ppstracks_)))
                if protons_extra_:
                    print("Num protons extra: {}".format(
                        ak.num(protons_extra_)))

                protons_["run"] = events_["run"]
                protons_["lumiblock"] = events_["lumiblock"]
                protons_["event"] = events_["event"]
                protons_["slice"] = events_["slice"]
                if random_protons_:
                    protons_["run_rnd"] = run_rnd_
                    protons_["lumiblock_rnd"] = lumiblock_rnd_
                    protons_["event_rnd"] = event_rnd_
                    protons_["slice_rnd"] = slice_rnd_

                protons_["jet0_pt"] = events_["jet_pt"][:, 0]
                protons_["jet0_eta"] = events_["jet_eta"][:, 0]
                protons_["jet0_phi"] = events_["jet_phi"][:, 0]
                protons_["jet0_energy"] = events_["jet_energy"][:, 0]
                protons_["jet0_mass"] = events_["jet_mass"][:, 0]
                protons_["jet0_corrmass"] = events_["jet_corrmass"][:, 0]
                protons_["jet0_tau1"] = events_["jet_tau1"][:, 0]
                protons_["jet0_tau2"] = events_["jet_tau2"][:, 0]
                protons_["jet0_vertexz"] = events_["jet_vertexz"][:, 0]
                protons_["muon0_pt"] = events_["muon_pt"][:, 0]
                protons_["muon0_eta"] = events_["muon_eta"][:, 0]
                protons_["muon0_phi"] = events_["muon_phi"][:, 0]
                protons_["muon0_energy"] = events_["muon_e"][:, 0]
                protons_["muon0_charge"] = events_["muon_charge"][:, 0]
                protons_["muon0_iso"] = events_["muon_iso"][:, 0]
                protons_["muon0_dxy"] = events_["muon_dxy"][:, 0]
                protons_["muon0_dz"] = events_["muon_dz"][:, 0]
                protons_["met"] = events_["met"]
                protons_["met_x"] = events_["met_x"]
                protons_["met_y"] = events_["met_y"]
                protons_["met_phi"] = events_["met_phi"]
                protons_["nVertices"] = events_["nVertices"]
                protons_["num_bjets_ak8"] = events_["num_bjets_ak8"]
                protons_["num_bjets_ak4"] = events_["num_bjets_ak4"]
                protons_["num_jets_ak4"] = events_["num_jets_ak4"]
                protons_["pfcand_nextracks"] = events_["pfcand_nextracks"]
                protons_["pfcand_nextracks_noDRl"] = events_[
                    "pfcand_nextracks_noDRl"]
                protons_["recoMWhad"] = events_["recoMWhad"]
                protons_["recoMWlep"] = events_["recoMWlep"]
                protons_["recoMWW"] = events_["recoMWW"]
                protons_["recoRapidityWW"] = events_["recoRapidityWW"]
                protons_["dphiWW"] = events_["dphiWW"]
                protons_["WLeptonicPt"] = events_["WLeptonicPt"]
                protons_["WLeptonicPhi"] = events_["WLeptonicPhi"]
                #protons_["x1"] = -999.
                #protons_["y1"] = -999.
                #protons_["x2"] = -999.
                #protons_["y2"] = -999.

                #ppstracks_ = events_["pps_track"]
                ppstracks_["run"] = events_["run"]
                ppstracks_["lumiblock"] = events_["lumiblock"]
                ppstracks_["event"] = events_["event"]
                ppstracks_["slice"] = events_["slice"]
                if random_protons_:
                    ppstracks_["run_rnd"] = run_rnd_
                    ppstracks_["lumiblock_rnd"] = lumiblock_rnd_
                    ppstracks_["event_rnd"] = event_rnd_
                    ppstracks_["slice_rnd"] = slice_rnd_

                protons_singleRP_ = protons_[protons_.ismultirp_ == 0]
                protons_multiRP_ = protons_[protons_.ismultirp_ == 1]
                if protons_extra_:
                    protons_multiRP_["trackx2"] = protons_extra_["trackx2"]
                    protons_multiRP_["tracky2"] = protons_extra_["tracky2"]
                    protons_multiRP_["trackpixshift2"] = protons_extra_[
                        "trackpixshift2"]
                    protons_multiRP_["rpid2"] = protons_extra_["rpid2"]

                protons_singleRP_byRP_ = {}
                ppstracks_byRP_ = {}
                protons_multiRP_byArm_ = {}
                for rpid in (3, 23, 103, 123):
                    #arm = -1
                    #if   rpid == 3   or rpid == 23 : arm = 0
                    #elif rpid == 103 or rpid == 123 : arm = 1
                    #print ( "Arm: {}".format( arm ) )

                    protons_singleRP_byRP_[rpid] = protons_singleRP_[
                        protons_singleRP_.rpid == rpid]
                    ppstracks_byRP_[rpid] = ppstracks_[ppstracks_.rpid == rpid]
                    #protons_singleRP_byRP_[ rpid ]["x1"] = ppstracks_byRP_[ rpid ].x
                    #protons_singleRP_byRP_[ rpid ]["y1"] = ppstracks_byRP_[ rpid ].y

                    print("\nNum protons RP {}: {}".format(
                        rpid, ak.num(protons_singleRP_byRP_[rpid])))
                    if debug:
                        print(ak.to_list(protons_singleRP_byRP_[rpid]))
                        print("\n")
                        print(ak.to_list(ppstracks_byRP_[rpid]))

                for arm in (0, 1):
                    protons_multiRP_byArm_[arm] = protons_multiRP_[
                        protons_multiRP_.arm == arm]

                    print("\nNum multi-RP protons Arm {}: {}".format(
                        arm, ak.num(protons_multiRP_byArm_[arm])))
                    if debug:
                        print(ak.to_list(protons_multiRP_byArm_[arm]))

                #msk  =  np.array( ak.num( protons_singleRP_byRP_[ 3 ].xi ) == 1 )
                #msk &= np.array( ak.num( protons_singleRP_byRP_[ 23 ].xi ) == 1 )
                #msk &= np.array( ak.num( protons_singleRP_byRP_[ 103 ].xi ) == 1 )
                #msk &= np.array( ak.num( protons_singleRP_byRP_[ 123 ].xi ) == 1 )

                msk_protons = np.array(ak.num(protons_multiRP_byArm_[0]) > 0)
                msk_protons &= np.array(ak.num(protons_multiRP_byArm_[1]) > 0)

                protons_multiRP_sel_ = protons_multiRP_[msk_protons]
                protons_singleRP_sel_ = protons_singleRP_[msk_protons]
                ppstracks_sel_ = ppstracks_[msk_protons]
                print("\n")
                if debug:
                    print(msk_protons)
                print(len(protons_multiRP_sel_))
                print(ak.num(protons_multiRP_sel_))
                if debug:
                    print("\n")
                    print(ak.to_list(protons_multiRP_sel_))
                    print("\n")
                    print(ak.to_list(protons_singleRP_sel_))
                    print("\n")
                    print(ak.to_list(ppstracks_sel_))

                counts_protons_ = len(protons_[msk_protons])
                if not counts_label_protons_ in selections:
                    selections = np.concatenate(
                        (selections, np.array([counts_label_protons_])))
                    counts = np.concatenate(
                        (counts, np.array([counts_protons_])))
                else:
                    counts[selections ==
                           counts_label_protons_] += counts_protons_

                print(selections)
                print(counts)

                for col_ in columns_protons_multiRP:
                    protons_multiRP_list[col_] = np.array(
                        ak.flatten(protons_multiRP_sel_[protons_keys[col_]]))

                arr_size_multiRP_ = len(protons_multiRP_list["xi"])
                print("Flattened array size multi-RP: {}".format(
                    arr_size_multiRP_))

                for col_ in columns_protons:
                    protons_singleRP_list[col_] = np.array(
                        ak.flatten(protons_singleRP_sel_[protons_keys[col_]]))

                arr_size_singleRP_ = len(protons_singleRP_list["xi"])
                print("Flattened array size single-RP: {}".format(
                    arr_size_singleRP_))

                for col_ in columns_ppstracks:
                    ppstracks_list[col_] = np.array(
                        ak.flatten(ppstracks_sel_[ppstracks_keys[col_]]))

                arr_size_ppstracks_ = len(ppstracks_list["x"])
                print("Flattened array size tracks: {}".format(
                    arr_size_ppstracks_))

                dset_multiRP_entries += arr_size_multiRP_
                dset_singleRP_entries += arr_size_singleRP_
                dset_ppstracks_entries += arr_size_ppstracks_

                if dset_multiRP_entries > dset_chunk_size:
                    resize_factor_ = (dset_multiRP_entries // dset_chunk_size)
                    chunk_resize_ = resize_factor_ * dset_chunk_size

                    print("Resizing output dataset by {} entries.".format(
                        chunk_resize_))
                    dset_protons_multiRP.resize(
                        (dset_protons_multiRP.shape[0] + chunk_resize_),
                        axis=0)
                    print("Dataset shape: {}".format(
                        dset_protons_multiRP.shape))

                    dset_multiRP_slice += resize_factor_
                    # Count the rest to the chunk size
                    dset_multiRP_entries = (dset_multiRP_entries %
                                            dset_chunk_size)

                if dset_singleRP_entries > dset_chunk_size:
                    resize_factor_ = (dset_singleRP_entries // dset_chunk_size)
                    chunk_resize_ = resize_factor_ * dset_chunk_size

                    print("Resizing output dataset by {} entries.".format(
                        chunk_resize_))
                    dset_protons_singleRP.resize(
                        (dset_protons_singleRP.shape[0] + chunk_resize_),
                        axis=0)
                    print("Dataset shape: {}".format(
                        dset_protons_singleRP.shape))

                    dset_singleRP_slice += resize_factor_
                    # Count the rest to the chunk size
                    dset_singleRP_entries = (dset_singleRP_entries %
                                             dset_chunk_size)

                if dset_ppstracks_entries > dset_chunk_size:
                    resize_factor_ = (dset_ppstracks_entries //
                                      dset_chunk_size)
                    chunk_resize_ = resize_factor_ * dset_chunk_size

                    print("Resizing output dataset by {} entries.".format(
                        chunk_resize_))
                    dset_ppstracks.resize(
                        (dset_ppstracks.shape[0] + chunk_resize_), axis=0)
                    print("Dataset shape: {}".format(dset_ppstracks.shape))

                    dset_ppstracks_slice += resize_factor_
                    # Count the rest to the chunk size
                    dset_ppstracks_entries = (dset_ppstracks_entries %
                                              dset_chunk_size)

                print("Stacking data.")
                data_protons_multiRP_ = np.stack(list(
                    protons_multiRP_list.values()),
                                                 axis=1)
                print(data_protons_multiRP_.shape)
                print(data_protons_multiRP_)

                data_protons_singleRP_ = np.stack(list(
                    protons_singleRP_list.values()),
                                                  axis=1)
                print(data_protons_singleRP_.shape)
                print(data_protons_singleRP_)

                data_ppstracks_ = np.stack(list(ppstracks_list.values()),
                                           axis=1)
                print(data_ppstracks_.shape)
                print(data_ppstracks_)

                dset_idx_next_ = dset_multiRP_idx + arr_size_multiRP_
                print("Slice: {}".format(dset_multiRP_slice))
                print("Writing in positions ({},{})".format(
                    dset_multiRP_idx, dset_idx_next_))
                dset_protons_multiRP[
                    dset_multiRP_idx:dset_idx_next_] = data_protons_multiRP_
                dset_multiRP_idx = dset_idx_next_

                dset_idx_next_ = dset_singleRP_idx + arr_size_singleRP_
                print("Slice: {}".format(dset_singleRP_slice))
                print("Writing in positions ({},{})".format(
                    dset_singleRP_idx, dset_idx_next_))
                dset_protons_singleRP[
                    dset_singleRP_idx:dset_idx_next_] = data_protons_singleRP_
                dset_singleRP_idx = dset_idx_next_

                dset_idx_next_ = dset_ppstracks_idx + arr_size_ppstracks_
                print("Slice: {}".format(dset_ppstracks_slice))
                print("Writing in positions ({},{})".format(
                    dset_ppstracks_idx, dset_idx_next_))
                dset_ppstracks[
                    dset_ppstracks_idx:dset_idx_next_] = data_ppstracks_
                dset_ppstracks_idx = dset_idx_next_

            # Iteration on input files
            root_.close()

        # Reduce dataset to its final size
        print("Reduce dataset.")
        dset_protons_multiRP.resize((dset_multiRP_idx), axis=0)
        print("Dataset shape: {}".format(dset_protons_multiRP.shape))

        dset_protons_singleRP.resize((dset_singleRP_idx), axis=0)
        print("Dataset shape: {}".format(dset_protons_singleRP.shape))

        dset_ppstracks.resize((dset_ppstracks_idx), axis=0)
        print("Dataset shape: {}".format(dset_ppstracks.shape))

        print("Writing column names and event counts.")

        columns_protons_multiRP_ = np.array(columns_protons_multiRP, dtype='S')
        print(columns_protons_multiRP_)

        columns_protons_singleRP_ = np.array(columns_protons, dtype='S')
        print(columns_protons_singleRP_)

        columns_ppstracks_ = np.array(columns_ppstracks, dtype='S')
        print(columns_ppstracks_)

        event_counts_ = counts
        print(event_counts_)

        selections_ = np.array(selections, dtype='S')
        print(selections_)

        dset_columns_protons_multiRP = f.create_dataset(
            'columns_protons_multiRP', data=columns_protons_multiRP_)
        dset_columns_protons_singleRP = f.create_dataset(
            'columns_protons_singleRP', data=columns_protons_singleRP_)
        dset_columns_ppstracks = f.create_dataset('columns_ppstracks',
                                                  data=columns_ppstracks_)
        dset_counts = f.create_dataset('event_counts', data=event_counts_)
        dset_selections = f.create_dataset('selections', data=selections_)

        print(dset_protons_multiRP)
        print(dset_protons_multiRP[-1])
        print(dset_protons_singleRP)
        print(dset_protons_singleRP[-1])
        print(dset_ppstracks)
        print(dset_ppstracks[-1])

        print(dset_columns_protons_multiRP)
        print(list(dset_columns_protons_multiRP))
        print(dset_columns_protons_singleRP)
        print(list(dset_columns_protons_singleRP))
        print(dset_columns_ppstracks)
        print(list(dset_columns_ppstracks))
        print(dset_counts)
        print(list(dset_counts))
        print(dset_selections)
        print(list(dset_selections))
Пример #19
0
    def process(self, events):

        output = self.accumulator.identity()

        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet) > 2

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        ## Muons
        muon = Collections(ev, "Muon", "tightTTH").get()
        vetomuon = Collections(ev, "Muon", "vetoTTH").get()
        dimuon = choose(muon, 2)
        SSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) > 0, axis=1)
        OSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) < 0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]

        ## Electrons
        electron = Collections(ev, "Electron", "tightTTH").get()
        vetoelectron = Collections(ev, "Electron", "vetoTTH").get()
        dielectron = choose(electron, 2)
        SSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) > 0, axis=1)
        OSelectron = ak.any(
            (dielectron['0'].charge * dielectron['1'].charge) < 0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]

        ## Merge electrons and muons - this should work better now in ak1
        lepton = ak.concatenate([muon, electron], axis=1)
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) > 0,
                          axis=1)
        OSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) < 0,
                          axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]

        ## Jets
        jet = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet = jet[ak.argsort(
            jet.pt_nom, ascending=False
        )]  # need to sort wrt smeared and recorrected jet pt
        jet = jet[~match(jet, muon,
                         deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~match(
            jet, electron,
            deltaRCut=0.4)]  # remove jets that overlap with electrons

        central = jet[(abs(jet.eta) < 2.4)]
        btag = getBTagsDeepFlavB(
            jet, year=self.year)  # should study working point for DeepJet
        light = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd = getFwdJet(light)
        fwd_noPU = getFwdJet(light, puId=False)

        ## forward jets
        high_p_fwd = fwd[ak.singletons(ak.argmax(
            fwd.p, axis=1))]  # highest momentum spectator
        high_pt_fwd = fwd[ak.singletons(ak.argmax(
            fwd.pt_nom, axis=1))]  # highest transverse momentum spectator
        high_eta_fwd = fwd[ak.singletons(ak.argmax(abs(
            fwd.eta), axis=1))]  # most forward spectator

        ## Get the two leading b-jets in terms of btag score
        high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:, :2]

        jf = cross(high_p_fwd, jet)
        mjf = (jf['0'] + jf['1']).mass
        deltaEta = abs(high_p_fwd.eta -
                       jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'].eta)
        deltaEtaMax = ak.max(deltaEta, axis=1)
        mjf_max = ak.max(mjf, axis=1)

        jj = choose(jet, 2)
        mjj_max = ak.max((jj['0'] + jj['1']).mass, axis=1)

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt,
                                                            axis=1)
        ht_central = ak.sum(central.pt, axis=1)

        ## event selectors
        filters = getFilters(ev, year=self.year, dataset=dataset)
        triggers = getTriggers(ev, year=self.year, dataset=dataset)

        dilep = ((ak.num(electron) == 1) & (ak.num(muon) == 1))
        lep0pt = ((ak.num(electron[(electron.pt > 25)]) +
                   ak.num(muon[(muon.pt > 25)])) > 0)
        lep1pt = ((ak.num(electron[(electron.pt > 20)]) +
                   ak.num(muon[(muon.pt > 20)])) > 1)
        lepveto = ((ak.num(vetoelectron) + ak.num(vetomuon)) == 2)

        # define the weight
        weight = Weights(len(ev))

        if not dataset == 'MuonEG':
            # lumi weight
            weight.add("weight", ev.weight * cfg['lumi'][self.year])

            # PU weight - not in the babies...
            weight.add("PU",
                       ev.puWeight,
                       weightUp=ev.puWeightUp,
                       weightDown=ev.puWeightDown,
                       shift=False)

            # b-tag SFs
            weight.add("btag", self.btagSF.Method1a(btag, light))

            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))

        selection = PackedSelection()
        selection.add('lepveto', lepveto)
        selection.add('dilep', dilep)
        selection.add('trigger', (triggers))
        selection.add('filter', (filters))
        selection.add('p_T(lep0)>25', lep0pt)
        selection.add('p_T(lep1)>20', lep1pt)
        selection.add('OS', OSlepton)
        selection.add('N_btag=2', (ak.num(btag) == 2))
        selection.add('N_jet>2', (ak.num(jet) >= 3))
        selection.add('N_central>1', (ak.num(central) >= 2))
        selection.add('N_fwd>0', (ak.num(fwd) >= 1))
        selection.add('MET>30', (ev.MET.pt > 30))

        os_reqs = [
            'lepveto', 'dilep', 'trigger', 'filter', 'p_T(lep0)>25',
            'p_T(lep1)>20', 'OS'
        ]
        bl_reqs = os_reqs + [
            'N_btag=2', 'N_jet>2', 'N_central>1', 'N_fwd>0', 'MET>30'
        ]

        os_reqs_d = {sel: True for sel in os_reqs}
        os_selection = selection.require(**os_reqs_d)
        bl_reqs_d = {sel: True for sel in bl_reqs}
        BL = selection.require(**bl_reqs_d)

        cutflow = Cutflow(output, ev, weight=weight)
        cutflow_reqs_d = {}
        for req in bl_reqs:
            cutflow_reqs_d.update({req: True})
            cutflow.addRow(req, selection.require(**cutflow_reqs_d))

        # first, make a few super inclusive plots
        output['PV_npvs'].fill(dataset=dataset,
                               multiplicity=ev.PV[os_selection].npvs,
                               weight=weight.weight()[os_selection])
        output['PV_npvsGood'].fill(dataset=dataset,
                                   multiplicity=ev.PV[os_selection].npvsGood,
                                   weight=weight.weight()[os_selection])
        output['N_jet'].fill(dataset=dataset,
                             multiplicity=ak.num(jet)[os_selection],
                             weight=weight.weight()[os_selection])
        output['N_b'].fill(dataset=dataset,
                           multiplicity=ak.num(btag)[os_selection],
                           weight=weight.weight()[os_selection])
        output['N_central'].fill(dataset=dataset,
                                 multiplicity=ak.num(central)[os_selection],
                                 weight=weight.weight()[os_selection])
        output['N_ele'].fill(dataset=dataset,
                             multiplicity=ak.num(electron)[os_selection],
                             weight=weight.weight()[os_selection])
        output['N_mu'].fill(dataset=dataset,
                            multiplicity=ak.num(electron)[os_selection],
                            weight=weight.weight()[os_selection])
        output['N_fwd'].fill(dataset=dataset,
                             multiplicity=ak.num(fwd)[os_selection],
                             weight=weight.weight()[os_selection])

        output['MET'].fill(dataset=dataset,
                           pt=ev.MET[os_selection].pt,
                           phi=ev.MET[os_selection].phi,
                           weight=weight.weight()[os_selection])

        output['electron'].fill(dataset=dataset,
                                pt=ak.to_numpy(ak.flatten(electron[BL].pt)),
                                eta=ak.to_numpy(ak.flatten(electron[BL].eta)),
                                phi=ak.to_numpy(ak.flatten(electron[BL].phi)),
                                weight=weight.weight()[BL])

        output['muon'].fill(dataset=dataset,
                            pt=ak.to_numpy(ak.flatten(muon[BL].pt)),
                            eta=ak.to_numpy(ak.flatten(muon[BL].eta)),
                            phi=ak.to_numpy(ak.flatten(muon[BL].phi)),
                            weight=weight.weight()[BL])

        output['lead_lep'].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_lepton[BL].pt)),
            eta=ak.to_numpy(ak.flatten(leading_lepton[BL].eta)),
            phi=ak.to_numpy(ak.flatten(leading_lepton[BL].phi)),
            weight=weight.weight()[BL])

        output['trail_lep'].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(trailing_lepton[BL].pt)),
            eta=ak.to_numpy(ak.flatten(trailing_lepton[BL].eta)),
            phi=ak.to_numpy(ak.flatten(trailing_lepton[BL].phi)),
            weight=weight.weight()[BL])

        output['fwd_jet'].fill(dataset=dataset,
                               pt=ak.flatten(high_p_fwd[BL].pt_nom),
                               eta=ak.flatten(high_p_fwd[BL].eta),
                               phi=ak.flatten(high_p_fwd[BL].phi),
                               weight=weight.weight()[BL])

        output['b1'].fill(dataset=dataset,
                          pt=ak.flatten(high_score_btag[:, 0:1][BL].pt_nom),
                          eta=ak.flatten(high_score_btag[:, 0:1][BL].eta),
                          phi=ak.flatten(high_score_btag[:, 0:1][BL].phi),
                          weight=weight.weight()[BL])

        output['b2'].fill(dataset=dataset,
                          pt=ak.flatten(high_score_btag[:, 1:2][BL].pt_nom),
                          eta=ak.flatten(high_score_btag[:, 1:2][BL].eta),
                          phi=ak.flatten(high_score_btag[:, 1:2][BL].phi),
                          weight=weight.weight()[BL])

        output['j1'].fill(dataset=dataset,
                          pt=ak.flatten(jet.pt_nom[:, 0:1][BL]),
                          eta=ak.flatten(jet.eta[:, 0:1][BL]),
                          phi=ak.flatten(jet.phi[:, 0:1][BL]),
                          weight=weight.weight()[BL])

        output['j2'].fill(dataset=dataset,
                          pt=ak.flatten(jet[:, 1:2][BL].pt_nom),
                          eta=ak.flatten(jet[:, 1:2][BL].eta),
                          phi=ak.flatten(jet[:, 1:2][BL].phi),
                          weight=weight.weight()[BL])

        output['j3'].fill(dataset=dataset,
                          pt=ak.flatten(jet[:, 2:3][BL].pt_nom),
                          eta=ak.flatten(jet[:, 2:3][BL].eta),
                          phi=ak.flatten(jet[:, 2:3][BL].phi),
                          weight=weight.weight()[BL])

        # Now, take care of systematic unceratinties
        if not dataset == 'MuonEG':
            alljets = getJets(ev, minPt=0, maxEta=4.7)
            alljets = alljets[(alljets.jetId > 1)]
            for var in self.variations:
                # get the collections that change with the variations
                jet = getPtEtaPhi(alljets, pt_var=var)
                jet = jet[(jet.pt > 25)]
                jet = jet[~match(
                    jet, muon,
                    deltaRCut=0.4)]  # remove jets that overlap with muons
                jet = jet[~match(
                    jet, electron,
                    deltaRCut=0.4)]  # remove jets that overlap with electrons

                central = jet[(abs(jet.eta) < 2.4)]
                btag = getBTagsDeepFlavB(
                    jet,
                    year=self.year)  # should study working point for DeepJet
                light = getBTagsDeepFlavB(jet, year=self.year, invert=True)
                fwd = getFwdJet(light)
                fwd_noPU = getFwdJet(light, puId=False)

                ## forward jets
                high_p_fwd = fwd[ak.singletons(ak.argmax(
                    fwd.p, axis=1))]  # highest momentum spectator
                high_pt_fwd = fwd[ak.singletons(ak.argmax(
                    fwd.pt, axis=1))]  # highest transverse momentum spectator
                high_eta_fwd = fwd[ak.singletons(
                    ak.argmax(abs(fwd.eta), axis=1))]  # most forward spectator

                ## Get the two leading b-jets in terms of btag score
                high_score_btag = central[ak.argsort(
                    central.btagDeepFlavB)][:, :2]

                # get the modified selection -> more difficult
                selection.add('N_jet>2_' + var,
                              (ak.num(jet.pt) >= 3))  # stupid bug here...
                selection.add('N_btag=2_' + var, (ak.num(btag) == 2))
                selection.add('N_central>1_' + var, (ak.num(central) >= 2))
                selection.add('N_fwd>0_' + var, (ak.num(fwd) >= 1))
                selection.add('MET>30_' + var, (getattr(ev.MET, var) > 30))

                ## Don't change the selection for now...
                bl_reqs = os_reqs + [
                    'N_jet>2_' + var, 'MET>30_' + var, 'N_btag=2_' + var,
                    'N_central>1_' + var, 'N_fwd>0_' + var
                ]
                bl_reqs_d = {sel: True for sel in bl_reqs}
                BL = selection.require(**bl_reqs_d)

                # the OS selection remains unchanged
                output['N_jet_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(jet)[os_selection],
                    weight=weight.weight()[os_selection])
                output['N_fwd_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(fwd)[os_selection],
                    weight=weight.weight()[os_selection])
                output['N_b_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(btag)[os_selection],
                    weight=weight.weight()[os_selection])
                output['N_central_' + var].fill(
                    dataset=dataset,
                    multiplicity=ak.num(central)[os_selection],
                    weight=weight.weight()[os_selection])

                # We don't need to redo all plots with variations. E.g., just add uncertainties to the jet plots.
                output['j1_' + var].fill(dataset=dataset,
                                         pt=ak.flatten(jet.pt[:, 0:1][BL]),
                                         eta=ak.flatten(jet.eta[:, 0:1][BL]),
                                         phi=ak.flatten(jet.phi[:, 0:1][BL]),
                                         weight=weight.weight()[BL])

                output['b1_' + var].fill(
                    dataset=dataset,
                    pt=ak.flatten(high_score_btag[:, 0:1].pt[:, 0:1][BL]),
                    eta=ak.flatten(high_score_btag[:, 0:1].eta[:, 0:1][BL]),
                    phi=ak.flatten(high_score_btag[:, 0:1].phi[:, 0:1][BL]),
                    weight=weight.weight()[BL])

                output['fwd_jet_' + var].fill(
                    dataset=dataset,
                    pt=ak.flatten(high_p_fwd[BL].pt),
                    eta=ak.flatten(high_p_fwd[BL].eta),
                    phi=ak.flatten(high_p_fwd[BL].phi),
                    weight=weight.weight()[BL])

                output['MET_' + var].fill(dataset=dataset,
                                          pt=getattr(ev.MET,
                                                     var)[os_selection],
                                          phi=ev.MET[os_selection].phi,
                                          weight=weight.weight()[os_selection])

        return output
Пример #20
0
    def process(self, events):

        output = self.accumulator.identity()

        # we can use a very loose preselection to filter the events. nothing is done with this presel, though
        presel = ak.num(events.Jet) >= 2

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        ## Electrons
        electron = Collections(ev, "Electron", "tight").get()
        electron = electron[(electron.pt > 20) & (abs(electron.eta) < 2.4)]

        electron = electron[((electron.genPartIdx >= 0) &
                             (np.abs(electron.matched_gen.pdgId) == 11)
                             )]  #from here on all leptons are gen-matched

        ##Muons
        muon = Collections(ev, "Muon", "tight").get()
        muon = muon[(muon.pt > 20) & (abs(muon.eta) < 2.4)]

        muon = muon[((muon.genPartIdx >= 0) &
                     (np.abs(muon.matched_gen.pdgId) == 13))]

        ##Leptons

        lepton = ak.concatenate([muon, electron], axis=1)
        SSlepton = (ak.sum(lepton.charge, axis=1) != 0) & (ak.num(lepton) == 2)
        OSlepton = (ak.sum(lepton.charge, axis=1) == 0) & (ak.num(lepton) == 2)

        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]

        #jets
        jet = getJets(ev, minPt=40, maxEta=2.4, pt_var='pt')
        jet = jet[ak.argsort(
            jet.pt, ascending=False
        )]  # need to sort wrt smeared and recorrected jet pt
        jet = jet[~match(jet, muon,
                         deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~match(jet, electron, deltaRCut=0.4)]

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        # setting up the various weights
        weight = Weights(len(ev))
        weight2 = Weights(len(ev))

        if not dataset == 'MuonEG':
            # generator weight
            weight.add("weight", ev.genWeight)
            weight2.add("weight", ev.genWeight)

        weight2.add("charge flip",
                    self.charge_flip_ratio.flip_weight(electron))

        #selections
        filters = getFilters(ev, year=self.year, dataset=dataset)
        ss = (SSlepton)
        os = (OSlepton)
        jet_all = (ak.num(jet) >= 2)

        selection = PackedSelection()
        selection.add('filter', (filters))
        selection.add('ss', ss)
        selection.add('os', os)
        selection.add('jet', jet_all)

        bl_reqs = ['filter', 'jet']

        bl_reqs_d = {sel: True for sel in bl_reqs}
        baseline = selection.require(**bl_reqs_d)

        s_reqs = bl_reqs + ['ss']
        s_reqs_d = {sel: True for sel in s_reqs}
        ss_sel = selection.require(**s_reqs_d)

        o_reqs = bl_reqs + ['os']
        o_reqs_d = {sel: True for sel in o_reqs}
        os_sel = selection.require(**o_reqs_d)

        #outputs
        output['N_jet'].fill(dataset=dataset,
                             multiplicity=ak.num(jet)[baseline],
                             weight=weight.weight()[baseline])

        output['N_ele'].fill(dataset=dataset,
                             multiplicity=ak.num(lepton)[ss_sel],
                             weight=weight.weight()[ss_sel])

        output['N_ele2'].fill(dataset=dataset,
                              multiplicity=ak.num(lepton)[os_sel],
                              weight=weight2.weight()[os_sel])

        output["electron"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_lepton[ss_sel].pt)),
            eta=abs(ak.to_numpy(ak.flatten(leading_lepton[ss_sel].eta))),
            phi=ak.to_numpy(ak.flatten(leading_lepton[ss_sel].phi)),
            weight=weight.weight()[ss_sel])

        output["electron2"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_lepton[os_sel].pt)),
            eta=abs(ak.to_numpy(ak.flatten(leading_lepton[os_sel].eta))),
            phi=ak.to_numpy(ak.flatten(leading_lepton[os_sel].phi)),
            weight=weight2.weight()[os_sel])

        return output
Пример #21
0
 def _concat_same_type(cls, to_concat):
     to_concat = [
         tc.data if isinstance(tc, AwkwardSeries) else tc
         for tc in to_concat
     ]
     return cls(ak.concatenate(to_concat))
Пример #22
0
WDecay = WDecay[WDecay.hasFlags('isLastCopy')]

#t_events is the lone bottom, W_events is the -> two jets
#select the hadronically decaying W
W_Events = ak.flatten(WDecay[ak.all(abs(WDecay.pdgId) <= 8, axis=-1)], axis=3)

#HadW is mask for Quark deacying W boson
hadW = ak.num(W_Events, axis=2) == 2
#filters out t_events that have a hadronically decayign W Boson
hadB = t_Events[hadW]
hadB = ak.flatten(hadB, axis=2)

W_quarks = W_Events[hadW]
W_quarks = ak.flatten(W_quarks, axis=2)
#concatentating these two arrays make an array of events with the correctly decaying GenParticles.
qqb = ak.concatenate([hadB, W_quarks], axis=1)

print("qqb Genparts matched")

#Filtering Out events with extra tops
final = final[(ak.count(qqb.pdgId, axis=1) == 3)]
finaljets = final.Jet
qqb = qqb[(ak.count(qqb.pdgId, axis=1) == 3)]
#Implementing Tight Jet Cuts on Training Data
finaljetSel = (abs(finaljets.eta) < 2.4) & (finaljets.pt > 30)
finalJets = finaljets[finaljetSel]

#Use nearest to match Jets
matchedGenJets = qqb.nearest(final.GenJet)
matchedJets = matchedGenJets.nearest(finalJets)
Пример #23
0
    def process(self, events):
        
        output = self.accumulator.identity()
        
        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet)>2
        
        ev = events[presel]
        dataset = ev.metadata['dataset']
        
        # load the config - probably not needed anymore
        cfg = loadConfig()
        
        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)
        
        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            ## Generated leptons
            gen_lep = ev.GenL
            leading_gen_lep = gen_lep[ak.singletons(ak.argmax(gen_lep.pt, axis=1))]
            trailing_gen_lep = gen_lep[ak.singletons(ak.argmin(gen_lep.pt, axis=1))]

        # Get the leptons. This has changed a couple of times now, but we are using fakeable objects as baseline leptons.
        # The added p4 instance has the corrected pt (conePt for fakeable) and should be used for any following selection or calculation
        # Any additional correction (if we choose to do so) should be added here, e.g. Rochester corrections, ...
        ## Muons
        mu_v     = Collections(ev, "Muon", "vetoTTH", year=year).get()  # these include all muons, tight and fakeable
        mu_t     = Collections(ev, "Muon", "tightSSTTH", year=year).get()
        mu_f     = Collections(ev, "Muon", "fakeableSSTTH", year=year).get()
        muon     = ak.concatenate([mu_t, mu_f], axis=1)
        muon['p4'] = get_four_vec_fromPtEtaPhiM(muon, get_pt(muon), muon.eta, muon.phi, muon.mass, copy=False) #FIXME new
        
        ## Electrons
        el_v        = Collections(ev, "Electron", "vetoTTH", year=year).get()
        el_t        = Collections(ev, "Electron", "tightSSTTH", year=year).get()
        el_f        = Collections(ev, "Electron", "fakeableSSTTH", year=year).get()
        electron    = ak.concatenate([el_t, el_f], axis=1)
        electron['p4'] = get_four_vec_fromPtEtaPhiM(electron, get_pt(electron), electron.eta, electron.phi, electron.mass, copy=False) #FIXME new
        
        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            el_t_p  = prompt(el_t)
            el_t_np = nonprompt(el_t)
            el_f_p  = prompt(el_f)
            el_f_np = nonprompt(el_f)
            mu_t_p  = prompt(mu_t)
            mu_t_np = nonprompt(mu_t)
            mu_f_p  = prompt(mu_f)
            mu_f_np = nonprompt(mu_f)

            is_flipped = ( (el_t_p.matched_gen.pdgId*(-1) == el_t_p.pdgId) & (abs(el_t_p.pdgId) == 11) )
            el_t_p_cc  = el_t_p[~is_flipped]  # this is tight, prompt, and charge consistent
            el_t_p_cf  = el_t_p[is_flipped]  # this is tight, prompt, and charge flipped


        ## Merge electrons and muons. These are fakeable leptons now
        lepton   = ak.concatenate([muon, electron], axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.p4.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.p4.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]

        dilepton_mass = (leading_lepton.p4 + trailing_lepton.p4).mass
        dilepton_pt = (leading_lepton.p4 + trailing_lepton.p4).pt
        #dilepton_dR = delta_r(leading_lepton, trailing_lepton)
        dilepton_dR = leading_lepton.p4.delta_r(trailing_lepton.p4)
        
        lepton_pdgId_pt_ordered = ak.fill_none(ak.pad_none(lepton[ak.argsort(lepton.p4.pt, ascending=False)].pdgId, 2, clip=True), 0)
        
        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            n_nonprompt = getNonPromptFromFlavour(electron) + getNonPromptFromFlavour(muon)
            n_chargeflip = getChargeFlips(electron, ev.GenPart) + getChargeFlips(muon, ev.GenPart)
            gp = ev.GenPart
            gp_e = gp[((abs(gp.pdgId)==11)&(gp.status==1)&((gp.statusFlags&(1<<0))==1)&(gp.statusFlags&(1<<8)==256))]
            gp_m = gp[((abs(gp.pdgId)==13)&(gp.status==1)&((gp.statusFlags&(1<<0))==1)&(gp.statusFlags&(1<<8)==256))]
            n_gen_lep = ak.num(gp_e) + ak.num(gp_m)
        else:
            n_gen_lep = np.zeros(len(ev))

        LL = (n_gen_lep > 2)  # this is the classifier for LL events (should mainly be ttZ/tZ/WZ...)

        mt_lep_met = mt(lepton.p4.pt, lepton.p4.phi, ev.MET.pt, ev.MET.phi)
        min_mt_lep_met = ak.min(mt_lep_met, axis=1)

        ## Tau and other stuff
        tau       = getTaus(ev)
        tau       = tau[~match(tau, muon, deltaRCut=0.4)] 
        tau       = tau[~match(tau, electron, deltaRCut=0.4)]

        track     = getIsoTracks(ev)

        ## Jets
        jet       = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet       = jet[ak.argsort(jet.pt_nom, ascending=False)] # need to sort wrt smeared and recorrected jet pt
        jet       = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons
        jet       = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons
        
        central   = jet[(abs(jet.eta)<2.4)]
        btag      = getBTagsDeepFlavB(jet, year=self.year) # should study working point for DeepJet
        light     = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd       = getFwdJet(light)
        fwd_noPU  = getFwdJet(light, puId=False)
        
        high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:,:2]

        bl          = cross(lepton, high_score_btag)
        bl_dR       = delta_r(bl['0'], bl['1'])
        min_bl_dR   = ak.min(bl_dR, axis=1)

        ## forward jets
        j_fwd = fwd[ak.singletons(ak.argmax(fwd.p, axis=1))] # highest momentum spectator

        # try to get either the most forward light jet, or if there's more than one with eta>1.7, the highest pt one
        most_fwd = light[ak.argsort(abs(light.eta))][:,0:1]
        #most_fwd = light[ak.singletons(ak.argmax(abs(light.eta)))]
        best_fwd = ak.concatenate([j_fwd, most_fwd], axis=1)[:,0:1]
        
        jf          = cross(j_fwd, jet)
        mjf         = (jf['0']+jf['1']).mass
        j_fwd2      = jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'] # this is the jet that forms the largest invariant mass with j_fwd
        delta_eta   = abs(j_fwd2.eta - j_fwd.eta)

        ## MET -> can switch to puppi MET
        met_pt  = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        #st = met_pt + ht + ak.sum(get_pt(muon), axis=1) + ak.sum(get_pt(electron), axis=1)
        st = met_pt + ht + ak.sum(lepton.p4.pt, axis=1)
        
        # define the weight
        weight = Weights( len(ev) )

        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            # lumi weight
            weight.add("weight", ev.weight*cfg['lumi'][self.year])
            
            # PU weight
            weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False)
            
            # b-tag SFs
            weight.add("btag", self.btagSF.Method1a(btag, light))
            
            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))
        

        cutflow     = Cutflow(output, ev, weight=weight)

        # slightly restructured
        # calculate everything from loose, require two tights on top
        # since n_tight == n_loose == 2, the tight and loose leptons are the same in the end

        # in this selection we'll get events with exactly two fakeable+tight and two loose leptons.
        sel = Selection(
            dataset = dataset,
            events = ev,
            year = self.year,
            ele = electron,
            ele_veto = el_v,
            mu = muon,
            mu_veto = mu_v,
            jet_all = jet,
            jet_central = central,
            jet_btag = btag,
            jet_fwd = fwd,
            jet_light = light,
            met = ev.MET,
        )
        
        baseline = sel.dilep_baseline(cutflow=cutflow, SS=True, omit=['N_fwd>0'])
        baseline_OS = sel.dilep_baseline(cutflow=cutflow, SS=False, omit=['N_fwd>0'])  # this is for charge flip estimation
        
        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):

            BL = (baseline & ((ak.num(el_t_p_cc)+ak.num(mu_t_p))==2))  # this is the MC baseline for events with two tight prompt leptons
            BL_incl = (baseline & ((ak.num(el_t)+ak.num(mu_t))==2)) # this is the MC baseline for events with two tight leptons

            np_est_sel_mc = (baseline & \
                ((((ak.num(el_t_p_cc)+ak.num(mu_t_p))==1) & ((ak.num(el_f_np)+ak.num(mu_f_np))==1)) | (((ak.num(el_t_p_cc)+ak.num(mu_t_p))==0) & ((ak.num(el_f_np)+ak.num(mu_f_np))==2)) ))  # no overlap between tight and nonprompt, and veto on additional leptons. this should be enough
            np_obs_sel_mc = (baseline & ((ak.num(el_t)+ak.num(mu_t))==2) & ((ak.num(el_t_np)+ak.num(mu_t_np))>=1) )  # two tight leptons, at least one nonprompt
            np_est_sel_data = (baseline & ~baseline)  # this has to be false

            cf_est_sel_mc = (baseline_OS & ((ak.num(el_t_p)+ak.num(mu_t_p))==2))
            cf_obs_sel_mc = (baseline & ((ak.num(el_t)+ak.num(mu_t))==2) & ((ak.num(el_t_p_cf))>=1) )  # two tight leptons, at least one electron charge flip
            cf_est_sel_data = (baseline & ~baseline)  # this has to be false

            weight_np_mc = self.nonpromptWeight.get(el_f_np, mu_f_np, meas='TT')
            weight_cf_mc = self.chargeflipWeight.flip_weight(el_t_p)

        else:
            BL = (baseline & ((ak.num(el_t)+ak.num(mu_t))==2))

            BL_incl = BL

            np_est_sel_mc = (baseline & ~baseline)
            np_obs_sel_mc = (baseline & ~baseline)
            np_est_sel_data = (baseline & (ak.num(el_t)+ak.num(mu_t)==1) & (ak.num(el_f)+ak.num(mu_f)==1) )

            cf_est_sel_mc = (baseline & ~baseline)
            cf_obs_sel_mc = (baseline & ~baseline)
            cf_est_sel_data = (baseline_OS & ((ak.num(el_t)+ak.num(mu_t))==2) )

            weight_np_mc = np.zeros(len(ev))
            weight_cf_mc = np.zeros(len(ev))

            #rle = ak.to_numpy(ak.zip([ev.run, ev.luminosityBlock, ev.event]))
            run_ = ak.to_numpy(ev.run)
            lumi_ = ak.to_numpy(ev.luminosityBlock)
            event_ = ak.to_numpy(ev.event)

            if False:
                output['%s_run'%dataset] += processor.column_accumulator(run_[BL])
                output['%s_lumi'%dataset] += processor.column_accumulator(lumi_[BL])
                output['%s_event'%dataset] += processor.column_accumulator(event_[BL])

        weight_BL = weight.weight()[BL]  # this is just a shortened weight list for the two prompt selection
        weight_np_data = self.nonpromptWeight.get(el_f, mu_f, meas='data')
        weight_cf_data = self.chargeflipWeight.flip_weight(el_t)

        out_sel = (BL | np_est_sel_mc | cf_est_sel_mc)

        dummy = (np.ones(len(ev))==1)
        def fill_multiple_np(hist, arrays, add_sel=dummy):
            #reg_sel = [BL, np_est_sel_mc, np_obs_sel_mc, np_est_sel_data, cf_est_sel_mc, cf_obs_sel_mc, cf_est_sel_data],
            #print ('len', len(reg_sel[0]))
            #print ('sel', reg_sel[0])
            reg_sel = [BL&add_sel, BL_incl&add_sel, np_est_sel_mc&add_sel, np_obs_sel_mc&add_sel, np_est_sel_data&add_sel, cf_est_sel_mc&add_sel, cf_obs_sel_mc&add_sel, cf_est_sel_data&add_sel],
            fill_multiple(
                hist,
                datasets=[
                    dataset, # only prompt contribution from process
                    dataset+"_incl", # everything from process (inclusive MC truth)
                    "np_est_mc", # MC based NP estimate
                    "np_obs_mc", # MC based NP observation
                    "np_est_data",
                    "cf_est_mc",
                    "cf_obs_mc",
                    "cf_est_data",
                ],
                arrays=arrays,
                selections=reg_sel[0],  # no idea where the additional dimension is coming from...
                weights=[
                    weight.weight()[reg_sel[0][0]],
                    weight.weight()[reg_sel[0][1]],
                    weight.weight()[reg_sel[0][2]]*weight_np_mc[reg_sel[0][2]],
                    weight.weight()[reg_sel[0][3]],
                    weight.weight()[reg_sel[0][4]]*weight_np_data[reg_sel[0][4]],
                    weight.weight()[reg_sel[0][5]]*weight_cf_mc[reg_sel[0][5]],
                    weight.weight()[reg_sel[0][6]],
                    weight.weight()[reg_sel[0][7]]*weight_cf_data[reg_sel[0][7]],
                ],
            )

        if self.evaluate or self.dump:
            # define the inputs to the NN
            # this is super stupid. there must be a better way.
            # used a np.stack which is ok performance wise. pandas data frame seems to be slow and memory inefficient
            #FIXME no n_b, n_fwd back in v13/v14 of the DNN

            NN_inputs_d = {
                'n_jet':            ak.to_numpy(ak.num(jet)),
                'n_fwd':            ak.to_numpy(ak.num(fwd)),
                'n_b':              ak.to_numpy(ak.num(btag)),
                'n_tau':            ak.to_numpy(ak.num(tau)),
                #'n_track':          ak.to_numpy(ak.num(track)),
                'st':               ak.to_numpy(st),
                'met':              ak.to_numpy(ev.MET.pt),
                'mjj_max':          ak.to_numpy(ak.fill_none(ak.max(mjf, axis=1),0)),
                'delta_eta_jj':     ak.to_numpy(pad_and_flatten(delta_eta)),
                'lead_lep_pt':      ak.to_numpy(pad_and_flatten(leading_lepton.p4.pt)),
                'lead_lep_eta':     ak.to_numpy(pad_and_flatten(leading_lepton.p4.eta)),
                'sublead_lep_pt':   ak.to_numpy(pad_and_flatten(trailing_lepton.p4.pt)),
                'sublead_lep_eta':  ak.to_numpy(pad_and_flatten(trailing_lepton.p4.eta)),
                'dilepton_mass':    ak.to_numpy(pad_and_flatten(dilepton_mass)),
                'dilepton_pt':      ak.to_numpy(pad_and_flatten(dilepton_pt)),
                'fwd_jet_pt':       ak.to_numpy(pad_and_flatten(best_fwd.pt)),
                'fwd_jet_p':        ak.to_numpy(pad_and_flatten(best_fwd.p)),
                'fwd_jet_eta':      ak.to_numpy(pad_and_flatten(best_fwd.eta)),
                'lead_jet_pt':      ak.to_numpy(pad_and_flatten(jet[:, 0:1].pt)),
                'sublead_jet_pt':   ak.to_numpy(pad_and_flatten(jet[:, 1:2].pt)),
                'lead_jet_eta':     ak.to_numpy(pad_and_flatten(jet[:, 0:1].eta)),
                'sublead_jet_eta':  ak.to_numpy(pad_and_flatten(jet[:, 1:2].eta)),
                'lead_btag_pt':     ak.to_numpy(pad_and_flatten(high_score_btag[:, 0:1].pt)),
                'sublead_btag_pt':  ak.to_numpy(pad_and_flatten(high_score_btag[:, 1:2].pt)),
                'lead_btag_eta':    ak.to_numpy(pad_and_flatten(high_score_btag[:, 0:1].eta)),
                'sublead_btag_eta': ak.to_numpy(pad_and_flatten(high_score_btag[:, 1:2].eta)),
                'min_bl_dR':        ak.to_numpy(ak.fill_none(min_bl_dR, 0)),
                'min_mt_lep_met':   ak.to_numpy(ak.fill_none(min_mt_lep_met, 0)),
            }

            if self.dump:
                for k in NN_inputs_d.keys():
                    output[k] += processor.column_accumulator(NN_inputs_d[k][out_sel])

            if self.evaluate:
            
                NN_inputs = np.stack( [NN_inputs_d[k] for k in NN_inputs_d.keys()] )

                NN_inputs = np.nan_to_num(NN_inputs, 0, posinf=1e5, neginf=-1e5)  # events with posinf/neginf/nan will not pass the BL selection anyway

                NN_inputs = np.moveaxis(NN_inputs, 0, 1)  # this is needed for a np.stack (old version)

                model, scaler = load_onnx_model(self.training)

                try:
                    NN_inputs_scaled = scaler.transform(NN_inputs)

                    NN_pred    = predict_onnx(model, NN_inputs_scaled)

                    best_score = np.argmax(NN_pred, axis=1)


                except ValueError:
                    print ("Problem with prediction. Showing the shapes here:")
                    print (np.shape(NN_inputs))
                    print (np.shape(weight_BL))
                    NN_pred = np.array([])
                    best_score = np.array([])
                    NN_inputs_scaled = NN_inputs
                    raise

                ##k.clear_session()

                #FIXME below needs to be fixed again with changed NN evaluation. Should work now

                fill_multiple_np(output['node'], {'multiplicity':best_score})
                fill_multiple_np(output['node0_score_incl'], {'score':NN_pred[:,0]})
                fill_multiple_np(output['node1_score_incl'], {'score':NN_pred[:,1]})
                fill_multiple_np(output['node2_score_incl'], {'score':NN_pred[:,2]})
                fill_multiple_np(output['node3_score_incl'], {'score':NN_pred[:,3]})
                fill_multiple_np(output['node4_score_incl'], {'score':NN_pred[:,4]})
                
                fill_multiple_np(output['node0_score'], {'score':NN_pred[:,0]}, add_sel=(best_score==0))
                fill_multiple_np(output['node1_score'], {'score':NN_pred[:,1]}, add_sel=(best_score==1))
                fill_multiple_np(output['node2_score'], {'score':NN_pred[:,2]}, add_sel=(best_score==2))
                fill_multiple_np(output['node3_score'], {'score':NN_pred[:,3]}, add_sel=(best_score==3))
                fill_multiple_np(output['node4_score'], {'score':NN_pred[:,4]}, add_sel=(best_score==4))

                #SR_sel_pp = ((best_score==0) & ak.flatten((leading_lepton[BL].pdgId<0)))
                #SR_sel_mm = ((best_score==0) & ak.flatten((leading_lepton[BL].pdgId>0)))
                #leading_lepton_BL = leading_lepton[BL]

                #output['lead_lep_SR_pp'].fill(
                #    dataset = dataset,
                #    pt  = ak.to_numpy(ak.flatten(leading_lepton_BL[SR_sel_pp].pt)),
                #    weight = weight_BL[SR_sel_pp]
                #)

                #output['lead_lep_SR_mm'].fill(
                #    dataset = dataset,
                #    pt  = ak.to_numpy(ak.flatten(leading_lepton_BL[SR_sel_mm].pt)),
                #    weight = weight_BL[SR_sel_mm]
                #)

                del model
                del scaler
                del NN_inputs, NN_inputs_scaled, NN_pred

        labels = {'topW_v3': 0, 'TTW':1, 'TTZ': 2, 'TTH': 3, 'ttbar': 4, 'rare':5, 'diboson':6}  # these should be all?
        if dataset in labels:
            label_mult = labels[dataset]
        else:
            label_mult = 7  # data or anything else

        if self.dump:
            output['label']     += processor.column_accumulator(np.ones(len(ev[out_sel])) * label_mult)
            output['SS']        += processor.column_accumulator(ak.to_numpy(BL[out_sel]))
            output['OS']        += processor.column_accumulator(ak.to_numpy(cf_est_sel_mc[out_sel]))
            output['AR']        += processor.column_accumulator(ak.to_numpy(np_est_sel_mc[out_sel]))
            output['LL']        += processor.column_accumulator(ak.to_numpy(LL[out_sel]))
            output['weight']    += processor.column_accumulator(ak.to_numpy(weight.weight()[out_sel]))
            output['weight_np'] += processor.column_accumulator(ak.to_numpy(weight_np_mc[out_sel]))
            output['weight_cf'] += processor.column_accumulator(ak.to_numpy(weight_cf_mc[out_sel]))

        # first, make a few super inclusive plots
        output['PV_npvs'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvs, weight=weight_BL)
        output['PV_npvsGood'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvsGood, weight=weight_BL)
        fill_multiple_np(output['N_jet'],     {'multiplicity': ak.num(jet)})
        fill_multiple_np(output['N_b'],       {'multiplicity': ak.num(btag)})
        fill_multiple_np(output['N_central'], {'multiplicity': ak.num(central)})
        fill_multiple_np(output['N_ele'],     {'multiplicity':ak.num(electron)})
        fill_multiple_np(output['N_mu'],      {'multiplicity':ak.num(muon)})
        fill_multiple_np(output['N_fwd'],     {'multiplicity':ak.num(fwd)})
        fill_multiple_np(output['ST'],        {'ht': st})
        fill_multiple_np(output['HT'],        {'ht': ht})

        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            output['nLepFromTop'].fill(dataset=dataset, multiplicity=ev[BL].nLepFromTop, weight=weight_BL)
            output['nLepFromTau'].fill(dataset=dataset, multiplicity=ev.nLepFromTau[BL], weight=weight_BL)
            output['nLepFromZ'].fill(dataset=dataset, multiplicity=ev.nLepFromZ[BL], weight=weight_BL)
            output['nLepFromW'].fill(dataset=dataset, multiplicity=ev.nLepFromW[BL], weight=weight_BL)
            output['nGenTau'].fill(dataset=dataset, multiplicity=ev.nGenTau[BL], weight=weight_BL)
            output['nGenL'].fill(dataset=dataset, multiplicity=ak.num(ev.GenL[BL], axis=1), weight=weight_BL)
            output['chargeFlip_vs_nonprompt'].fill(dataset=dataset, n1=n_chargeflip[BL], n2=n_nonprompt[BL], n_ele=ak.num(electron)[BL], weight=weight_BL)

        fill_multiple_np(output['MET'], {'pt':ev.MET.pt, 'phi':ev.MET.phi})

        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            output['lead_gen_lep'].fill(
                dataset = dataset,
                pt  = ak.to_numpy(ak.flatten(leading_gen_lep[BL].pt)),
                eta = ak.to_numpy(ak.flatten(leading_gen_lep[BL].eta)),
                phi = ak.to_numpy(ak.flatten(leading_gen_lep[BL].phi)),
                weight = weight_BL
            )

            output['trail_gen_lep'].fill(
                dataset = dataset,
                pt  = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].pt)),
                eta = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].eta)),
                phi = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].phi)),
                weight = weight_BL
            )
        
        fill_multiple_np(
            output['lead_lep'],
            {
                'pt':  pad_and_flatten(leading_lepton.p4.pt),
                'eta': pad_and_flatten(leading_lepton.eta),
                'phi': pad_and_flatten(leading_lepton.phi),
            },
        )

        fill_multiple_np(
            output['trail_lep'],
            {
                'pt':  pad_and_flatten(trailing_lepton.p4.pt),
                'eta': pad_and_flatten(trailing_lepton.eta),
                'phi': pad_and_flatten(trailing_lepton.phi),
            },
        )
        
        output['j1'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet.pt_nom[:, 0:1][BL]),
            eta = ak.flatten(jet.eta[:, 0:1][BL]),
            phi = ak.flatten(jet.phi[:, 0:1][BL]),
            weight = weight_BL
        )
        
        output['j2'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 1:2][BL].pt_nom),
            eta = ak.flatten(jet[:, 1:2][BL].eta),
            phi = ak.flatten(jet[:, 1:2][BL].phi),
            weight = weight_BL
        )
        
        output['j3'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 2:3][BL].pt_nom),
            eta = ak.flatten(jet[:, 2:3][BL].eta),
            phi = ak.flatten(jet[:, 2:3][BL].phi),
            weight = weight_BL
        )
        
        fill_multiple_np(
            output['fwd_jet'],
            {
                'pt':  pad_and_flatten(best_fwd.pt),
                'eta': pad_and_flatten(best_fwd.eta),
                'phi': pad_and_flatten(best_fwd.phi),
            },
        )
        
        #output['fwd_jet'].fill(
        #    dataset = dataset,
        #    pt  = ak.flatten(j_fwd[BL].pt),
        #    eta = ak.flatten(j_fwd[BL].eta),
        #    phi = ak.flatten(j_fwd[BL].phi),
        #    weight = weight_BL
        #)
            
        output['high_p_fwd_p'].fill(dataset=dataset, p = ak.flatten(best_fwd[BL].p), weight = weight_BL)
        
        return output
Пример #24
0
    def process(self, events):
        
        output = self.accumulator.identity()
        
        # use a very loose preselection to filter the events
        presel = ak.num(events.Jet)>2
        
        ev = events[presel]
        dataset = ev.metadata['dataset']
        
        # load the config - probably not needed anymore
        cfg = loadConfig()
        
        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)
        
        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            ## Generated leptons
            gen_lep = ev.GenL
            leading_gen_lep = gen_lep[ak.singletons(ak.argmax(gen_lep.pt, axis=1))]
            trailing_gen_lep = gen_lep[ak.singletons(ak.argmin(gen_lep.pt, axis=1))]

        ## Muons
        muon     = Collections(ev, "Muon", "tightSSTTH").get()
        vetomuon = Collections(ev, "Muon", "vetoTTH").get()
        dimuon   = choose(muon, 2)
        SSmuon   = ak.any((dimuon['0'].charge * dimuon['1'].charge)>0, axis=1)
        leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1))
        leading_muon = muon[leading_muon_idx]
        
        ## Electrons
        electron     = Collections(ev, "Electron", "tightSSTTH").get()
        vetoelectron = Collections(ev, "Electron", "vetoTTH").get()
        dielectron   = choose(electron, 2)
        SSelectron   = ak.any((dielectron['0'].charge * dielectron['1'].charge)>0, axis=1)
        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[leading_electron_idx]
        
        ## Merge electrons and muons - this should work better now in ak1
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge)>0, axis=1)

        lepton   = ak.concatenate([muon, electron], axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]

        dilepton_mass = (leading_lepton+trailing_lepton).mass
        dilepton_pt = (leading_lepton+trailing_lepton).pt
        dilepton_dR = delta_r(leading_lepton, trailing_lepton)
        
        lepton_pdgId_pt_ordered = ak.fill_none(ak.pad_none(lepton[ak.argsort(lepton.pt, ascending=False)].pdgId, 2, clip=True), 0)
        
        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            n_nonprompt = getNonPromptFromFlavour(electron) + getNonPromptFromFlavour(muon)
            n_chargeflip = getChargeFlips(electron, ev.GenPart) + getChargeFlips(muon, ev.GenPart)

        mt_lep_met = mt(lepton.pt, lepton.phi, ev.MET.pt, ev.MET.phi)
        min_mt_lep_met = ak.min(mt_lep_met, axis=1)

        ## Tau and other stuff
        tau       = getTaus(ev)
        track     = getIsoTracks(ev)

        ## Jets
        jet       = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom')
        jet       = jet[ak.argsort(jet.pt_nom, ascending=False)] # need to sort wrt smeared and recorrected jet pt
        jet       = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons
        jet       = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons
        
        central   = jet[(abs(jet.eta)<2.4)]
        btag      = getBTagsDeepFlavB(jet, year=self.year) # should study working point for DeepJet
        light     = getBTagsDeepFlavB(jet, year=self.year, invert=True)
        fwd       = getFwdJet(light)
        fwd_noPU  = getFwdJet(light, puId=False)
        
        high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:,:2]

        bl          = cross(lepton, high_score_btag)
        bl_dR       = delta_r(bl['0'], bl['1'])
        min_bl_dR   = ak.min(bl_dR, axis=1)

        ## forward jets
        j_fwd = fwd[ak.singletons(ak.argmax(fwd.p, axis=1))] # highest momentum spectator
        
        jf          = cross(j_fwd, jet)
        mjf         = (jf['0']+jf['1']).mass
        j_fwd2      = jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'] # this is the jet that forms the largest invariant mass with j_fwd
        delta_eta   = abs(j_fwd2.eta - j_fwd.eta)

        ## MET -> can switch to puppi MET
        met_pt  = ev.MET.pt
        met_phi = ev.MET.phi

        ## other variables
        ht = ak.sum(jet.pt, axis=1)
        st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1)
        
        
        # define the weight
        weight = Weights( len(ev) )
        

        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            # lumi weight
            weight.add("weight", ev.weight*cfg['lumi'][self.year])
            #weight.add("weight", ev.genWeight*cfg['lumi'][self.year]*mult)
            
            # PU weight - not in the babies...
            weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False)
            
            # b-tag SFs
            weight.add("btag", self.btagSF.Method1a(btag, light))
            
            # lepton SFs
            weight.add("lepton", self.leptonSF.get(electron, muon))
        

        cutflow     = Cutflow(output, ev, weight=weight)

        sel = Selection(
            dataset = dataset,
            events = ev,
            year = self.year,
            ele = electron,
            ele_veto = vetoelectron,
            mu = muon,
            mu_veto = vetomuon,
            jet_all = jet,
            jet_central = central,
            jet_btag = btag,
            jet_fwd = fwd,
            met = ev.MET,
        )
        
        BL = sel.dilep_baseline(cutflow=cutflow, SS=True)

        weight_BL = weight.weight()[BL]        

        if True:
            # define the inputs to the NN
            # this is super stupid. there must be a better way.
            NN_inputs = np.stack([
                ak.to_numpy(ak.num(jet[BL])),
                ak.to_numpy(ak.num(tau[BL])),
                ak.to_numpy(ak.num(track[BL])),
                ak.to_numpy(st[BL]),
                ak.to_numpy(ev.MET[BL].pt),
                ak.to_numpy(ak.max(mjf[BL], axis=1)),
                ak.to_numpy(pad_and_flatten(delta_eta[BL])),
                ak.to_numpy(pad_and_flatten(leading_lepton[BL].pt)),
                ak.to_numpy(pad_and_flatten(leading_lepton[BL].eta)),
                ak.to_numpy(pad_and_flatten(trailing_lepton[BL].pt)),
                ak.to_numpy(pad_and_flatten(trailing_lepton[BL].eta)),
                ak.to_numpy(pad_and_flatten(dilepton_mass[BL])),
                ak.to_numpy(pad_and_flatten(dilepton_pt[BL])),
                ak.to_numpy(pad_and_flatten(j_fwd[BL].pt)),
                ak.to_numpy(pad_and_flatten(j_fwd[BL].p)),
                ak.to_numpy(pad_and_flatten(j_fwd[BL].eta)),
                ak.to_numpy(pad_and_flatten(jet[:, 0:1][BL].pt)),
                ak.to_numpy(pad_and_flatten(jet[:, 1:2][BL].pt)),
                ak.to_numpy(pad_and_flatten(jet[:, 0:1][BL].eta)),
                ak.to_numpy(pad_and_flatten(jet[:, 1:2][BL].eta)),
                ak.to_numpy(pad_and_flatten(high_score_btag[:, 0:1][BL].pt)),
                ak.to_numpy(pad_and_flatten(high_score_btag[:, 1:2][BL].pt)),
                ak.to_numpy(pad_and_flatten(high_score_btag[:, 0:1][BL].eta)),
                ak.to_numpy(pad_and_flatten(high_score_btag[:, 1:2][BL].eta)),
                ak.to_numpy(min_bl_dR[BL]),
                ak.to_numpy(min_mt_lep_met[BL]),
            ])

            NN_inputs = np.moveaxis(NN_inputs, 0, 1)

            model, scaler = load_onnx_model('v8')

            try:
                NN_inputs_scaled = scaler.transform(NN_inputs)

                NN_pred    = predict_onnx(model, NN_inputs_scaled)

                best_score = np.argmax(NN_pred, axis=1)


            except ValueError:
                #print ("Empty NN_inputs")
                NN_pred = np.array([])
                best_score = np.array([])
                NN_inputs_scaled = NN_inputs

            #k.clear_session()

            output['node'].fill(dataset=dataset, multiplicity=best_score, weight=weight_BL)

            output['node0_score_incl'].fill(dataset=dataset, score=NN_pred[:,0] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL)
            output['node0_score'].fill(dataset=dataset, score=NN_pred[best_score==0][:,0] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==0])
            output['node1_score'].fill(dataset=dataset, score=NN_pred[best_score==1][:,1] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==1])
            output['node2_score'].fill(dataset=dataset, score=NN_pred[best_score==2][:,2] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==2])
            output['node3_score'].fill(dataset=dataset, score=NN_pred[best_score==3][:,3] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==3])
            output['node4_score'].fill(dataset=dataset, score=NN_pred[best_score==4][:,4] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==4])

            SR_sel_pp = ((best_score==0) & ak.flatten((leading_lepton[BL].pdgId<0)))
            SR_sel_mm = ((best_score==0) & ak.flatten((leading_lepton[BL].pdgId>0)))
            leading_lepton_BL = leading_lepton[BL]

            output['lead_lep_SR_pp'].fill(
                dataset = dataset,
                pt  = ak.to_numpy(ak.flatten(leading_lepton_BL[SR_sel_pp].pt)),
                weight = weight_BL[SR_sel_pp]
            )

            output['lead_lep_SR_mm'].fill(
                dataset = dataset,
                pt  = ak.to_numpy(ak.flatten(leading_lepton_BL[SR_sel_mm].pt)),
                weight = weight_BL[SR_sel_mm]
            )

            del model
            del scaler
            del NN_inputs, NN_inputs_scaled, NN_pred

        # first, make a few super inclusive plots
        output['PV_npvs'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvs, weight=weight_BL)
        output['PV_npvsGood'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvsGood, weight=weight_BL)
        output['N_jet'].fill(dataset=dataset, multiplicity=ak.num(jet)[BL], weight=weight_BL)
        output['N_b'].fill(dataset=dataset, multiplicity=ak.num(btag)[BL], weight=weight_BL)
        output['N_central'].fill(dataset=dataset, multiplicity=ak.num(central)[BL], weight=weight_BL)
        output['N_ele'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight_BL)
        output['N_mu'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight_BL)
        output['N_fwd'].fill(dataset=dataset, multiplicity=ak.num(fwd)[BL], weight=weight_BL)
        output['ST'].fill(dataset=dataset, pt=st[BL], weight=weight_BL)
        output['HT'].fill(dataset=dataset, pt=ht[BL], weight=weight_BL)

        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            output['nLepFromTop'].fill(dataset=dataset, multiplicity=ev[BL].nLepFromTop, weight=weight_BL)
            output['nLepFromTau'].fill(dataset=dataset, multiplicity=ev.nLepFromTau[BL], weight=weight_BL)
            output['nLepFromZ'].fill(dataset=dataset, multiplicity=ev.nLepFromZ[BL], weight=weight_BL)
            output['nLepFromW'].fill(dataset=dataset, multiplicity=ev.nLepFromW[BL], weight=weight_BL)
            output['nGenTau'].fill(dataset=dataset, multiplicity=ev.nGenTau[BL], weight=weight_BL)
            output['nGenL'].fill(dataset=dataset, multiplicity=ak.num(ev.GenL[BL], axis=1), weight=weight_BL)
            output['chargeFlip_vs_nonprompt'].fill(dataset=dataset, n1=n_chargeflip[BL], n2=n_nonprompt[BL], n_ele=ak.num(electron)[BL], weight=weight_BL)
        
        output['MET'].fill(
            dataset = dataset,
            pt  = ev.MET[BL].pt,
            phi  = ev.MET[BL].phi,
            weight = weight_BL
        )

        if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset):
            output['lead_gen_lep'].fill(
                dataset = dataset,
                pt  = ak.to_numpy(ak.flatten(leading_gen_lep[BL].pt)),
                eta = ak.to_numpy(ak.flatten(leading_gen_lep[BL].eta)),
                phi = ak.to_numpy(ak.flatten(leading_gen_lep[BL].phi)),
                weight = weight_BL
            )

            output['trail_gen_lep'].fill(
                dataset = dataset,
                pt  = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].pt)),
                eta = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].eta)),
                phi = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].phi)),
                weight = weight_BL
            )
        
        output['lead_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(leading_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(leading_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(leading_lepton[BL].phi)),
            weight = weight_BL
        )
        
        output['trail_lep'].fill(
            dataset = dataset,
            pt  = ak.to_numpy(ak.flatten(trailing_lepton[BL].pt)),
            eta = ak.to_numpy(ak.flatten(trailing_lepton[BL].eta)),
            phi = ak.to_numpy(ak.flatten(trailing_lepton[BL].phi)),
            weight = weight_BL
        )
        
        output['j1'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet.pt_nom[:, 0:1][BL]),
            eta = ak.flatten(jet.eta[:, 0:1][BL]),
            phi = ak.flatten(jet.phi[:, 0:1][BL]),
            weight = weight_BL
        )
        
        output['j2'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 1:2][BL].pt_nom),
            eta = ak.flatten(jet[:, 1:2][BL].eta),
            phi = ak.flatten(jet[:, 1:2][BL].phi),
            weight = weight_BL
        )
        
        output['j3'].fill(
            dataset = dataset,
            pt  = ak.flatten(jet[:, 2:3][BL].pt_nom),
            eta = ak.flatten(jet[:, 2:3][BL].eta),
            phi = ak.flatten(jet[:, 2:3][BL].phi),
            weight = weight_BL
        )
        
        
        output['fwd_jet'].fill(
            dataset = dataset,
            pt  = ak.flatten(j_fwd[BL].pt),
            eta = ak.flatten(j_fwd[BL].eta),
            phi = ak.flatten(j_fwd[BL].phi),
            weight = weight_BL
        )
            
        output['high_p_fwd_p'].fill(dataset=dataset, p = ak.flatten(j_fwd[BL].p), weight = weight_BL)
        
        return output
Пример #25
0
    def get(self, ele, mu, variation='central'):

        if self.year == 2016:
            ele_sf_reco = self.evaluator["ele_2016_reco"](ele[ele.pt > 20].eta,
                                                          ele[ele.pt > 20].pt)
            ele_sf_reco_low = self.evaluator["ele_2016_reco_low"](
                ele[ele.pt <= 20].eta, ele[ele.pt <= 20].pt)
            ele_sf_loose = self.evaluator["ele_2016_loose"](
                abs(ele.eta + ele.deltaEtaSC), ele.pt)
            ele_sf_looseTTH = self.evaluator["ele_2016_looseTTH"](
                abs(ele.eta + ele.deltaEtaSC), ele.pt)
            ele_sf_tight = self.evaluator["ele_2016_tight"](
                abs(ele.eta + ele.deltaEtaSC), ele.pt)

            mu_sf_loose = self.evaluator["mu_2016_loose"](abs(mu.eta), mu.pt)
            mu_sf_tight = self.evaluator["mu_2016_tight"](abs(mu.eta), mu.pt)

            sf = ak.prod(ele_sf_reco, axis=1) * ak.prod(
                ele_sf_reco_low,
                axis=1) * ak.prod(ele_sf_loose, axis=1) * ak.prod(
                    ele_sf_looseTTH, axis=1) * ak.prod(
                        ele_sf_tight, axis=1) * ak.prod(
                            mu_sf_loose, axis=1) * ak.prod(mu_sf_tight, axis=1)

        elif self.year == 2017:
            ele_sf_reco = self.evaluator["ele_2017_reco"](ele[ele.pt > 20].eta,
                                                          ele[ele.pt > 20].pt)
            ele_sf_reco_low = self.evaluator["ele_2017_reco_low"](
                ele[ele.pt <= 20].eta, ele[ele.pt <= 20].pt)
            ele_sf_loose = self.evaluator["ele_2017_loose"](
                abs(ele.eta + ele.deltaEtaSC), ele.pt)
            ele_sf_looseTTH = self.evaluator["ele_2017_looseTTH"](
                abs(ele.eta + ele.deltaEtaSC), ele.pt)
            ele_sf_tight = self.evaluator["ele_2017_tight"](
                abs(ele.eta + ele.deltaEtaSC), ele.pt)

            mu_sf_loose = self.evaluator["mu_2017_loose"](abs(mu.eta), mu.pt)
            mu_sf_tight = self.evaluator["mu_2017_tight"](abs(mu.eta), mu.pt)

            sf = ak.prod(ele_sf_reco, axis=1) * ak.prod(
                ele_sf_reco_low,
                axis=1) * ak.prod(ele_sf_loose, axis=1) * ak.prod(
                    ele_sf_looseTTH, axis=1) * ak.prod(
                        ele_sf_tight, axis=1) * ak.prod(
                            mu_sf_loose, axis=1) * ak.prod(mu_sf_tight, axis=1)

        elif self.year == 2018:
            ele_sf_reco = self.evaluator["ele_2018_reco"](ele.eta, ele.pt)
            ele_sf_loose = self.evaluator["ele_2018_loose"](
                abs(ele.eta + ele.deltaEtaSC), ele.pt)
            ele_sf_looseTTH = self.evaluator["ele_2018_looseTTH"](
                abs(ele.eta + ele.deltaEtaSC), ele.pt)
            ele_sf_tight = self.evaluator["ele_2018_tight"](
                abs(ele.eta + ele.deltaEtaSC), ele.pt)

            mu_sf_loose = self.evaluator["mu_2018_loose"](abs(mu.eta), mu.pt)
            mu_sf_tight = self.evaluator["mu_2018_tight"](abs(mu.eta), mu.pt)

            if not variation == 'central':

                ele_sf_tight_err1 = self.evaluator["ele_2018_tight_eta"](
                    abs(ele.eta + ele.deltaEtaSC))
                ele_sf_tight_err2 = self.evaluator["ele_2018_tight_pt"](ele.pt)

                ele_sf_tight_err1 = ak.from_regular(
                    ele_sf_tight_err1[:, :, np.newaxis])
                ele_sf_tight_err2 = ak.from_regular(
                    ele_sf_tight_err2[:, :, np.newaxis])
                ele_sf_tight_err = ak.max(ak.concatenate(
                    [ele_sf_tight_err1, ele_sf_tight_err2], axis=2),
                                          axis=2)

                mu_sf_tight_err1 = self.evaluator["mu_2018_tight_eta"](abs(
                    mu.eta))
                mu_sf_tight_err2 = self.evaluator["mu_2018_tight_pt"](mu.pt)

                mu_sf_tight_err1 = ak.from_regular(
                    mu_sf_tight_err1[:, :, np.newaxis])
                mu_sf_tight_err2 = ak.from_regular(
                    mu_sf_tight_err2[:, :, np.newaxis])
                mu_sf_tight_err = ak.max(ak.concatenate(
                    [mu_sf_tight_err1, mu_sf_tight_err2], axis=2),
                                         axis=2)

                if variation == 'up':
                    ele_sf_tight = ele_sf_tight * ele_sf_tight_err
                    mu_sf_tight = mu_sf_tight * mu_sf_tight_err
                if variation == 'down':
                    ele_sf_tight = ele_sf_tight / ele_sf_tight_err
                    mu_sf_tight = mu_sf_tight / mu_sf_tight_err

            sf = ak.prod(ele_sf_reco, axis=1) * ak.prod(
                ele_sf_loose, axis=1) * ak.prod(
                    ele_sf_looseTTH, axis=1) * ak.prod(
                        ele_sf_tight, axis=1) * ak.prod(
                            mu_sf_loose, axis=1) * ak.prod(mu_sf_tight, axis=1)

        return sf
Пример #26
0
def test_0459():
    plain_plain = awkward1.Array([0.0, 1.1, 2.2, 3.3, 4.4])
    array_plain = awkward1.with_parameter(plain_plain, "__array__", "zoinks")
    plain_isdoc = awkward1.with_parameter(plain_plain, "__doc__", "This is a zoink.")
    array_isdoc = awkward1.with_parameter(array_plain, "__doc__", "This is a zoink.")
    assert awkward1.parameters(plain_plain) == {}
    assert awkward1.parameters(array_plain) == {"__array__": "zoinks"}
    assert awkward1.parameters(plain_isdoc) == {"__doc__": "This is a zoink."}
    assert awkward1.parameters(array_isdoc) == {"__array__": "zoinks", "__doc__": "This is a zoink."}

    assert awkward1.parameters(awkward1.concatenate([plain_plain, plain_plain])) == {}
    assert awkward1.parameters(awkward1.concatenate([array_plain, array_plain])) == {"__array__": "zoinks"}
    assert awkward1.parameters(awkward1.concatenate([plain_isdoc, plain_isdoc])) == {"__doc__": "This is a zoink."}
    assert awkward1.parameters(awkward1.concatenate([array_isdoc, array_isdoc])) == {"__array__": "zoinks", "__doc__": "This is a zoink."}

    assert isinstance(awkward1.concatenate([plain_plain, plain_plain]).layout, awkward1.layout.NumpyArray)
    assert isinstance(awkward1.concatenate([array_plain, array_plain]).layout, awkward1.layout.NumpyArray)
    assert isinstance(awkward1.concatenate([plain_isdoc, plain_isdoc]).layout, awkward1.layout.NumpyArray)
    assert isinstance(awkward1.concatenate([array_isdoc, array_isdoc]).layout, awkward1.layout.NumpyArray)

    assert awkward1.parameters(awkward1.concatenate([plain_plain, array_plain])) == {}
    assert awkward1.parameters(awkward1.concatenate([plain_isdoc, array_isdoc])) == {}
    assert awkward1.parameters(awkward1.concatenate([array_plain, plain_plain])) == {}
    assert awkward1.parameters(awkward1.concatenate([array_isdoc, plain_isdoc])) == {}

    assert isinstance(awkward1.concatenate([plain_plain, array_plain]).layout, awkward1.layout.UnionArray8_64)
    assert isinstance(awkward1.concatenate([plain_isdoc, array_isdoc]).layout, awkward1.layout.UnionArray8_64)
    assert isinstance(awkward1.concatenate([array_plain, plain_plain]).layout, awkward1.layout.UnionArray8_64)
    assert isinstance(awkward1.concatenate([array_isdoc, plain_isdoc]).layout, awkward1.layout.UnionArray8_64)

    assert awkward1.parameters(awkward1.concatenate([plain_plain, plain_isdoc])) == {}
    assert awkward1.parameters(awkward1.concatenate([array_plain, array_isdoc])) == {"__array__": "zoinks"}
    assert awkward1.parameters(awkward1.concatenate([plain_isdoc, plain_plain])) == {}
    assert awkward1.parameters(awkward1.concatenate([array_isdoc, array_plain])) == {"__array__": "zoinks"}

    assert isinstance(awkward1.concatenate([plain_plain, plain_isdoc]).layout, awkward1.layout.NumpyArray)
    assert isinstance(awkward1.concatenate([array_plain, array_isdoc]).layout, awkward1.layout.NumpyArray)
    assert isinstance(awkward1.concatenate([plain_isdoc, plain_plain]).layout, awkward1.layout.NumpyArray)
    assert isinstance(awkward1.concatenate([array_isdoc, array_plain]).layout, awkward1.layout.NumpyArray)
Пример #27
0
    def process(self, events):

        output = self.accumulator.identity()

        # we can use a very loose preselection to filter the events. nothing is done with this presel, though
        presel = ak.num(events.Jet) >= 0

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        ## Muons
        muon = ev.Muon

        ## Electrons
        electron = ev.Electron

        ## Merge electrons and muons - this should work better now in ak1
        dilepton = cross(muon, electron)
        SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) > 0,
                          axis=1)

        lepton = ak.concatenate([muon, electron], axis=1)
        leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1))
        leading_lepton = lepton[leading_lepton_idx]
        trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1))
        trailing_lepton = lepton[trailing_lepton_idx]

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        # define the weight
        weight = Weights(len(ev))

        if not dataset == 'MuonEG':
            # generator weight
            weight.add("weight", ev.genWeight)

        filters = getFilters(ev, year=self.year, dataset=dataset)
        dilep = ((ak.num(electron) + ak.num(muon)) == 2)

        selection = PackedSelection()
        selection.add('dilep', dilep)
        selection.add('filter', (filters))

        bl_reqs = ['dilep', 'filter']

        bl_reqs_d = {sel: True for sel in bl_reqs}
        baseline = selection.require(**bl_reqs_d)

        output['N_ele'].fill(dataset=dataset,
                             multiplicity=ak.num(electron)[baseline],
                             weight=weight.weight()[baseline])
        output['N_mu'].fill(dataset=dataset,
                            multiplicity=ak.num(muon)[baseline],
                            weight=weight.weight()[baseline])

        output['lead_lep'].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_lepton[baseline].pt)),
            eta=ak.to_numpy(ak.flatten(leading_lepton[baseline].eta)),
            phi=ak.to_numpy(ak.flatten(leading_lepton[baseline].phi)),
            weight=weight.weight()[baseline])

        return output