Пример #1
0
    def process(self, df):
        output = self.accumulator.identity()

        dataset = df["dataset"]

        muon = None
        if isinstance(df["Muon_pt"], akd.JaggedArray):
            muon = CandArray.candidatesfromcounts(
                counts=df["Muon_pt"].counts,
                pt=df["Muon_pt"].content,
                eta=df["Muon_eta"].content,
                phi=df["Muon_phi"].content,
                mass=df["Muon_mass"].content,
            )
        else:
            muon = CandArray.candidatesfromcounts(
                counts=df["nMuon"],
                pt=df["Muon_pt"],
                eta=df["Muon_eta"],
                phi=df["Muon_phi"],
                mass=df["Muon_mass"],
            )

        dimuon = muon.distincts()

        output["pt"].fill(dataset=dataset, pt=muon.pt.flatten())
        output["mass"].fill(dataset=dataset, mass=dimuon.mass.flatten())
        output["cutflow"]["%s_pt" % dataset] += np.sum(muon.counts)
        output["cutflow"]["%s_mass" % dataset] += np.sum(dimuon.counts)

        return output
Пример #2
0
    def process(self, df):
        output = self.accumulator.identity()

        dataset = df['dataset']

        muon = None
        if isinstance(df['Muon_pt'], akd.JaggedArray):
            muon = CandArray.candidatesfromcounts(counts=df['Muon_pt'].counts,
                                                  pt=df['Muon_pt'].content,
                                                  eta=df['Muon_eta'].content,
                                                  phi=df['Muon_phi'].content,
                                                  mass=df['Muon_mass'].content)
        else:
            muon = CandArray.candidatesfromcounts(counts=df['nMuon'],
                                                  pt=df['Muon_pt'],
                                                  eta=df['Muon_eta'],
                                                  phi=df['Muon_phi'],
                                                  mass=df['Muon_mass'])

        dimuon = muon.distincts()

        output['pt'].fill(dataset=dataset, pt=muon.pt.flatten())
        output['mass'].fill(dataset=dataset, mass=dimuon.mass.flatten())
        output['cutflow']['%s_pt' % dataset] += np.sum(muon.counts)
        output['cutflow']['%s_mass' % dataset] += np.sum(dimuon.counts)

        return output
Пример #3
0
    def process(self, df):
        output = self.accumulator.identity()
        if df.size == 0: return output

        dataset = df['dataset']
        ## construct weights ##
        wgts = processor.Weights(df.size)
        if self.data_type != 'data':
            wgts.add('genw', df['weight'])
            npv = df['trueInteractionNum']
            wgts.add('pileup', *(f(npv) for f in self.pucorrs))

        triggermask = np.logical_or.reduce([df[t] for t in Triggers])
        wgts.add('trigger', triggermask)
        cosmicpairmask = df['cosmicveto_result']
        wgts.add('cosmicveto', cosmicpairmask)
        pvmask = df['metfilters_PrimaryVertexFilter']
        wgts.add('primaryvtx', pvmask)
        # ...bla bla, other weights goes here

        weight = wgts.weight()
        ########################

        genjets = JaggedCandidateArray.candidatesfromcounts(
            df['genjet_p4'],
            px=df['genjet_p4.fCoordinates.fX'].content,
            py=df['genjet_p4.fCoordinates.fY'].content,
            pz=df['genjet_p4.fCoordinates.fZ'].content,
            energy=df['genjet_p4.fCoordinates.fT'].content,
        )
        genparticles = JaggedCandidateArray.candidatesfromcounts(
            df['gen_p4'],
            px=df['gen_p4.fCoordinates.fX'].content,
            py=df['gen_p4.fCoordinates.fY'].content,
            pz=df['gen_p4.fCoordinates.fZ'].content,
            energy=df['gen_p4.fCoordinates.fT'].content,
            pid=df['gen_pid'].content,
        )
        darkphotons = genparticles[genparticles.pid == 32]
        dpptmax = darkphotons.pt.max()

        mask_ = genjets.match(darkphotons, deltaRCut=0.4)
        genjets = genjets[~mask_]

        output['njets'].fill(
            dataset=dataset,
            cnt=genjets[genjets.pt > dpptmax].counts,
            weight=weight,
        )

        return output
Пример #4
0
    def __getitem__(self, key):
        if key in self.cache_:
            return self.table_[key]
        elif key in self.keys_:
            ret = self.array(key)
            self.table_[key] = ret
            self.cache_.add(key)
            return self.table_[key]
        else:
            branch = key + '_'
            subset = [k for k in self.keys_ if k.startswith(branch)]
            info = {i.replace(branch, ''): self.array(i) for i in subset}
            counter = 'n' + key
            counts = 0
            if counter in self.keys_:
                counts = self.array(counter)
                for name, branch in info.items():
                    if not (branch.counts == counts).all():
                        raise ValueError(
                            f'Key {name} does not have the right shape')
                info = {i: j.content for i, j in info.items()}
            #check that everything is there to make a p4
            if all(i in info for i in ['pt', 'eta', 'phi', 'mass'
                                       ]):  # FIXME! wrong logic
                ret = JaggedCandidateArray.candidatesfromcounts(counts, **info)
            else:
                ret = awk.Table(**info)
                if all(i in info for i in ['pt', 'phi']):
                    ret['p4'] = uproot_methods.TLorentzVectorArray.from_ptetaphi(
                        ret['pt'], 0, ret['phi'], 0)

            self.table_[key] = ret
            self.cache_.add(key)
            return self.table_[key]
Пример #5
0
def probe_tracks(df, is_mc=False):
    probe_tracks_collection = JaggedCandidateArray.candidatesfromcounts(
        df["nProbeTracks"].flatten(),
        DCASig                = df["ProbeTracks_DCASig"].flatten(),
        dxy                   = df["ProbeTracks_dxy"].flatten(),
        dxyS                  = df["ProbeTracks_dxyS"].flatten(),
        dz                    = df["ProbeTracks_dz"].flatten(),
        dzS                   = df["ProbeTracks_dzS"].flatten(),
        eta                   = df["ProbeTracks_eta"].flatten(),
        mass                  = df["ProbeTracks_mass"].flatten(),
        phi                   = df["ProbeTracks_phi"].flatten(),
        pt                    = df["ProbeTracks_pt"].flatten(),
        vx                    = df["ProbeTracks_vx"].flatten(),
        vy                    = df["ProbeTracks_vy"].flatten(),
        vz                    = df["ProbeTracks_vz"].flatten(),
        charge                = df["ProbeTracks_charge"].flatten(),
        isLostTrk             = df["ProbeTracks_isLostTrk"].flatten(),
        isPacked              = df["ProbeTracks_isPacked"].flatten(),
        isMatchedToEle        = df["ProbeTracks_isMatchedToEle"].flatten(),
        isMatchedToLooseMuon  = df["ProbeTracks_isMatchedToLooseMuon"].flatten(),
        isMatchedToMediumMuon = df["ProbeTracks_isMatchedToMediumMuon"].flatten(),
        isMatchedToMuon       = df["ProbeTracks_isMatchedToMuon"].flatten(),
        isMatchedToSoftMuon   = df["ProbeTracks_isMatchedToSoftMuon"].flatten(),
    )
    if is_mc:
        probe_tracks_collection.add_attributes(
            pdgId                 = df["ProbeTracks_pdgId"].flatten(),
            genPartIdx            = df["ProbeTracks_genPartIdx"].flatten(),
            genPartFlav           = df["ProbeTracks_genPartFlav"].flatten(),
        )
    return probe_tracks_collection
def setup_photons(df):
    # Setup photons

    if extract_year(df['dataset']) == 2016:
        id_branch = 'Photon_cutBased'
    else:
        id_branch = 'Photon_cutBasedBitmap'

    photons = JaggedCandidateArray.candidatesfromcounts(
        df['nPhoton'],
        pt=df['Photon_pt'],
        eta=df['Photon_eta'],
        abseta=np.abs(df['Photon_eta']),
        phi=df['Photon_phi'],
        mass=0 * df['Photon_pt'],
        mediumId=(df[id_branch] >= 2) & df['Photon_electronVeto'],
        r9=df['Photon_r9'],
        barrel=np.abs(df['Photon_eta']) < 1.479,
        vid=df['Photon_vidNestedWPBitmap'],
        eleveto=df['Photon_electronVeto'],
        sieie=df['Photon_sieie'],
    )

    photons = photons[(photons.pt > 200) & photons.barrel & photons.eleveto]
    return photons
Пример #7
0
def setup_gen_jets(df):
    genjets = JaggedCandidateArray.candidatesfromcounts(df['nGenJet'],
                                                        pt=df['GenJet_pt'],
                                                        eta=df['GenJet_eta'],
                                                        phi=df['GenJet_phi'],
                                                        mass=0 *
                                                        df['GenJet_pt'])
    return genjets
Пример #8
0
def getGenW(df):
    GenW = JaggedCandidateArray.candidatesfromcounts(
            df['nGenW'],
            pt = df['GenW_pt'].content,
            eta = df['GenW_eta'].content,
            phi = df['GenW_phi'].content,
            mass = ((df['GenW_pt']>0)*80).content,
        )
    return GenW
Пример #9
0
    def process(self, df):
        output = self.accumulator.identity()

        datasetFull = df['dataset']
        print(datasetFull)
        if '2016' in datasetFull:
            year = 2016
            yearStr = '2016'
            dataset = datasetFull.replace('_2016', '')
        if '2017' in datasetFull:
            year = 2017
            yearStr = '2017'
            dataset = datasetFull.replace('_2017', '')
        if '2018' in datasetFull:
            year = 2018
            yearStr = '2018'
            dataset = datasetFull.replace('_2018', '')

        jets = JaggedCandidateArray.candidatesfromcounts(
            df['nJet'],
            pt=df['Jet_pt'],
            eta=df['Jet_eta'],
            phi=df['Jet_phi'],
            mass=df['Jet_mass'],
            jetId=df['Jet_jetId'],
            btag=df['Jet_btagDeepB'],
            hadFlav=df['Jet_hadronFlavour'],
            genIdx=df['Jet_genJetIdx'],
        )

        jetSelect = ((jets.pt > 30) & (abs(jets.eta) < 2.4) &
                     ((jets.jetId >> 0 & 1) == 1))

        bTagWP = 0.6321
        if year == '2017':
            bTagWP = 0.4941
        if year == '2018':
            bTagWP = 0.4184

        Jets = jets[jetSelect]
        bJets = jets[jetSelect & (jets.btag > bTagWP)]
        output['hJets'].fill(
            dataset=datasetFull,
            jetPt=Jets.pt.flatten(),
            jetEta=abs(Jets.eta).flatten(),
            jetFlav=Jets.hadFlav.flatten(),
        )

        output['hBJets'].fill(
            dataset=datasetFull,
            jetPt=bJets.pt.flatten(),
            jetEta=abs(bJets.eta).flatten(),
            jetFlav=bJets.hadFlav.flatten(),
        )

        return output
Пример #10
0
def setup_dressed_gen_candidates(df):
    dressed = JaggedCandidateArray.candidatesfromcounts(
        df['nGenDressedLepton'],
        pt=df['GenDressedLepton_pt'],
        eta=df['GenDressedLepton_eta'],
        phi=df['GenDressedLepton_phi'],
        mass=0 * df['GenDressedLepton_pt'],
        status=np.ones(df['GenDressedLepton_pt'].size),
        pdg=df['GenDressedLepton_pdgId'])
    return dressed
Пример #11
0
def test_jet_transformer():
    from coffea.analysis_objects import JaggedCandidateArray as CandArray
    from coffea.jetmet_tools import (FactorizedJetCorrector,
                                     JetResolution,
                                     JetResolutionScaleFactor,
                                     JetCorrectionUncertainty,
                                     JetTransformer)
    
    counts, test_px, test_py, test_pz, test_e = dummy_four_momenta()
    
    test_Rho = np.full(shape=(np.sum(counts),), fill_value=100.)
    test_A = np.full(shape=(np.sum(counts),), fill_value=5.)

    jets = CandArray.candidatesfromcounts(counts, px=test_px, py=test_py, pz=test_pz, energy=test_e)
    jets.add_attributes(ptRaw=jets.pt,
                        massRaw=jets.mass,
                        rho=test_Rho,
                        area=test_A)
    
    jec_names = ['Summer16_23Sep2016V3_MC_L1FastJet_AK4PFPuppi',
                 'Summer16_23Sep2016V3_MC_L2Relative_AK4PFPuppi',
                 'Summer16_23Sep2016V3_MC_L2L3Residual_AK4PFPuppi',
                 'Summer16_23Sep2016V3_MC_L3Absolute_AK4PFPuppi']
    corrector = FactorizedJetCorrector(**{name: evaluator[name] for name in jec_names})
    
    junc_names = []
    for name in dir(evaluator):
        if 'Summer16_23Sep2016V3_MC_UncertaintySources_AK4PFPuppi' in name:
            junc_names.append(name)
    junc = JetCorrectionUncertainty(**{name: evaluator[name] for name in junc_names})

    jer_names = ['Spring16_25nsV10_MC_PtResolution_AK4PFPuppi']
    reso = JetResolution(**{name: evaluator[name] for name in jer_names})
    
    jersf_names = ['Spring16_25nsV10_MC_SF_AK4PFPuppi']
    resosf = JetResolutionScaleFactor(**{name: evaluator[name] for name in jersf_names})

    xform = JetTransformer(jec=corrector, junc=junc, jer=reso, jersf=resosf)

    print(xform.uncertainties)

    xform.transform(jets)

    print(jets.columns)

    assert('pt_jer_up' in jets.columns)
    assert('pt_jer_down' in jets.columns)
    assert('mass_jer_up' in jets.columns)
    assert('mass_jer_down' in jets.columns)
    
    for unc in xform.uncertainties:
        assert('pt_'+unc+'_up' in jets.columns)
        assert('pt_'+unc+'_down' in jets.columns)
        assert('mass_'+unc+'_up' in jets.columns)
        assert('mass_'+unc+'_down' in jets.columns)
Пример #12
0
def getJets(df):
    jet = JaggedCandidateArray.candidatesfromcounts(
            df['nJet'],
            pt = df['Jet_pt'].content,
            eta = df['Jet_eta'].content,
            phi = df['Jet_phi'].content,
            mass = df['Jet_mass'].content,
            jetId = df['Jet_jetId'].content, # https://twiki.cern.ch/twiki/bin/view/CMS/JetID
            btagDeepB = df['Jet_btagDeepB'].content, # https://twiki.cern.ch/twiki/bin/viewauth/CMS/BtagRecommendation102X
        )
    return jet[(jet.pt>30) & (abs(jet.eta)<2.4) & (jet.jetId>1)]
Пример #13
0
def getIsoTracks(df, WP='veto'):
    isotrack = JaggedCandidateArray.candidatesfromcounts(
            df['nIsoTrack'],
            pt=df['IsoTrack_pt'].content, 
            eta=df['IsoTrack_eta'].content,
            phi=df['IsoTrack_phi'].content, 
            mass=((df['IsoTrack_pt']>0)*0.).content,
            rel_iso=df['IsoTrack_pfRelIso03_all'].content, 
        )
    if WP == 'veto':
        return isotrack[(isotrack.pt > 10) & (abs(isotrack.eta) < 2.4) & ((isotrack.rel_iso < 0.1) | ((isotrack.rel_iso*isotrack.pt) < 6))]
Пример #14
0
def setup_lhe_cleaned_genjets(df):
    genjets = JaggedCandidateArray.candidatesfromcounts(df['nGenJet'],
                                                        pt=df['GenJet_pt'],
                                                        eta=df['GenJet_eta'],
                                                        abseta=np.abs(
                                                            df['GenJet_eta']),
                                                        phi=df['GenJet_phi'],
                                                        mass=df['GenJet_mass'])
    lhe = JaggedCandidateArray.candidatesfromcounts(
        df['nLHEPart'],
        pt=df['LHEPart_pt'],
        eta=df['LHEPart_eta'],
        phi=df['LHEPart_phi'],
        mass=df['LHEPart_mass'],
        pdg=df['LHEPart_pdgId'],
    )

    lhe_leps_gams = lhe[(islep(lhe.pdg) & ~isnu(lhe.pdg)) | (lhe.pdg == 22)]

    return genjets[(~genjets.match(lhe_leps_gams, deltaRCut=0.4))]
Пример #15
0
def getTaus(df, WP='veto'):
    tau = JaggedCandidateArray.candidatesfromcounts(
            df['nTau'],
            pt=df['Tau_pt'].content, 
            eta=df['Tau_eta'].content, 
            phi=df['Tau_phi'].content,
            mass=df['Tau_mass'].content,
            decaymode=df['Tau_idDecayMode'].content,
            newid=df['Tau_idMVAnewDM2017v2'].content,
        )
    if WP == 'veto':
        return tau[(tau.pt > 20) & (abs(tau.eta) < 2.4) & (tau.decaymode) & (tau.newid >= 8)]
Пример #16
0
def reco_muons(df, is_mc=False):
    reco_muons_collection = JaggedCandidateArray.candidatesfromcounts(
        df["nMuon"].flatten(),
        dxy            = df["Muon_dxy"].flatten(),
        dxyErr         = df["Muon_dxyErr"].flatten(),
        dz             = df["Muon_dz"].flatten(),
        dzErr          = df["Muon_dzErr"].flatten(),
        eta            = df["Muon_eta"].flatten(),
        ip3d           = df["Muon_ip3d"].flatten(),
        mass           = df["Muon_mass"].flatten(),
        pfRelIso03_all = df["Muon_pfRelIso03_all"].flatten(),
        pfRelIso03_chg = df["Muon_pfRelIso03_chg"].flatten(),
        pfRelIso04_all = df["Muon_pfRelIso04_all"].flatten(),
        phi            = df["Muon_phi"].flatten(),
        pt             = df["Muon_pt"].flatten(),
        ptErr          = df["Muon_ptErr"].flatten(),
        segmentComp    = df["Muon_segmentComp"].flatten(),
        sip3d          = df["Muon_sip3d"].flatten(),
        vx             = df["Muon_vx"].flatten(),
        vy             = df["Muon_vy"].flatten(),
        vz             = df["Muon_vz"].flatten(),
        charge         = df["Muon_charge"].flatten(),
        isTriggering   = (df["Muon_isTriggering"]==1).flatten(),
        nStations      = df["Muon_nStations"].flatten(),
        pdgId          = df["Muon_pdgId"].flatten(),
        tightCharge    = df["Muon_tightCharge"].flatten(),
        highPtId       = df["Muon_highPtId"].flatten(),
        inTimeMuon     = df["Muon_inTimeMuon"].flatten(),
        isGlobal       = df["Muon_isGlobal"].flatten(),
        isPFcand       = df["Muon_isPFcand"].flatten(),
        isTracker      = df["Muon_isTracker"].flatten(),
        mediumId       = df["Muon_mediumId"].flatten(),
        mediumPromptId = df["Muon_mediumPromptId"].flatten(),
        miniIsoId      = df["Muon_miniIsoId"].flatten(),
        multiIsoId     = df["Muon_multiIsoId"].flatten(),
        mvaId          = df["Muon_mvaId"].flatten(),
        pfIsoId        = df["Muon_pfIsoId"].flatten(),
        softId         = df["Muon_softId"].flatten(),
        softMvaId      = df["Muon_softMvaId"].flatten(),
        tightId        = df["Muon_tightId"].flatten(),
        tkIsoId        = df["Muon_tkIsoId"].flatten(),
        triggerIdLoose = df["Muon_triggerIdLoose"].flatten(),
        isTriggering_HLT_Mu7_IP4  = (df["Muon_isTriggering_HLT_Mu7_IP4"]==1).flatten(),
        isTriggering_HLT_Mu9_IP5  = (df["Muon_isTriggering_HLT_Mu9_IP5"]==1).flatten(),
        isTriggering_HLT_Mu9_IP6  = (df["Muon_isTriggering_HLT_Mu9_IP6"]==1).flatten(),
        isTriggering_HLT_Mu12_IP6 = (df["Muon_isTriggering_HLT_Mu12_IP6"]==1).flatten(),
    )
    if is_mc:
        reco_muons_collection.add_attributes(
            genPartIdx     = df["Muon_genPartIdx"].flatten(),
            genPartFlav    = df["Muon_genPartFlav"].flatten(),
        )
    return reco_muons_collection
Пример #17
0
def setup_gen_candidates(df):
    gen = JaggedCandidateArray.candidatesfromcounts(
        df['nGenPart'],
        pt=df['GenPart_pt'],
        eta=df['GenPart_eta'],
        phi=df['GenPart_phi'],
        mass=df['GenPart_mass'],
        charge=df['GenPart_pdgId'],
        pdg=df['GenPart_pdgId'],
        status=df['GenPart_status'],
        flag=df['GenPart_statusFlags'])
    return gen
Пример #18
0
    def _setup_candidates(self, df):
        '''Set up candidates (mainly jets).'''
        ak4 = JaggedCandidateArray.candidatesfromcounts(df['nJet'],
                                                        pt=df['Jet_pt'],
                                                        eta=df['Jet_eta'],
                                                        phi=df['Jet_phi'],
                                                        mass=0. * df['Jet_pt'])

        htmiss = df['HTmiss']
        ht = df['HT']

        return ak4, htmiss, ht
Пример #19
0
 def rebuild(leptons):
     return JaggedCandidateArray.candidatesfromoffsets(
         leptons.offsets,
         pt=leptons.pt.flatten(),
         eta=leptons.eta.flatten(),
         phi=leptons.phi.flatten(),
         mass=leptons.mass.flatten(),
         charge=leptons.charge.flatten(),
         pdgId=leptons.pdgId.flatten(),
         # needed for electron SF
         etaSC=leptons.etaSC.flatten()
         if hasattr(leptons, 'etaSC') else leptons.eta.flatten(),
     )
Пример #20
0
    def process(self, df):
        output = self.accumulator.identity()
        dataset = df['dataset']

        leptonjets = JaggedCandidateArray.candidatesfromcounts(
            df['pfjet_p4'],
            px=df['pfjet_p4.fCoordinates.fX'].content,
            py=df['pfjet_p4.fCoordinates.fY'].content,
            pz=df['pfjet_p4.fCoordinates.fZ'].content,
            energy=df['pfjet_p4.fCoordinates.fT'].content,
        )
        ljdautype = awkward.fromiter(df['pfjet_pfcand_type'])
        npfmu = (ljdautype==3).sum()
        ndsa = (ljdautype==8).sum()
        isegammajet = (npfmu==0)&(ndsa==0)
        ispfmujet = (npfmu>=2)&(ndsa==0)
        isdsajet = ndsa>0
        label = isegammajet.astype(int)*1+ispfmujet.astype(int)*2+isdsajet.astype(int)*3
        leptonjets.add_attributes(label=label)
        nmu = ((ljdautype==3)|(ljdautype==8)).sum()
        leptonjets.add_attributes(ismutype=(nmu>=2), iseltype=(nmu==0))
        ljdaucharge = awkward.fromiter(df['pfjet_pfcand_charge']).sum()
        leptonjets.add_attributes(qsum=ljdaucharge)
        leptonjets.add_attributes(isneutral=(leptonjets.iseltype | (leptonjets.ismutype&(leptonjets.qsum==0))))
        leptonjets = leptonjets[leptonjets.isneutral]

        ## __ twoleptonjets__
        twoleptonjets = leptonjets.counts>=2
        dileptonjets = leptonjets[twoleptonjets]

        if dileptonjets.size==0: return output
        lj0 = dileptonjets[dileptonjets.pt.argmax()]
        lj1 = dileptonjets[dileptonjets.pt.argsort()[:, 1:2]]

        ## channel def ##
        singleMuljEvents = dileptonjets.ismutype.sum()==1
        muljInLeading2Events = (lj0.ismutype | lj1.ismutype).flatten()
        channel_2mu2e = (singleMuljEvents&muljInLeading2Events).astype(int)*1

        doubleMuljEvents = dileptonjets.ismutype.sum()==2
        muljIsLeading2Events = (lj0.ismutype & lj1.ismutype).flatten()
        channel_4mu = (doubleMuljEvents&muljIsLeading2Events).astype(int)*2

        channel_ = channel_2mu2e + channel_4mu
        ###########

        output['invm_s'].fill(dataset=dataset, mass_s=(lj0+lj1).p4.mass[channel_>0].flatten())
        output['invm_m'].fill(dataset=dataset, mass_m=(lj0+lj1).p4.mass[channel_>0].flatten())
        output['invm_l'].fill(dataset=dataset, mass_l=(lj0+lj1).p4.mass[channel_>0].flatten())

        return output
Пример #21
0
    def __getitem__(self, key):
        if key in self.cache_:
            return self.table_[key]
        elif key in self.keys_:
            ret = self.array(key)
            self.table_[key] = ret
            self.cache_.add(key)
            return self.table_[key]
        else:
            branch = key + '_'
            subset = [k for k in self.keys_ if k.startswith(branch)]
            info = {i.replace(branch, ''): self.array(i) for i in subset}
            counter = 'n' + key
            counts = None

            if counter in self.keys_:
                counts = self.array(counter)
            elif all(isinstance(i, awk.JaggedArray) for i in
                     info.values()):  # In case counter is missing by mistake
                print(
                    f'You probably forgot to ask to load {counter} as a branch. Inferring it...'
                )
                counts = info[list(info.keys())[0]].counts

            if counts is not None:
                for name, branch in info.items():
                    if not (branch.counts == counts).all():
                        raise ValueError(
                            f'Key {name} does not have the right shape')

            #check that everything is there to make a p4
            if counts is not None and all(
                    i in info for i in ['pt', 'eta', 'phi', 'mass']):
                # flatted to use candidatesfromcounts, better option available?
                info = {i: j.content for i, j in info.items()}
                ret = JaggedCandidateArray.candidatesfromcounts(counts, **info)
            elif counts is not None:  # Not enough to make a JaggedCandidateArray, but a jagged table
                ret = awk.JaggedArray.zip(**info)
            else:  # flat object
                ret = awk.Table(**info)
                # check if p4 can be made, with MET fix for missing eta and mass
                if all(i in info for i in ['pt', 'phi']):
                    ret['p4'] = uproot_methods.TLorentzVectorArray.from_ptetaphi(
                        info['pt'], info.get('eta', 0), info['phi'],
                        info.get('mass', 0))

            self.table_[key] = ret
            self.cache_.add(key)
            return self.table_[key]
Пример #22
0
def trigger_muons(df, is_mc=False):
    trigger_muons_collection = JaggedCandidateArray.candidatesfromcounts(
        df["nTriggerMuon"].flatten(),
        eta          = df["TriggerMuon_eta"].flatten(),
        mass         = df["TriggerMuon_mass"].flatten(),
        phi          = df["TriggerMuon_phi"].flatten(),
        pt           = df["TriggerMuon_pt"].flatten(),
        vx           = df["TriggerMuon_vx"].flatten(),
        vy           = df["TriggerMuon_vy"].flatten(),
        vz           = df["TriggerMuon_vz"].flatten(),
        charge       = df["TriggerMuon_charge"].flatten(),
        pdgId        = df["TriggerMuon_pdgId"].flatten(),
        trgMuonIndex = df["TriggerMuon_trgMuonIndex"].flatten(),
    )
    return trigger_muons_collection
Пример #23
0
def genparts(df, is_mc=True):
    genparts_collection = JaggedCandidateArray.candidatesfromcounts(
        df["nGenPart"].flatten(),
        eta=df["GenPart_eta"].flatten(),
        mass=df["GenPart_mass"].flatten(),
        phi=df["GenPart_phi"].flatten(),
        pt=df["GenPart_pt"].flatten(),
        vx=df["GenPart_vx"].flatten(),
        vy=df["GenPart_vy"].flatten(),
        vz=df["GenPart_vz"].flatten(),
        status=df["GenPart_status"].flatten(),
        genPartIdxMother=df["GenPart_genPartIdxMother"].flatten(),
        pdgId=df["GenPart_pdgId"].flatten(),
    )
    return genparts_collection
Пример #24
0
def getFatJets(df, ptcorr=None, masscorr=None):
    ptpostfix = '' if ptcorr is None else ptcorr
    masspostfix = '' if masscorr is None else masscorr
    fatjet = JaggedCandidateArray.candidatesfromcounts(
            df['nFatJet'],
            pt = df['FatJet_pt%s'%ptpostfix].content,
            eta = df['FatJet_eta'].content,
            phi = df['FatJet_phi'].content,
            mass = df['FatJet_mass%s'%masspostfix].content,
            msoftdrop = df["FatJet_msoftdrop%s"%masspostfix].content,  
            deepTagMD_HbbvsQCD = df['FatJet_deepTagMD_HbbvsQCD'].content, 
            deepTagMD_WvsQCD = df['FatJet_deepTagMD_WvsQCD'].content, 
            deepTag_WvsQCD = df['FatJet_deepTag_WvsQCD'].content
            
        )
    return fatjet[(fatjet.pt>200) & (abs(fatjet.eta)<2.4)]
Пример #25
0
def convertToJagged(jets):
    # takes a list of list of dicts
    # converts to JaggedArrayCandidates

    # make awkward array
    jagged_jets = ak.JaggedArray.fromiter(jets)

    # make jagged candidate array
    fatjets = JaggedCandidateArray.candidatesfromcounts(
        jagged_jets.counts,
        pt=jagged_jets.pt.flatten(),
        eta=jagged_jets.eta.flatten(),
        phi=jagged_jets.phi.flatten(),
        mass=jagged_jets.mass.flatten(),
        ntracks=jagged_jets.ntracks.flatten())
    return fatjets
Пример #26
0
def getMuons(df, WP='veto'):
    muon = JaggedCandidateArray.candidatesfromcounts(
            df['nMuon'],
            pt = df['Muon_pt'].content,
            eta = df['Muon_eta'].content,
            phi = df['Muon_phi'].content,
            mass = df['Muon_mass'].content,
            miniPFRelIso_all=df['Muon_miniPFRelIso_all'].content,
            looseId =df['Muon_looseId'].content,
            mediumId =df['Muon_mediumId'].content,
            pdgId =df['Muon_pdgId'].content,
            )
    if WP=='veto':
        return muon[(muon.pt > 10) & (abs(muon.eta) < 2.4) & (muon.looseId) & (muon.miniPFRelIso_all < 0.2)]
    elif WP=='medium':
        return muon[(muon.pt > 25) & (abs(muon.eta) < 2.4) & (muon.mediumId) & (muon.miniPFRelIso_all < 0.2)]
Пример #27
0
def getElectrons(df, WP='veto'):
    electron = JaggedCandidateArray.candidatesfromcounts(
            df['nElectron'],
            pt = df['Electron_pt'].content,
            eta = df['Electron_eta'].content,
            #etaSC = (df['Electron_eta']+df['Electron_deltaEtaSC']).content,
            phi = df['Electron_phi'].content,
            mass = df['Electron_mass'].content,
            miniPFRelIso_all=df['Electron_miniPFRelIso_all'].content,
            cutBased=df['Electron_cutBased'].content
            )
    if WP=='veto':
        return electron[(electron.pt>10) & (abs(electron.eta) < 2.4) & (electron.miniPFRelIso_all < 0.1) &  (electron.cutBased >= 1)]
    elif WP=='medium':
        return electron[(electron.pt>25) & (abs(electron.eta) < 2.4) & (electron.miniPFRelIso_all < 0.1) &  (electron.cutBased >= 3)]
    elif WP=='tight':
        return electron[(electron.pt>30) & (abs(electron.eta) < 2.4) & (electron.miniPFRelIso_all < 0.1) &  (electron.cutBased >= 4)]
Пример #28
0
def make_svs(df):
    nsv = df['sv_pt'].counts
    temp = JaggedCandidateArray.candidatesfromcounts(
        nsv,
        mass=df['sv_mass'].flatten().astype(np.float64),  #BDT
        # charge = df['sv_charge'].flatten(), # SV CHARGE IS NOT FILLED!
        eta=df['sv_eta'].flatten().astype(np.float64),
        phi=df['sv_phi'].flatten().astype(np.float64),
        pt=df['sv_pt'].flatten().astype(np.float64),  #BDT
        lxySig=df['sv_LxySig'].flatten().astype(np.float64),  #BDT
        lxyzSig=df['sv_LxyzSig'].flatten().astype(np.float64),  #BDT
        lxy=df['sv_Lxy'].flatten().astype(np.float64),  #BDT
        lxyz=df['sv_Lxyz'].flatten().astype(np.float64),  #BDT
        angle3D=df['sv_Angle3D'].flatten().astype(np.float64),  #BDT
        angle2D=df['sv_Angle2D'].flatten().astype(np.float64),  #BDT
        gamma=df['sv_Gamma'].flatten().astype(np.float64),  #BDT
        chi2=df['sv_Chi2'].flatten().astype(np.float64),  #BDT
        position=uproot_methods.TVector3Array(
            df['sv_Lx'].flatten().astype(np.float64),
            df['sv_Ly'].flatten().astype(np.float64),
            df['sv_Lz'].flatten().astype(np.float64),
        ))

    # Add CNN variables, they are stored as ObjectArrays instead of
    # doubly JaggedArrays because ROOT. The conversion is expensive
    # ans is probably better to do later on if needed (and probably
    # will be needed) when fewer events are stored.

    # vectors of vectors are a mess to handle, so we need to mem-copy
    # them. A better solution for the future would be to unroll the
    # values in each event and then roll them back

    temp['tracks_charge'] = awk.JaggedArray.fromiter(df['sv_tracks_charge'])
    temp['tracks_eta'] = awk.JaggedArray.fromiter(df['sv_tracks_eta'])
    temp['tracks_phi'] = awk.JaggedArray.fromiter(df['sv_tracks_phi'])
    temp['tracks_pt'] = awk.JaggedArray.fromiter(df['sv_tracks_pt'])
    temp['tracks_p'] = awk.JaggedArray.fromiter(df['sv_tracks_p'])
    temp['tracks_dxySig'] = awk.JaggedArray.fromiter(df['sv_tracks_dxySig'])
    temp['tracks_dxy'] = awk.JaggedArray.fromiter(df['sv_tracks_dxy'])
    temp['tracks_dxyz'] = awk.JaggedArray.fromiter(df['sv_tracks_dxyz'])

    temp['p3'] = uproot_methods.TVector3Array.from_cartesian(
        temp.p4.x, temp.p4.y, temp.p4.z)
    temp['sum_tracks_dxySig'] = np.abs(temp['tracks_dxySig']).sum()

    return temp
Пример #29
0
def process_met(df):
    from coffea.analysis_objects import JaggedCandidateArray
    met = JaggedCandidateArray.candidatesfromcounts(
        counts=np.ones(df.size, dtype=int),
        pt=df['MET_pt'],
        eta=0,
        phi=df['MET_phi'],
        mass=0,
        MetUnclustEnUpDeltaX=df['MET_MetUnclustEnUpDeltaX'],
        MetUnclustEnUpDeltaY=df['MET_MetUnclustEnUpDeltaY'],
        covXX=df['MET_covXX'],
        covXY=df['MET_covXY'],
        covYY=df['MET_covYY'],
        sumEt=df['MET_sumEt'],
    )

    return met
Пример #30
0
def make_jets(df):
    njets = df['jet_pt'].counts
    temp = JaggedCandidateArray.candidatesfromcounts(
        njets,
        pt=df['jet_pt'].flatten().astype(np.float64),
        eta=df['jet_eta'].flatten().astype(np.float64),  #BDT for mujet
        phi=df['jet_phi'].flatten().astype(np.float64),  #BDT for mujet
        energy=df['jet_en'].flatten().astype(np.float64),
        neutHadEnFrac=df['jet_neutralHadronEnergyFraction'].flatten().astype(
            np.float64),  #BDT for mujet
        neutEmEnFrac=df['jet_neutralEmEnergyFraction'].flatten().astype(
            np.float64),  #BDT for mujet
        charHadEnFrac=df['jet_chargedHadronEnergyFraction'].flatten().astype(
            np.float64),  #BDT for mujet
        charEmEnFrac=df['jet_chargedEmEnergyFraction'].flatten().astype(
            np.float64),  #BDT for mujet
        chargedMult=df['jet_chargedMultiplicity'].flatten().astype(
            np.float64),  #BDT for mujet
        neutMult=df['jet_neutralMultiplicity'].flatten().astype(
            np.float64),  #BDT for mujet
        smeared_pt=df['jetSmearedPt'].flatten().astype(
            np.float64),  #BDT for mujet
        dCsv_bb=df['jet_deepCSV_bb'].flatten().astype(
            np.float64),  #BDT for mujet
        charEmEn=df['jet_chargedEmEnergy'].flatten().astype(
            np.float64),  #BDT for mujet
        charHadEn=df['jet_chargedHadronEnergy'].flatten().astype(
            np.float64),  ##BDT for mujet
        charMuEn=df['jet_chargedMuEnergy'].flatten().astype(
            np.float64),  ##BDT for mujet
        charMuEnFrac=df['jet_chargedMuEnergyFraction'].flatten().astype(
            np.float64),  ##BDT for mujet
        muonEn=df['jet_muonEnergy'].flatten().astype(
            np.float64),  ##BDT for mujet
        muonEnFrac=df['jet_muonEnergyFraction'].flatten().astype(
            np.float64),  ##BDT for mujet
        neutEmEn=df['jet_neutralEmEnergy'].flatten().astype(
            np.float64),  ##BDT for mujet
        neutHadEn=df['jet_neutralHadronEnergy'].flatten().astype(
            np.float64),  ##BDT for mujet
    )
    temp['p3'] = uproot_methods.TVector3Array.from_cartesian(
        temp.p4.x, temp.p4.y, temp.p4.z)
    return temp