def monojet_selection(vphi, genjets): selection = processor.PackedSelection() selection.add('at_least_one_jet', genjets.counts > 0) selection.add('leadak4_pt_eta', (genjets.pt.max() > 100) & (np.abs(genjets[genjets.pt.argmax()].eta.max()) < 2.4)) selection.add('mindphijr', min_dphi_jet_met(genjets, vphi, njet=4, ptmin=30) > 0.5) return selection
def process(self, df): '''Fill and save histograms.''' dataset = df['dataset'] # Set up physics objects ak4, htmiss, ht = self._setup_candidates(df) # Leading jet pair diak4 = ak4[:, :2].distincts() df['mjj'] = diak4.mass.max() df['dphijj'] = dphi(diak4.i0.phi.min(), diak4.i1.phi.max()) df['detajj'] = np.abs(diak4.i0.eta - diak4.i1.eta).max() selection = processor.PackedSelection() pass_all = np.ones(df.size) == 1 selection.add('inclusive', pass_all) leadak4_pt_eta = (diak4.i0.pt > 80) & (np.abs(diak4.i0.eta) < 4.7) trailak4_pt_eta = (diak4.i1.pt > 40) & (np.abs(diak4.i1.eta) < 4.7) hemisphere = (diak4.i0.eta * diak4.i1.eta < 0).any() selection.add('mjj', df['mjj'] > 200) selection.add('detajj', df['detajj'] > 1.) selection.add('dphijj', df['dphijj'] < 1.5) selection.add('hemisphere', hemisphere) selection.add('leadak4_pt_eta', leadak4_pt_eta.any()) selection.add('trailak4_pt_eta', trailak4_pt_eta.any()) output = self.accumulator.identity() if not df['is_data']: output['sumw'][dataset] += df['sumw'] output['sumw2'][dataset] += df['sumw2'] for region, cuts in self.regions.items(): mask = selection.all(*cuts) def ezfill(name, **kwargs): """Helper function to make filling easier.""" output[name].fill(dataset=dataset, region=region, **kwargs) # Fill histograms ezfill('ak4_pt0', jetpt=diak4.i0.pt[mask].flatten()) ezfill('ak4_eta0', jeteta=diak4.i0.eta[mask].flatten()) ezfill('ak4_phi0', jetphi=diak4.i0.phi[mask].flatten()) ezfill('ak4_pt1', jetpt=diak4.i1.pt[mask].flatten()) ezfill('ak4_eta1', jeteta=diak4.i1.eta[mask].flatten()) ezfill('ak4_phi1', jetphi=diak4.i1.phi[mask].flatten()) ezfill('mjj', mjj=df['mjj'][mask]) ezfill('ht', ht=ht[mask]) ezfill('htmiss', ht=htmiss[mask]) return output
def vbf_selection(vphi, dijet, genjets): selection = processor.PackedSelection() selection.add('two_jets', dijet.counts > 0) selection.add('leadak4_pt_eta', (dijet.i0.pt.max() > 80) & (np.abs(dijet.i0.eta.max()) < 4.7)) selection.add('trailak4_pt_eta', (dijet.i1.pt.max() > 40) & (np.abs(dijet.i1.eta.max()) < 4.7)) selection.add('hemisphere', (dijet.i0.eta.max() * dijet.i1.eta.max() < 0)) selection.add( 'mindphijr', min_dphi_jet_met(genjets, vphi.max(), njet=4, ptmin=30) > 0.5) return selection
def vbf_selection(vphi, dijet, genjets): selection = processor.PackedSelection() selection.add('two_jets', dijet.counts > 0) selection.add('leadak4_pt_eta', (dijet.i0.pt.max() > 80) & (np.abs(dijet.i0.eta.max()) < 5.0)) selection.add('trailak4_pt_eta', (dijet.i1.pt.max() > 40) & (np.abs(dijet.i1.eta.max()) < 5.0)) selection.add('hemisphere', (dijet.i0.eta.max() * dijet.i1.eta.max() < 0)) selection.add( 'mindphijr', min_dphi_jet_met(genjets, vphi, njet=4, ptmin=30, etamax=5.0) > 0.5) selection.add('detajj', np.abs(dijet.i0.eta - dijet.i1.eta).max() > 1) selection.add('dphijj', dphi(dijet.i0.phi, dijet.i1.phi).min() < 1.5) return selection
def process(self, df): np.random.seed( 10 ) # sets seed so values from random distributions are reproducible (JER corrections) output = self.accumulator.identity() self.sample_name = df.dataset ## make event weights # data or MC distinction made internally evt_weights = MCWeights.get_event_weights(df, year=args.year, corrections=self.corrections, BTagSFs=btaggers) ## initialize selections and regions selection = processor.PackedSelection() regions = { 'Muon': { 'Loose': { 'btagPass': { '3Jets': {'objselection', 'jets_3', 'loose_MU', 'DeepCSV_pass'}, '4PJets': { 'objselection', 'jets_4p', 'loose_MU', 'DeepCSV_pass' }, }, 'btagFail': { '3Jets': {'objselection', 'jets_3', 'loose_MU', 'DeepCSV_fail'}, '4PJets': { 'objselection', 'jets_4p', 'loose_MU', 'DeepCSV_fail' }, }, }, 'Tight': { 'btagPass': { '3Jets': {'objselection', 'jets_3', 'tight_MU', 'DeepCSV_pass'}, '4PJets': { 'objselection', 'jets_4p', 'tight_MU', 'DeepCSV_pass' }, }, 'btagFail': { '3Jets': {'objselection', 'jets_3', 'tight_MU', 'DeepCSV_fail'}, '4PJets': { 'objselection', 'jets_4p', 'tight_MU', 'DeepCSV_fail' }, }, }, }, 'Electron': { 'Loose': { 'btagPass': { '3Jets': {'objselection', 'jets_3', 'loose_EL', 'DeepCSV_pass'}, '4PJets': { 'objselection', 'jets_4p', 'loose_EL', 'DeepCSV_pass' }, }, 'btagFail': { '3Jets': {'objselection', 'jets_3', 'loose_EL', 'DeepCSV_fail'}, '4PJets': { 'objselection', 'jets_4p', 'loose_EL', 'DeepCSV_fail' }, }, }, 'Tight': { 'btagPass': { '3Jets': {'objselection', 'jets_3', 'tight_EL', 'DeepCSV_pass'}, '4PJets': { 'objselection', 'jets_4p', 'tight_EL', 'DeepCSV_pass' }, }, 'btagFail': { '3Jets': {'objselection', 'jets_3', 'tight_EL', 'DeepCSV_fail'}, '4PJets': { 'objselection', 'jets_4p', 'tight_EL', 'DeepCSV_fail' }, }, }, }, } ## object selection objsel_evts = objsel.select(df, year=args.year, corrections=self.corrections, accumulator=output) output['cutflow'][ 'nEvts passing jet and lepton obj selection'] += objsel_evts.sum() selection.add('jets_3', df['Jet'].counts == 3) selection.add('jets_4p', df['Jet'].counts > 3) selection.add('objselection', objsel_evts) #selection.add('DeepJet_pass', df['Jet']['DeepJet'+wps_to_use[0]].sum() >= 2) selection.add('DeepCSV_pass', df['Jet']['DeepCSV' + wps_to_use[0]].sum() >= 2) #set_trace() # sort jets by btag value df['Jet'] = df['Jet'][df['Jet']['btagDeepB'].argsort( ascending=False)] if btaggers[0] == 'DeepCSV' else df['Jet'][ df['Jet']['btagDeepFlavB'].argsort(ascending=False)] # btag fail sideband deepcsv_sorted = df['Jet'][df['Jet']['btagDeepB'].argsort( ascending=False)]['btagDeepB'] valid_counts_inds = np.where(df['Jet'].counts > 1)[0] deepcsv_fail = np.zeros(df.size).astype(bool) deepcsv_fail[valid_counts_inds] = ( deepcsv_sorted[valid_counts_inds][:, 0] < btag_values[args.year]['btagDeepB']['DeepCSV' + wps_to_use[0]]) & ( deepcsv_sorted[valid_counts_inds][:, 1] < btag_values[args.year]['btagDeepB']['DeepCSV' + wps_to_use[0]]) selection.add( 'DeepCSV_fail', deepcsv_fail ) # highest and second highest DeepCSV values don't pass tight and loose WPs self.isData = self.sample_name.startswith('data_Single') if self.isData: isSE_Data = self.sample_name.startswith('data_SingleElectron') isSM_Data = self.sample_name.startswith('data_SingleMuon') runs = df.run lumis = df.luminosityBlock Golden_Json_LumiMask = lumi_tools.LumiMask( '%s/inputs/data/LumiMasks/%s_GoldenJson.txt' % (proj_dir, args.year)) LumiMask = Golden_Json_LumiMask.__call__( runs, lumis) ## returns array of valid events selection.add('lumimask', LumiMask) ## object selection and add different selections if isSM_Data: del regions['Electron'] ## muons selection.add('tight_MU', df['Muon']['TIGHTMU'].sum() == 1) # one muon passing TIGHT criteria selection.add('loose_MU', df['Muon']['LOOSEMU'].sum() == 1) # one muon passing LOOSE criteria #selection.add('loose_or_tight_MU', (df['Muon']['LOOSEMU'] | df['Muon']['TIGHTMU']).sum() == 1) # one muon passing LOOSE or TIGHT criteria if isSE_Data: del regions['Muon'] ## electrons selection.add('tight_EL', df['Electron']['TIGHTEL'].sum() == 1) # one electron passing TIGHT criteria selection.add('loose_EL', df['Electron']['LOOSEEL'].sum() == 1) # one electron passing LOOSE criteria #selection.add('loose_or_tight_EL', (df['Electron']['LOOSEEL'] | df['Electron']['TIGHTEL']).sum() == 1) # one electron passing LOOSE or TIGHT criteria for lepton in regions.keys(): for lepcat in regions[lepton].keys(): for btagregion in regions[lepton][lepcat].keys(): for jmult in regions[lepton][lepcat][btagregion].keys( ): regions[lepton][lepcat][btagregion][jmult].update( {'lumimask'}) if not self.isData: ## add different selections ## muons selection.add('tight_MU', df['Muon']['TIGHTMU'].sum() == 1) # one muon passing TIGHT criteria selection.add('loose_MU', df['Muon']['LOOSEMU'].sum() == 1) # one muon passing LOOSE criteria #selection.add('loose_or_tight_MU', (df['Muon']['LOOSEMU'] | df['Muon']['TIGHTMU']).sum() == 1) # one muon passing LOOSE or TIGHT criteria ## electrons selection.add('tight_EL', df['Electron']['TIGHTEL'].sum() == 1) # one electron passing TIGHT criteria selection.add('loose_EL', df['Electron']['LOOSEEL'].sum() == 1) # one electron passing LOOSE criteria #selection.add('loose_or_tight_EL', (df['Electron']['LOOSEEL'] | df['Electron']['TIGHTEL']).sum() == 1) # one electron passing LOOSE or TIGHT criteria #set_trace() ### apply lepton SFs to MC (only applicable to tight leptons) if 'LeptonSF' in corrections.keys(): tight_mu_cut = selection.require( objselection=True, tight_MU=True ) # find events passing muon object selection with one tight muon tight_muons = df['Muon'][tight_mu_cut][( df['Muon'][tight_mu_cut]['TIGHTMU'] == True)] evt_weights._weights['Muon_SF'][ tight_mu_cut] = MCWeights.get_lepton_sf( year=args.year, lepton='Muons', corrections=lepSF_correction, pt=tight_muons.pt.flatten(), eta=tight_muons.eta.flatten()) tight_el_cut = selection.require( objselection=True, tight_EL=True ) # find events passing electron object selection with one tight electron tight_electrons = df['Electron'][tight_el_cut][( df['Electron'][tight_el_cut]['TIGHTEL'] == True)] evt_weights._weights['Electron_SF'][ tight_el_cut] = MCWeights.get_lepton_sf( year=args.year, lepton='Electrons', corrections=lepSF_correction, pt=tight_electrons.pt.flatten(), eta=tight_electrons.etaSC.flatten()) ## apply btagging SFs to MC if corrections['BTagSF'] == True: #set_trace() threeJets_cut = selection.require(objselection=True, jets_3=True) #deepjet_3j_wts = self.corrections['BTag_Constructors']['DeepJet']['3Jets'].get_scale_factor(jets=df['Jet'][threeJets_cut], passing_cut='DeepJet'+wps_to_use[0]) #evt_weights._weights['DeepJet'][threeJets_cut] = deepjet_3j_wts['central'].prod() deepcsv_3j_wts = self.corrections['BTag_Constructors'][ 'DeepCSV']['3Jets'].get_scale_factor( jets=df['Jet'][threeJets_cut], passing_cut='DeepCSV' + wps_to_use[0]) evt_weights._weights['DeepCSV'][ threeJets_cut] = deepcsv_3j_wts['central'].prod() fourplusJets_cut = selection.require(objselection=True, jets_4p=True) #deepjet_4pj_wts = self.corrections['BTag_Constructors']['DeepJet']['4PJets'].get_scale_factor(jets=df['Jet'][fourplusJets_cut], passing_cut='DeepJet'+wps_to_use[0]) #evt_weights._weights['DeepJet'][fourplusJets_cut] = deepjet_4pj_wts['central'].prod() deepcsv_4pj_wts = self.corrections['BTag_Constructors'][ 'DeepCSV']['4PJets'].get_scale_factor( jets=df['Jet'][fourplusJets_cut], passing_cut='DeepCSV' + wps_to_use[0]) evt_weights._weights['DeepCSV'][ fourplusJets_cut] = deepcsv_4pj_wts['central'].prod() # don't use ttbar events with indices % 10 == 0, 1, 2 if self.sample_name in Nominal_ttJets: events = df.event selection.add( 'keep_ttbar', ~np.stack([((events % 10) == idx) for idx in [0, 1, 2]], axis=1).any(axis=1)) for lepton in regions.keys(): for lepcat in regions[lepton].keys(): for btagregion in regions[lepton][lepcat].keys(): for jmult in regions[lepton][lepcat][ btagregion].keys(): sel = regions[lepton][lepcat][btagregion][ jmult] sel.update({'keep_ttbar'}) #set_trace() ## fill hists for each region for lepton in regions.keys(): lepSF_to_exclude = 'Electron_SF' if lepton == 'Muon' else 'Muon_SF' btagSF_to_exclude = 'DeepJet' for lepcat in regions[lepton].keys(): for btagregion in regions[lepton][lepcat].keys(): for jmult in regions[lepton][lepcat][btagregion].keys(): cut = selection.all( *regions[lepton][lepcat][btagregion][jmult]) #set_trace() if cut.sum() > 0: ltype = 'MU' if lepton == 'Muon' else 'EL' if 'loose_or_tight_%s' % ltype in regions[lepton][ lepcat][btagregion][jmult]: lep_mask = ((df[lepton][cut]['TIGHT%s' % ltype] == True) | (df[lepton][cut]['LOOSE%s' % ltype] == True)) elif 'tight_%s' % ltype in regions[lepton][lepcat][ btagregion][jmult]: lep_mask = (df[lepton][cut]['TIGHT%s' % ltype] == True) elif 'loose_%s' % ltype in regions[lepton][lepcat][ btagregion][jmult]: lep_mask = (df[lepton][cut]['LOOSE%s' % ltype] == True) else: raise ValueError( "Not sure what lepton type to choose for event" ) ## calculate MT MT = make_vars.MT(df[lepton][cut][lep_mask], df['MET'][cut]) MTHigh = (MT >= MTcut).flatten() evt_weights_to_use = evt_weights.weight() if not self.isData: evt_weights_to_use = evt_weights.partial_weight( exclude=[ lepSF_to_exclude, btagSF_to_exclude ]) jets = df['Jet'][cut][MTHigh] leptons = df[lepton][cut][lep_mask][MTHigh] btagSF = np.ones( MTHigh.size ) if self.isData else evt_weights._weights[ btaggers[0]][cut][MTHigh].flatten() lepSF = np.ones( MTHigh.size ) if self.isData else evt_weights._weights[ '%s_SF' % lepton][cut][MTHigh].flatten() tot_weight = evt_weights_to_use[cut][ MTHigh].flatten() #set_trace() output['BTagSF'].fill(dataset=self.sample_name, jmult=jmult, leptype=lepton, lepcat=lepcat, btag=btagregion, sf=btagSF) output['LepSF'].fill(dataset=self.sample_name, jmult=jmult, leptype=lepton, lepcat=lepcat, btag=btagregion, sf=lepSF) output['EvtWeight'].fill(dataset=self.sample_name, jmult=jmult, leptype=lepton, lepcat=lepcat, btag=btagregion, sf=tot_weight) output = self.fill_hists(accumulator=output, jetmult=jmult, leptype=lepton, lepcat=lepcat, btag=btagregion, jets=jets, leptons=leptons, MT=MT[MTHigh].flatten(), evt_weights=tot_weight) # check iso values for cut based wps if lepton == 'Electron': barrel_els = leptons[(np.abs(leptons.etaSC) <= 1.479)] endcap_els = leptons[(np.abs(leptons.etaSC) > 1.479)] tight_iso_cut_barrel = 0.0287 + 0.506 / barrel_els.pt tight_iso_cut_endcap = 0.0445 + 0.963 / endcap_els.pt tight_iso_passFail_barrel = barrel_els.pfRelIso < tight_iso_cut_barrel tight_iso_passFail_endcap = endcap_els.pfRelIso < tight_iso_cut_endcap output['El_iso_barrel'].fill( dataset=self.sample_name, jmult=jmult, leptype=lepton, lepcat=lepcat, btag=btagregion, iso_passfail=tight_iso_passFail_barrel. flatten().astype(int), weight=tot_weight[(np.abs(leptons.etaSC) <= 1.479).flatten()]) output['El_iso_endcap'].fill( dataset=self.sample_name, jmult=jmult, leptype=lepton, lepcat=lepcat, btag=btagregion, iso_passfail=tight_iso_passFail_endcap. flatten().astype(int), weight=tot_weight[(np.abs(leptons.etaSC) > 1.479).flatten()]) return output
def process(self, events): #assert(len(np.unique(events.event)) == len((events.event))) dataset = events.metadata['dataset'] print('process dataset', dataset) isRealData = 'genWeight' not in events.columns selection = processor.PackedSelection() weights = processor.Weights(len(events)) output = self.accumulator.identity() if(len(events) == 0): return output if not isRealData: output['sumw'][dataset] += events.genWeight.sum() # trigger paths if isRealData: trigger_fatjet = np.zeros(events.size, dtype='bool') for t in self._triggers[self._year]: try: trigger_fatjet = trigger_fatjet | events.HLT[t] except: print('trigger %s not available'%t) continue trigger_muon = np.zeros(events.size, dtype='bool') for t in self._muontriggers[self._year]: trigger_muon = trigger_muon | events.HLT[t] else: trigger_fatjet = np.ones(events.size, dtype='bool') trigger_muon = np.ones(events.size, dtype='bool') selection.add('fatjet_trigger', trigger_fatjet) selection.add('muon_trigger', trigger_muon) # run model on PFCands associated to FatJet (FatJetPFCands) #events.FatJet.array.content["PFCands"] = type(events.FatJetPFCands.array).fromcounts(events.FatJet.nPFConstituents.flatten(), events.FatJetPFCands.flatten()) #events.FatJet.array.content["twoProngGru"] = run_model(events.FatJet.flatten()) #else: # events.FatJet["genMatchFull"] = np.ones(len(events)) fatjets = events.FatJet gru = events.GRU IN = events.IN fatjets['msdcorr'] = corrected_msoftdrop(fatjets) fatjets['rhocorr'] = 2*np.log(fatjets.msdcorr/fatjets.pt) fatjets['gruddt'] = gru.v25 - shift(fatjets,algo='gruddt',year=self._year) fatjets['in_v3_ddt'] = IN.v3 - shift(fatjets,algo='inddt',year=self._year) fatjets['n2ddt'] = fatjets.n2b1 - n2ddt_shift(fatjets,year=self._year) #fatjets['count'] = fatjets.count if 'WJetsToQQ' in dataset or 'ZJetsToQQ' in dataset: fatjets["genMatchFull"] = genmatch(events) else: fatjets["genMatchFull"] = fatjets.pt.zeros_like() #np.zeros(events.size, dtype='bool') candidatejet = fatjets[ (fatjets.pt > 200) & (abs(fatjets.eta) < 2.5) ][:, 0:1] # basic jet selection selection.add('minjetkin', ( (candidatejet.pt >= 450) #& (candidatejet.msdcorr >= 40.) & (abs(candidatejet.eta) < 2.5) & (candidatejet.rhocorr >= -5.5) & (candidatejet.rhocorr <= -2) ).any()) selection.add('signal_pt', ( (candidatejet.pt >= 525) ).any()) selection.add('mass', (candidatejet.msdcorr >= 40.).any()) selection.add('v_selection_jetkin', ( (candidatejet.pt >= 200) & (candidatejet.rhocorr >= -5.5) & (candidatejet.rhocorr <= -2) ).any()) selection.add('genmatch', candidatejet.genMatchFull.pad(1).fillna(0).flatten() if ('WJetsToQQ' in dataset or 'ZJetsToQQ' in dataset) else candidatejet.pt.pad(1).fillna(0).flatten().astype(bool)) #if isRealData: # selection.add('blinding', ( # (events.event %10 == 0) # )) selection.add('n2ddt', (candidatejet.n2ddt < 0.).any()) selection.add('jetid', candidatejet.isTight.any()) selection.add('met', events.MET.pt > 40.) goodmuon = ( (events.Muon.pt > 10) & (abs(events.Muon.eta) < 2.1) #& (events.Muon.pfRelIso04_all < 0.4) #& (events.Muon.looseId).astype(bool) ) nmuons=goodmuon.sum() leadingmuon = events.Muon[goodmuon #& (events.Muon.pt > 55) ][:, 0:1] muon_ak8_pair = leadingmuon.cross(candidatejet, nested=True) ngoodmuons = goodmuon[events.Muon.pt > 55].sum() selection.add('muonDphiAK8', ( abs(muon_ak8_pair.i0.delta_phi(muon_ak8_pair.i1)) > 2*np.pi/3 ).all().all()) selection.add('muonkin', ( (leadingmuon.pt > 55.) & (abs(leadingmuon.eta) < 2.1) #& (leadingmuon.looseId).astype(bool) ).all()) #ak4 puppi jet for CR jets = events.Jet[ (events.Jet.pt > 50.) & (abs(events.Jet.eta) < 3) & (events.Jet.isTight).astype(bool) ] # only consider first 4 jets to be consistent with old framework jets = jets[:, :4] ak4_ak8_pair = jets.cross(candidatejet, nested=True) dr = abs(ak4_ak8_pair.i0.delta_r(ak4_ak8_pair.i1)) ak4_away = jets[(dr > 0.8).all()] #selection.add('ak4btagMedium08', ak4_away.btagDeepB.max() > 0.4941) selection.add('ak4btagMedium08', ak4_away.btagCSVV2.max() > 0.8838) #generic lep veto nelectrons = ( (events.Electron.pt > 10.) & (abs(events.Electron.eta) < 2.5) & (events.Electron.cutBased >= events.Electron.LOOSE) ).sum() ntaus = ( (events.Tau.pt > 20.) & (events.Tau.idDecayMode).astype(bool) # bacon iso looser than Nano selection ).sum() selection.add('onemuon', (ngoodmuons==1)& (nelectrons == 0) & (ntaus == 0)) selection.add('noleptons', (nmuons == 0) & (nelectrons == 0) & (ntaus == 0)) selection.add('noelectron_notau', (nelectrons == 0) & (ntaus == 0)) if not isRealData: weights.add('genweight', events.genWeight) #add_pileup_weight(weights, events.Pileup.nPU, self._year, dataset) #add_jetTriggerWeight(weights, candidatejet.msdcorr, candidatejet.pt, self._year) signal region only bosons = getBosons(events) genBosonPt = bosons.pt.pad(1, clip=True).fillna(0) add_VJets_NLOkFactor(weights, genBosonPt, self._year, dataset) #b-tag weights regions = { 'signal' : ['fatjet_trigger','minjetkin','signal_pt','mass','noleptons','jetid','genmatch'], 'ttbar_muoncontrol' : ['muon_trigger', 'minjetkin','jetid', 'mass', 'muonDphiAK8','muonkin','ak4btagMedium08','onemuon',], 'noselection' : [],#'vselection_muoncontrol' : ['muon_trigger', 'v_selection_jetkin', 'genmatch', 'jetid', 'ak4btagMedium08', 'muonkin','met'], } #if isRealData and 'SingleMuon' not in dataset: # regions['signal'].append('blinding') '''for region, cuts in regions.items(): allcuts = set() print ('weights', weights.weight().shape) print( len(events)) output['cutflow'].fill(dataset=dataset, region=region, cut=0)#,weight=weights.weight()) for i, cut in enumerate(cuts): allcuts.add(cut) cut = selection.all(*allcuts) output['cutflow'].fill(dataset=dataset, region=region, cut=i + 1)# weight=weights.weight()[cut]) ''' allcuts_signal = set() output['cutflow_signal'][dataset]['none']+= float(weights.weight().sum()) allcuts_ttbar_muoncontrol = set() output['cutflow_ttbar_muoncontrol'][dataset]['none']+= float(weights.weight().sum()) for cut in regions['signal']: allcuts_signal.add(cut) output['cutflow_signal'][dataset][cut] += float(weights.weight()[selection.all(*allcuts_signal)].sum()) for cut in regions['ttbar_muoncontrol']: allcuts_ttbar_muoncontrol.add(cut) output['cutflow_ttbar_muoncontrol'][dataset][cut] += float(weights.weight()[selection.all(*allcuts_ttbar_muoncontrol)].sum()) def normalize(val, cut): return val[cut].pad(1, clip=True).fillna(0).flatten() def fill(region, systematic=None, wmod=None): selections = regions[region] cut = selection.all(*selections) sname = 'nominal' if systematic is None else systematic weight = weights.weight()[cut] output['templates'].fill( dataset=dataset, region=region, pt=normalize(candidatejet.pt, cut), msd=normalize(candidatejet.msdcorr, cut), #gruddt=normalize(candidatejet.gruddt, cut), #n2=normalize(candidatejet.n2b1, cut), #gru=normalize(candidatejet.twoProngGru, cut), #rho=normalize(candidatejet.rhocorr, cut), in_v3_ddt=normalize(candidatejet.in_v3_ddt, cut), #nPFConstituents=normalize(candidatejet.nPFConstituents, cut), #nJet=candidatejet.counts[cut], #Vmatch=normalize(candidatejet.genMatchFull, cut), mu_pt=normalize(leadingmuon.pt, cut), mu_pfRelIso04_all=normalize(leadingmuon.pfRelIso04_all, cut), weight=weight, ) for region in regions: fill(region) return output
def process(self, df): dataset = df['dataset'] if self._debug: print("Processing dataframe from", dataset) isRealData = dataset in ["JetHT", "SingleMuon", "data_obs_mu", "data_obs_jet"] self.build_leading_ak8_variables(df) self.build_subleading_ak8_variables(df) self.build_ak4_variables(df) self.build_met_systematics(df) df['muon_dphi'] = np.abs(deltaphi(df['vmuoLoose0_phi'], df['AK8Puppijet0_phi'])) selection = processor.PackedSelection() if isRealData: # Only take jet triggers from JetHT, single muon triggers from SingleMuon dataset # necessary but not sufficient condition to prevent double-counting # (this plus mutually exclusive offline selections are sufficient) selection.add('trigger', (df['triggerBits'] & self._corrections[f'{self._year}_triggerMask']).astype('bool') & (dataset=="JetHT")) selection.add('mutrigger', ((df['triggerBits']&1) & df['passJson']).astype('bool') & (dataset=="SingleMuon")) if self._debug: print("Trigger pass/all", selection.all('trigger').sum(), df.size) print("Muon trigger pass/all", selection.all('mutrigger').sum(), df.size) else: selection.add('trigger', np.ones(df.size, dtype='bool')) selection.add('mutrigger', np.ones(df.size, dtype='bool')) btagLooseWPs = { '2016': 0.6321, '2017': 0.4941, '2018': 0.4184, } selection.add('noLeptons', (df['neleLoose']==0) & (df['nmuLoose']==0) & (df['ntau']==0)) selection.add('oneMuon', (df['neleLoose']==0) & (df['nmuLoose']==1) & (df['ntau']==0)) selection.add('muonAcceptance', (df['vmuoLoose0_pt'] > 55.) & (np.abs(df['vmuoLoose0_eta']) < 2.1)) selection.add('muonDphiAK8', df['muon_dphi'] > 2*np.pi/3) selection.add('ak4btagMediumDR08', df['ak4_leadingDeepCSV_dR08'] > btagLooseWPs[self._year]) # at least one passes medium cut selection.add('antiak4btagMediumOppHem', df['opposite_ak4_leadingDeepCSV'] < btagLooseWPs[self._year]) # none pass selection.add('tightVjet', df['AK8Puppijet0_isTightVJet'] != 0) selection.add('n2ddtPass', df['ak8jet_n2ddt'] < 0) selection.add('jetMass', df['AK8Puppijet0_msd'] > 40.) selection.add('deepcvb', df['AK8Puppijet0_deepdoublecvb'] > 0.2) selection.add('jetKinematics', df['AK8Puppijet0_pt'] > 450.) selection.add('jetKinematicsMuonCR', df['AK8Puppijet0_pt'] > 400.) selection.add('pfmet', df['pfmet'] < 140.) regions = {} regions['noselection'] = {} regions['preselection'] = {'trigger', 'noLeptons'} regions['signalregion'] = {'trigger', 'noLeptons', 'jetKinematics', 'pfmet', 'n2ddtPass', 'tightVjet', 'antiak4btagMediumOppHem'} regions['muoncontrol'] = {'mutrigger', 'oneMuon', 'muonAcceptance', 'jetKinematicsMuonCR', 'n2ddtPass', 'tightVjet', 'ak4btagMediumDR08', 'muonDphiAK8'} regions['hCCsignalregion'] = {'trigger', 'noLeptons', 'jetKinematics', 'pfmet', 'n2ddtPass', 'tightVjet', 'antiak4btagMediumOppHem', 'deepcvb'} regions['hCCmuoncontrol'] = {'mutrigger', 'oneMuon', 'muonAcceptance', 'jetKinematicsMuonCR', 'n2ddtPass', 'tightVjet', 'ak4btagMediumDR08', 'muonDphiAK8', 'deepcvb'} shiftSystematics = ['JESUp', 'JESDown', 'JERUp', 'JERDown'] shiftedQuantities = {'AK8Puppijet0_pt', 'pfmet'} shiftedSelections = {'jetKinematics', 'jetKinematicsMuonCR', 'pfmet'} for syst in shiftSystematics: selection.add('jetKinematics'+syst, df['AK8Puppijet0_pt_'+syst] > 450) selection.add('jetKinematicsMuonCR'+syst, df['AK8Puppijet0_pt_'+syst] > 400.) selection.add('pfmet'+syst, df['pfmet_'+syst] < 140.) # mass shift applied only to V-matched data # https://github.com/kakwok/ZPrimePlusJet/blob/PerBinEff/fitting/PbbJet/buildRhalphabetHbb.py#L30 if not isRealData: shiftSystematics.append('matchedUp') shiftedQuantities.add('AK8Puppijet0_msd') msdshifts = {'2016': 1.001, '2017': 0.979, '2018': 0.970} df['AK8Puppijet0_msd_matchedUp'] = msdshifts[self._year] * df['AK8Puppijet0_msd'] weights = processor.Weights(df.size) if not isRealData: # SumWeights is sum(scale1fb), so we need to use full value here weights.add('genweight', df['scale1fb']) if not self._skipPileup: if self._year == '2017' and dataset in self._corrections['2017_pileupweight_dataset']: weights.add('pileupweight', self._corrections['2017_pileupweight_dataset'][dataset](df['npu']), self._corrections['2017_pileupweight_dataset_puUp'][dataset](df['npu']), self._corrections['2017_pileupweight_dataset_puDown'][dataset](df['npu']), ) elif self._year != '2017': weights.add('pileupweight', self._corrections[f'{self._year}_pileupweight'](df['npu']), self._corrections[f'{self._year}_pileupweight_puUp'](df['npu']), self._corrections[f'{self._year}_pileupweight_puDown'](df['npu']), ) # TODO unc. if self._year == '2017' and 'ZJetsToQQ_HT' in dataset: nlo_over_lo_qcd = self._corrections['2017_Z_nlo_qcd'](df['genVPt']) nlo_over_lo_ewk = self._corrections['Z_nlo_over_lo_ewk'](df['genVPt']) weights.add('kfactor', nlo_over_lo_qcd * nlo_over_lo_ewk) elif self._year == '2017' and 'WJetsToQQ_HT' in dataset: nlo_over_lo_qcd = self._corrections['2017_W_nlo_qcd'](df['genVPt']) nlo_over_lo_ewk = self._corrections['W_nlo_over_lo_ewk'](df['genVPt']) weights.add('kfactor', nlo_over_lo_qcd * nlo_over_lo_ewk) elif self._year == '2016' and 'DYJetsToQQ' in dataset: nlo_over_lo_qcd = self._corrections['2016_Z_nlo_qcd'](df['genVPt']) nlo_over_lo_ewk = self._corrections['Z_nlo_over_lo_ewk'](df['genVPt']) weights.add('kfactor', nlo_over_lo_qcd * nlo_over_lo_ewk) elif self._year == '2016' and 'WJetsToQQ' in dataset: nlo_over_lo_qcd = self._corrections['2016_W_nlo_qcd'](df['genVPt']) nlo_over_lo_ewk = self._corrections['W_nlo_over_lo_ewk'](df['genVPt']) weights.add('kfactor', nlo_over_lo_qcd * nlo_over_lo_ewk) if not isRealData: # handle weight systematics for signal region def regionMask(w): if self._skipTrigger: return np.ones(df.size) return np.where(selection.all('noLeptons'), w, 1.) weights.add('trigweight', regionMask(self._corrections[f'{self._year}_trigweight_msd_pt'](df['AK8Puppijet0_msd_raw'], df['AK8Puppijet0_pt'])), regionMask(self._corrections[f'{self._year}_trigweight_msd_pt_trigweightUp'](df['AK8Puppijet0_msd_raw'], df['AK8Puppijet0_pt'])), regionMask(self._corrections[f'{self._year}_trigweight_msd_pt_trigweightDown'](df['AK8Puppijet0_msd_raw'], df['AK8Puppijet0_pt'])), ) vmatch = (np.abs(deltaphi(df['AK8Puppijet0_phi'], df['genVPhi'])) < 0.8) & (np.abs(df['AK8Puppijet0_pt']-df['genVPt'])/df['genVPt'] < 0.5) & (np.abs(df['AK8Puppijet0_msd']-df['genVMass'])/df['genVMass'] < 0.3) weights.add('matched', np.ones(df.size, dtype='f'), vmatch.astype('f'), 1.-vmatch) # handle weight systematics for muon CR def regionMask(w): if self._skipTrigger: return np.ones(df.size) return np.where(selection.all('oneMuon'), w, 1.) mu_abseta = np.abs(df['vmuoLoose0_eta']) weights.add('mutrigweight', regionMask(self._corrections[f'{self._year}_mutrigweight_pt_abseta'](df['vmuoLoose0_pt'], mu_abseta)), regionMask(self._corrections[f'{self._year}_mutrigweight_pt_abseta_mutrigweightShift'](df['vmuoLoose0_pt'], mu_abseta)), shift=True ) weights.add('muidweight', regionMask(self._corrections[f'{self._year}_muidweight_abseta_pt'](mu_abseta, df['vmuoLoose0_pt'])), regionMask(self._corrections[f'{self._year}_muidweight_abseta_pt_muidweightShift'](mu_abseta, df['vmuoLoose0_pt'])), shift=True ) weights.add('muisoweight', regionMask(self._corrections[f'{self._year}_muisoweight_abseta_pt'](mu_abseta, df['vmuoLoose0_pt'])), regionMask(self._corrections[f'{self._year}_muisoweight_abseta_pt_muisoweightShift'](mu_abseta, df['vmuoLoose0_pt'])), shift=True ) if self._debug: print("Weight statistics:") pprint.pprint(weights._weightStats, indent=4) hout = self.accumulator.identity() for histname, h in hout.items(): if not isinstance(h, hist.Hist): continue if not all(k in df or k == 'systematic' for k in h.fields): # Cannot fill this histogram due to missing fields # is this an error, warning, or ignorable? if self._debug: print("Missing fields %r from %r" % (set(h.fields) - set(df.keys()), h)) continue fields = {k: df[k] for k in h.fields if k in df} region = [r for r in regions.keys() if r in histname.split('_')] if 'nminus1' in histname: _, sel, region = histname.split('_') cut = regions[region] - {sel} weight = weights.weight() * selection.all(*cut) h.fill(**fields, weight=weight) elif len(region) == 1: region = region[0] weight = weights.weight() cut = selection.all(*regions[region]) h.fill(systematic="", **fields, weight=weight*cut) if 'systematic' in h.fields: if self._debug: print("Filling systematics for %s" % histname) systs = set(weights.variations) systs.update(shiftSystematics) for syst in systs: if self._debug: print(" Filling systematic %s" % syst) fields_syst = fields for val in shiftedQuantities: if val+'_'+syst in df: fields_syst[val] = df[val+'_'+syst] if self._debug: print(" Replacing field %s with %s" % (val, val+'_'+syst)) if syst in weights.variations: weight_syst = weights.weight(syst) if self._debug: print(" Using modified weight") else: weight_syst = weight if syst in set(shiftSystematics): cut_syst = set() for sel in regions[region]: if sel in shiftedSelections and sel+syst in selection.names: cut_syst.add(sel+syst) if self._debug: print(" Replacing cut %s with systematic-shifted %s" % (sel, sel+syst)) else: cut_syst.add(sel) cut_syst = selection.all(*cut_syst) else: cut_syst = cut h.fill(systematic=syst, **fields_syst, weight=weight_syst*cut_syst) elif len(region) > 1: raise ValueError("Histogram '%s' has a name matching multiple region definitions: %r" % (histname, region)) else: raise ValueError("Histogram '%s' does not fall into any region definitions." % (histname, )) if not isRealData: if 'skim_sumw' in df: # hacky way to only accumulate file-level information once if df['skim_sumw'] is not None: hout['sumw'][dataset] += df['skim_sumw'] else: hout['sumw'][dataset] += np.sum(df['scale1fb']) return hout
def process(self, df): if not df.size: return self.accumulator.identity() self._configure(df) dataset = df['dataset'] df['is_lo_w'] = is_lo_w(dataset) df['is_lo_z'] = is_lo_z(dataset) df['is_data'] = is_data(dataset) if not df['is_data']: gen_v_pt = df['LHE_Vpt'] # Candidates # Already pre-filtered! # All leptons are at least loose # Check out setup_candidates for filtering details ak4, ak8, muons, electrons, taus, photons, hlt = setup_candidates( df, cfg) # Muons is_tight_muon = muons.tightId \ & (muons.iso < cfg.MUON.CUTS.TIGHT.ISO) \ & (muons.pt>cfg.MUON.CUTS.TIGHT.PT) \ & (np.abs(muons.eta)<cfg.MUON.CUTS.TIGHT.ETA) dimuons = muons.distincts() dimuon_charge = dimuons.i0['charge'] + dimuons.i1['charge'] df['MT_mu'] = ( (muons.counts == 1) * mt(muons.pt, muons.phi, df['MET_pt'], df['MET_phi'])).max() # Electrons is_tight_electron = electrons.tightId \ & (electrons.pt > cfg.ELECTRON.CUTS.TIGHT.PT) \ & (np.abs(electrons.eta) < cfg.ELECTRON.CUTS.TIGHT.ETA) dielectrons = electrons.distincts() dielectron_charge = dielectrons.i0['charge'] + dielectrons.i1['charge'] df['MT_el'] = ((electrons.counts == 1) * mt( electrons.pt, electrons.phi, df['MET_pt'], df['MET_phi'])).max() # ak4 jet_acceptance = np.abs(ak4.eta) < 2.4 # B tagged ak4 btag_cut = cfg.BTAG.CUTS[cfg.BTAG.algo][cfg.BTAG.wp] jet_btag_val = getattr(ak4, cfg.BTAG.algo) jet_btagged = jet_btag_val > btag_cut bjets = ak4[ jet_acceptance \ & jet_btagged \ & (ak4.pt>20) ] # Recoil df['recoil_pt'], df['recoil_phi'] = recoil(df['MET_pt'], df['MET_phi'], electrons, muons, photons) df["dPFCalo"] = (df['MET_pt'] - df["CaloMET_pt"]) / df["recoil_pt"] df["minDPhiJetRecoil"] = min_dphi_jet_met(ak4, df['recoil_phi'], njet=4, ptmin=30) df["minDPhiJetMet"] = min_dphi_jet_met(ak4, df['MET_phi'], njet=4, ptmin=30) selection = processor.PackedSelection() selection.add('inclusive', np.ones(df.size) == 1) # Triggers if cfg.RUN.SYNC: # Synchronization mode pass_all = np.ones(df.size) == 1 selection.add('filt_met', pass_all) selection.add('trig_met', pass_all) selection.add('trig_ele', pass_all) selection.add('trig_mu', pass_all) else: selection.add('filt_met', df['Flag_METFilters']) selection.add('trig_met', combine_masks(df, cfg.TRIGGERS.MET)) # Trigger overlap if df['is_data']: if "SinglePhoton" in dataset: trig_ele = combine_masks( df, cfg.TRIGGERS.ELECTRON.SINGLE_BACKUP) & ( ~combine_masks(df, cfg.TRIGGERS.ELECTRON.SINGLE)) else: trig_ele = combine_masks(df, cfg.TRIGGERS.ELECTRON.SINGLE) else: trig_ele = combine_masks( df, cfg.TRIGGERS.ELECTRON.SINGLE_BACKUP) | combine_masks( df, cfg.TRIGGERS.ELECTRON.SINGLE) selection.add('trig_ele', trig_ele) selection.add('trig_mu', combine_masks(df, cfg.TRIGGERS.MUON.SINGLE)) selection.add('trig_ht_for_g_eff', combine_masks(df, cfg.TRIGGERS.HT.GAMMAEFF)) # Trigger objects hlt_muons = hlt[hlt.id == 13] hlt_single_muons = hlt_muons[hlt_muons.filter & 8 == 8] hlt_double_muons = hlt_muons[hlt_muons.filter & 16 == 16] selection.add('one_hlt_muon', hlt_single_muons.counts >= 1) selection.add( 'two_hlt_muons', (hlt_single_muons.counts + 2 * hlt_double_muons.counts) >= 2) # Common selection selection.add('veto_ele', electrons.counts == 0) selection.add('veto_muo', muons.counts == 0) selection.add('veto_photon', photons.counts == 0) selection.add('veto_tau', taus.counts == 0) selection.add('veto_b', bjets.counts == 0) selection.add('mindphijr', df['minDPhiJetRecoil'] > cfg.SELECTION.SIGNAL.MINDPHIJR) selection.add('dpfcalo', np.abs(df['dPFCalo']) < cfg.SELECTION.SIGNAL.DPFCALO) selection.add('recoil', df['recoil_pt'] > cfg.SELECTION.SIGNAL.RECOIL) # AK4 Jet leadak4_index = ak4.pt.argmax() leadak4_pt_eta = (ak4.pt.max() > cfg.SELECTION.SIGNAL.leadak4.PT) \ & (np.abs(ak4.eta[leadak4_index]) < cfg.SELECTION.SIGNAL.leadak4.ETA).any() selection.add('leadak4_pt_eta', leadak4_pt_eta) selection.add('leadak4_id',(ak4.tightId[leadak4_index] \ & (ak4.chf[leadak4_index] >cfg.SELECTION.SIGNAL.leadak4.CHF) \ & (ak4.nhf[leadak4_index]<cfg.SELECTION.SIGNAL.leadak4.NHF)).any()) # AK8 Jet leadak8_index = ak8.pt.argmax() leadak8_pt_eta = (ak8.pt.max() > cfg.SELECTION.SIGNAL.leadak8.PT) \ & (np.abs(ak8.eta[leadak8_index]) < cfg.SELECTION.SIGNAL.leadak8.ETA).any() selection.add('leadak8_pt_eta', leadak8_pt_eta) selection.add('leadak8_id', (ak8.tightId[leadak8_index]).any()) # Mono-V selection selection.add('leadak8_tau21', ((ak8.tau2[leadak8_index] / ak8.tau1[leadak8_index]) < cfg.SELECTION.SIGNAL.LEADAK8.TAU21).any()) selection.add('leadak8_mass', ((ak8.mass[leadak8_index] > cfg.SELECTION.SIGNAL.LEADAK8.MASS.MIN) \ & (ak8.mass[leadak8_index] < cfg.SELECTION.SIGNAL.LEADAK8.MASS.MAX)).any()) selection.add( 'veto_vtag', ~selection.all("leadak8_pt_eta", "leadak8_id", "leadak8_tau21", "leadak8_mass")) # Dimuon CR leadmuon_index = muons.pt.argmax() selection.add('at_least_one_tight_mu', is_tight_muon.any()) selection.add('dimuon_mass', ((dimuons.mass > cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MIN) \ & (dimuons.mass < cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MAX)).any()) selection.add('dimuon_charge', (dimuon_charge == 0).any()) selection.add('two_muons', muons.counts == 2) # Single muon CR selection.add('one_muon', muons.counts == 1) selection.add('mt_mu', df['MT_mu'] < cfg.SELECTION.CONTROL.SINGLEMU.MT) # Diele CR leadelectron_index = electrons.pt.argmax() selection.add('one_electron', electrons.counts == 1) selection.add('two_electrons', electrons.counts == 2) selection.add('at_least_one_tight_el', is_tight_electron.any()) selection.add('dielectron_mass', ((dielectrons.mass > cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MIN) \ & (dielectrons.mass < cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MAX)).any()) selection.add('dielectron_charge', (dielectron_charge == 0).any()) selection.add('two_electrons', electrons.counts == 2) # Single Ele CR selection.add('mt_el', df['MT_el'] < cfg.SELECTION.CONTROL.SINGLEEL.MT) # Photon CR selection.add('trig_photon', combine_masks(df, cfg.TRIGGERS.PHOTON.SINGLE)) leadphoton_index = photons.pt.argmax() is_tight_photon = photons.mediumId \ & (photons.pt > cfg.PHOTON.CUTS.TIGHT.PT) \ & (np.abs(photons.eta) < cfg.PHOTON.CUTS.TIGHT.ETA) selection.add('one_photon', photons.counts == 1) selection.add('at_least_one_tight_photon', is_tight_photon.any()) # Fill histograms output = self.accumulator.identity() # Gen if not df['is_data']: output['genvpt_check'].fill(vpt=gen_v_pt, type="Nano", dataset=dataset) # Weights evaluator = monojet_evaluator(cfg) all_weights = {} if df['is_data']: weight = np.ones(df.size) else: weight = df['Generator_weight'] # Muon ID and Isolation for tight and loose WP # Function of pT, eta (Order!) all_weights["muon_id_tight"] = evaluator['muon_id_tight']( muons[is_tight_muon].pt, muons[is_tight_muon].eta).prod() all_weights["muon_iso_tight"] = evaluator['muon_iso_tight']( muons[is_tight_muon].pt, muons[is_tight_muon].eta).prod() all_weights["muon_id_loose"] = evaluator['muon_id_loose']( muons[~is_tight_muon].pt, muons[~is_tight_muon].eta).prod() all_weights["muon_iso_loose"] = evaluator['muon_iso_loose']( muons[~is_tight_muon].pt, muons[~is_tight_muon].eta).prod() # Electron ID and reco # Function of eta, pT (Other way round relative to muons!) all_weights["ele_reco"] = evaluator['ele_reco']( electrons.eta, electrons.pt).prod() all_weights["ele_id_tight"] = evaluator['ele_id_tight']( electrons[is_tight_electron].eta, electrons[is_tight_electron].pt).prod() all_weights["ele_id_loose"] = evaluator['ele_id_loose']( electrons[~is_tight_electron].eta, electrons[~is_tight_electron].pt).prod() # Photon ID and electron veto all_weights["photon_id_tight"] = evaluator['photon_id_tight']( photons[is_tight_photon].eta, photons[is_tight_photon].pt).prod() # CSEV not split only by EE/EB for now csev_sf_index = 0.5 * photons.barrel + 2.5 * ~photons.barrel all_weights["photon_csev"] = evaluator['photon_csev']( csev_sf_index).prod() all_weights["pileup"] = evaluator['pileup'](df['Pileup_nTrueInt']) if df['is_lo_w']: all_weights["theory"] = evaluator["qcd_ew_nlo_w"](gen_v_pt) elif df['is_lo_z']: all_weights["theory"] = evaluator["qcd_ew_nlo_z"](gen_v_pt) else: all_weights["theory"] = np.ones(df.size) for iw in all_weights.values(): weight = weight * iw # Save per-event values for synchronization if cfg.RUN.KINEMATICS.SAVE: for event in cfg.RUN.KINEMATICS.EVENTS: mask = df['event'] == event if not mask.any(): continue output['kinematics']['event'] += [event] output['kinematics']['met'] += [df['MET_pt'][mask]] output['kinematics']['met_phi'] += [df['MET_phi'][mask]] output['kinematics']['recoil'] += [df['recoil_pt'][mask]] output['kinematics']['recoil_phi'] += [df['recoil_phi'][mask]] output['kinematics']['ak4pt0'] += [ak4[leadak4_index][mask].pt] output['kinematics']['ak4eta0'] += [ ak4[leadak4_index][mask].eta ] output['kinematics']['leadbtag'] += [ jet_btag_val[jet_acceptance & (ak4.pt > 20)][mask].max() ] output['kinematics']['nLooseMu'] += [muons.counts[mask]] output['kinematics']['nTightMu'] += [ muons[is_tight_muon].counts[mask] ] output['kinematics']['mupt0'] += [ muons[leadmuon_index][mask].pt ] output['kinematics']['mueta0'] += [ muons[leadmuon_index][mask].eta ] output['kinematics']['nLooseEl'] += [electrons.counts[mask]] output['kinematics']['nTightEl'] += [ electrons[is_tight_electron].counts[mask] ] output['kinematics']['elpt0'] += [ electrons[leadelectron_index][mask].pt ] output['kinematics']['eleta0'] += [ electrons[leadelectron_index][mask].eta ] output['kinematics']['nLooseGam'] += [photons.counts[mask]] output['kinematics']['nTightGam'] += [ photons[is_tight_photon].counts[mask] ] output['kinematics']['gpt0'] += [ photons[leadphoton_index][mask].pt ] output['kinematics']['geta0'] += [ photons[leadphoton_index][mask].eta ] # Sum of all weights to use for normalization # TODO: Deal with systematic variations if not df['is_data']: output['sumw'][dataset] += df['genEventSumw'] output['sumw2'][dataset] += df['genEventSumw2'] regions = monojet_regions() for region, cuts in regions.items(): # Blinding if (self._blind and df['is_data'] and region.startswith('sr')): continue # Cutflow plot for signal and control regions if any(x in region for x in ["sr", "cr", "tr"]): output['cutflow_' + region]['all'] += df.size for icut, cutname in enumerate(cuts): output['cutflow_' + region][cutname] += selection.all( *cuts[:icut + 1]).sum() mask = selection.all(*cuts) # Save the event numbers of events passing this selection if cfg.RUN.SAVE.PASSING: output['selected_events'][region] += list(df['event'][mask]) # Multiplicities def fill_mult(name, candidates): output[name].fill(dataset=dataset, region=region, multiplicity=candidates[mask].counts, weight=weight[mask]) fill_mult('ak8_mult', ak8) fill_mult('ak4_mult', ak4) fill_mult('bjet_mult', bjets) fill_mult('loose_ele_mult', electrons) fill_mult('tight_ele_mult', electrons[is_tight_electron]) fill_mult('loose_muo_mult', muons) fill_mult('tight_muo_mult', muons[is_tight_muon]) fill_mult('tau_mult', taus) fill_mult('photon_mult', photons) def ezfill(name, **kwargs): """Helper function to make filling easier.""" output[name].fill(dataset=dataset, region=region, **kwargs) # Monitor weights for wname, wvalue in all_weights.items(): ezfill("weights", weight_type=wname, weight_value=wvalue[mask]) # All ak4 # This is a workaround to create a weight array of the right dimension w_alljets = weight_shape(ak4[mask].eta, weight[mask]) ezfill('ak4eta', jeteta=ak4[mask].eta.flatten(), weight=w_alljets) ezfill('ak4pt', jetpt=ak4[mask].pt.flatten(), weight=w_alljets) # Leading ak4 leadak4_indices = ak4.pt.argmax() w_leadak4 = weight_shape(ak4[leadak4_indices].eta[mask], weight[mask]) ezfill('ak4eta0', jeteta=ak4[leadak4_indices].eta[mask].flatten(), weight=w_leadak4) ezfill('ak4pt0', jetpt=ak4[leadak4_indices].pt[mask].flatten(), weight=w_leadak4) # All ak8 w_allak8 = weight_shape(ak8.eta[mask], weight[mask]) ezfill('ak8eta', jeteta=ak8[mask].eta.flatten(), weight=w_allak8) ezfill('ak8pt', jetpt=ak8[mask].pt.flatten(), weight=w_allak8) ezfill('ak8mass', mass=ak8[mask].mass.flatten(), weight=w_allak8) # Leading ak8 leadak8_indices = ak8.pt.argmax() w_leadak8 = weight_shape(ak8[leadak8_indices].eta[mask], weight[mask]) ezfill('ak8eta0', jeteta=ak8[leadak8_indices].eta[mask].flatten(), weight=w_leadak8) ezfill('ak8pt0', jetpt=ak8[leadak8_indices].pt[mask].flatten(), weight=w_leadak8) ezfill('ak8mass0', mass=ak8[leadak8_indices].mass[mask].flatten(), weight=w_leadak8) # B tag discriminator btag = getattr(ak4, cfg.BTAG.ALGO) w_btag = weight_shape(btag[mask], weight[mask]) ezfill('ak4btag', btag=btag[mask].flatten(), weight=w_btag) # MET ezfill('dpfcalo', dpfcalo=df["dPFCalo"][mask], weight=weight[mask]) ezfill('met', met=df["MET_pt"][mask], weight=weight[mask]) ezfill('recoil', recoil=df["recoil_pt"][mask], weight=weight[mask]) ezfill('dphijm', dphi=df["minDPhiJetMet"][mask], weight=weight[mask]) # Muons w_allmu = weight_shape(muons.pt[mask], weight[mask]) ezfill('muon_pt', pt=muons.pt[mask].flatten(), weight=w_allmu) ezfill('muon_mt', mt=df['MT_mu'][mask], weight=weight[mask]) ezfill('muon_eta', eta=muons.eta[mask].flatten(), weight=w_allmu) # Dimuon w_dimu = weight_shape(dimuons.pt[mask], weight[mask]) ezfill('dimuon_pt', pt=dimuons.pt[mask].flatten(), weight=w_dimu) ezfill('dimuon_eta', eta=dimuons.eta[mask].flatten(), weight=w_dimu) ezfill('dimuon_mass', dilepton_mass=dimuons.mass[mask].flatten(), weight=w_dimu) # Electrons w_allel = weight_shape(electrons.pt[mask], weight[mask]) ezfill('electron_pt', pt=electrons.pt[mask].flatten(), weight=w_allel) ezfill('electron_mt', mt=df['MT_el'][mask], weight=weight[mask]) ezfill('electron_eta', eta=electrons.eta[mask].flatten(), weight=w_allel) # Dielectron w_diel = weight_shape(dielectrons.pt[mask], weight[mask]) ezfill('dielectron_pt', pt=dielectrons.pt[mask].flatten(), weight=w_diel) ezfill('dielectron_eta', eta=dielectrons.eta[mask].flatten(), weight=w_diel) ezfill('dielectron_mass', dilepton_mass=dielectrons.mass[mask].flatten(), weight=w_diel) # Photon w_leading_photon = weight_shape(photons[leadphoton_index].pt[mask], weight[mask]) ezfill('photonpt0', pt=photons[leadphoton_index].pt[mask].flatten(), weight=w_leading_photon) ezfill('photoneta0', eta=photons[leadphoton_index].eta[mask].flatten(), weight=w_leading_photon) ezfill('photonphi0', phi=photons[leadphoton_index].phi[mask].flatten(), weight=w_leading_photon) return output
def process(self, df): if not df.size: return self.accumulator.identity() self._configure(df) dataset = df['dataset'] df['is_lo_w'] = is_lo_w(dataset) df['is_lo_z'] = is_lo_z(dataset) df['is_lo_g'] = is_lo_g(dataset) df['is_nlo_z'] = is_nlo_z(dataset) df['is_nlo_w'] = is_nlo_w(dataset) df['has_v_jet'] = has_v_jet(dataset) df['has_lhe_v_pt'] = df['is_lo_w'] | df['is_lo_z'] | df['is_nlo_z'] | df['is_nlo_w'] | df['is_lo_g'] df['is_data'] = is_data(dataset) gen_v_pt = None if not df['is_data']: gen = setup_gen_candidates(df) if df['is_lo_w'] or df['is_lo_z'] or df['is_nlo_z'] or df['is_nlo_w']: dressed = setup_dressed_gen_candidates(df) fill_gen_v_info(df, gen, dressed) gen_v_pt = df['gen_v_pt_combined'] elif df['is_lo_g']: gen_v_pt = gen[(gen.pdg==22) & (gen.status==1)].pt.max() # Candidates # Already pre-filtered! # All leptons are at least loose # Check out setup_candidates for filtering details met_pt, met_phi, ak4, bjets, ak8, muons, electrons, taus, photons = setup_candidates(df, cfg) # Muons df['is_tight_muon'] = muons.tightId \ & (muons.iso < cfg.MUON.CUTS.TIGHT.ISO) \ & (muons.pt>cfg.MUON.CUTS.TIGHT.PT) \ & (muons.abseta<cfg.MUON.CUTS.TIGHT.ETA) dimuons = muons.distincts() dimuon_charge = dimuons.i0['charge'] + dimuons.i1['charge'] df['MT_mu'] = ((muons.counts==1) * mt(muons.pt, muons.phi, met_pt, met_phi)).max() # Electrons df['is_tight_electron'] = electrons.tightId \ & (electrons.pt > cfg.ELECTRON.CUTS.TIGHT.PT) \ & (electrons.abseta < cfg.ELECTRON.CUTS.TIGHT.ETA) dielectrons = electrons.distincts() dielectron_charge = dielectrons.i0['charge'] + dielectrons.i1['charge'] df['MT_el'] = ((electrons.counts==1) * mt(electrons.pt, electrons.phi, met_pt, met_phi)).max() # ak4 leadak4_index=ak4.pt.argmax() elejet_pairs = ak4[:,:1].cross(electrons) df['dREleJet'] = np.hypot(elejet_pairs.i0.eta-elejet_pairs.i1.eta , dphi(elejet_pairs.i0.phi,elejet_pairs.i1.phi)).min() muonjet_pairs = ak4[:,:1].cross(muons) df['dRMuonJet'] = np.hypot(muonjet_pairs.i0.eta-muonjet_pairs.i1.eta , dphi(muonjet_pairs.i0.phi,muonjet_pairs.i1.phi)).min() # Photons # Angular distance leading photon - leading jet phojet_pairs = ak4[:,:1].cross(photons[:,:1]) df['dRPhotonJet'] = np.hypot(phojet_pairs.i0.eta-phojet_pairs.i1.eta , dphi(phojet_pairs.i0.phi,phojet_pairs.i1.phi)).min() # Recoil df['recoil_pt'], df['recoil_phi'] = recoil(met_pt,met_phi, electrons, muons, photons) df["dPFCalo"] = (met_pt - df["CaloMET_pt"]) / df["recoil_pt"] df["minDPhiJetRecoil"] = min_dphi_jet_met(ak4, df['recoil_phi'], njet=4, ptmin=30, etamax=2.4) df["minDPhiJetMet"] = min_dphi_jet_met(ak4, met_phi, njet=4, ptmin=30, etamax=2.4) selection = processor.PackedSelection() # Triggers pass_all = np.ones(df.size)==1 selection.add('inclusive', pass_all) selection = trigger_selection(selection, df, cfg) selection.add('mu_pt_trig_safe', muons.pt.max() > 30) # Common selection selection.add('veto_ele', electrons.counts==0) selection.add('veto_muo', muons.counts==0) selection.add('veto_photon', photons.counts==0) selection.add('veto_tau', taus.counts==0) selection.add('veto_b', bjets.counts==0) selection.add('mindphijr',df['minDPhiJetRecoil'] > cfg.SELECTION.SIGNAL.MINDPHIJR) selection.add('mindphijm',df['minDPhiJetMet'] > cfg.SELECTION.SIGNAL.MINDPHIJR) selection.add('dpfcalo',np.abs(df['dPFCalo']) < cfg.SELECTION.SIGNAL.DPFCALO) selection.add('recoil', df['recoil_pt']>cfg.SELECTION.SIGNAL.RECOIL) if(cfg.MITIGATION.HEM and extract_year(df['dataset']) == 2018 and not cfg.RUN.SYNC): selection.add('hemveto', df['hemveto']) else: selection.add('hemveto', np.ones(df.size)==1) # AK4 Jet leadak4_pt_eta = (ak4.pt.max() > cfg.SELECTION.SIGNAL.leadak4.PT) \ & (ak4.abseta[leadak4_index] < cfg.SELECTION.SIGNAL.leadak4.ETA).any() selection.add('leadak4_pt_eta', leadak4_pt_eta) selection.add('leadak4_id',(ak4.tightId[leadak4_index] \ & (ak4.chf[leadak4_index] >cfg.SELECTION.SIGNAL.leadak4.CHF) \ & (ak4.nhf[leadak4_index]<cfg.SELECTION.SIGNAL.leadak4.NHF)).any()) # AK8 Jet leadak8_index=ak8.pt.argmax() leadak8_pt_eta = (ak8.pt.max() > cfg.SELECTION.SIGNAL.leadak8.PT) \ & (ak8.abseta[leadak8_index] < cfg.SELECTION.SIGNAL.leadak8.ETA).any() selection.add('leadak8_pt_eta', leadak8_pt_eta) selection.add('leadak8_id',(ak8.tightId[leadak8_index]).any()) # Mono-V selection selection.add('leadak8_tau21', ((ak8.tau2[leadak8_index] / ak8.tau1[leadak8_index]) < cfg.SELECTION.SIGNAL.LEADAK8.TAU21).any()) selection.add('leadak8_mass', ((ak8.mass[leadak8_index] > cfg.SELECTION.SIGNAL.LEADAK8.MASS.MIN) \ & (ak8.mass[leadak8_index] < cfg.SELECTION.SIGNAL.LEADAK8.MASS.MAX)).any()) selection.add('leadak8_wvsqcd_loosemd', ((ak8.wvsqcdmd[leadak8_index] > cfg.WTAG.LOOSEMD) & (ak8.wvsqcdmd[leadak8_index] < cfg.WTAG.TIGHTMD)).any()) selection.add('leadak8_wvsqcd_tightmd', ((ak8.wvsqcdmd[leadak8_index] > cfg.WTAG.TIGHTMD)).any()) selection.add('leadak8_wvsqcd_loose', ((ak8.wvsqcd[leadak8_index] > cfg.WTAG.LOOSE) & (ak8.wvsqcd[leadak8_index] < cfg.WTAG.TIGHT)).any()) selection.add('leadak8_wvsqcd_tight', ((ak8.wvsqcd[leadak8_index] > cfg.WTAG.TIGHT)).any()) selection.add('veto_vtag', ~selection.all("leadak8_pt_eta", "leadak8_id", "leadak8_tau21", "leadak8_mass")) selection.add('only_one_ak8', ak8.counts==1) # Dimuon CR leadmuon_index=muons.pt.argmax() selection.add('at_least_one_tight_mu', df['is_tight_muon'].any()) selection.add('dimuon_mass', ((dimuons.mass > cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MIN) \ & (dimuons.mass < cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MAX)).any()) selection.add('dimuon_charge', (dimuon_charge==0).any()) selection.add('two_muons', muons.counts==2) # Single muon CR selection.add('one_muon', muons.counts==1) selection.add('mt_mu', df['MT_mu'] < cfg.SELECTION.CONTROL.SINGLEMU.MT) # Diele CR leadelectron_index=electrons.pt.argmax() selection.add('one_electron', electrons.counts==1) selection.add('two_electrons', electrons.counts==2) selection.add('at_least_one_tight_el', df['is_tight_electron'].any()) selection.add('dielectron_mass', ((dielectrons.mass > cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MIN) \ & (dielectrons.mass < cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MAX)).any()) selection.add('dielectron_charge', (dielectron_charge==0).any()) selection.add('two_electrons', electrons.counts==2) # Single Ele CR selection.add('met_el', met_pt > cfg.SELECTION.CONTROL.SINGLEEL.MET) selection.add('mt_el', df['MT_el'] < cfg.SELECTION.CONTROL.SINGLEEL.MT) # Photon CR leadphoton_index=photons.pt.argmax() df['is_tight_photon'] = photons.mediumId \ & (photons.abseta < cfg.PHOTON.CUTS.TIGHT.ETA) selection.add('one_photon', photons.counts==1) selection.add('at_least_one_tight_photon', df['is_tight_photon'].any()) selection.add('photon_pt', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PT) selection.add('photon_pt_trig', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PTTRIG) # Fill histograms output = self.accumulator.identity() # Gen if gen_v_pt is not None: output['genvpt_check'].fill(vpt=gen_v_pt,type="Nano", dataset=dataset, weight=df['Generator_weight']) if 'LHE_HT' in df: output['lhe_ht'].fill(dataset=dataset, ht=df['LHE_HT']) # Weights evaluator = evaluator_from_config(cfg) weights = processor.Weights(size=df.size, storeIndividual=True) if not df['is_data']: weights.add('gen', df['Generator_weight']) try: weights.add('prefire', df['PrefireWeight']) except KeyError: weights.add('prefire', np.ones(df.size)) weights = candidate_weights(weights, df, evaluator, muons, electrons, photons) weights = pileup_weights(weights, df, evaluator, cfg) if not (gen_v_pt is None): weights = theory_weights_monojet(weights, df, evaluator, gen_v_pt) # Save per-event values for synchronization if cfg.RUN.KINEMATICS.SAVE: for event in cfg.RUN.KINEMATICS.EVENTS: mask = df['event'] == event if not mask.any(): continue output['kinematics']['event'] += [event] output['kinematics']['met'] += [met_pt[mask].flatten()] output['kinematics']['met_phi'] += [met_phi[mask].flatten()] output['kinematics']['recoil'] += [df['recoil_pt'][mask].flatten()] output['kinematics']['recoil_phi'] += [df['recoil_phi'][mask].flatten()] output['kinematics']['ak4pt0'] += [ak4[leadak4_index][mask].pt.flatten()] output['kinematics']['ak4eta0'] += [ak4[leadak4_index][mask].eta.flatten()] output['kinematics']['leadbtag'] += [ak4.pt.max()<0][mask] output['kinematics']['nLooseMu'] += [muons.counts[mask]] output['kinematics']['nTightMu'] += [muons[df['is_tight_muon']].counts[mask].flatten()] output['kinematics']['mupt0'] += [muons[leadmuon_index][mask].pt.flatten()] output['kinematics']['mueta0'] += [muons[leadmuon_index][mask].eta.flatten()] output['kinematics']['muphi0'] += [muons[leadmuon_index][mask].phi.flatten()] output['kinematics']['nLooseEl'] += [electrons.counts[mask]] output['kinematics']['nTightEl'] += [electrons[df['is_tight_electron']].counts[mask].flatten()] output['kinematics']['elpt0'] += [electrons[leadelectron_index][mask].pt.flatten()] output['kinematics']['eleta0'] += [electrons[leadelectron_index][mask].eta.flatten()] output['kinematics']['nLooseGam'] += [photons.counts[mask]] output['kinematics']['nTightGam'] += [photons[df['is_tight_photon']].counts[mask].flatten()] output['kinematics']['gpt0'] += [photons[leadphoton_index][mask].pt.flatten()] output['kinematics']['geta0'] += [photons[leadphoton_index][mask].eta.flatten()] # Sum of all weights to use for normalization # TODO: Deal with systematic variations output['nevents'][dataset] += df.size if not df['is_data']: output['sumw'][dataset] += df['genEventSumw'] output['sumw2'][dataset] += df['genEventSumw2'] output['sumw_pileup'][dataset] += weights.partial_weight(include=['pileup']).sum() regions = monojet_regions(cfg) for region, cuts in regions.items(): region_weights = copy.deepcopy(weights) if not df['is_data']: if re.match(r'cr_(\d+)e.*', region): region_weights.add('trigger', np.ones(df.size)) elif re.match(r'cr_(\d+)m.*', region) or re.match('sr_.*', region): region_weights.add('trigger', evaluator["trigger_met"](df['recoil_pt'])) elif re.match(r'cr_g.*', region): region_weights.add('trigger', np.ones(df.size)) if not df['is_data']: genVs = gen[((gen.pdg==23) | (gen.pdg==24) | (gen.pdg==-24)) & (gen.pt>10)] leadak8 = ak8[ak8.pt.argmax()] leadak8_matched_mask = leadak8.match(genVs, deltaRCut=0.8) matched_leadak8 = leadak8[leadak8_matched_mask] unmatched_leadak8 = leadak8[~leadak8_matched_mask] for wp in ['loose','loosemd','tight','tightmd']: if re.match(r'.*_{wp}_v.*', region): if (wp == 'tight') or ('nomistag' in region): # no mistag SF available for tight cut matched_weights = evaluator[f'wtag_{wp}'](matched_leadak8.pt).prod() else: matched_weights = evaluator[f'wtag_{wp}'](matched_leadak8.pt).prod() \ * evaluator[f'wtag_mistag_{wp}'](unmatched_leadak8.pt).prod() region_weights.add('wtag_{wp}', matched_weights) # Blinding if(self._blind and df['is_data'] and region.startswith('sr')): continue # Cutflow plot for signal and control regions if any(x in region for x in ["sr", "cr", "tr"]): output['cutflow_' + region]['all']+=df.size for icut, cutname in enumerate(cuts): output['cutflow_' + region][cutname] += selection.all(*cuts[:icut+1]).sum() mask = selection.all(*cuts) if cfg.RUN.SAVE.TREE: def fill_tree(variable, values): treeacc = processor.column_accumulator(values) name = f'tree_{region}_{variable}' if dataset in output[name].keys(): output[name][dataset] += treeacc else: output[name][dataset] = treeacc if region in ['cr_2m_j','cr_1m_j','cr_2e_j','cr_1e_j','cr_g_j']: fill_tree('recoil',df['recoil_pt'][mask].flatten()) fill_tree('weight',region_weights.weight()[mask].flatten()) if gen_v_pt is not None: fill_tree('gen_v_pt',gen_v_pt[mask].flatten()) else: fill_tree('gen_v_pt', -1 * np.ones(sum(mask))) # Save the event numbers of events passing this selection if cfg.RUN.SAVE.PASSING: output['selected_events'][region] += list(df['event'][mask]) # Multiplicities def fill_mult(name, candidates): output[name].fill( dataset=dataset, region=region, multiplicity=candidates[mask].counts, weight=region_weights.weight()[mask] ) fill_mult('ak8_mult', ak8) fill_mult('ak4_mult', ak4) fill_mult('bjet_mult',bjets) fill_mult('loose_ele_mult',electrons) fill_mult('tight_ele_mult',electrons[df['is_tight_electron']]) fill_mult('loose_muo_mult',muons) fill_mult('tight_muo_mult',muons[df['is_tight_muon']]) fill_mult('tau_mult',taus) fill_mult('photon_mult',photons) def ezfill(name, **kwargs): """Helper function to make filling easier.""" output[name].fill( dataset=dataset, region=region, **kwargs ) # Monitor weights for wname, wvalue in region_weights._weights.items(): ezfill("weights", weight_type=wname, weight_value=wvalue[mask]) # All ak4 # This is a workaround to create a weight array of the right dimension w_alljets = weight_shape(ak4[mask].eta, region_weights.weight()[mask]) ezfill('ak4_eta', jeteta=ak4[mask].eta.flatten(), weight=w_alljets) ezfill('ak4_phi', jetphi=ak4[mask].phi.flatten(), weight=w_alljets) ezfill('ak4_eta_phi', phi=ak4[mask].phi.flatten(),eta=ak4[mask].eta.flatten(), weight=w_alljets) ezfill('ak4_pt', jetpt=ak4[mask].pt.flatten(), weight=w_alljets) # Leading ak4 w_leadak4 = weight_shape(ak4[leadak4_index].eta[mask], region_weights.weight()[mask]) ezfill('ak4_eta0', jeteta=ak4[leadak4_index].eta[mask].flatten(), weight=w_leadak4) ezfill('ak4_phi0', jetphi=ak4[leadak4_index].phi[mask].flatten(), weight=w_leadak4) ezfill('ak4_pt0', jetpt=ak4[leadak4_index].pt[mask].flatten(), weight=w_leadak4) ezfill('ak4_ptraw0', jetpt=ak4[leadak4_index].ptraw[mask].flatten(), weight=w_leadak4) ezfill('ak4_chf0', frac=ak4[leadak4_index].chf[mask].flatten(), weight=w_leadak4) ezfill('ak4_nhf0', frac=ak4[leadak4_index].nhf[mask].flatten(), weight=w_leadak4) ezfill('drelejet', dr=df['dREleJet'][mask], weight=region_weights.weight()[mask]) ezfill('drmuonjet', dr=df['dRMuonJet'][mask], weight=region_weights.weight()[mask]) ezfill('drphotonjet', dr=df['dRPhotonJet'][mask], weight=region_weights.weight()[mask]) # AK8 jets if region=='inclusive' or region.endswith('v'): # All w_allak8 = weight_shape(ak8.eta[mask], region_weights.weight()[mask]) ezfill('ak8_eta', jeteta=ak8[mask].eta.flatten(), weight=w_allak8) ezfill('ak8_phi', jetphi=ak8[mask].phi.flatten(), weight=w_allak8) ezfill('ak8_pt', jetpt=ak8[mask].pt.flatten(), weight=w_allak8) ezfill('ak8_mass', mass=ak8[mask].mass.flatten(), weight=w_allak8) # Leading w_leadak8 = weight_shape(ak8[leadak8_index].eta[mask], region_weights.weight()[mask]) ezfill('ak8_eta0', jeteta=ak8[leadak8_index].eta[mask].flatten(), weight=w_leadak8) ezfill('ak8_phi0', jetphi=ak8[leadak8_index].phi[mask].flatten(), weight=w_leadak8) ezfill('ak8_pt0', jetpt=ak8[leadak8_index].pt[mask].flatten(), weight=w_leadak8 ) ezfill('ak8_mass0', mass=ak8[leadak8_index].mass[mask].flatten(), weight=w_leadak8) ezfill('ak8_tau210', tau21=ak8[leadak8_index].tau21[mask].flatten(), weight=w_leadak8) ezfill('ak8_wvsqcd0', tagger=ak8[leadak8_index].wvsqcd[mask].flatten(), weight=w_leadak8) ezfill('ak8_wvsqcdmd0', tagger=ak8[leadak8_index].wvsqcdmd[mask].flatten(), weight=w_leadak8) ezfill('ak8_zvsqcd0', tagger=ak8[leadak8_index].zvsqcd[mask].flatten(), weight=w_leadak8) ezfill('ak8_zvsqcdmd0', tagger=ak8[leadak8_index].zvsqcdmd[mask].flatten(), weight=w_leadak8) # histogram with only gen-matched lead ak8 pt if not df['is_data']: w_matchedleadak8 = weight_shape(matched_leadak8.eta[mask], region_weights.weight()[mask]) ezfill('ak8_Vmatched_pt0', jetpt=matched_leadak8.pt[mask].flatten(), weight=w_matchedleadak8 ) # Dimuon specifically for deepak8 mistag rate measurement if 'inclusive_v' in region: ezfill('ak8_passloose_pt0', wppass=ak8[leadak8_index].wvsqcd[mask].max()>cfg.WTAG.LOOSE, jetpt=ak8[leadak8_index].pt[mask].max(), weight=w_leadak8 ) ezfill('ak8_passtight_pt0', wppass=ak8[leadak8_index].wvsqcd[mask].max()>cfg.WTAG.TIGHT, jetpt=ak8[leadak8_index].pt[mask].max(), weight=w_leadak8 ) ezfill('ak8_passloosemd_pt0', wppass=ak8[leadak8_index].wvsqcdmd[mask].max()>cfg.WTAG.LOOSEMD, jetpt=ak8[leadak8_index].pt[mask].max(), weight=w_leadak8 ) ezfill('ak8_passtightmd_pt0', wppass=ak8[leadak8_index].wvsqcdmd[mask].max()>cfg.WTAG.TIGHTMD, jetpt=ak8[leadak8_index].pt[mask].max(), weight=w_leadak8 ) ezfill('ak8_passloose_mass0', wppass=ak8[leadak8_index].wvsqcd[mask].max()>cfg.WTAG.LOOSE, mass=ak8[leadak8_index].mass[mask].max(), weight=w_leadak8 ) ezfill('ak8_passtight_mass0', wppass=ak8[leadak8_index].wvsqcd[mask].max()>cfg.WTAG.TIGHT, mass=ak8[leadak8_index].mass[mask].max(), weight=w_leadak8 ) ezfill('ak8_passloosemd_mass0', wppass=ak8[leadak8_index].wvsqcdmd[mask].max()>cfg.WTAG.LOOSEMD, mass=ak8[leadak8_index].mass[mask].max(), weight=w_leadak8 ) ezfill('ak8_passtightmd_mass0', wppass=ak8[leadak8_index].wvsqcdmd[mask].max()>cfg.WTAG.TIGHTMD, mass=ak8[leadak8_index].mass[mask].max(), weight=w_leadak8 ) # MET ezfill('dpfcalo', dpfcalo=df["dPFCalo"][mask], weight=region_weights.weight()[mask] ) ezfill('met', met=met_pt[mask], weight=region_weights.weight()[mask] ) ezfill('met_phi', phi=met_phi[mask], weight=region_weights.weight()[mask] ) ezfill('recoil', recoil=df["recoil_pt"][mask], weight=region_weights.weight()[mask] ) ezfill('recoil_phi', phi=df["recoil_phi"][mask], weight=region_weights.weight()[mask] ) ezfill('recoil_nopog', recoil=df["recoil_pt"][mask], weight=region_weights.partial_weight(include=['pileup','theory','gen','prefire'])[mask]) ezfill('recoil_nopref', recoil=df["recoil_pt"][mask], weight=region_weights.partial_weight(exclude=['prefire'])[mask]) ezfill('recoil_nopu', recoil=df["recoil_pt"][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask]) ezfill('recoil_notrg', recoil=df["recoil_pt"][mask], weight=region_weights.partial_weight(exclude=['trigger'])[mask]) ezfill('ak4_pt0_over_recoil', ratio=ak4.pt.max()[mask]/df["recoil_pt"][mask], weight=region_weights.weight()[mask]) ezfill('dphijm', dphi=df["minDPhiJetMet"][mask], weight=region_weights.weight()[mask] ) ezfill('dphijr', dphi=df["minDPhiJetRecoil"][mask], weight=region_weights.weight()[mask] ) if 'noveto' in region: continue # Muons if '_1m_' in region or '_2m_' in region: w_allmu = weight_shape(muons.pt[mask], region_weights.weight()[mask]) ezfill('muon_pt', pt=muons.pt[mask].flatten(), weight=w_allmu ) ezfill('muon_mt', mt=df['MT_mu'][mask], weight=region_weights.weight()[mask]) ezfill('muon_eta', eta=muons.eta[mask].flatten(), weight=w_allmu) ezfill('muon_eta_phi', phi=muons.phi[mask].flatten(),eta=muons.eta[mask].flatten(), weight=w_allmu) ezfill('muon_phi', phi=muons.phi[mask].flatten(), weight=w_allmu) ezfill('muon_dxy', dxy=muons.dxy[mask].flatten(), weight=w_allmu) ezfill('muon_dz', dz=muons.dz[mask].flatten(), weight=w_allmu) # Leading muon w_leadmu = weight_shape(muons[leadmuon_index].pt[mask], region_weights.weight()[mask]) ezfill('muon_pt0', pt=muons[leadmuon_index].pt[mask].flatten(), weight=w_leadmu ) ezfill('muon_eta0', eta=muons[leadmuon_index].eta[mask].flatten(), weight=w_leadmu) ezfill('muon_phi0', phi=muons[leadmuon_index].phi[mask].flatten(), weight=w_leadmu) ezfill('muon_dxy0', dxy=muons[leadmuon_index].dxy[mask].flatten(), weight=w_leadmu) ezfill('muon_dz0', dz=muons[leadmuon_index].dz[mask].flatten(), weight=w_leadmu) # Dimuon if '_2m_' in region: w_dimu = weight_shape(dimuons.pt[mask], region_weights.weight()[mask]) ezfill('dimuon_pt', pt=dimuons.pt[mask].flatten(), weight=w_dimu) ezfill('dimuon_eta', eta=dimuons.eta[mask].flatten(), weight=w_dimu) ezfill('dimuon_mass', dilepton_mass=dimuons.mass[mask].flatten(), weight=w_dimu ) ezfill('dimuon_dr', dr=dimuons.i0.p4.delta_r(dimuons.i1.p4)[mask].flatten(), weight=w_dimu ) ezfill('muon_pt1', pt=muons[~leadmuon_index].pt[mask].flatten(), weight=w_leadmu ) ezfill('muon_eta1', eta=muons[~leadmuon_index].eta[mask].flatten(), weight=w_leadmu) ezfill('muon_phi1', phi=muons[~leadmuon_index].phi[mask].flatten(), weight=w_leadmu) # Electrons if '_1e_' in region or '_2e_' in region: w_allel = weight_shape(electrons.pt[mask], region_weights.weight()[mask]) ezfill('electron_pt', pt=electrons.pt[mask].flatten(), weight=w_allel) ezfill('electron_mt', mt=df['MT_el'][mask], weight=region_weights.weight()[mask]) ezfill('electron_eta', eta=electrons.eta[mask].flatten(), weight=w_allel) ezfill('electron_phi', phi=electrons.phi[mask].flatten(), weight=w_allel) ezfill('electron_eta_phi', phi=electrons.phi[mask].flatten(),eta=electrons.eta[mask].flatten(), weight=w_allel) ezfill('electron_dz', dz=electrons.dz[mask].flatten(), weight=w_allel) ezfill('electron_dxy', dxy=electrons.dxy[mask].flatten(), weight=w_allel) w_leadel = weight_shape(electrons[leadelectron_index].pt[mask], region_weights.weight()[mask]) ezfill('electron_pt0', pt=electrons[leadelectron_index].pt[mask].flatten(), weight=w_leadel) ezfill('electron_eta0', eta=electrons[leadelectron_index].eta[mask].flatten(), weight=w_leadel) ezfill('electron_phi0', phi=electrons[leadelectron_index].phi[mask].flatten(), weight=w_leadel) w_trailel = weight_shape(electrons[~leadelectron_index].pt[mask], region_weights.weight()[mask]) ezfill('electron_tightid1', id=electrons[~leadelectron_index].tightId[mask].flatten(), weight=w_trailel) # Dielectron if '_2e_' in region: w_diel = weight_shape(dielectrons.pt[mask], region_weights.weight()[mask]) ezfill('dielectron_pt', pt=dielectrons.pt[mask].flatten(), weight=w_diel) ezfill('dielectron_eta', eta=dielectrons.eta[mask].flatten(), weight=w_diel) ezfill('dielectron_mass', dilepton_mass=dielectrons.mass[mask].flatten(), weight=w_diel) ezfill('dielectron_dr', dr=dielectrons.i0.p4.delta_r(dielectrons.i1.p4)[mask].flatten(), weight=w_diel ) ezfill('electron_pt1', pt=electrons[~leadelectron_index].pt[mask].flatten(), weight=w_leadel) ezfill('electron_eta1', eta=electrons[~leadelectron_index].eta[mask].flatten(), weight=w_leadel) ezfill('electron_phi1', phi=electrons[~leadelectron_index].phi[mask].flatten(), weight=w_leadel) # Photon if '_g_' in region: w_leading_photon = weight_shape(photons[leadphoton_index].pt[mask],region_weights.weight()[mask]); ezfill('photon_pt0', pt=photons[leadphoton_index].pt[mask].flatten(), weight=w_leading_photon) ezfill('photon_eta0', eta=photons[leadphoton_index].eta[mask].flatten(), weight=w_leading_photon) ezfill('photon_phi0', phi=photons[leadphoton_index].phi[mask].flatten(), weight=w_leading_photon) ezfill('photon_eta_phi', phi=photons[leadphoton_index].phi[mask].flatten(),eta=photons[leadphoton_index].eta[mask].flatten(), weight=w_leading_photon) # w_drphoton_jet = weight_shape(df['dRPhotonJet'][mask], region_weights.weight()[mask]) # PV ezfill('npv', nvtx=df['PV_npvs'][mask], weight=region_weights.weight()[mask]) ezfill('npvgood', nvtx=df['PV_npvsGood'][mask], weight=region_weights.weight()[mask]) ezfill('npv_nopu', nvtx=df['PV_npvs'][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask]) ezfill('npvgood_nopu', nvtx=df['PV_npvsGood'][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask]) ezfill('rho_all', rho=df['fixedGridRhoFastjetAll'][mask], weight=region_weights.weight()[mask]) ezfill('rho_central', rho=df['fixedGridRhoFastjetCentral'][mask], weight=region_weights.weight()[mask]) ezfill('rho_all_nopu', rho=df['fixedGridRhoFastjetAll'][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask]) ezfill('rho_central_nopu', rho=df['fixedGridRhoFastjetCentral'][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask]) return output
def process(self, events): # Initialize accumulator out = self.accumulator.identity() dataset = sample_name #events.metadata['dataset'] # Data or MC isData = 'genWeight' not in events.fields #Stop processing if there is no event remain if len(events) == 0: return out # Golden Json file if (self._year == "2018") and isData: injson = "/x5/cms/jwkim/gitdir/JWCorp/JW_analysis/Coffea_WZG/Corrections/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt.RunABCD" if (self._year == "2017") and isData: injson = "/x5/cms/jwkim/gitdir/JWCorp/JW_analysis/Coffea_WZG/Corrections/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt" # <----- Get Scale factors ------># if not isData: # Egamma reco ID get_ele_reco_above20_sf = self._corrections[ 'get_ele_reco_above20_sf'][self._year] get_ele_medium_id_sf = self._corrections['get_ele_medium_id_sf'][ self._year] get_pho_medium_id_sf = self._corrections['get_pho_medium_id_sf'][ self._year] # DoubleEG trigger # 2016, 2017 are not applied yet if self._year == "2018": get_ele_trig_leg1_SF = self._corrections[ 'get_ele_trig_leg1_SF'][self._year] get_ele_trig_leg1_data_Eff = self._corrections[ 'get_ele_trig_leg1_data_Eff'][self._year] get_ele_trig_leg1_mc_Eff = self._corrections[ 'get_ele_trig_leg1_mc_Eff'][self._year] get_ele_trig_leg2_SF = self._corrections[ 'get_ele_trig_leg2_SF'][self._year] get_ele_trig_leg2_data_Eff = self._corrections[ 'get_ele_trig_leg2_data_Eff'][self._year] get_ele_trig_leg2_mc_Eff = self._corrections[ 'get_ele_trig_leg2_mc_Eff'][self._year] # PU weight with custom made npy and multi-indexing pu_weight_idx = ak.values_astype(events.Pileup.nTrueInt, "int64") pu = self._puweight_arr[pu_weight_idx] selection = processor.PackedSelection() # Cut flow cut0 = np.zeros(len(events)) # <----- Helper functions ------># # Sort by PT helper function def sort_by_pt(ele, pho, jet): ele = ele[ak.argsort(ele.pt, ascending=False, axis=1)] pho = pho[ak.argsort(pho.pt, ascending=False, axis=1)] jet = jet[ak.argsort(jet.pt, ascending=False, axis=1)] return ele, pho, jet # Lorentz vectors from coffea.nanoevents.methods import vector ak.behavior.update(vector.behavior) def TLorentz_vector(vec): vec = ak.zip({ "x": vec.x, "y": vec.y, "z": vec.z, "t": vec.t }, with_name="LorentzVector") return vec def TLorentz_vector_cylinder(vec): vec = ak.zip( { "pt": vec.pt, "eta": vec.eta, "phi": vec.phi, "mass": vec.mass, }, with_name="PtEtaPhiMLorentzVector", ) return vec # Cut-based ID modification @numba.njit def PhotonVID(vid, idBit): rBit = 0 for x in range(0, 7): rBit |= (1 << x) if ((vid >> (x * 2)) & 0b11 >= idBit) else 0 return rBit # Inverse Sieie and upper limit @numba.njit def make_fake_obj_mask(Pho, builder): #for eventIdx,pho in enumerate(tqdm(Pho)): # --Event Loop for eventIdx, pho in enumerate(Pho): builder.begin_list() if len(pho) < 1: continue for phoIdx, _ in enumerate(pho): # --Photon Loop vid = Pho[eventIdx][phoIdx].vidNestedWPBitmap vid_cuts1 = PhotonVID(vid, 1) # Loose photon vid_cuts2 = PhotonVID(vid, 2) # Medium photon vid_cuts3 = PhotonVID(vid, 3) # Tight photon # Field name # |0|0|0|0|0|0|0| # |IsoPho|IsoNeu|IsoChg|Sieie|hoe|scEta|PT| # 1. Turn off cut (ex turn off Sieie # |1|1|1|0|1|1|1| = |1|1|1|0|1|1|1| # 2. Inverse cut (ex inverse Sieie) # |1|1|1|1|1|1|1| = |1|1|1|0|1|1|1| #if (vid_cuts2 & 0b1111111 == 0b1111111): # Cut applied #if (vid_cuts2 & 0b1111111 == 0b1110111): # Inverse Sieie if (vid_cuts2 & 0b1110111 == 0b1110111): # Without Sieie builder.boolean(True) else: builder.boolean(False) builder.end_list() return builder # <----- Selection ------># Initial_events = events # Good Run ( Golden Json files ) from coffea import lumi_tools if isData: lumi_mask_builder = lumi_tools.LumiMask(injson) lumimask = ak.Array( lumi_mask_builder.__call__(events.run, events.luminosityBlock)) events = events[lumimask] #print("{0}% of files pass good-run conditions".format(len(events)/ len(Initial_events))) # Stop processing if there is no event remain if len(events) == 0: return out ##----------- Cut flow1: Passing Triggers # double lepton trigger is_double_ele_trigger = True if not is_double_ele_trigger: double_ele_triggers_arr = np.ones(len(events), dtype=np.bool) else: double_ele_triggers_arr = np.zeros(len(events), dtype=np.bool) for path in self._doubleelectron_triggers[self._year]: if path not in events.HLT.fields: continue double_ele_triggers_arr = double_ele_triggers_arr | events.HLT[ path] # single lepton trigger is_single_ele_trigger = True if not is_single_ele_trigger: single_ele_triggers_arr = np.ones(len(events), dtype=np.bool) else: single_ele_triggers_arr = np.zeros(len(events), dtype=np.bool) for path in self._singleelectron_triggers[self._year]: if path not in events.HLT.fields: continue single_ele_triggers_arr = single_ele_triggers_arr | events.HLT[ path] events.Electron, events.Photon, events.Jet = sort_by_pt( events.Electron, events.Photon, events.Jet) # Good Primary vertex nPV = events.PV.npvsGood if not isData: nPV = nPV * pu nPV_nw = nPV # Apply cut1 events = events[double_ele_triggers_arr] if not isData: pu = pu[double_ele_triggers_arr] cut1 = np.ones(len(events)) # Set Particles Electron = events.Electron Muon = events.Muon Photon = events.Photon MET = events.MET Jet = events.Jet # Stop processing if there is no event remain if len(Electron) == 0: return out # --Gen Photon for dR genparts = events.GenPart pdgID_mask = (genparts.pdgId == 22) # mask2: isPrompt | fromHardProcess | isLastCopy mask2 = (1 << 0) | (1 << 8) | (1 << 13) # https://github.com/PKUHEPEWK/WGamma/blob/master/2018/wgRealPhotonTemplateModule.py status_mask = ((genparts.statusFlags & mask2) == mask2) gen_photons = genparts[pdgID_mask & status_mask] assert (ak.all(ak.num(gen_photons) == 1) ) # Raise error if len(gen_photon) != 1 # --Muon ( only used to calculate dR ) MuSelmask = (Muon.pt >= 10) & (abs( Muon.eta) <= 2.5) & (Muon.tightId) & (Muon.pfRelIso04_all < 0.15) Muon = Muon[MuSelmask] ##----------- Cut flow2: Electron Selection EleSelmask = ((Electron.pt >= 20) & (np.abs(Electron.eta + Electron.deltaEtaSC) < 1.479) & (Electron.cutBased > 2) & (abs(Electron.dxy) < 0.05) & (abs(Electron.dz) < 0.1)) | \ ((Electron.pt >= 20) & (np.abs(Electron.eta + Electron.deltaEtaSC) > 1.479) & (np.abs(Electron.eta + Electron.deltaEtaSC) <= 2.5) & (Electron.cutBased > 2) & (abs(Electron.dxy) < 0.1) & (abs(Electron.dz) < 0.2)) Electron = Electron[EleSelmask] # apply cut 2 Tri_electron_mask = ak.num(Electron) >= 2 Electron = Electron[Tri_electron_mask] Photon = Photon[Tri_electron_mask] Jet = Jet[Tri_electron_mask] MET = MET[Tri_electron_mask] Muon = Muon[Tri_electron_mask] if not isData: pu = pu[Tri_electron_mask] events = events[Tri_electron_mask] gen_photons = gen_photons[Tri_electron_mask] # Stop processing if there is no event remain if len(Electron) == 0: return out cut2 = np.ones(len(Photon)) * 2 ##----------- Cut flow3: Photon Selection # Basic photon selection isgap_mask = (abs(Photon.eta) < 1.442) | ((abs(Photon.eta) > 1.566) & (abs(Photon.eta) < 2.5)) Pixel_seed_mask = ~Photon.pixelSeed PT_mask = Photon.pt >= 20 # dR cut with selected Muon and Electrons dr_pho_ele_mask = ak.all(Photon.metric_table(Electron) >= 0.5, axis=-1) # default metric table: delta_r dr_pho_mu_mask = ak.all(Photon.metric_table(Muon) >= 0.5, axis=-1) PhoSelmask = PT_mask & isgap_mask & Pixel_seed_mask & dr_pho_ele_mask & dr_pho_mu_mask Photon = Photon[PhoSelmask] # Apply cut 3 A_photon_mask = ak.num(Photon) > 0 Electron = Electron[A_photon_mask] Photon = Photon[A_photon_mask] Jet = Jet[A_photon_mask] Muon = Muon[A_photon_mask] MET = MET[A_photon_mask] if not isData: pu = pu[A_photon_mask] events = events[A_photon_mask] gen_photons = gen_photons[A_photon_mask] Photon_template_mask = make_fake_obj_mask( Photon, ak.ArrayBuilder()).snapshot() Photon = Photon[Photon_template_mask] # Apply cut 3 A_photon_mask = ak.num(Photon) > 0 Electron = Electron[A_photon_mask] Photon = Photon[A_photon_mask] Jet = Jet[A_photon_mask] Muon = Muon[A_photon_mask] MET = MET[A_photon_mask] if not isData: pu = pu[A_photon_mask] events = events[A_photon_mask] gen_photons = gen_photons[A_photon_mask] # Stop processing if there is no event remain if len(Electron) == 0: return out cut3 = np.ones(len(Photon)) * 3 ## -- Additional photon selection: Photon gen-matching # Choose Photons that dR(genPhoton,Photon) <= 0.1 gen_match_photon_mask = ak.all(Photon.metric_table(gen_photons) <= 0.1, axis=-1) # Apply cut Photon = Photon[gen_match_photon_mask] gen_match_photon_evt_mask = ak.num(Photon) >= 1 Electron = Electron[gen_match_photon_evt_mask] Photon = Photon[gen_match_photon_evt_mask] Jet = Jet[gen_match_photon_evt_mask] MET = MET[gen_match_photon_evt_mask] gen_photons = gen_photons[gen_match_photon_evt_mask] if not isData: pu = pu[gen_match_photon_evt_mask] events = events[gen_match_photon_evt_mask] ##----------- Cut flow4: Select 2 OSSF electrons from Z @numba.njit def find_2lep(events_leptons, builder): for leptons in events_leptons: builder.begin_list() nlep = len(leptons) for i0 in range(nlep): for i1 in range(i0 + 1, nlep): if leptons[i0].charge + leptons[i1].charge != 0: continue if nlep == 2: builder.begin_tuple(2) builder.index(0).integer(i0) builder.index(1).integer(i1) builder.end_tuple() else: for i2 in range(nlep): if len({i0, i1, i2}) < 3: continue builder.begin_tuple(3) builder.index(0).integer(i0) builder.index(1).integer(i1) builder.index(2).integer(i2) builder.end_tuple() builder.end_list() return builder ossf_idx = find_2lep(Electron, ak.ArrayBuilder()).snapshot() # OSSF cut ossf_mask = ak.num(ossf_idx) >= 1 ossf_idx = ossf_idx[ossf_mask] Electron = Electron[ossf_mask] Photon = Photon[ossf_mask] Jet = Jet[ossf_mask] MET = MET[ossf_mask] events = events[ossf_mask] if not isData: pu = pu[ossf_mask] Double_electron = [Electron[ossf_idx[idx]] for idx in "01"] from coffea.nanoevents.methods import vector ak.behavior.update(vector.behavior) Diele = ak.zip({ "lep1": Double_electron[0], "lep2": Double_electron[1], "p4": TLorentz_vector(Double_electron[0] + Double_electron[1]) }) bestZ_idx = ak.singletons( ak.argmin(abs(Diele.p4.mass - 91.1876), axis=1)) Diele = Diele[bestZ_idx] # Stop processing if there is no event remain if len(Electron) == 0: return out cut4 = np.ones(len(Electron)) * 4 leading_ele = Diele.lep1 subleading_ele = Diele.lep2 def make_leading_pair(target, base): return target[ak.argmax(base.pt, axis=1, keepdims=True)] leading_pho = make_leading_pair(Photon, Photon) # -- Scale Factor for each electron # Trigger weight helper function def Trigger_Weight(eta1, pt1, eta2, pt2): per_ev_MC =\ get_ele_trig_leg1_mc_Eff(eta1,pt1) * get_ele_trig_leg2_mc_Eff(eta2,pt2) +\ get_ele_trig_leg1_mc_Eff(eta2,pt2) * get_ele_trig_leg2_mc_Eff(eta1,pt1) -\ get_ele_trig_leg1_mc_Eff(eta1,pt1) * get_ele_trig_leg1_mc_Eff(eta2,pt2) per_ev_data =\ get_ele_trig_leg1_data_Eff(eta1,pt1) * get_ele_trig_leg1_SF(eta1,pt1) * get_ele_trig_leg2_data_Eff(eta2,pt2) * get_ele_trig_leg2_SF(eta2,pt2) +\ get_ele_trig_leg1_data_Eff(eta2,pt2) * get_ele_trig_leg1_SF(eta2,pt2) * get_ele_trig_leg2_data_Eff(eta1,pt1) * get_ele_trig_leg2_SF(eta1,pt1) -\ get_ele_trig_leg1_data_Eff(eta1,pt1) * get_ele_trig_leg1_SF(eta1,pt1) * get_ele_trig_leg1_data_Eff(eta2,pt2) * get_ele_trig_leg1_SF(eta2,pt2) return per_ev_data / per_ev_MC if not isData: ## -------------< Egamma ID and Reco Scale factor > -----------------## get_pho_medium_id_sf = get_pho_medium_id_sf( ak.flatten(leading_pho.eta), ak.flatten(leading_pho.pt)) ele_reco_sf = get_ele_reco_above20_sf( ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta), ak.flatten(leading_ele.pt)) * get_ele_reco_above20_sf( ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta), ak.flatten(subleading_ele.pt)) ele_medium_id_sf = get_ele_medium_id_sf( ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta), ak.flatten(leading_ele.pt)) * get_ele_medium_id_sf( ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta), ak.flatten(subleading_ele.pt)) ## -------------< Double Electron Trigger Scale factor > -----------------## eta1 = ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta) eta2 = ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta) pt1 = ak.flatten(leading_ele.pt) pt2 = ak.flatten(subleading_ele.pt) # -- 2017,2016 are not applied yet if self._year == '2018': ele_trig_weight = Trigger_Weight(eta1, pt1, eta2, pt2) ##----------- Cut flow5: Event selection # Mee cut Mee_cut_mask = ak.firsts(Diele.p4.mass) > 4 # Electron PT cuts Elept_mask = ak.firsts((Diele.lep1.pt >= 25) & (Diele.lep2.pt >= 20)) # MET cuts MET_mask = MET.pt > 20 # --------Mask -------# Event_sel_mask = Mee_cut_mask & Elept_mask & MET_mask Diele_sel = Diele[Event_sel_mask] leading_pho_sel = leading_pho[Event_sel_mask] Jet_sel = Jet[Event_sel_mask] MET_sel = MET[Event_sel_mask] # Photon EE and EB isEE_mask = leading_pho.isScEtaEE isEB_mask = leading_pho.isScEtaEB Pho_EE = leading_pho[isEE_mask & Event_sel_mask] Pho_EB = leading_pho[isEB_mask & Event_sel_mask] #Stop processing if there is no event remain if len(leading_pho_sel) == 0: return out cut5 = np.ones(len(Diele)) * 5 # -------------------- Flatten variables ---------------------------# # -- Ele1 --# Ele1_PT = ak.flatten(Diele_sel.lep1.pt) Ele1_Eta = ak.flatten(Diele_sel.lep1.eta) Ele1_Phi = ak.flatten(Diele_sel.lep1.phi) # -- Ele2 --# Ele2_PT = ak.flatten(Diele_sel.lep2.pt) Ele2_Eta = ak.flatten(Diele_sel.lep2.eta) Ele2_Phi = ak.flatten(Diele_sel.lep2.phi) # -- Pho -- # Pho_PT = ak.flatten(leading_pho_sel.pt) Pho_Eta = ak.flatten(leading_pho_sel.eta) Pho_Phi = ak.flatten(leading_pho_sel.phi) # -- Pho EB --# Pho_EB_PT = ak.flatten(Pho_EB.pt) Pho_EB_Eta = ak.flatten(Pho_EB.eta) Pho_EB_Phi = ak.flatten(Pho_EB.phi) Pho_EB_Isochg = ak.flatten(Pho_EE.pfRelIso03_chg) Pho_EB_Sieie = ak.flatten(Pho_EE.sieie) # -- Pho EE --# Pho_EE_PT = ak.flatten(Pho_EE.pt) Pho_EE_Eta = ak.flatten(Pho_EE.eta) Pho_EE_Phi = ak.flatten(Pho_EE.phi) Pho_EE_Isochg = ak.flatten(Pho_EE.pfRelIso03_chg) Pho_EE_Sieie = ak.flatten(Pho_EE.sieie) # --Kinematics --# Diele_mass = ak.flatten(Diele_sel.p4.mass) leading_ele, subleading_ele = ak.flatten( TLorentz_vector_cylinder(Diele_sel.lep1)), ak.flatten( TLorentz_vector_cylinder(Diele_sel.lep2)) dR_e1pho = ak.flatten( leading_ele.delta_r(leading_pho_sel)) # dR pho,ele1 dR_e2pho = ak.flatten( subleading_ele.delta_r(leading_pho_sel)) # dR pho,ele2 dR_jpho = ak.flatten(Jet_sel[:, 0].delta_r(leading_pho_sel)) MET_PT = ak.to_numpy(MET_sel.pt) # -------------------- Sieie bins---------------------------# def make_bins(pt, eta, sieie, bin_range_str): bin_dict = { 'PT_1_eta_1': (pt > 20) & (pt < 30) & (eta < 1), 'PT_1_eta_2': (pt > 20) & (pt < 30) & (eta > 1) & (eta < 1.5), 'PT_1_eta_3': (pt > 20) & (pt < 30) & (eta > 1.5) & (eta < 2), 'PT_1_eta_4': (pt > 20) & (pt < 30) & (eta > 2) & (eta < 2.5), 'PT_2_eta_1': (pt > 30) & (pt < 40) & (eta < 1), 'PT_2_eta_2': (pt > 30) & (pt < 40) & (eta > 1) & (eta < 1.5), 'PT_2_eta_3': (pt > 30) & (pt < 40) & (eta > 1.5) & (eta < 2), 'PT_2_eta_4': (pt > 30) & (pt < 40) & (eta > 2) & (eta < 2.5), 'PT_3_eta_1': (pt > 40) & (pt < 50) & (eta < 1), 'PT_3_eta_2': (pt > 40) & (pt < 50) & (eta > 1) & (eta < 1.5), 'PT_3_eta_3': (pt > 40) & (pt < 50) & (eta > 1.5) & (eta < 2), 'PT_3_eta_4': (pt > 40) & (pt < 50) & (eta > 2) & (eta < 2.5), 'PT_4_eta_1': (pt > 50) & (eta < 1), 'PT_4_eta_2': (pt > 50) & (eta > 1) & (eta < 1.5), 'PT_4_eta_3': (pt > 50) & (eta > 1.5) & (eta < 2), 'PT_4_eta_4': (pt > 50) & (eta > 2) & (eta < 2.5) } binmask = bin_dict[bin_range_str] return ak.to_numpy(sieie[binmask]), binmask bin_name_list = [ 'PT_1_eta_1', 'PT_1_eta_2', 'PT_1_eta_3', 'PT_1_eta_4', 'PT_2_eta_1', 'PT_2_eta_2', 'PT_2_eta_3', 'PT_2_eta_4', 'PT_3_eta_1', 'PT_3_eta_2', 'PT_3_eta_3', 'PT_3_eta_4', 'PT_4_eta_1', 'PT_4_eta_2', 'PT_4_eta_3', 'PT_4_eta_4' ] binned_sieie_hist = {} binmask_dict = {} for name in bin_name_list: binned_sieie_hist[name], _ = make_bins( ak.flatten(leading_pho_sel.pt), ak.flatten(abs(leading_pho_sel.eta)), ak.flatten(leading_pho_sel.sieie), name) _, binmask_dict[name] = make_bins(ak.flatten(leading_pho.pt), ak.flatten(abs(leading_pho.eta)), ak.flatten(leading_pho.sieie), name) print("Show me the last bin: ", binned_sieie_hist['PT_4_eta_4']) # --- Apply weight and hist weights = processor.Weights(len(cut4)) # --- skim cut-weight def skim_weight(arr): mask1 = ~ak.is_none(arr) subarr = arr[mask1] mask2 = subarr != 0 return ak.to_numpy(subarr[mask2]) cuts = Event_sel_mask cuts_pho_EE = ak.flatten(isEE_mask) cuts_pho_EB = ak.flatten(isEB_mask) print( "cut0: {0}, cut1: {1}, cut2: {2}, cut3: {3}, cut4: {4} ,cut5 {5} ". format(len(Initial_events), len(cut1), len(cut2), len(cut3), len(cut4), len(cut5))) # Weight and SF here if not isData: weights.add('pileup', pu) weights.add('ele_id', ele_medium_id_sf) weights.add('pho_id', get_pho_medium_id_sf) weights.add('ele_reco', ele_reco_sf) # 2016,2017 are not applied yet if self._year == "2018": weights.add('ele_trigger', ele_trig_weight) # ---------------------------- Fill hist --------------------------------------# # Initial events out["sumw"][dataset] += len(Initial_events) # Cut flow loop for cut in [cut0, cut1, cut2, cut3, cut4, cut5]: out["cutflow"].fill(dataset=dataset, cutflow=cut) # Primary vertex out['nPV'].fill( dataset=dataset, nPV=nPV, ) out['nPV_nw'].fill(dataset=dataset, nPV_nw=nPV_nw) # Fill hist # -- met -- # out["met"].fill(dataset=dataset, met=MET_PT, weight=skim_weight(weights.weight() * cuts)) # --mass -- # out["mass"].fill(dataset=dataset, mass=Diele_mass, weight=skim_weight(weights.weight() * cuts)) # -- Ele1 -- # out["ele1pt"].fill(dataset=dataset, ele1pt=Ele1_PT, weight=skim_weight(weights.weight() * cuts)) out["ele1eta"].fill(dataset=dataset, ele1eta=Ele1_Eta, weight=skim_weight(weights.weight() * cuts)) out["ele1phi"].fill(dataset=dataset, ele1phi=Ele1_Phi, weight=skim_weight(weights.weight() * cuts)) # --Ele2 --# out["ele2pt"].fill(dataset=dataset, ele2pt=Ele2_PT, weight=skim_weight(weights.weight() * cuts)) out["ele2eta"].fill(dataset=dataset, ele2eta=Ele2_Eta, weight=skim_weight(weights.weight() * cuts)) out["ele2phi"].fill(dataset=dataset, ele2phi=Ele2_Phi, weight=skim_weight(weights.weight() * cuts)) # -- Photon -- # out["phopt"].fill(dataset=dataset, phopt=Pho_PT, weight=skim_weight(weights.weight() * cuts)) out["phoeta"].fill(dataset=dataset, phoeta=Pho_Eta, weight=skim_weight(weights.weight() * cuts)) out["phophi"].fill(dataset=dataset, phophi=Pho_Phi, weight=skim_weight(weights.weight() * cuts)) # -- Binned sieie hist -- # if len(binned_sieie_hist['PT_1_eta_1'] > 0): out['PT_1_eta_1'].fill(dataset=dataset, PT_1_eta_1=binned_sieie_hist['PT_1_eta_1']) if len(binned_sieie_hist['PT_1_eta_2'] > 0): out['PT_1_eta_2'].fill(dataset=dataset, PT_1_eta_2=binned_sieie_hist['PT_1_eta_2']) if len(binned_sieie_hist['PT_1_eta_3'] > 0): out['PT_1_eta_3'].fill(dataset=dataset, PT_1_eta_3=binned_sieie_hist['PT_1_eta_3']) if len(binned_sieie_hist['PT_1_eta_4'] > 0): out['PT_1_eta_4'].fill(dataset=dataset, PT_1_eta_4=binned_sieie_hist['PT_1_eta_4']) if len(binned_sieie_hist['PT_2_eta_1'] > 0): out['PT_2_eta_1'].fill(dataset=dataset, PT_2_eta_1=binned_sieie_hist['PT_2_eta_1']) if len(binned_sieie_hist['PT_2_eta_2'] > 0): out['PT_2_eta_2'].fill(dataset=dataset, PT_2_eta_2=binned_sieie_hist['PT_2_eta_2']) if len(binned_sieie_hist['PT_2_eta_3'] > 0): out['PT_2_eta_3'].fill(dataset=dataset, PT_2_eta_3=binned_sieie_hist['PT_2_eta_3']) if len(binned_sieie_hist['PT_2_eta_4'] > 0): out['PT_2_eta_4'].fill(dataset=dataset, PT_2_eta_4=binned_sieie_hist['PT_2_eta_4']) if len(binned_sieie_hist['PT_3_eta_1'] > 0): out['PT_3_eta_1'].fill(dataset=dataset, PT_3_eta_1=binned_sieie_hist['PT_3_eta_1']) if len(binned_sieie_hist['PT_3_eta_2'] > 0): out['PT_3_eta_2'].fill(dataset=dataset, PT_3_eta_2=binned_sieie_hist['PT_3_eta_2']) if len(binned_sieie_hist['PT_3_eta_3'] > 0): out['PT_3_eta_3'].fill(dataset=dataset, PT_3_eta_3=binned_sieie_hist['PT_3_eta_3']) if len(binned_sieie_hist['PT_3_eta_4'] > 0): out['PT_3_eta_4'].fill(dataset=dataset, PT_3_eta_4=binned_sieie_hist['PT_3_eta_4']) if len(binned_sieie_hist['PT_4_eta_1'] > 0): out['PT_4_eta_1'].fill(dataset=dataset, PT_4_eta_1=binned_sieie_hist['PT_4_eta_1']) if len(binned_sieie_hist['PT_4_eta_2'] > 0): out['PT_4_eta_2'].fill(dataset=dataset, PT_4_eta_2=binned_sieie_hist['PT_4_eta_2']) if len(binned_sieie_hist['PT_4_eta_3'] > 0): out['PT_4_eta_3'].fill(dataset=dataset, PT_4_eta_3=binned_sieie_hist['PT_4_eta_3']) if len(binned_sieie_hist['PT_4_eta_4'] > 0): out['PT_4_eta_4'].fill(dataset=dataset, PT_4_eta_4=binned_sieie_hist['PT_4_eta_4']) return out
def process(self, events): dataset = events.metadata['dataset'] isRealData = 'genWeight' not in events.columns selection = processor.PackedSelection() weights = processor.Weights(len(events)) output = self.accumulator.identity() if not isRealData: output['sumw'][dataset] += events.genWeight.sum() if isRealData: trigger = np.zeros(events.size, dtype='bool') for t in self._triggers[self._year]: trigger = trigger | events.HLT[t] else: trigger = np.ones(events.size, dtype='bool') selection.add('trigger', trigger) if isRealData: trigger = np.zeros(events.size, dtype='bool') for t in self._muontriggers[self._year]: trigger = trigger | events.HLT[t] else: trigger = np.ones(events.size, dtype='bool') selection.add('muontrigger', trigger) try: fatjets = events.FatJet except AttributeError: # early pancakes fatjets = events.CustomAK8Puppi fatjets['msdcorr'] = corrected_msoftdrop(fatjets) fatjets['rho'] = 2 * np.log(fatjets.msdcorr / fatjets.pt) fatjets['n2ddt'] = fatjets.n2b1 - n2ddt_shift(fatjets, year=self._year) fatjets['msdcorr_full'] = fatjets['msdcorr'] * self._msdSF[self._year] candidatejet = fatjets[ # https://github.com/DAZSLE/BaconAnalyzer/blob/master/Analyzer/src/VJetLoader.cc#L269 (fatjets.pt > 200) & (abs(fatjets.eta) < 2.5) # & fatjets.isLoose # not always available ][:, 0:1] selection.add('minjetkin', ((candidatejet.pt >= 450) & (candidatejet.msdcorr >= 47.) & (abs(candidatejet.eta) < 2.5)).any()) selection.add('jetacceptance', ((candidatejet.msdcorr >= 47.) & (candidatejet.pt < 1200) & (candidatejet.msdcorr < 201.)).any()) selection.add('jetid', candidatejet.isTight.any()) selection.add('n2ddt', (candidatejet.n2ddt < 0.).any()) selection.add('ddbpass', (candidatejet.btagDDBvL >= 0.89).any()) jets = events.Jet[(events.Jet.pt > 30.) & (abs(events.Jet.eta) < 2.5) & events.Jet.isTight] # only consider first 4 jets to be consistent with old framework jets = jets[:, :4] ak4_ak8_pair = jets.cross(candidatejet, nested=True) dphi = abs(ak4_ak8_pair.i0.delta_phi(ak4_ak8_pair.i1)) ak4_opposite = jets[(dphi > np.pi / 2).all()] selection.add( 'antiak4btagMediumOppHem', ak4_opposite.btagDeepB.max() < BTagEfficiency.btagWPs[self._year]['medium']) ak4_away = jets[(dphi > 0.8).all()] selection.add( 'ak4btagMedium08', ak4_away.btagDeepB.max() > BTagEfficiency.btagWPs[self._year]['medium']) selection.add('met', events.MET.pt < 140.) goodmuon = ((events.Muon.pt > 10) & (abs(events.Muon.eta) < 2.4) & (events.Muon.pfRelIso04_all < 0.25) & (events.Muon.looseId).astype(bool)) nmuons = goodmuon.sum() leadingmuon = events.Muon[goodmuon][:, 0:1] muon_ak8_pair = leadingmuon.cross(candidatejet, nested=True) nelectrons = ( (events.Electron.pt > 10) & (abs(events.Electron.eta) < 2.5) & (events.Electron.cutBased >= events.Electron.LOOSE)).sum() ntaus = ((events.Tau.pt > 20) & (events.Tau.idDecayMode).astype(bool) # bacon iso looser than Nano selection ).sum() selection.add('noleptons', (nmuons == 0) & (nelectrons == 0) & (ntaus == 0)) selection.add('onemuon', (nmuons == 1) & (nelectrons == 0) & (ntaus == 0)) selection.add('muonkin', ((leadingmuon.pt > 55.) & (abs(leadingmuon.eta) < 2.1)).all()) selection.add('muonDphiAK8', (abs(muon_ak8_pair.i0.delta_phi(muon_ak8_pair.i1)) > 2 * np.pi / 3).all().all()) if isRealData: genflavor = candidatejet.pt.zeros_like() else: weights.add('genweight', events.genWeight) add_pileup_weight(weights, events.Pileup.nPU, self._year, dataset) bosons = getBosons(events) genBosonPt = bosons.pt.pad(1, clip=True).fillna(0) add_VJets_NLOkFactor(weights, genBosonPt, self._year, dataset) genflavor = matchedBosonFlavor(candidatejet, bosons).pad( 1, clip=True).fillna(-1).flatten() add_jetTriggerWeight(weights, candidatejet.msdcorr, candidatejet.pt, self._year) output['btagWeight'].fill(dataset=dataset, val=self._btagSF.addBtagWeight( weights, ak4_away)) logger.debug("Weight statistics: %r" % weights._weightStats) msd_matched = candidatejet.msdcorr * self._msdSF[self._year] * ( genflavor > 0) + candidatejet.msdcorr * (genflavor == 0) regions = { 'signal': [ 'trigger', 'minjetkin', ], #'noleptons','jetacceptance', 'noleptons','jetid',],#'jetid', 'noleptons',],# 'n2ddt','antiak4btagMediumOppHem'],#, 'met',], 'muoncontrol': [ 'muontrigger', 'minjetkin', 'jetid', 'muonDphiAK8', 'muonkin', 'ak4btagMedium08', 'onemuon', ], # 'muonkin', 'muonDphiAK8'], 'noselection': [], } for region, cuts in regions.items(): allcuts = set() logger.debug( f"Filling cutflow with: {dataset}, {region}, {genflavor}, {weights.weight()}" ) #output['cutflow'].fill(dataset=dataset, region=region, genflavor=genflavor, cut=0, weight=weights.weight()) #for i, cut in enumerate(cuts + ['ddbpass']): # allcuts.add(cut) # cut = selection.all(*allcuts) # output['cutflow'].fill(dataset=dataset, region=region, genflavor=genflavor[cut], cut=i + 1, weight=weights.weight()[cut]) systematics = [ None, 'jet_triggerUp', 'jet_triggerDown', 'btagWeightUp', 'btagWeightDown', 'btagEffStatUp', 'btagEffStatDown', ] def normalize(val, cut): return val[cut].pad(1, clip=True).fillna(0).flatten() def fill(region, systematic=None, wmod=None): selections = regions[region] cut = selection.all(*selections) sname = 'nominal' if systematic is None else systematic if wmod is None: weight = weights.weight(modifier=systematic)[cut] else: weight = weights.weight()[cut] * wmod[cut] output['templates'].fill( dataset=dataset, region=region, #systematic=sname, #genflavor=genflavor[cut], pt=normalize(candidatejet.pt, cut), msd=normalize(msd_matched, cut), #ddb=normalize(candidatejet.btagDDBvL, cut), weight=weight, ) if wmod is not None: output['genresponse_noweight'].fill( dataset=dataset, region=region, systematic=sname, pt=normalize(candidatejet.pt, cut), genpt=normalize(genBosonPt, cut), weight=events.genWeight[cut] * wmod[cut], ) output['genresponse'].fill( dataset=dataset, region=region, systematic=sname, pt=normalize(candidatejet.pt, cut), genpt=normalize(genBosonPt, cut), weight=weight, ) for region in regions: cut = selection.all(*(set(regions[region]) - {'n2ddt'})) output['nminus1_n2ddt'].fill( dataset=dataset, region=region, n2ddt=normalize(candidatejet.n2ddt, cut), weight=weights.weight()[cut], ) #for systematic in systematics: fill(region) #, systematic) if 'GluGluHToBB' in dataset: for i in range(9): fill(region, 'LHEScale_%d' % i, events.LHEScaleWeight[:, i]) for c in events.LHEWeight.columns[1:]: fill(region, 'LHEWeight_%s' % c, events.LHEWeight[c]) return output
def process(self, events): # Initialize accumulator out = self.accumulator.identity() dataset = setname #events.metadata['dataset'] isData = 'genWeight' not in events.fields selection = processor.PackedSelection() # Cut flow cut0 = np.zeros(len(events)) # --- Selection # << flat dim helper function >> def flat_dim(arr): sub_arr = ak.flatten(arr) mask = ~ak.is_none(sub_arr) return ak.to_numpy(sub_arr[mask]) # << drop na helper function >> def drop_na(arr): mask = ~ak.is_none(arr) return arr[mask] # << drop na helper function >> def drop_na_np(arr): mask = ~np.isnan(arr) return arr[mask] # double lepton trigger is_double_ele_trigger=True if not is_double_ele_trigger: double_ele_triggers_arr=np.ones(len(events), dtype=np.bool) else: double_ele_triggers_arr = np.zeros(len(events), dtype=np.bool) for path in self._doubleelectron_triggers[self._year]: if path not in events.HLT.fields: continue double_ele_triggers_arr = double_ele_triggers_arr | events.HLT[path] # single lepton trigger is_single_ele_trigger=True if not is_single_ele_trigger: single_ele_triggers_arr=np.ones(len(events), dtype=np.bool) else: single_ele_triggers_arr = np.zeros(len(events), dtype=np.bool) for path in self._singleelectron_triggers[self._year]: if path not in events.HLT.fields: continue single_ele_triggers_arr = single_ele_triggers_arr | events.HLT[path] Initial_events = events print("#### Initial events: ",Initial_events) #events = events[single_ele_triggers_arr | double_ele_triggers_arr] events = events[double_ele_triggers_arr] ##----------- Cut flow1: Passing Triggers cut1 = np.ones(len(events)) print("#### cut1: ",len(cut1)) # Particle Identification Electron = events.Electron def Electron_selection(ele): return(ele.pt > 25) & (np.abs(ele.eta) < 2.5) & (ele.cutBased > 2) # Electron channel Electron_mask = Electron_selection(Electron) Ele_channel_mask = ak.num(Electron[Electron_mask]) > 1 Ele_channel_events = events[Ele_channel_mask] ##----------- Cut flow2: Electron channel cut2 = np.ones(len(Ele_channel_events)) * 2 print("#### cut2: ",len(cut2)) # --- Calculate Scale factor weight if not isData: # PU weight with lookup table <-- On developing --> #get_pu_weight = self._corrections['get_pu_weight'][self._year] #pu = get_pu_weight(events.Pileup.nTrueInt) get_ele_reco_sf = self._corrections['get_ele_reco_sf'][self._year] get_ele_loose_id_sf = self._corrections['get_ele_loose_id_sf'][self._year] get_ele_trig_leg1_SF = self._corrections['get_ele_trig_leg1_SF'][self._year] get_ele_trig_leg1_data_Eff = self._corrections['get_ele_trig_leg1_data_Eff'][self._year] get_ele_trig_leg1_mc_Eff = self._corrections['get_ele_trig_leg1_mc_Eff'][self._year] get_ele_trig_leg2_SF = self._corrections['get_ele_trig_leg2_SF'][self._year] get_ele_trig_leg2_data_Eff = self._corrections['get_ele_trig_leg2_data_Eff'][self._year] get_ele_trig_leg2_mc_Eff = self._corrections['get_ele_trig_leg2_mc_Eff'][self._year] # PU weight with custom made npy and multi-indexing pu_weight_idx = ak.values_astype(Ele_channel_events.Pileup.nTrueInt,"int64") pu = self._puweight_arr[pu_weight_idx] nPV = Ele_channel_events.PV.npvsGood else: nPV = Ele_channel_events.PV.npvsGood # Electron array Ele = Ele_channel_events.Electron Electron_mask = Electron_selection(Ele) Ele_sel = Ele[Electron_mask] # Electron pair ele_pairs = ak.combinations(Ele_sel,2,axis=1) ele_left, ele_right = ak.unzip(ele_pairs) diele = ele_left + ele_right # OS os_mask = diele.charge == 0 os_diele = diele[os_mask] os_ele_left = ele_left[os_mask] os_ele_right = ele_right[os_mask] os_event_mask = ak.num(os_diele) > 0 Ele_os_channel_events = Ele_channel_events[os_event_mask] #selection.add('ossf',os_event_mask) # Helper function: High PT argmax def make_leading_pair(target,base): return target[ak.argmax(base.pt,axis=1,keepdims=True)] # -- Only Leading pair -- leading_diele = make_leading_pair(diele,diele) leading_ele = make_leading_pair(ele_left,diele) subleading_ele= make_leading_pair(ele_right,diele) # -- Scale Factor for each electron def Trigger_Weight(eta1,pt1,eta2,pt2): per_ev_MC =\ get_ele_trig_leg1_mc_Eff(eta1,pt1) * get_ele_trig_leg2_mc_Eff(eta2,pt2) +\ get_ele_trig_leg1_mc_Eff(eta2,pt2) * get_ele_trig_leg2_mc_Eff(eta1,pt1) -\ get_ele_trig_leg1_mc_Eff(eta1,pt1) * get_ele_trig_leg1_mc_Eff(eta2,pt2) per_ev_data =\ get_ele_trig_leg1_data_Eff(eta1,pt1) * get_ele_trig_leg1_SF(eta1,pt1) * get_ele_trig_leg2_data_Eff(eta2,pt2) * get_ele_trig_leg2_SF(eta2,pt2) +\ get_ele_trig_leg1_data_Eff(eta2,pt2) * get_ele_trig_leg1_SF(eta2,pt2) * get_ele_trig_leg2_data_Eff(eta1,pt1) * get_ele_trig_leg2_SF(eta1,pt1) -\ get_ele_trig_leg1_data_Eff(eta1,pt1) * get_ele_trig_leg1_SF(eta1,pt1) * get_ele_trig_leg1_data_Eff(eta2,pt2) * get_ele_trig_leg1_SF(eta2,pt2) return per_ev_data/per_ev_MC if not isData: ele_loose_id_sf = get_ele_loose_id_sf(ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta),ak.flatten(leading_ele.pt))* get_ele_loose_id_sf(ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta),ak.flatten(subleading_ele.pt)) #print("Ele ID SC---->",ele_loose_id_sf) ele_reco_sf = get_ele_reco_sf(ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta),ak.flatten(leading_ele.pt))* get_ele_reco_sf(ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta),ak.flatten(subleading_ele.pt)) #print("Ele RECO SC---->",ele_reco_sf) eta1 = ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta) eta2 = ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta) pt1 = ak.flatten(leading_ele.pt) pt2 = ak.flatten(subleading_ele.pt) ele_trig_weight = Trigger_Weight(eta1,pt1,eta2,pt2) print("#### Test print trigger weight ####") print(ele_trig_weight) # --OS and Leading pair -- leading_os_diele = make_leading_pair(os_diele,os_diele) leading_os_ele = make_leading_pair(os_ele_left,os_diele) subleading_os_ele= make_leading_pair(os_ele_right,os_diele) ##----------- Cut flow3: OSSF cut3 = np.ones(len(flat_dim(leading_os_diele))) * 3 print("#### cut3: ",len(cut3)) # Helper function: Zmass window def makeZmass_window_mask(dielecs,start=60,end=120): mask = (dielecs.mass >= start) & (dielecs.mass <= end) return mask # -- OS and Leading pair -- Zmass_mask_os = makeZmass_window_mask(leading_os_diele) leading_os_Zwindow_ele = leading_os_ele[Zmass_mask_os] subleading_os_Zwindow_ele = subleading_os_ele[Zmass_mask_os] leading_os_Zwindow_diele = leading_os_diele[Zmass_mask_os] # for masking Zmass_event_mask = makeZmass_window_mask(leading_diele) Zmass_os_event_mask= ak.flatten(os_event_mask * Zmass_event_mask) Ele_Zmass_os_events = Ele_channel_events[Zmass_os_event_mask] ##----------- Cut flow4: Zmass cut4 = np.ones(len(flat_dim(leading_os_Zwindow_diele))) * 4 print("#### cut4: ",len(cut4)) ## << Selection method -- Need validation >> #print("a--->",len(Ele_channel_events)) #print("b--->",len(Ele_os_channel_events)) #print("b2--->",len(cut3)) #print("c--->",len(Ele_Zmass_os_events)) #print("c2--->",len(cut4)) ele1PT = flat_dim(leading_os_Zwindow_ele.pt) ele1Eta = flat_dim(leading_os_Zwindow_ele.eta) ele1Phi = flat_dim(leading_os_Zwindow_ele.phi) ele2PT = flat_dim(subleading_os_Zwindow_ele.pt) ele2Eta = flat_dim(subleading_os_Zwindow_ele.eta) ele2Phi = flat_dim(subleading_os_Zwindow_ele.phi) Mee = flat_dim(leading_os_Zwindow_diele.mass) charge = flat_dim(leading_os_Zwindow_diele.charge) # --- Apply weight and hist weights = processor.Weights(len(cut2)) # --- skim cut-weight def skim_weight(arr): mask1 = ~ak.is_none(arr) subarr = arr[mask1] mask2 = subarr !=0 return ak.to_numpy(subarr[mask2]) cuts = ak.flatten(Zmass_mask_os) if not isData: weights.add('pileup',pu) weights.add('ele_id',ele_loose_id_sf) weights.add('ele_reco',ele_reco_sf) #weights.add('ele_trigger',ele_trig_weight) # Initial events out["sumw"][dataset] += len(Initial_events) # Cut flow loop for cut in [cut0,cut1,cut2,cut3,cut4]: out["cutflow"].fill( dataset = dataset, cutflow=cut ) # Primary vertex out['nPV'].fill( dataset=dataset, nPV = nPV, weight = weights.weight() ) out['nPV_nw'].fill( dataset=dataset, nPV_nw = nPV ) # Physics varibles passing Zwindow out["mass"].fill( dataset=dataset, mass=Mee, weight = skim_weight(weights.weight() * cuts) ) out["ele1pt"].fill( dataset=dataset, ele1pt=ele1PT, weight = skim_weight(weights.weight() * cuts) ) out["ele1eta"].fill( dataset=dataset, ele1eta=ele1Eta, weight = skim_weight(weights.weight() * cuts) ) out["ele1phi"].fill( dataset=dataset, ele1phi=ele1Phi, weight = skim_weight(weights.weight() * cuts) ) out["ele2pt"].fill( dataset=dataset, ele2pt=ele2PT, weight = skim_weight(weights.weight() * cuts) ) out["ele2eta"].fill( dataset=dataset, ele2eta=ele2Eta, weight = skim_weight(weights.weight() * cuts) ) out["ele2phi"].fill( dataset=dataset, ele2phi=ele2Phi, weight = skim_weight(weights.weight() * cuts) ) return out
def process(self, events): dataset = events.metadata['dataset'] isRealData = 'genWeight' not in events.columns output = self.accumulator.identity() selection = processor.PackedSelection() trigger = np.ones(events.size, dtype='bool') for t in self._triggers[self._year]: trigger = trigger & events.HLT[t] selection.add('trigger', trigger) fatjets = events.FatJet fatjets['msdcorr'] = corrected_msoftdrop(fatjets) fatjets['rho'] = 2 * np.log(fatjets.msdcorr / fatjets.pt) fatjets['n2ddt'] = fatjets.n2b1 - n2ddt_shift(fatjets, year=self._year) candidatejet = fatjets[:, 0:1] selection.add('jetkin', ((candidatejet.pt > 450) & (candidatejet.eta < 2.4) & (candidatejet.msdcorr > 40.)).any()) selection.add('jetid', (candidatejet.jetId & 2).any()) # tight id selection.add('n2ddt', (candidatejet.n2ddt < 0.).any()) jets = events.Jet[(events.Jet.pt > 30.) & (events.Jet.jetId & 2) # tight id ] # only consider first 4 jets to be consistent with old framework jets = jets[:, :4] ak4_ak8_pair = jets.cross(candidatejet, nested=True) dphi = ak4_ak8_pair.i0.delta_phi(ak4_ak8_pair.i1) ak4_opposite = jets[(np.abs(dphi) > np.pi / 2).all()] selection.add( 'antiak4btagMediumOppHem', ak4_opposite.btagDeepB.max() < self._btagWPs['med'][self._year]) ak4_away = jets[(np.abs(dphi) > 0.8).all()] selection.add( 'ak4btagMedium08', ak4_away.btagDeepB.max() > self._btagWPs['med'][self._year]) selection.add('met', events.MET.pt < 140.) goodmuon = ((events.Muon.pt > 10) & (np.abs(events.Muon.eta) < 2.4) & (events.Muon.pfRelIso04_all < 0.25) & (events.Muon.looseId).astype(bool)) nmuons = goodmuon.sum() leadingmuon = events.Muon[goodmuon][:, 0:1] muon_ak8_pair = leadingmuon.cross(candidatejet, nested=True) nelectrons = ( (events.Electron.pt > 10) & (np.abs(events.Electron.eta) < 2.5) & (events.Electron.cutBased >= events.Electron.LOOSE)).sum() ntaus = ((events.Tau.pt > 20) & (events.Tau.idDecayMode).astype(bool) # bacon iso looser than Nano selection ).sum() selection.add('noleptons', (nmuons == 0) & (nelectrons == 0) & (ntaus == 0)) selection.add('onemuon', (nmuons == 1) & (nelectrons == 0) & (ntaus == 0)) selection.add('muonkin', ((leadingmuon.pt > 55.) & (np.abs(leadingmuon.eta) < 2.1)).all()) selection.add('muonDphiAK8', (muon_ak8_pair.i0.delta_phi( muon_ak8_pair.i1) > 2 * np.pi / 3).all().all()) cutflow = [ 'jetkin', 'trigger', 'jetid', 'n2ddt', 'antiak4btagMediumOppHem', 'met', 'noleptons' ] allcuts = set() output['cutflow']['none'] += len(events) for cut in cutflow: allcuts.add(cut) output['cutflow'][cut] += selection.all(*allcuts).sum() weights = processor.Weights(len(events)) if not isRealData: weights.add('genweight', events.genWeight) add_pileup_weight(weights, events.Pileup.nPU, self._year, dataset) bosons = events.GenPart[(np.abs(events.GenPart.pdgId) >= 21) & (np.abs(events.GenPart.pdgId) <= 37) & events.GenPart.hasFlags( ['isHardProcess', 'isLastCopy'])] genBosonPt = bosons.pt.pad(1, clip=True).fillna(0) add_VJets_NLOkFactor(weights, genBosonPt, self._year, dataset) ak8_boson_pair = candidatejet.cross(bosons, nested=True) dR2 = ak8_boson_pair.i0.delta_r2(ak8_boson_pair.i1) dPt2 = ((ak8_boson_pair.i0.pt - ak8_boson_pair.i1.pt) / (ak8_boson_pair.i0.pt + ak8_boson_pair.i1.pt))**2 matchedBoson = ak8_boson_pair.i1[(dR2 + dPt2).argmin()].flatten(axis=1) return output
def process(self, events): output = self.accumulator.identity() dataset = events.metadata['dataset'] isRealData = 'genWeight' not in events.fields if not isRealData: output['sumw'][dataset] += sum(events.genWeight) JECversion = JECversions[str(self.year)]['MC'] else: output['nbtagmu'][dataset] += ak.count(events.event) JECversion = JECversions[str( self.year)]['Data'][dataset.split('BTagMu')[1]] ############ # Some corrections weights = processor.Weights(len(events)) corrections = {} if not isRealData: weights.add('genWeight', events.genWeight) weights.add( 'pileup_weight', self.puReweight(self.puFile, self.nTrueFile, dataset)(events.Pileup.nPU)) events.FatJet = self.applyJEC(events.FatJet, events.fixedGridRhoFastjetAll, events.caches[0], 'AK8PFPuppi', isRealData, JECversion) cuts = processor.PackedSelection() ############ # Trigger selection if self.year == 2016: if 'BTagMu_AK4Jet300_Mu5' not in events.HLT.fields: self.triggers = [ trigger.replace('AK4', '') for trigger in self.triggers ] elif self.year == 2018: for (i, trigger) in enumerate(self.triggers): if trigger.strip("HLT_") not in events.HLT.fields: self.triggers[i] = trigger + "_noalgo" trig_arrs = [ events.HLT[_trig.strip("HLT_")] for _trig in self.triggers ] req_trig = np.zeros(len(events), dtype='bool') for t in trig_arrs: req_trig = req_trig | t cuts.add('trigger', ak.to_numpy(req_trig)) ############ # Basic cuts ## Muon cuts # muon twiki: https://twiki.cern.ch/twiki/bin/view/CMS/SWGuideMuonIdRun2 events.Muon = events.Muon[(events.Muon.pt > 5) & (abs(events.Muon.eta < 2.4)) & (events.Muon.tightId != 1) & (events.Muon.pfRelIso04_all > 0.15)] events.Muon = ak.pad_none(events.Muon, 2, axis=1) ## Jet cuts (not used) events.Jet = events.Jet[(events.Jet.pt > 25) & (abs(events.Jet.eta) <= 2.5)] #req_jets = (ak.count(events.Jet.pt, axis=1) >= 2) ## FatJet cuts events.FatJet = events.FatJet[ (events.FatJet.pt > self._mask_fatjets['basic']['pt_cut']) & (abs(events.FatJet.eta) <= self._mask_fatjets['basic']['eta_cut']) & (events.FatJet.jetId > self._mask_fatjets['basic']['jetId_cut']) & (ak.count(events.FatJet.subjets.pt, axis=2) >= 2)] ## subjet sel to crosscheck #print(events['FatJetSVs']) ## Event level variables eventVariables = {} eventVariables['nfatjet'] = ak.num(events.FatJet) ## Leading jet variables leadfatjet = ak.firsts(events.FatJet) leadfatjet['tau21'] = leadfatjet.tau2 / leadfatjet.tau1 subjet1 = ak.pad_none(leadfatjet.subjets, 2)[:, 0] subjet2 = ak.pad_none(leadfatjet.subjets, 2)[:, 1] leadfatjet['nsv1'] = get_nsv(subjet1, events.SV) leadfatjet['nsv2'] = get_nsv(subjet2, events.SV) leadfatjet['nmusj1'] = ak.num(subjet1.delta_r(events.Muon) < 0.4) leadfatjet['nmusj2'] = ak.num(subjet2.delta_r(events.Muon) < 0.4) fatjet_mutag = (leadfatjet.nmusj1 >= 1) & (leadfatjet.nmusj2 >= 1) cuts.add('fatjet_mutag', ak.to_numpy(fatjet_mutag)) for DDX in self._mask_DDX.keys(): for wp, cut in self._mask_DDX[DDX].items(): DDX_pass = (leadfatjet[f'btag{DDX}vLV2'] > cut) DDX_fail = (leadfatjet[f'btag{DDX}vLV2'] < cut) cuts.add(f'{DDX}_pass{wp}wp', ak.to_numpy(DDX_pass)) cuts.add(f'{DDX}_fail{wp}wp', ak.to_numpy(DDX_fail)) flavors = {} if not isRealData: flavors['b'] = (leadfatjet.hadronFlavour == 5) flavors['c'] = (leadfatjet.hadronFlavour == 4) flavors['l'] = (leadfatjet.hadronFlavour < 4) flavors['bb'] = abs(leadfatjet.hadronFlavour == 5) & ( leadfatjet.nBHadrons >= 2) #& (leadfatjet.nCHadrons == 0) flavors['cc'] = abs(leadfatjet.hadronFlavour == 4) & ( leadfatjet.nBHadrons == 0) & (leadfatjet.nCHadrons >= 2) #flavors['ll'] = abs(leadfatjet.hadronFlavour < 4) & (leadfatjet.nBHadrons == 0) & (leadfatjet.nCHadrons == 0) flavors['b'] = flavors['b'] & ~flavors['bb'] flavors['c'] = flavors['c'] & ~flavors['cc'] flavors['l'] = flavors['l'] & ~flavors['bb'] & ~flavors[ 'cc'] & ~flavors['b'] & ~flavors['c'] #flavors['others'] = ~flavors['l'] & ~flavors['bb'] & ~flavors['cc'] & ~flavors['b'] & ~flavors['c'] else: flavors['Data'] = np.ones(len(events), dtype='bool') for selname, cut in self._mask_fatjets.items(): sel = (leadfatjet.pt > cut['pt_cut']) & \ (leadfatjet.msoftdrop > cut['mass_cut']) & \ (abs(leadfatjet.eta) < cut['eta_cut']) & \ (leadfatjet.jetId >= cut['jetId_cut']) & \ (leadfatjet.tau21 < cut['tau21_cut']) #(leadfatjet.Hbb > cut['Hbb']) cuts.add(selname, ak.to_numpy(sel)) selection = {} selection['basic'] = {'trigger', 'basic'} selection['pt350msd50'] = {'trigger', 'fatjet_mutag', 'pt350msd50'} selection['msd100tau06'] = {'trigger', 'fatjet_mutag', 'msd100tau06'} selection['pt400msd100tau06'] = { 'trigger', 'fatjet_mutag', 'pt400msd100tau06' } for mask_f in self._final_mask: for DDX in self._mask_DDX.keys(): for wp, cut in self._mask_DDX[DDX].items(): selection[f'{mask_f}{DDX}pass{wp}wp'] = selection[ mask_f].copy() selection[f'{mask_f}{DDX}pass{wp}wp'].add( f'{DDX}_pass{wp}wp') selection[f'{mask_f}{DDX}fail{wp}wp'] = selection[ mask_f].copy() selection[f'{mask_f}{DDX}fail{wp}wp'].add( f'{DDX}_fail{wp}wp') for histname, h in output.items(): sel = [r for r in selection.keys() if r in histname.split('_')] if ((histname in self.fatjet_hists) | ('hist2d_fatjet' in histname)): for flav, mask in flavors.items(): weight = weights.weight() * cuts.all( *selection[sel[0]]) * ak.to_numpy(mask) fields = { k: ak.fill_none(leadfatjet[k], -9999) for k in h.fields if k in dir(leadfatjet) } h.fill(dataset=dataset, flavor=flav, **fields, weight=weight) if histname in self.event_hists: for flav, mask in flavors.items(): weight = weights.weight() * cuts.all( *selection[sel[0]]) * ak.to_numpy(mask) fields = { k: ak.fill_none(eventVariables[k], -9999) for k in h.fields if k in eventVariables.keys() } h.fill(dataset=dataset, flavor=flav, **fields, weight=weight) return output
def process(self, events): # Initialize accumulator out = self.accumulator.identity() dataset = sample_name # events.metadata['dataset'] # Data or MC isData = "genWeight" not in events.fields isFake = self._isFake # Stop processing if there is no event remain if len(events) == 0: return out # Golden Json file if (self._year == "2018") and isData: injson = "/x5/cms/jwkim/gitdir/JWCorp/JW_analysis/Coffea_WZG/Corrections/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt.RunABD" if (self._year == "2017") and isData: injson = "/x5/cms/jwkim/gitdir/JWCorp/JW_analysis/Coffea_WZG/Corrections/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt" # <----- Get Scale factors ------># if not isData: # Egamma reco ID get_ele_reco_above20_sf = self._corrections["get_ele_reco_above20_sf"][ self._year ] get_ele_medium_id_sf = self._corrections["get_ele_medium_id_sf"][self._year] get_pho_medium_id_sf = self._corrections["get_pho_medium_id_sf"][self._year] # DoubleEG trigger # 2016, 2017 are not applied yet if self._year == "2018": get_ele_trig_leg1_SF = self._corrections["get_ele_trig_leg1_SF"][ self._year ] get_ele_trig_leg1_data_Eff = self._corrections[ "get_ele_trig_leg1_data_Eff" ][self._year] get_ele_trig_leg1_mc_Eff = self._corrections[ "get_ele_trig_leg1_mc_Eff" ][self._year] get_ele_trig_leg2_SF = self._corrections["get_ele_trig_leg2_SF"][ self._year ] get_ele_trig_leg2_data_Eff = self._corrections[ "get_ele_trig_leg2_data_Eff" ][self._year] get_ele_trig_leg2_mc_Eff = self._corrections[ "get_ele_trig_leg2_mc_Eff" ][self._year] # PU weight with custom made npy and multi-indexing pu_weight_idx = ak.values_astype(events.Pileup.nTrueInt, "int64") pu = self._puweight_arr[pu_weight_idx] print("## pu_idx: ",len(pu_weight_idx),pu_weight_idx) print("## pu_arr: ",len(self._puweight_arr),self._puweight_arr) print("## pu:",len(pu),pu) selection = processor.PackedSelection() # Cut flow cut0 = np.zeros(len(events)) out["cutflow"].fill(dataset=dataset, cutflow=cut0) # <----- Helper functions ------># # Sort by PT helper function def sort_by_pt(ele, pho, jet): ele = ele[ak.argsort(ele.pt, ascending=False, axis=1)] pho = pho[ak.argsort(pho.pt, ascending=False, axis=1)] jet = jet[ak.argsort(jet.pt, ascending=False, axis=1)] return ele, pho, jet # Lorentz vectors from coffea.nanoevents.methods import vector ak.behavior.update(vector.behavior) def TLorentz_vector(vec): vec = ak.zip( {"x": vec.x, "y": vec.y, "z": vec.z, "t": vec.t}, with_name="LorentzVector", ) return vec def TLorentz_vector_cylinder(vec): vec = ak.zip( { "pt": vec.pt, "eta": vec.eta, "phi": vec.phi, "mass": vec.mass, }, with_name="PtEtaPhiMLorentzVector", ) return vec # <----- Selection ------># Initial_events = events # Good Run ( Golden Json files ) from coffea import lumi_tools if isData: lumi_mask_builder = lumi_tools.LumiMask(injson) lumimask = ak.Array( lumi_mask_builder.__call__(events.run, events.luminosityBlock) ) events = events[lumimask] # print("{0}% of files pass good-run conditions".format(len(events)/ len(Initial_events))) # Stop processing if there is no event remain if len(events) == 0: return out ##----------- Cut flow1: Passing Triggers # double lepton trigger is_double_ele_trigger = True if not is_double_ele_trigger: double_ele_triggers_arr = np.ones(len(events), dtype=np.bool) else: double_ele_triggers_arr = np.zeros(len(events), dtype=np.bool) for path in self._doubleelectron_triggers[self._year]: if path not in events.HLT.fields: continue double_ele_triggers_arr = double_ele_triggers_arr | events.HLT[path] # single lepton trigger is_single_ele_trigger = True if not is_single_ele_trigger: single_ele_triggers_arr = np.ones(len(events), dtype=np.bool) else: single_ele_triggers_arr = np.zeros(len(events), dtype=np.bool) for path in self._singleelectron_triggers[self._year]: if path not in events.HLT.fields: continue single_ele_triggers_arr = single_ele_triggers_arr | events.HLT[path] events.Electron, events.Photon, events.Jet = sort_by_pt( events.Electron, events.Photon, events.Jet ) # Good Primary vertex nPV = events.PV.npvsGood nPV_nw = events.PV.npvsGood if not isData: nPV = nPV * pu print(pu) # Apply cut1 events = events[double_ele_triggers_arr] if not isData: pu = pu[double_ele_triggers_arr] # Stop processing if there is no event remain if len(events) == 0: return out cut1 = np.ones(len(events)) out["cutflow"].fill(dataset=dataset, cutflow=cut1) # Set Particles Electron = events.Electron Muon = events.Muon Photon = events.Photon MET = events.MET Jet = events.Jet # --Muon ( only used to calculate dR ) MuSelmask = ( (Muon.pt >= 10) & (abs(Muon.eta) <= 2.5) & (Muon.tightId) & (Muon.pfRelIso04_all < 0.15) ) Muon = Muon[MuSelmask] # --Loose Muon ( For Loose Muon veto ) LoooseMuSelmask = ( (Muon.pt > 20) & (abs(Muon.eta) < 2.4) & (Muon.isPFcand) & (Muon.isGlobal | Muon.isTracker) & (Muon.pfRelIso03_all < 0.25) ) # Reference: VBS Zgamma+2jets VetoMuon = Muon[LoooseMuSelmask] ##----------- Cut flow2: Electron Selection EleSelmask = ( (Electron.pt >= 10) & (np.abs(Electron.eta + Electron.deltaEtaSC) < 1.479) & (Electron.cutBased > 2) & (abs(Electron.dxy) < 0.05) & (abs(Electron.dz) < 0.1) ) | ( (Electron.pt >= 10) & (np.abs(Electron.eta + Electron.deltaEtaSC) > 1.479) & (np.abs(Electron.eta + Electron.deltaEtaSC) <= 2.5) & (Electron.cutBased > 2) & (abs(Electron.dxy) < 0.1) & (abs(Electron.dz) < 0.2) ) Electron = Electron[EleSelmask] # Event with 3 Electrons # apply cut 2 Tri_electron_mask = ak.num(Electron) == 3 Electron = Electron[Tri_electron_mask] Photon = Photon[Tri_electron_mask] Jet = Jet[Tri_electron_mask] MET = MET[Tri_electron_mask] Muon = Muon[Tri_electron_mask] VetoMuon = VetoMuon[Tri_electron_mask] if not isData: pu = pu[Tri_electron_mask] events = events[Tri_electron_mask] # Stop processing if there is no event remain if len(Electron) == 0: return out cut2 = np.ones(len(Photon)) * 2 out["cutflow"].fill(dataset=dataset, cutflow=cut2) ##----------- Cut flow3: 4th lepton veto (Loose Muon) # Veto 4th Loose muon # apply cut 3 fourth_lepton_veto = ak.num(VetoMuon) < 1 Electron = Electron[fourth_lepton_veto] Photon = Photon[fourth_lepton_veto] Jet = Jet[fourth_lepton_veto] MET = MET[fourth_lepton_veto] Muon = Muon[fourth_lepton_veto] if not isData: pu = pu[fourth_lepton_veto] events = events[fourth_lepton_veto] # Stop processing if there is no event remain if len(Electron) == 0: return out cut3 = np.ones(len(Photon)) * 3 out["cutflow"].fill(dataset=dataset, cutflow=cut3) ##----------- Cut flow4: Photon Selection # Basic photon selection isgap_mask = (abs(Photon.eta) < 1.442) | ( (abs(Photon.eta) > 1.566) & (abs(Photon.eta) < 2.5) ) Pixel_seed_mask = ~Photon.pixelSeed if (dataset == "ZZ") and (self._year == "2017"): PT_ID_mask = (Photon.pt >= 20) & ( Photon.cutBasedBitmap >= 3 ) # 2^0(Loose) + 2^1(Medium) + 2^2(Tights) else: PT_ID_mask = (Photon.pt >= 20) & (Photon.cutBased > 1) # dR cut with selected Muon and Electrons dr_pho_ele_mask = ak.all( Photon.metric_table(Electron) >= 0.5, axis=-1 ) # default metric table: delta_r dr_pho_mu_mask = ak.all(Photon.metric_table(Muon) >= 0.5, axis=-1) # genPartFlav cut """ if dataset == "WZG": isPrompt = (Photon.genPartFlav == 1) | (Photon.genPartFlav == 11) PhoSelmask = PT_ID_mask & isgap_mask & Pixel_seed_mask & isPrompt & dr_pho_ele_mask & dr_pho_mu_mask elif dataset == "WZ": isPrompt = (Photon.genPartFlav == 1) PhoSelmask = PT_ID_mask & isgap_mask & Pixel_seed_mask & ~isPrompt & dr_pho_ele_mask & dr_pho_mu_mask else: PhoSelmask = PT_ID_mask & isgap_mask & Pixel_seed_mask & dr_pho_ele_mask & dr_pho_mu_mask """ PhoSelmask = ( PT_ID_mask & isgap_mask & Pixel_seed_mask & dr_pho_ele_mask & dr_pho_mu_mask ) Photon = Photon[PhoSelmask] # Apply cut 4 A_photon_mask = ak.num(Photon) > 0 Electron = Electron[A_photon_mask] Photon = Photon[A_photon_mask] Jet = Jet[A_photon_mask] Muon = Muon[A_photon_mask] MET = MET[A_photon_mask] if not isData: pu = pu[A_photon_mask] events = events[A_photon_mask] # Stop processing if there is no event remain if len(Electron) == 0: return out cut4 = np.ones(len(Photon)) * 4 out["cutflow"].fill(dataset=dataset, cutflow=cut4) ##----------- Cut flow5: OSSF # OSSF index maker @numba.njit def find_3lep(events_leptons, builder): for leptons in events_leptons: builder.begin_list() nlep = len(leptons) for i0 in range(nlep): for i1 in range(i0 + 1, nlep): if leptons[i0].charge + leptons[i1].charge != 0: continue for i2 in range(nlep): if len({i0, i1, i2}) < 3: continue builder.begin_tuple(3) builder.index(0).integer(i0) builder.index(1).integer(i1) builder.index(2).integer(i2) builder.end_tuple() builder.end_list() return builder eee_triplet_idx = find_3lep(Electron, ak.ArrayBuilder()).snapshot() ossf_mask = ak.num(eee_triplet_idx) == 2 # Apply cut 5 eee_triplet_idx = eee_triplet_idx[ossf_mask] Electron = Electron[ossf_mask] Photon = Photon[ossf_mask] Jet = Jet[ossf_mask] MET = MET[ossf_mask] if not isData: pu = pu[ossf_mask] events = events[ossf_mask] # Stop processing if there is no event remain if len(Electron) == 0: return out cut5 = np.ones(ak.sum(ak.num(Electron) > 0)) * 5 out["cutflow"].fill(dataset=dataset, cutflow=cut5) # Define Electron Triplet Triple_electron = [Electron[eee_triplet_idx[idx]] for idx in "012"] Triple_eee = ak.zip( { "lep1": Triple_electron[0], "lep2": Triple_electron[1], "lep3": Triple_electron[2], "p4": TLorentz_vector(Triple_electron[0] + Triple_electron[1]), } ) # Ele pair selector --> Close to Z mass bestZ_idx = ak.singletons(ak.argmin(abs(Triple_eee.p4.mass - 91.1876), axis=1)) Triple_eee = Triple_eee[bestZ_idx] leading_ele = Triple_eee.lep1 subleading_ele = Triple_eee.lep2 third_ele = Triple_eee.lep3 def make_leading_pair(target, base): return target[ak.argmax(base.pt, axis=1, keepdims=True)] leading_pho = make_leading_pair(Photon, Photon) # -- Scale Factor for each electron # Trigger weight helper function def Trigger_Weight(eta1, pt1, eta2, pt2): per_ev_MC = ( get_ele_trig_leg1_mc_Eff(eta1, pt1) * get_ele_trig_leg2_mc_Eff(eta2, pt2) + get_ele_trig_leg1_mc_Eff(eta2, pt2) * get_ele_trig_leg2_mc_Eff(eta1, pt1) - get_ele_trig_leg1_mc_Eff(eta1, pt1) * get_ele_trig_leg1_mc_Eff(eta2, pt2) ) per_ev_data = ( get_ele_trig_leg1_data_Eff(eta1, pt1) * get_ele_trig_leg1_SF(eta1, pt1) * get_ele_trig_leg2_data_Eff(eta2, pt2) * get_ele_trig_leg2_SF(eta2, pt2) + get_ele_trig_leg1_data_Eff(eta2, pt2) * get_ele_trig_leg1_SF(eta2, pt2) * get_ele_trig_leg2_data_Eff(eta1, pt1) * get_ele_trig_leg2_SF(eta1, pt1) - get_ele_trig_leg1_data_Eff(eta1, pt1) * get_ele_trig_leg1_SF(eta1, pt1) * get_ele_trig_leg1_data_Eff(eta2, pt2) * get_ele_trig_leg1_SF(eta2, pt2) ) return per_ev_data / per_ev_MC if not isData: ## -------------< Egamma ID and Reco Scale factor > -----------------## get_pho_medium_id_sf = get_pho_medium_id_sf( ak.flatten(leading_pho.eta), ak.flatten(leading_pho.pt) ) ele_reco_sf = ( get_ele_reco_above20_sf( ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta), ak.flatten(leading_ele.pt), ) * get_ele_reco_above20_sf( ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta), ak.flatten(subleading_ele.pt), ) * get_ele_reco_above20_sf( ak.flatten(third_ele.deltaEtaSC + third_ele.eta), ak.flatten(third_ele.pt), ) ) ele_medium_id_sf = ( get_ele_medium_id_sf( ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta), ak.flatten(leading_ele.pt), ) * get_ele_medium_id_sf( ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta), ak.flatten(subleading_ele.pt), ) * get_ele_medium_id_sf( ak.flatten(third_ele.deltaEtaSC + third_ele.eta), ak.flatten(third_ele.pt), ) ) ## -------------< Double Electron Trigger Scale factor > -----------------## eta1 = ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta) eta2 = ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta) pt1 = ak.flatten(leading_ele.pt) pt2 = ak.flatten(subleading_ele.pt) # -- 2017,2016 are not applied yet if self._year == "2018": ele_trig_weight = Trigger_Weight(eta1, pt1, eta2, pt2) ##----------- Cut flow6: Event selection # Mee cut diele = Triple_eee.p4 Mee_cut_mask = ak.firsts(diele.mass) > 4 # Z mass window # zmass_window_mask = ak.firsts(abs(diele.mass - 91.1876)) < 15 # SR, CR_ZZA, CR_Z+jets, CR_Conversion # zmass_window_mask = ak.firsts(abs(diele.mass - 91.1876)) > 5 # CR_t-enriched # zmass_window_mask = ak.firsts(abs(diele.mass - 91.1876)) > 15 # CR_Conversion # M(eee) cut SR, CR_ZZA, CR_Z+jets, CR_t enriched # eee = Triple_eee.lep1 + Triple_eee.lep2 + Triple_eee.lep3 # Meee_cut_mask = ak.firsts(eee.mass > 100) # Meee_cut_mask = ak.firsts(eee.mass <= 100) # b-Jet veto cut #SR, CR_ZZA, CR_Z+jets, CR_Conversion # bjet_mask = (Jet.btagCSVV2 > 0.4184) & (Jet.pt > 30) # bjet_veto_mask = ak.num(Jet[bjet_mask]) == 0 # bjet_veto_mask = ak.num(Jet[bjet_mask]) > 0 # CR_t-enriched # Electron PT cuts Elept_mask = ak.firsts( (leading_ele.pt >= 25) & (subleading_ele.pt >= 10) & (third_ele.pt >= 25) ) # MET cuts MET_mask = MET > 20 # Baseline # MET_mask = MET.pt > 30 # SR, CR-ZZE, CR-t-entirched # MET_mask = MET.pt <= 30 # CR-Z+jets. CR-Conversion # Mask # Event_sel_mask = Elept_mask & MET_mask & bjet_veto_mask & Mee_cut_mask & zmass_window_mask & Meee_cut_mask # SR,CR Event_sel_mask = Elept_mask & MET_mask & Mee_cut_mask # SR,CR # Apply cut6 Triple_eee_sel = Triple_eee[Event_sel_mask] leading_pho_sel = leading_pho[Event_sel_mask] MET_sel = MET[Event_sel_mask] events = events[Event_sel_mask] # Photon EE and EB isEE_mask = leading_pho.isScEtaEE isEB_mask = leading_pho.isScEtaEB Pho_EE = leading_pho[isEE_mask & Event_sel_mask] Pho_EB = leading_pho[isEB_mask & Event_sel_mask] # Stop processing if there is no event remain if len(leading_pho_sel) == 0: return out cut6 = np.ones(ak.sum(ak.num(leading_pho_sel) > 0)) * 6 out["cutflow"].fill(dataset=dataset, cutflow=cut6) ## -------------------- Prepare making hist --------------# # Photon phoPT = ak.flatten(leading_pho_sel.pt) phoEta = ak.flatten(leading_pho_sel.eta) phoPhi = ak.flatten(leading_pho_sel.phi) # Photon EE if len(Pho_EE.pt) != 0: Pho_EE_PT = ak.flatten(Pho_EE.pt) Pho_EE_Eta = ak.flatten(Pho_EE.eta) Pho_EE_Phi = ak.flatten(Pho_EE.phi) Pho_EE_sieie = ak.flatten(Pho_EE.sieie) Pho_EE_hoe = ak.flatten(Pho_EE.hoe) Pho_EE_Iso_charge = ak.flatten(Pho_EE.pfRelIso03_chg) # Photon EB if len(Pho_EB.pt) != 0: Pho_EB_PT = ak.flatten(Pho_EB.pt) Pho_EB_Eta = ak.flatten(Pho_EB.eta) Pho_EB_Phi = ak.flatten(Pho_EB.phi) Pho_EB_sieie = ak.flatten(Pho_EB.sieie) Pho_EB_hoe = ak.flatten(Pho_EB.hoe) Pho_EB_Iso_charge = ak.flatten(Pho_EB.pfRelIso03_chg) # Electrons ele1PT = ak.flatten(Triple_eee_sel.lep1.pt) ele1Eta = ak.flatten(Triple_eee_sel.lep1.eta) ele1Phi = ak.flatten(Triple_eee_sel.lep1.phi) ele2PT = ak.flatten(Triple_eee_sel.lep2.pt) ele2Eta = ak.flatten(Triple_eee_sel.lep2.eta) ele2Phi = ak.flatten(Triple_eee_sel.lep2.phi) ele3PT = ak.flatten(Triple_eee_sel.lep3.pt) ele3Eta = ak.flatten(Triple_eee_sel.lep3.eta) ele3Phi = ak.flatten(Triple_eee_sel.lep3.phi) charge = ak.flatten(Triple_eee.lep1.charge + Triple_eee.lep2.charge) # MET met = ak.to_numpy(MET_sel) # M(eea) M(ee) diele = Triple_eee_sel.p4 eeg_vec = diele + leading_pho_sel Meea = ak.flatten(eeg_vec.mass) Mee = ak.flatten(Triple_eee_sel.p4.mass) # --- Apply weight and hist if isFake: weights = processor.Weights(len(cut6)) else: weights = processor.Weights(len(cut5)) # -------------------- Sieie bins---------------------------# def make_bins(pt, eta, bin_range_str): bin_dict = { "PT_1_eta_1": (pt > 20) & (pt < 30) & (eta < 1), "PT_1_eta_2": (pt > 20) & (pt < 30) & (eta > 1) & (eta < 1.5), "PT_1_eta_3": (pt > 20) & (pt < 30) & (eta > 1.5) & (eta < 2), "PT_1_eta_4": (pt > 20) & (pt < 30) & (eta > 2) & (eta < 2.5), "PT_2_eta_1": (pt > 30) & (pt < 40) & (eta < 1), "PT_2_eta_2": (pt > 30) & (pt < 40) & (eta > 1) & (eta < 1.5), "PT_2_eta_3": (pt > 30) & (pt < 40) & (eta > 1.5) & (eta < 2), "PT_2_eta_4": (pt > 30) & (pt < 40) & (eta > 2) & (eta < 2.5), "PT_3_eta_1": (pt > 40) & (pt < 50) & (eta < 1), "PT_3_eta_2": (pt > 40) & (pt < 50) & (eta > 1) & (eta < 1.5), "PT_3_eta_3": (pt > 40) & (pt < 50) & (eta > 1.5) & (eta < 2), "PT_3_eta_4": (pt > 40) & (pt < 50) & (eta > 2) & (eta < 2.5), "PT_4_eta_1": (pt > 50) & (eta < 1), "PT_4_eta_2": (pt > 50) & (eta > 1) & (eta < 1.5), "PT_4_eta_3": (pt > 50) & (eta > 1.5) & (eta < 2), "PT_4_eta_4": (pt > 50) & (eta > 2) & (eta < 2.5), } binmask = bin_dict[bin_range_str] return binmask bin_name_list = [ "PT_1_eta_1", "PT_1_eta_2", "PT_1_eta_3", "PT_1_eta_4", "PT_2_eta_1", "PT_2_eta_2", "PT_2_eta_3", "PT_2_eta_4", "PT_3_eta_1", "PT_3_eta_2", "PT_3_eta_3", "PT_3_eta_4", "PT_4_eta_1", "PT_4_eta_2", "PT_4_eta_3", "PT_4_eta_4", ] ## -- Fake-fraction Lookup table --## if isFake: # Make Bin-range mask binned_pteta_mask = {} for name in bin_name_list: binned_pteta_mask[name] = make_bins( ak.flatten(leading_pho_sel.pt), ak.flatten(abs(leading_pho_sel.eta)), name, ) # Read Fake fraction --> Mapping bin name to int() in_dict = np.load('Fitting_v2/results_210517.npy',allow_pickle="True")[()] idx=0 fake_dict ={} for i,j in in_dict.items(): fake_dict[idx] = j idx+=1 # Reconstruct Fake_weight fw= 0 for i,j in binned_pteta_mask.items(): fw = fw + j*fake_dict[bin_name_list.index(i)] # Process 0 weight to 1 @numba.njit def zero_one(x): if x == 0: x = 1 return x vec_zero_one = np.vectorize(zero_one) fw = vec_zero_one(fw) # --- skim cut-weight if not isFake: def skim_weight(arr): mask1 = ~ak.is_none(arr) subarr = arr[mask1] mask2 = subarr != 0 return ak.to_numpy(subarr[mask2]) else: def skim_weight(arr): return arr if not isFake: cuts = Event_sel_mask cuts_pho_EE = ak.flatten(isEE_mask) cuts_pho_EB = ak.flatten(isEB_mask) if isFake: cuts = np.ones(len(Event_sel_mask)) cuts_pho_EE = ak.flatten(isEE_mask & Event_sel_mask) cuts_pho_EB = ak.flatten(isEB_mask & Event_sel_mask) if isFake: weights.add("fake_fraction", fw) # Weight and SF here if not (isData | isFake): weights.add("pileup", pu) weights.add("ele_id", ele_medium_id_sf) weights.add("pho_id", get_pho_medium_id_sf) weights.add("ele_reco", ele_reco_sf) # 2016,2017 are not applied yet if self._year == "2018": weights.add("ele_trigger", ele_trig_weight) # ---------------------------- Fill hist --------------------------------------# # Initial events out["sumw"][dataset] += len(Initial_events) print("cut1: {0},cut2: {1},cut3: {2},cut4: {3},cut5: {4},cut6: {5},cut7: {6}".format(len(cut0), len(cut1), len(cut2), len(cut3), len(cut4), len(cut5),len(cut6))) ## Cut flow loop #for cut in [cut0, cut1, cut2, cut3, cut4, cut5,cut6]: # out["cutflow"].fill(dataset=dataset, cutflow=cut) # Primary vertex out["nPV"].fill( dataset=dataset, nPV=nPV, ) out["nPV_nw"].fill(dataset=dataset, nPV_nw=nPV_nw) # Fill hist # -- met -- # out["met"].fill( dataset=dataset, met=met, weight=skim_weight(weights.weight() * cuts) ) # --mass -- # out["mass"].fill( dataset=dataset, mass=Mee, weight=skim_weight(weights.weight() * cuts) ) out["mass_eea"].fill( dataset=dataset, mass_eea=Meea, weight=skim_weight(weights.weight() * cuts) ) # -- Electron -- # out["ele1pt"].fill( dataset=dataset, ele1pt=ele1PT, weight=skim_weight(weights.weight() * cuts) ) out["ele1eta"].fill( dataset=dataset, ele1eta=ele1Eta, weight=skim_weight(weights.weight() * cuts), ) out["ele1phi"].fill( dataset=dataset, ele1phi=ele1Phi, weight=skim_weight(weights.weight() * cuts), ) out["ele2pt"].fill( dataset=dataset, ele2pt=ele2PT, weight=skim_weight(weights.weight() * cuts) ) out["ele2eta"].fill( dataset=dataset, ele2eta=ele2Eta, weight=skim_weight(weights.weight() * cuts), ) out["ele2phi"].fill( dataset=dataset, ele2phi=ele2Phi, weight=skim_weight(weights.weight() * cuts), ) out["ele3pt"].fill( dataset=dataset, ele3pt=ele3PT, weight=skim_weight(weights.weight() * cuts) ) # -- Photon -- # out["phopt"].fill( dataset=dataset, phopt=phoPT, weight=skim_weight(weights.weight() * cuts) ) out["phoeta"].fill( dataset=dataset, phoeta=phoEta, weight=skim_weight(weights.weight() * cuts) ) out["phophi"].fill( dataset=dataset, phophi=phoPhi, weight=skim_weight(weights.weight() * cuts) ) if len(Pho_EE.pt) != 0: out["pho_EE_pt"].fill( dataset=dataset, pho_EE_pt=Pho_EE_PT, weight=skim_weight(weights.weight() * cuts * cuts_pho_EE), ) out["pho_EE_eta"].fill( dataset=dataset, pho_EE_eta=Pho_EE_Eta, weight=skim_weight(weights.weight() * cuts * cuts_pho_EE), ) out["pho_EE_phi"].fill( dataset=dataset, pho_EE_phi=Pho_EE_Phi, weight=skim_weight(weights.weight() * cuts * cuts_pho_EE), ) out["pho_EE_hoe"].fill( dataset=dataset, pho_EE_hoe=Pho_EE_hoe, weight=skim_weight(weights.weight() * cuts * cuts_pho_EE), ) out["pho_EE_sieie"].fill( dataset=dataset, pho_EE_sieie=Pho_EE_sieie, weight=skim_weight(weights.weight() * cuts * cuts_pho_EE), ) out["pho_EE_Iso_chg"].fill( dataset=dataset, pho_EE_Iso_chg=Pho_EE_Iso_charge, weight=skim_weight(weights.weight() * cuts * cuts_pho_EE), ) if len(Pho_EB.pt) != 0: out["pho_EB_pt"].fill( dataset=dataset, pho_EB_pt=Pho_EB_PT, weight=skim_weight(weights.weight() * cuts * cuts_pho_EB), ) out["pho_EB_eta"].fill( dataset=dataset, pho_EB_eta=Pho_EB_Eta, weight=skim_weight(weights.weight() * cuts * cuts_pho_EB), ) out["pho_EB_phi"].fill( dataset=dataset, pho_EB_phi=Pho_EB_Phi, weight=skim_weight(weights.weight() * cuts * cuts_pho_EB), ) out["pho_EB_hoe"].fill( dataset=dataset, pho_EB_hoe=Pho_EB_hoe, weight=skim_weight(weights.weight() * cuts * cuts_pho_EB), ) out["pho_EB_sieie"].fill( dataset=dataset, pho_EB_sieie=Pho_EB_sieie, weight=skim_weight(weights.weight() * cuts * cuts_pho_EB), ) out["pho_EB_Iso_chg"].fill( dataset=dataset, pho_EB_Iso_chg=Pho_EB_Iso_charge, weight=skim_weight(weights.weight() * cuts * cuts_pho_EB), ) return out
def process(self, events): # get meta infos dataset = events.metadata["dataset"] isRealData = not hasattr(events, "genWeight") n_events = len(events) selection = processor.PackedSelection() weights = processor.Weights(n_events) output = self.accumulator.identity() # weights if not isRealData: output['sumw'][dataset] += awkward1.sum(events.genWeight) # trigger triggers = {} for channel in ["e","mu"]: trigger = np.zeros(len(events), dtype='bool') for t in self._trigger[channel]: try: trigger = trigger | events.HLT[t] except: warnings.warn("Missing trigger %s" % t, RuntimeWarning) triggers[channel] = trigger # met filter met_filters = ["goodVertices", "globalSuperTightHalo2016Filter", "HBHENoiseFilter", "HBHENoiseIsoFilter", "EcalDeadCellTriggerPrimitiveFilter", "BadPFMuonFilter", ] met_filters_mask = np.ones(len(events), dtype='bool') for t in met_filters: met_filters_mask = met_filters_mask & events.Flag[t] selection.add("met_filter", awkward1.to_numpy(met_filters_mask)) # load objects muons = events.Muon electrons = events.Electron jets = events.Jet fatjets = events.FatJet subjets = events.SubJet fatjetsLS = events.FatJetLS met = events.MET # muons goodmuon = ( (muons.mediumId) & (muons.miniPFRelIso_all <= 0.2) & (muons.pt >= 27) & (abs(muons.eta) <= 2.4) & (abs(muons.dz) < 0.1) & (abs(muons.dxy) < 0.05) & (muons.sip3d < 4) ) good_muons = muons[goodmuon] ngood_muons = awkward1.sum(goodmuon, axis=1) # electrons goodelectron = ( (electrons.mvaFall17V2noIso_WP90) & (electrons.pt >= 30) & (abs(electrons.eta) <= 1.479) & (abs(electrons.dz) < 0.1) & (abs(electrons.dxy) < 0.05) & (electrons.sip3d < 4) ) good_electrons = electrons[goodelectron] ngood_electrons = awkward1.sum(goodelectron, axis=1) # good leptons good_leptons = awkward1.concatenate([good_muons, good_electrons], axis=1) good_leptons = good_leptons[awkward1.argsort(good_leptons.pt)] # lepton candidate candidatelep = awkward1.firsts(good_leptons) # lepton channel selection selection.add("ch_e", awkward1.to_numpy((triggers["e"]) & (ngood_electrons==1) & (ngood_muons==0))) # not sure if need to require 0 muons or 0 electrons in the next line selection.add("ch_mu", awkward1.to_numpy((triggers["mu"]) & (ngood_electrons==0) & (ngood_muons==1))) # jets ht = awkward1.sum(jets[jets.pt > 30].pt,axis=1) selection.add("ht_400", awkward1.to_numpy(ht>=400)) goodjet = ( (jets.isTight) & (jets.pt > 30) & (abs(jets.eta) <= 2.5) ) good_jets = jets[goodjet] # fat jets jID = "isTight" # TODO: add mass correction # a way to get the first two subjets # cart = awkward1.cartesian([fatjets, subjets], nested=True) # idxes = awkward1.pad_none(awkward1.argsort(cart['0'].delta_r(cart['1'])), 2, axis=2) # sj1 = subjets[idxes[:,:,0]] # sj2 = subjets[idxes[:,:,1]] good_fatjet = ( (getattr(fatjets, jID)) & (abs(fatjets.eta) <= 2.4) & (fatjets.pt > 50) & (fatjets.msoftdrop > 30) & (fatjets.msoftdrop < 210) #& (fatjets.pt.copy(content=fatjets.subjets.content.counts) == 2) # TODO: require 2 subjets? # this can probably be done w FatJet_subJetIdx1 or FatJet_subJetIdx2 & (awkward1.all(fatjets.subjets.pt >= 20)) & (awkward1.all(abs(fatjets.subjets.eta) <= 2.4)) ) good_fatjets = fatjets[good_fatjet] # hbb candidate mask_hbb = ( (good_fatjets.pt > 200) & (good_fatjets.delta_r(candidatelep) > 2.0) ) candidateHbb = awkward1.firsts(good_fatjets[mask_hbb]) # b-tag #& (good_fatjets.particleNetMD_Xbb > 0.9) selection.add('hbb_btag',awkward1.to_numpy(candidateHbb.deepTagMD_ZHbbvsQCD >= 0.8)) # score would be larger for tight category (0.97) # No AK4 b-tagged jets away from bb jet jets_HbbV = jets[good_jets.delta_r(candidateHbb) >= 1.2] selection.add('hbb_vetobtagaway', awkward1.to_numpy(awkward1.max(jets_HbbV.btagDeepB, axis=1, mask_identity=False) > BTagEfficiency.btagWPs[self._year]['medium'])) # fat jets Lepton Subtracted # wjj candidate mask_wjj = ( (fatjetsLS.pt > 50) & (fatjetsLS.delta_r(candidatelep) > 1.2) # need to add 2 subjets w pt > 20 & eta<2.4 # need to add ID? ) candidateWjj = awkward1.firsts(fatjetsLS[mask_wjj][awkward1.argmin(fatjetsLS[mask_wjj].delta_r(candidatelep),axis=1,keepdims=True)]) # add t2/t1 <= 0.75 (0.45 HP) selection.add('hww_mass', awkward1.to_numpy(candidateWjj.mass >= 10)) print('met ',met) # wjjlnu info #HSolverLiInfo hwwInfoLi; # qqSDmass = candidateWjj.msoftdrop # hwwLi = hSolverLi->minimize(candidatelep.p4(), met.p4(), wjjcand.p4(), qqSDmass, hwwInfoLi) #neutrino = hwwInfoLi.neutrino; #wlnu = hwwInfoLi.wlnu; #wqq = hwwInfoLi.wqqjet; #hWW = hwwInfoLi.hWW; #wwDM = PhysicsUtilities::deltaR( wlnu,wqq) * hWW.pt()/2.0; # add dlvqq <= 11 (2.5 HP) # in the meantime let's add the mass ''' mm = (candidatejet - candidatelep).mass2 jmass = (mm>0)*np.sqrt(np.maximum(0, mm)) + (mm<0)*candidatejet.mass joffshell = jmass < 62.5 massassumption = 80.*joffshell + (125 - 80.)*~joffshell x = massassumption**2/(2*candidatelep.pt*met.pt) + np.cos(candidatelep.phi - met.phi) met_eta = ( (x < 1)*np.arcsinh(x*np.sinh(candidatelep.eta)) + (x > 1)*( candidatelep.eta - np.sign(candidatelep.eta)*np.arccosh(candidatelep.eta) ) ) met_p4 = TLorentzVectorArray.from_ptetaphim(np.array([0.]),np.array([0.]),np.array([0.]),np.array([0.])) if met.size > 0: met_p4 = TLorentzVectorArray.from_ptetaphim(met.pt, met_eta.fillna(0.), met.phi, np.zeros(met.size)) # hh system candidateHH = candidateWjj + met_p4 + candidateHbb selection.add('hh_mass', candidateHH.mass >= 700) selection.add('hh_centrality', candidateHH.pt/candidateHH.mass >= 0.3) ''' channels = {"e": ["met_filter","ch_e","ht_400","hbb_btag","hbb_vetobtagaway","hww_mass"], #,"hh_mass","hh_centrality"], "mu": ["met_filter","ch_mu","ht_400","hbb_btag","hbb_vetobtagaway","hww_mass"] #,"hh_mass","hh_centrality"], } # need to add gen info if not isRealData: weights.add('genweight', events.genWeight) add_pileup_weight(weights, events.Pileup.nPU, self._year, dataset) for channel, cuts in channels.items(): allcuts = set() output['cutflow'].fill(dataset=dataset, channel=channel, cut=0, weight=weights.weight()) for i, cut in enumerate(cuts): allcuts.add(cut) cut = selection.all(*allcuts) output['cutflow'].fill(dataset=dataset, channel=channel, cut=i + 1, weight=weights.weight()[cut]) return output
def process(self, df): # Dataset parameters dataset = df['dataset'] year = self._samples[dataset]['year'] xsec = self._samples[dataset]['xsec'] sow = self._samples[dataset]['nSumOfWeights'] isData = self._samples[dataset]['isData'] ### Recover objects, selection, functions and others... # Objects isTightMuon = self._objects['isTightMuon'] isTightElectron = self._objects['isTightElectron'] isGoodJet = self._objects['isGoodJet'] # Corrections GetMuonIsoSF = self._corrections['getMuonIso'] GetMuonIDSF = self._corrections['getMuonID'] # Selection passNJets = self._selection['passNJets'] passMETcut = self._selection['passMETcut'] # Functions pow2 = self._functions['pow2'] # Initialize objects met = Initialize({ 'pt': df['MET_pt'], 'eta': 0, 'phi': df['MET_phi'], 'mass': 0 }) e = Initialize({ 'pt': df['Electron_pt'], 'eta': df['Electron_eta'], 'phi': df['Electron_phi'], 'mass': df['Electron_mass'] }) mu = Initialize({ 'pt': df['Muon_pt'], 'eta': df['Muon_eta'], 'phi': df['Muon_phi'], 'mass': df['Muon_mass'] }) j = Initialize({ 'pt': df['Jet_pt'], 'eta': df['Jet_eta'], 'phi': df['Jet_phi'], 'mass': df['Jet_mass'] }) # Electron selection for key in self._e: e[key] = e.pt.zeros_like() if self._e[key] in df: e[key] = df[self._e[key]] e['istight'] = isTightElectron(e.pt, e.eta, e.dxy, e.dz, e.id, year) leading_e = e[e.pt.argmax()] leading_e = leading_e[leading_e.istight.astype(np.bool)] nElec = e.counts # Muon selection for key in self._mu: mu[key] = mu.pt.zeros_like() if self._mu[key] in df: mu[key] = df[self._mu[key]] mu['istight'] = isTightMuon(mu.pt, mu.eta, mu.dxy, mu.dz, mu.iso, mu.tight_id, year) leading_mu = mu[mu.pt.argmax()] leading_mu = leading_mu[leading_mu.istight.astype(np.bool)] nMuon = mu.counts # Jet selection j['deepcsv'] = df['Jet_btagDeepB'] j['deepflv'] = df['Jet_btagDeepFlavB'] for key in self._jet: j[key] = j.pt.zeros_like() if self._jet[key] in df: j[key] = df[self._jet[key]] j['isgood'] = isGoodJet(j.pt, j.eta, j.id) j['isclean'] = ~j.match(e, 0.4) & ~j.match(mu, 0.4) & j.isgood.astype( np.bool) j0 = j[j.pt.argmax()] j0 = j0[j0.isclean.astype(np.bool)] nJets = j.counts # Dilepton pair ele_pairs = e.distincts() diele = leading_e leading_diele = leading_e if ele_pairs.i0.content.size > 0: diele = ele_pairs.i0 + ele_pairs.i1 leading_diele = diele[diele.pt.argmax()] mu_pairs = mu.distincts() dimu = leading_mu leading_dimu = leading_mu if mu_pairs.i0.content.size > 0: dimu = mu_pairs.i0 + mu_pairs.i1 leading_dimu = dimu[dimu.pt.argmax()] mmumu = leading_dimu.mass # Triggers # MET filters # Weights genw = np.ones_like(df['MET_pt']) if isData else df['genWeight'] weights = processor.Weights(df.size) weights.add('norm', xsec / sow * genw) # Selections and cuts selections = processor.PackedSelection() channels = ['em', 'mm', 'ee'] selections.add('em', (nElec == 1) & (nMuon == 1)) selections.add('ee', (nElec >= 2)) selections.add('mm', (nMuon >= 2)) levels = ['dilepton', '2jets'] selections.add('dilepton', (nElec >= 2) | (nMuon >= 2) | ((nElec + nMuon) >= 2)) selections.add('2jets', (nJets >= 2)) # Variables # Fill Histos hout = self.accumulator.identity() hout['dummy'].fill(sample=dataset, dummy=1, weight=df.size) for ch in channels: for lev in levels: weight = weights.weight() cuts = [ch] + [lev] cut = selections.all(*cuts) invmass_flat = mmumu[cut].flatten() weights_flat = (~np.isnan(mmumu[cut]) * weight[cut]).flatten() hout['invmass'].fill( sample=dataset, channel=ch, level=lev, invmass=invmass_flat, weight=weights_flat) #*selections.all(*{'mm'}) #flat_variables = {k: v[cut].flatten() for k, v in variables.items()} #flat_weights = {k: (~np.isnan(v[cut])*weight[cut]).flatten() for k, v in variables.items()} #hout['invmass'].fill(sample=dataset, channel='mm', level="dilepton", invmass=mmumu, weight=np.ones_like(df['MET_pt']))#weight=weights.weight())#*selections.all(*{'mm'}) return hout
def process(self, events): dataset = events.metadata['dataset'] isData = 'genWeight' not in events.columns selection = processor.PackedSelection() hout = self.accumulator.identity() ### #Getting ids from .coffea files ### get_msd_weight = self._corrections['get_msd_weight'] isLooseMuon = self._ids['isLooseMuon'] isTightMuon = self._ids['isTightMuon'] isGoodFatJet = self._ids['isGoodFatJet'] match = self._common['match'] ### #Initialize physics objects ### mu = events.Muon leading_mu = mu[mu.pt.argmax()] fj = events.AK15Puppi fj['sd'] = fj.subjets.sum() fj['isgood'] = isGoodFatJet(fj.sd.pt, fj.sd.eta, fj.jetId) fj['T'] = TVector2Array.from_polar(fj.pt, fj.phi) fj['msd_raw'] = (fj.subjets * (1 - fj.subjets.rawFactor)).sum().mass fj['msd_corr'] = fj.msd_raw * awkward.JaggedArray.fromoffsets( fj.array.offsets, np.maximum( 1e-5, get_msd_weight(fj.sd.pt.flatten(), fj.sd.eta.flatten()))) probQCD = fj.probQCDbb + fj.probQCDcc + fj.probQCDb + fj.probQCDc + fj.probQCDothers probZHbb = fj.probZbb + fj.probHbb fj['ZHbbvsQCD'] = probZHbb / (probZHbb + probQCD) fj['tau21'] = fj.tau2 / fj.tau1 SV = events.SV ### # Calculating weights ### if not isData: gen = events.GenPart gen['isb'] = (abs(gen.pdgId) == 5) & gen.hasFlags( ['fromHardProcess', 'isLastCopy']) jetgenb = fj.sd.cross(gen[gen.isb], nested=True) bmatch = ((jetgenb.i0.delta_r(jetgenb.i1) < 1.5).sum() == 1) & (gen[gen.isb].counts > 0) fj['isb'] = bmatch bmatch = ((jetgenb.i0.delta_r(jetgenb.i1) < 1.5).sum() == 2) & (gen[gen.isb].counts > 0) fj['isbb'] = bmatch gen['isc'] = (abs(gen.pdgId) == 4) & gen.hasFlags( ['fromHardProcess', 'isLastCopy']) jetgenc = fj.sd.cross(gen[gen.isc], nested=True) cmatch = ((jetgenc.i0.delta_r(jetgenc.i1) < 1.5).sum() == 1) & (gen[gen.isc].counts > 0) fj['isc'] = cmatch cmatch = ((jetgenc.i0.delta_r(jetgenc.i1) < 1.5).sum() == 2) & (gen[gen.isc].counts > 0) fj['iscc'] = cmatch ##### axis=1 option to remove boundaries between fat-jets ##### ##### copy (match jaggedness and shape of array) the contents of crossed array into the fat-jet subjets ##### ##### we're not use copy since it keeps the original array type ##### ##### fj.subjets is a TLorentzVectorArray ##### mu = mu[mu.isGlobal] ## Use a global muon for QCD events jetmu = fj.subjets.flatten(axis=1).cross(mu, nested=True) mask = (mu.counts > 0) & ((jetmu.i0.delta_r(jetmu.i1) < 0.4) & ((jetmu.i1.pt / jetmu.i0.pt) < 0.7) & (jetmu.i1.pt > 7)).sum() == 1 ##### Three steps to match the jaggedness of the mask array to the fj.subjets array ##### ##### Using the offset function to copy contents not the type of the array ##### step1 = fj.subjets.flatten() step2 = awkward.JaggedArray.fromoffsets(step1.offsets, mask.content) step2 = step2.pad(1).fillna( 0) ##### Fill None for empty arrays and convert None to False step3 = awkward.JaggedArray.fromoffsets(fj.subjets.offsets, step2) ##### fatjet with two subjets matched with muons fj['withmu'] = step3.sum() == 2 ### # Selections ### #### trigger selection #### triggers = np.zeros(events.size, dtype=np.bool) for path in self._btagmu_triggers[self._year]: if path not in events.HLT.columns: continue triggers = triggers | events.HLT[path] selection.add('btagmu_triggers', triggers) #### MET filters #### met_filters = np.ones(events.size, dtype=np.bool) if isData: met_filters = met_filters & events.Flag[ 'eeBadScFilter'] #this filter is recommended for data only for flag in AnalysisProcessor.met_filter_flags[self._year]: met_filters = met_filters & events.Flag[flag] selection.add('met_filters', met_filters) #### ak15 jet selection #### leading_fj = fj[fj.sd.pt.argmax()] leading_fj = leading_fj[leading_fj.isgood.astype(np.bool)] leading_fj = leading_fj[leading_fj.withmu.astype(np.bool)] #### SV selection for matched with leading ak15 jet #### SV['ismatched'] = match(SV, leading_fj, 1.5) #leading_SV = SV[SV.pt.argmax()] leading_SV = SV[SV.dxySig.argmax()] leading_SV = leading_SV[leading_SV.ismatched.astype(np.bool)] #fj_good = fj[fj.isgood.astype(np.bool)] #fj_withmu = fj_good[fj_good.withmu.astype(np.bool)] #fj_nwithmu = fj_withmu.counts selection.add('fj_pt', (leading_fj.sd.pt.max() > 250)) selection.add( 'fj_mass', (leading_fj.msd_corr.sum() > 50)) ## optionally also <130 #selection.add('fj_tau21', (leading_fj.tau21.sum() < 0.3) ) #selection.add('fjCoupledMu', (fj_nwithmu > 0) ) print('Selections') print(selection.names, '\n') variables = { 'ZHbbvsQCD': leading_fj.ZHbbvsQCD, 'btagJP': leading_fj.btagJP, 'tau21': leading_fj.tau21, 'fjmass': leading_fj.msd_corr, 'fj1pt': leading_fj.sd.pt, #'svmass': leading_SV.mass, 'svmass': np.log(leading_SV.mass), 'svdxysig': leading_SV.dxySig } def fill(dataset, gentype, weight, cut): flat_variables = { k: v[cut].flatten() for k, v in variables.items() } flat_gentype = { k: (~np.isnan(v[cut]) * gentype[cut]).flatten() for k, v in variables.items() } flat_weight = { k: (~np.isnan(v[cut]) * weight[cut]).flatten() for k, v in variables.items() } #print('variables:', flat_variables) for histname, h in hout.items(): if not isinstance(h, hist.Hist): continue if histname not in variables: continue elif histname == 'sumw': continue elif histname == 'jptemplate' or histname == 'svtemplate': continue else: flat_variable = {histname: flat_variables[histname]} h.fill(dataset=dataset, gentype=flat_gentype[histname], **flat_variable, weight=flat_weight[histname]) isFilled = False if isData: if not isFilled: hout['sumw'].fill(dataset=dataset, sumw=1, weight=1) isFilled = True cut = selection.all(*selection.names) vcut = np.zeros(events.size, dtype=np.int) hout['cutflow'].fill(dataset=dataset, cutname='nocut', cut=vcut, weight=np.ones(events.size)) allcuts = set() ### cutflow fill for i, icut in enumerate(selection.names): allcuts.add(icut) jcut = selection.all(*allcuts) vcut = (i + 1) * jcut hout['cutflow'].fill(dataset=dataset, cutname=str(icut), cut=vcut, weight=jcut) ##### template for bb SF ##### ##### btagjp template ##### hout['jptemplate'].fill(dataset=dataset, gentype=np.zeros(events.size, dtype=np.int), btagJP=leading_fj.btagJP.sum(), ZHbbvsQCD=leading_fj.ZHbbvsQCD.sum(), weight=np.ones(events.size) * cut) ##### sv mass template ##### hout['svtemplate'].fill( dataset=dataset, gentype=np.zeros(events.size, dtype=np.int), #svmass=leading_SV.mass.sum(), svmass=np.log(leading_SV.mass.sum()), ZHbbvsQCD=leading_fj.ZHbbvsQCD.sum(), weight=np.ones(events.size) * cut) fill(dataset, np.zeros(events.size, dtype=np.int), np.ones(events.size), cut) else: weights = processor.Weights(len(events)) wgentype = { 'bb': (leading_fj.isbb).sum(), 'b': (~leading_fj.isbb & leading_fj.isb).sum(), 'cc': (~leading_fj.isbb & ~leading_fj.isb & leading_fj.iscc).sum(), 'c': (~leading_fj.isbb & ~leading_fj.isb & ~leading_fj.iscc & leading_fj.isc).sum(), 'other': (~leading_fj.isbb & ~leading_fj.isb & ~leading_fj.iscc & ~leading_fj.isc).sum(), } vgentype = np.zeros(events.size, dtype=np.int) for gentype in self._gentype_map.keys(): vgentype += self._gentype_map[gentype] * wgentype[gentype] if not isFilled: hout['sumw'].fill(dataset=dataset, sumw=1, weight=events.genWeight.sum()) isFilled = True cut = selection.all(*selection.names) if 'QCD' in dataset: vcut = np.zeros(events.size, dtype=np.int) hout['cutflow'].fill(dataset=dataset, cutname='nocut', cut=vcut, weight=weights.weight()) allcuts = set() ### cutflow fill for i, icut in enumerate(selection.names): allcuts.add(icut) jcut = selection.all(*allcuts) vcut = (i + 1) * jcut hout['cutflow'].fill(dataset=dataset, cutname=str(icut), cut=vcut, weight=weights.weight() * jcut) ### other variables fill(dataset, vgentype, weights.weight(), cut) ##### template for bb SF ##### ##### btagjp template ##### hout['jptemplate'].fill(dataset=dataset, gentype=vgentype, btagJP=leading_fj.btagJP.sum(), ZHbbvsQCD=leading_fj.ZHbbvsQCD.sum(), weight=weights.weight() * cut) ##### sv mass template ##### hout['svtemplate'].fill( dataset=dataset, gentype=vgentype, #svmass=leading_SV.mass.sum(), svmass=np.log(leading_SV.mass.sum()), ZHbbvsQCD=leading_fj.ZHbbvsQCD.sum(), weight=np.ones(events.size) * cut) else: fill(dataset, vgentype, weights.weight(), np.ones(events.size, dtype=np.int)) ##### template for bb SF ##### ##### btagjp template ##### hout['jptemplate'].fill(dataset=dataset, gentype=vgentype, btagJP=leading_fj.btagJP.sum(), ZHbbvsQCD=leading_fj.ZHbbvsQCD.sum(), weight=weights.weight()) ##### sv mass template ##### hout['svtemplate'].fill( dataset=dataset, gentype=vgentype, #svmass=leading_SV.mass.sum(), svmass=np.log(leading_SV.mass.sum()), ZHbbvsQCD=leading_fj.ZHbbvsQCD.sum(), weight=np.ones(events.size) * cut) return hout
def process(self, events): logging.debug('starting process') output = self.accumulator.identity() dataset = events.metadata['dataset'] self._isData = dataset in [ 'SingleMuon', 'DoubleMuon', 'SingleElectron', 'DoubleEG', 'EGamma', 'MuonEG' ] selection = processor.PackedSelection() # TODO: instead of cutflow, use processor.PackedSelection output['cutflow']['all events'] += events.size logging.debug('applying lumi mask') if self._isData: lumiMask = lumi_tools.LumiMask(self._corrections['golden']) events['passLumiMask'] = lumiMask(np.array(events.run), np.array(events.luminosityBlock)) else: events['passLumiMask'] = np.ones_like(events.run, dtype=bool) passLumiMask = events.passLumiMask selection.add('lumiMask', passLumiMask) logging.debug('adding trigger') self._add_trigger(events) passHLT = events.passHLT selection.add('trigger', passHLT) output['cutflow']['pass trigger'] += passHLT.sum() # if no trigger: fast return if passHLT.sum() == 0: return output # require one good vertex logging.debug('checking vertices') passGoodVertex = (events.PV.npvsGood > 0) output['cutflow']['good vertex'] += passGoodVertex.sum() selection.add('goodVertex', passGoodVertex) # run rochester rochester = self._rochester _muon_offsets = events.Muon.pt.offsets _charge = events.Muon.charge _pt = events.Muon.pt _eta = events.Muon.eta _phi = events.Muon.phi if self._isData: _k = rochester.kScaleDT(_charge, _pt, _eta, _phi) # _kErr = rochester.kScaleDTerror(_charge, _pt, _eta, _phi) else: # for default if gen present _gpt = events.Muon.matched_gen.pt # for backup w/o gen _nl = events.Muon.nTrackerLayers _u = JaggedArray.fromoffsets(_muon_offsets, np.random.rand(*_pt.flatten().shape)) _hasgen = (_gpt.fillna(-1) > 0) _kspread = rochester.kSpreadMC(_charge[_hasgen], _pt[_hasgen], _eta[_hasgen], _phi[_hasgen], _gpt[_hasgen]) _ksmear = rochester.kSmearMC(_charge[~_hasgen], _pt[~_hasgen], _eta[~_hasgen], _phi[~_hasgen], _nl[~_hasgen], _u[~_hasgen]) _k = np.ones_like(_pt.flatten()) _k[_hasgen.flatten()] = _kspread.flatten() _k[~_hasgen.flatten()] = _ksmear.flatten() _k = JaggedArray.fromoffsets(_muon_offsets, _k) # _kErrspread = rochester.kSpreadMCerror(_charge[_hasgen], _pt[_hasgen], _eta[_hasgen], _phi[_hasgen], # _gpt[_hasgen]) # _kErrsmear = rochester.kSmearMCerror(_charge[~_hasgen], _pt[~_hasgen], _eta[~_hasgen], _phi[~_hasgen], # _nl[~_hasgen], _u[~_hasgen]) # _kErr = np.ones_like(_pt.flatten()) # _kErr[_hasgen.flatten()] = _kErrspread.flatten() # _kErr[~_hasgen.flatten()] = _kErrsmear.flatten() # _kErr = JaggedArray.fromoffsets(_muon_offsets, _kErr) mask = _pt.flatten() < 200 rochester_pt = _pt.flatten() rochester_pt[mask] = (_k * _pt).flatten()[mask] events.Muon['pt'] = JaggedArray.fromoffsets(_muon_offsets, rochester_pt) logging.debug('adding muon id') self._add_muon_id(events.Muon) logging.debug('adding electron id') self._add_electron_id(events.Electron) logging.debug('selecting muons') muonId = (events.Muon.passId > 0) muons = events.Muon[muonId] logging.debug('selecting electrons') electronId = (events.Electron.passId > 0) electrons = events.Electron[electronId] passTwoLeptons = (muons.counts >= 2) | (electrons.counts >= 2) output['cutflow']['two leptons'] += passTwoLeptons.sum() selection.add('twoLeptons', passTwoLeptons) # build cands # remake z to have same columns # pt eta phi mass charge pdgId logging.debug('rebuilding leptons') def rebuild(leptons): return JaggedCandidateArray.candidatesfromoffsets( leptons.offsets, pt=leptons.pt.flatten(), eta=leptons.eta.flatten(), phi=leptons.phi.flatten(), mass=leptons.mass.flatten(), charge=leptons.charge.flatten(), pdgId=leptons.pdgId.flatten(), # needed for electron SF etaSC=leptons.etaSC.flatten() if hasattr(leptons, 'etaSC') else leptons.eta.flatten(), ) newMuons = rebuild(muons) newElectrons = rebuild(electrons) logging.debug('building 2 leptons') ee_cands = newElectrons.choose(2) mm_cands = newMuons.choose(2) # combine them z_cands = JaggedArray.concatenate([ee_cands, mm_cands], axis=1) def bestcombination(zcands): good_charge = sum(zcands[str(i)]['charge'] for i in range(2)) == 0 # this keeps the first z cand in each event # should instead sort the best first # TODO: select best zcands = zcands[good_charge][:, :1] return zcands logging.debug('selecting best combinations') z_cands = bestcombination(z_cands) z1 = np.zeros_like(z_cands['p4'].pt.flatten(), dtype='i') z2 = np.ones_like(z_cands['p4'].pt.flatten(), dtype='i') z1[(z_cands['0']['p4'].pt.flatten() < z_cands['1']['p4'].pt.flatten())] = 1 z2[(z_cands['0']['p4'].pt.flatten() < z_cands['1']['p4'].pt.flatten())] = 0 z1 = JaggedArray.fromoffsets(z_cands.offsets, z1) z2 = JaggedArray.fromoffsets(z_cands.offsets, z2) passZCand = (z_cands.counts > 0) output['cutflow']['z cand'] += passZCand.sum() selection.add('zCand', passZCand) passMassWindow = (passZCand & z_cands[( (z_cands.p4.mass > 60) & (z_cands.p4.mass < 120))].counts > 0) output['cutflow']['mass window'] += passMassWindow.sum() selection.add('massWindow', passMassWindow) # im sure there is a better way, but for now just do this def get_lepton_values(zl, key): val = np.zeros_like(zl.flatten(), dtype=float) if len(val) == 0: return JaggedArray.fromoffsets(zl.offsets, val) for i in range(2): mask = (i == zl.flatten()) if key == 'pt': val[mask] = z_cands[passZCand][str( i)].flatten()[mask]['p4'].pt elif key == 'eta': val[mask] = z_cands[passZCand][str( i)].flatten()[mask]['p4'].eta elif key == 'phi': val[mask] = z_cands[passZCand][str( i)].flatten()[mask]['p4'].phi elif key == 'mass': val[mask] = z_cands[passZCand][str( i)].flatten()[mask]['p4'].mass else: val[mask] = z_cands[passZCand][str(i)].flatten()[mask][key] return JaggedArray.fromoffsets(zl.offsets, val) z1pt = get_lepton_values(z1, 'pt') z2pt = get_lepton_values(z2, 'pt') passPt = ((z1pt > 30) & (z2pt > 20)).counts > 0 output['cutflow']['pt threshold'] += passPt.sum() selection.add('ptThreshold', passPt) chanSels = {} z1pdg = get_lepton_values(z1, 'pdgId') z2pdg = get_lepton_values(z2, 'pdgId') for chan in ['ee', 'mm']: if chan == 'ee': pdgIds = (11, 11) if chan == 'mm': pdgIds = (13, 13) chanSels[chan] = ((abs(z1pdg) == pdgIds[0]) & (abs(z2pdg) == pdgIds[1])) weights = processor.Weights(events.run.size) if self._isData: output['sumw'][dataset] = 0 # always set to 0 for data else: output['sumw'][dataset] += events.genWeight.sum() weights.add('genWeight', events.genWeight) weights.add( 'pileupWeight', self._corrections['pileupWeight'](events.Pileup.nPU), self._corrections['pileupWeightUp'](events.Pileup.nPU), self._corrections['pileupWeightDown'](events.Pileup.nPU), ) zls = [z1, z2] # electron sf for ei, zl in enumerate(zls): ei = str(ei) eta = get_lepton_values(zl, 'etaSC') pt = get_lepton_values(zl, 'pt') electronRecoSF = self._corrections['electron_reco'](eta, pt) electronIdSF = self._corrections['electron_id_MVA90'](eta, pt) electronSF = np.ones_like(electronRecoSF.prod()) if ei in ['0', '1']: chans = ['ee'] else: chans = [] for chan in chans: # turns empty arrays into 0's, nonempty int 1's chanSel = (chanSels[chan].ones_like().sum() > 0) electronSF[chanSel] *= electronRecoSF[chanSel].prod() electronSF[chanSel] *= electronIdSF[chanSel].prod() weights.add('electronSF' + ei, electronSF) # muon SF for mi, zl in enumerate(zls): mi = str(mi) eta = get_lepton_values(zl, 'eta') pt = get_lepton_values(zl, 'pt') if self._year == '2016': idSF = self._corrections['muon_id_MediumID'](eta, pt) isoSF = self._corrections['muon_iso_TightRelIso_MediumID']( eta, pt) else: idSF = self._corrections['muon_id_MediumPromptID']( pt, abs(eta)) isoSF = self._corrections['muon_iso_TightRelIso_MediumID']( pt, abs(eta)) muonSF = np.ones_like(idSF.prod()) if mi in ['0', '1']: chans = ['mm'] else: chans = [] for chan in chans: # turns empty arrays into 0's, nonempty int 1's chanSel = (chanSels[chan].ones_like().sum() > 0) muonSF[chanSel] *= idSF[chanSel].prod() muonSF[chanSel] *= isoSF[chanSel].prod() weights.add('muonSF' + mi, muonSF) logging.debug('filling') for sel in self._selections: if sel == 'massWindow': cut = selection.all('lumiMask', 'trigger', 'goodVertex', 'twoLeptons', 'zCand', 'massWindow', 'ptThreshold') for chan in ['ee', 'mm']: chanSel = chanSels[chan] weight = chanSel.astype(float) * weights.weight() output[sel + '_zmass'].fill( dataset=dataset, channel=chan, mass=z_cands[cut].p4.mass.flatten(), weight=weight[cut].flatten(), ) output[sel + '_met'].fill( dataset=dataset, channel=chan, met=events.MET.pt[cut], weight=weight[cut].flatten(), ) output[sel + '_pileup'].fill( dataset=dataset, channel=chan, npvs=events.PV.npvs[cut], weight=weight[cut].flatten(), ) return output
def process(self, events): dataset = events.metadata['dataset'] isData = 'genWeight' not in events.columns selection = processor.PackedSelection() hout = self.accumulator.identity() match = self._common['match'] isLooseElectron = self._ids['isLooseElectron'] isLooseMuon = self._ids['isLooseMuon'] isLoosePhoton = self._ids['isLoosePhoton'] isTightPhoton = self._ids['isTightPhoton'] isGoodJet = self._ids['isGoodJet'] #### Select loose muon and electron to select clean photon mu = events.Muon mu['isloose'] = isLooseMuon(mu.pt, mu.eta, mu.pfRelIso04_all, mu.looseId, self._year) mu_loose = mu[mu.isloose.astype(np.bool)] e = events.Electron e['isclean'] = ~match(e, mu_loose, 0.3) e['isloose'] = isLooseElectron(e.pt, e.eta + e.deltaEtaSC, e.dxy, e.dz, e.cutBased, self._year) e_clean = e[e.isclean.astype(np.bool)] e_loose = e_clean[e_clean.isloose.astype(np.bool)] #### Consider clean and tight photon for purity measurement pho = events.Photon pho['isclean'] = ~match(pho, mu_loose, 0.5) & ~match(pho, e_loose, 0.5) _id = 'cutBasedBitmap' if self._year == '2016': _id = 'cutBased' def isPurityPhoton(pt, medium_id): mask = ~(pt == np.nan) if self._year == '2016': mask = (pt > 200) & (medium_id >= 2) else: mask = (pt > 200) & ((medium_id & 2) == 2) return mask pho['isloose'] = isLoosePhoton(pho.pt, pho.eta, pho[_id], self._year) & (pho.electronVeto) pho['ispurity'] = isPurityPhoton( pho.pt, pho[_id]) & (pho.isScEtaEB) & (pho.electronVeto) pho_clean = pho[pho.isclean.astype(np.bool)] pho_loose = pho_clean[pho_clean.isloose.astype(np.bool)] pho_purity = pho_clean[pho_clean.ispurity.astype(np.bool)] pho_nosieie = pho_clean[(pho_clean.pt > 200) & (pho_clean.isScEtaEB) & (pho_clean.electronVeto) & medium_id_no_sieie(pho_clean)] pho_nosieie_inv_iso = pho_clean[(pho_clean.pt > 200) & (pho_clean.isScEtaEB) & (pho_clean.electronVeto) & medium_id_no_sieie_inv_iso(pho_clean)] #### Consider AK4 jet def isPurityJet(pt, eta, jet_id): mask = (pt > 30) & (abs(eta) < 2.4) & ((jet_id & 2) == 2) return mask j = events.Jet #30 GeV cut on jet pT, we need to check later #j['isgood'] = isGoodJet(j.pt, j.eta, j.jetId, j.neHEF, j.neEmEF, j.chHEF, j.chEmEF) j['ispurity'] = isPurityJet(j.pt, j.eta, j.jetId) j['isclean'] = ~match(j, e_loose, 0.4) & ~match( j, mu_loose, 0.4) & ~match(j, pho_loose, 0.4) j_purity = j[j.ispurity.astype(np.bool)] j_clean = j_purity[j_purity.isclean.astype(np.bool)] j_nclean = j_clean.counts met = events.MET #### Genweights weights = processor.Weights(len(events), storeIndividual=True) if isData: weights.add('genw', np.ones(events.size)) else: weights.add('genw', events.genWeight) #### MET filter & single photon trigger met_filters = np.ones(events.size, dtype=np.bool) if isData: met_filters = met_filters & events.Flag['eeBadScFilter'] for flag in PhotonPurity.met_filter_flags[self._year]: met_filters = met_filters & events.Flag[flag] #selection.add('met_filters',met_filters) triggers = np.zeros(events.size, dtype=np.bool) for path in self._singlephoton_triggers[self._year]: if path not in events.HLT.columns: continue triggers = triggers | events.HLT[path] #selection.add('singlephoton_triggers', triggers) #selection.add('jet_cut', (j_nclean>0)) #selection.add('met60', (met.pt<60)) event_mask = met_filters & triggers & (met.pt < 60) & (j_nclean > 0) hout['count'].fill(dataset=dataset, cat='medium', sieie=pho_purity.sieie[event_mask].flatten(), pt=pho_purity.pt[event_mask].flatten(), weight=weight_shape(pho_purity.sieie[event_mask], weights.weight()[event_mask])) hout['count'].fill(dataset=dataset, cat='medium_nosieie', sieie=pho_nosieie.sieie[event_mask].flatten(), pt=pho_nosieie.pt[event_mask].flatten(), weight=weight_shape(pho_nosieie.sieie[event_mask], weights.weight()[event_mask])) hout['count'].fill( dataset=dataset, cat='medium_nosieie_invertiso', sieie=pho_nosieie_inv_iso.sieie[event_mask].flatten(), pt=pho_nosieie_inv_iso.pt[event_mask].flatten(), weight=weight_shape(pho_nosieie_inv_iso.sieie[event_mask], weights.weight()[event_mask])) if isData: hout['sumw'].fill(dataset=dataset, sumw=1, weight=1) else: hout['sumw'].fill(dataset=dataset, sumw=1, weight=events.genWeight.sum()) return hout
def process(self, events): # Dataset parameters dataset = events.metadata['dataset'] year = self._samples[dataset]['year'] xsec = self._samples[dataset]['xsec'] sow = self._samples[dataset]['nSumOfWeights'] isData = self._samples[dataset]['isData'] datasets = [ 'SingleMuon', 'SingleElectron', 'EGamma', 'MuonEG', 'DoubleMuon', 'DoubleElectron' ] for d in datasets: if d in dataset: dataset = dataset.split('_')[0] ### Recover objects, selection, functions and others... # Objects isTightMuon = self._objects['isTightMuonPOG'] isTightElectron = self._objects['isTightElectronPOG'] isGoodJet = self._objects['isGoodJet'] isClean = self._objects['isClean'] isMuonMVA = self._objects[ 'isMuonMVA'] #isMuonMVA(pt, eta, dxy, dz, miniIso, sip3D, mvaTTH, mediumPrompt, tightCharge, jetDeepB=0, minpt=15) isElecMVA = self._objects[ 'isElecMVA'] #isElecMVA(pt, eta, dxy, dz, miniIso, sip3D, mvaTTH, elecMVA, lostHits, convVeto, tightCharge, jetDeepB=0, minpt=15) # Corrections GetMuonIsoSF = self._corrections['getMuonIso'] GetMuonIDSF = self._corrections['getMuonID'] # Selection passNJets = self._selection['passNJets'] passMETcut = self._selection['passMETcut'] passTrigger = self._selection['passTrigger'] # Functions pow2 = self._functions['pow2'] IsClosestToZ = self._functions['IsClosestToZ'] GetGoodTriplets = self._functions['GetGoodTriplets'] # Initialize objects met = events.MET e = events.Electron mu = events.Muon j = events.Jet # Electron selection #e['isGood'] = e.pt.zeros_like() e['isGood'] = isElecMVA(e.pt, e.eta, e.dxy, e.dz, e.miniPFRelIso_all, e.sip3d, e.mvaTTH, e.mvaFall17V2Iso, e.lostHits, e.convVeto, e.tightCharge, minpt=10) leading_e = e[e.pt.argmax()] leading_e = leading_e[leading_e.isGood.astype(np.bool)] # Muon selection mu['isGood'] = isMuonMVA(mu.pt, mu.eta, mu.dxy, mu.dz, mu.miniPFRelIso_all, mu.sip3d, mu.mvaTTH, mu.mediumPromptId, mu.tightCharge, minpt=10) leading_mu = mu[mu.pt.argmax()] leading_mu = leading_mu[leading_mu.isGood.astype(np.bool)] e = e[e.isGood.astype(np.bool)] mu = mu[mu.isGood.astype(np.bool)] nElec = e.counts nMuon = mu.counts twoLeps = (nElec + nMuon) == 2 threeLeps = (nElec + nMuon) == 3 twoElec = (nElec == 2) twoMuon = (nMuon == 2) e0 = e[e.pt.argmax()] m0 = mu[mu.pt.argmax()] # Jet selection j['isgood'] = isGoodJet(j.pt, j.eta, j.jetId) j['isclean'] = isClean(j, e, mu) goodJets = j[(j.isclean) & (j.isgood)] njets = goodJets.counts ht = goodJets.pt.sum() j0 = goodJets[goodJets.pt.argmax()] nbtags = goodJets[goodJets.btagDeepFlavB > 0.2770].counts ################################################################## ### 2 same-sign leptons ################################################################## # emu singe = e[(nElec == 1) & (nMuon == 1) & (e.pt > -1)] singm = mu[(nElec == 1) & (nMuon == 1) & (mu.pt > -1)] em = singe.cross(singm) emSSmask = (em.i0.charge * em.i1.charge > 0) emSS = em[emSSmask] nemSS = len(emSS.flatten()) # ee and mumu # pt>-1 to preserve jagged dimensions ee = e[(nElec == 2) & (nMuon == 0) & (e.pt > -1)] mm = mu[(nElec == 0) & (nMuon == 2) & (mu.pt > -1)] eepairs = ee.distincts() eeSSmask = (eepairs.i0.charge * eepairs.i1.charge > 0) eeonZmask = (np.abs((eepairs.i0 + eepairs.i1).mass - 91) < 15) eeoffZmask = (eeonZmask == 0) mmpairs = mm.distincts() mmSSmask = (mmpairs.i0.charge * mmpairs.i1.charge > 0) mmonZmask = (np.abs((mmpairs.i0 + mmpairs.i1).mass - 91) < 15) mmoffZmask = (mmonZmask == 0) eeSSonZ = eepairs[eeSSmask & eeonZmask] eeSSoffZ = eepairs[eeSSmask & eeoffZmask] mmSSonZ = mmpairs[mmSSmask & mmonZmask] mmSSoffZ = mmpairs[mmSSmask & mmoffZmask] neeSS = len(eeSSonZ.flatten()) + len(eeSSoffZ.flatten()) nmmSS = len(mmSSonZ.flatten()) + len(mmSSoffZ.flatten()) #print('Same-sign events [ee, emu, mumu] = [%i, %i, %i]'%(neeSS, nemSS, nmmSS)) # Cuts eeSSmask = (eeSSmask[eeSSmask].counts > 0) mmSSmask = (mmSSmask[mmSSmask].counts > 0) eeonZmask = (eeonZmask[eeonZmask].counts > 0) eeoffZmask = (eeoffZmask[eeoffZmask].counts > 0) mmonZmask = (mmonZmask[mmonZmask].counts > 0) mmoffZmask = (mmoffZmask[mmoffZmask].counts > 0) emSSmask = (emSSmask[emSSmask].counts > 0) # njets ################################################################## ### 3 leptons ################################################################## # eem muon_eem = mu[(nElec == 2) & (nMuon == 1) & (mu.pt > -1)] elec_eem = e[(nElec == 2) & (nMuon == 1) & (e.pt > -1)] ee_eem = elec_eem.distincts() ee_eemZmask = (ee_eem.i0.charge * ee_eem.i1.charge < 1) & (np.abs( (ee_eem.i0 + ee_eem.i1).mass - 91) < 15) ee_eemOffZmask = (ee_eem.i0.charge * ee_eem.i1.charge < 1) & (np.abs( (ee_eem.i0 + ee_eem.i1).mass - 91) > 15) ee_eemZmask = (ee_eemZmask[ee_eemZmask].counts > 0) ee_eemOffZmask = (ee_eemOffZmask[ee_eemOffZmask].counts > 0) eepair_eem = (ee_eem.i0 + ee_eem.i1) trilep_eem = eepair_eem.cross(muon_eem) trilep_eem = (trilep_eem.i0 + trilep_eem.i1) # mme muon_mme = mu[(nElec == 1) & (nMuon == 2) & (mu.pt > -1)] elec_mme = e[(nElec == 1) & (nMuon == 2) & (e.pt > -1)] mm_mme = muon_mme.distincts() mm_mmeZmask = (mm_mme.i0.charge * mm_mme.i1.charge < 1) & (np.abs( (mm_mme.i0 + mm_mme.i1).mass - 91) < 15) mm_mmeOffZmask = (mm_mme.i0.charge * mm_mme.i1.charge < 1) & (np.abs( (mm_mme.i0 + mm_mme.i1).mass - 91) > 15) mm_mmeZmask = (mm_mmeZmask[mm_mmeZmask].counts > 0) mm_mmeOffZmask = (mm_mmeOffZmask[mm_mmeOffZmask].counts > 0) mmpair_mme = (mm_mme.i0 + mm_mme.i1) trilep_mme = mmpair_mme.cross(elec_mme) trilep_mme = (trilep_mme.i0 + trilep_mme.i1) mZ_mme = mmpair_mme.mass mZ_eem = eepair_eem.mass m3l_eem = trilep_eem.mass m3l_mme = trilep_mme.mass ### eee and mmm eee = e[(nElec == 3) & (nMuon == 0) & (e.pt > -1)] mmm = mu[(nElec == 0) & (nMuon == 3) & (mu.pt > -1)] # Create pairs ee_pairs = eee.argchoose(2) mm_pairs = mmm.argchoose(2) # Select pairs that are SFOS. eeSFOS_pairs = ee_pairs[ (np.abs(eee[ee_pairs.i0].pdgId) == np.abs(eee[ee_pairs.i1].pdgId)) & (eee[ee_pairs.i0].charge != eee[ee_pairs.i1].charge)] mmSFOS_pairs = mm_pairs[ (np.abs(mmm[mm_pairs.i0].pdgId) == np.abs(mmm[mm_pairs.i1].pdgId)) & (mmm[mm_pairs.i0].charge != mmm[mm_pairs.i1].charge)] # Find the pair with mass closest to Z. eeOSSFmask = eeSFOS_pairs[np.abs((eee[eeSFOS_pairs.i0] + eee[eeSFOS_pairs.i1]).mass - 91.2).argmin()] onZmask_ee = np.abs((eee[eeOSSFmask.i0] + eee[eeOSSFmask.i1]).mass - 91.2) < 15 mmOSSFmask = mmSFOS_pairs[np.abs((mmm[mmSFOS_pairs.i0] + mmm[mmSFOS_pairs.i1]).mass - 91.2).argmin()] onZmask_mm = np.abs((mmm[mmOSSFmask.i0] + mmm[mmOSSFmask.i1]).mass - 91.2) < 15 offZmask_ee = np.abs((eee[eeOSSFmask.i0] + eee[eeOSSFmask.i1]).mass - 91.2) > 15 offZmask_mm = np.abs((mmm[mmOSSFmask.i0] + mmm[mmOSSFmask.i1]).mass - 91.2) > 15 # Create masks eeeOnZmask = onZmask_ee[onZmask_ee].counts > 0 eeeOffZmask = offZmask_ee[offZmask_ee].counts > 0 mmmOnZmask = onZmask_mm[onZmask_mm].counts > 0 mmmOffZmask = offZmask_mm[offZmask_mm].counts > 0 # Leptons from Z eZ0 = eee[eeOSSFmask.i0] eZ1 = eee[eeOSSFmask.i1] mZ0 = mmm[mmOSSFmask.i0] mZ1 = mmm[mmOSSFmask.i1] # Leptons from W eW = eee[~eeOSSFmask.i0 | ~eeOSSFmask.i1] mW = mmm[~mmOSSFmask.i0 | ~mmOSSFmask.i1] eZ = eee[eeOSSFmask.i0] + eee[eeOSSFmask.i1] triElec = eZ + eW mZ = mmm[mmOSSFmask.i0] + mmm[mmOSSFmask.i1] triMuon = mZ + mW mZ_eee = eZ.mass m3l_eee = triElec.mass mZ_mmm = mZ.mass m3l_mmm = triMuon.mass # Triggers #passTrigger = lambda events, n, m, o : np.ones_like(events['MET_pt'], dtype=np.bool) # XXX trig_eeSS = passTrigger(events, 'ee', isData, dataset) trig_mmSS = passTrigger(events, 'mm', isData, dataset) trig_emSS = passTrigger(events, 'em', isData, dataset) trig_eee = passTrigger(events, 'eee', isData, dataset) trig_mmm = passTrigger(events, 'mmm', isData, dataset) trig_eem = passTrigger(events, 'eem', isData, dataset) trig_mme = passTrigger(events, 'mme', isData, dataset) # MET filters # Weights genw = np.ones_like( events['MET_pt']) if isData else events['genWeight'] weights = processor.Weights(events.size) weights.add('norm', genw if isData else (xsec / sow) * genw) # Selections and cuts selections = processor.PackedSelection() channels2LSS = ['eeSSonZ', 'eeSSoffZ', 'mmSSonZ', 'mmSSoffZ', 'emSS'] selections.add('eeSSonZ', (eeonZmask) & (eeSSmask) & (trig_eeSS)) selections.add('eeSSoffZ', (eeoffZmask) & (eeSSmask) & (trig_eeSS)) selections.add('mmSSonZ', (mmonZmask) & (mmSSmask) & (trig_mmSS)) selections.add('mmSSoffZ', (mmoffZmask) & (mmSSmask) & (trig_mmSS)) selections.add('emSS', (emSSmask) & (trig_emSS)) channels3L = ['eemSSonZ', 'eemSSoffZ', 'mmeSSonZ', 'mmeSSoffZ'] selections.add('eemSSonZ', (ee_eemZmask) & (trig_eem)) selections.add('eemSSoffZ', (ee_eemOffZmask) & (trig_eem)) selections.add('mmeSSonZ', (mm_mmeZmask) & (trig_mme)) selections.add('mmeSSoffZ', (mm_mmeOffZmask) & (trig_mme)) channels3L += ['eeeSSonZ', 'eeeSSoffZ', 'mmmSSonZ', 'mmmSSoffZ'] selections.add('eeeSSonZ', (eeeOnZmask) & (trig_eee)) selections.add('eeeSSoffZ', (eeeOffZmask) & (trig_eee)) selections.add('mmmSSonZ', (mmmOnZmask) & (trig_mmm)) selections.add('mmmSSoffZ', (mmmOffZmask) & (trig_mmm)) levels = ['base', '2jets', '4jets', '4j1b', '4j2b'] selections.add('base', (nElec + nMuon >= 2)) selections.add('2jets', (njets >= 2)) selections.add('4jets', (njets >= 4)) selections.add('4j1b', (njets >= 4) & (nbtags >= 1)) selections.add('4j2b', (njets >= 4) & (nbtags >= 2)) # Variables invMass_eeSSonZ = (eeSSonZ.i0 + eeSSonZ.i1).mass invMass_eeSSoffZ = (eeSSoffZ.i0 + eeSSoffZ.i1).mass invMass_mmSSonZ = (mmSSonZ.i0 + mmSSonZ.i1).mass invMass_mmSSoffZ = (mmSSoffZ.i0 + mmSSoffZ.i1).mass invMass_emSS = (emSS.i0 + emSS.i1).mass varnames = {} varnames['met'] = met.pt varnames['ht'] = ht varnames['njets'] = njets varnames['nbtags'] = nbtags varnames['invmass'] = { 'eeSSonZ': invMass_eeSSonZ, 'eeSSoffZ': invMass_eeSSoffZ, 'mmSSonZ': invMass_mmSSonZ, 'mmSSoffZ': invMass_mmSSoffZ, 'emSS': invMass_emSS, 'eemSSonZ': mZ_eem, 'eemSSoffZ': mZ_eem, 'mmeSSonZ': mZ_mme, 'mmeSSoffZ': mZ_mme, 'eeeSSonZ': mZ_eee, 'eeeSSoffZ': mZ_eee, 'mmmSSonZ': mZ_mmm, 'mmmSSoffZ': mZ_mmm, } varnames['m3l'] = { 'eemSSonZ': m3l_eem, 'eemSSoffZ': m3l_eem, 'mmeSSonZ': m3l_mme, 'mmeSSoffZ': m3l_mme, 'eeeSSonZ': m3l_eee, 'eeeSSoffZ': m3l_eee, 'mmmSSonZ': m3l_mmm, 'mmmSSoffZ': m3l_mmm, } varnames['e0pt'] = e0.pt varnames['e0eta'] = e0.eta varnames['m0pt'] = m0.pt varnames['m0eta'] = m0.eta varnames['j0pt'] = j0.pt varnames['j0eta'] = j0.eta varnames['counts'] = np.ones_like(events.MET.pt, dtype=np.int) # Fill Histos hout = self.accumulator.identity() hout['dummy'].fill(sample=dataset, dummy=1, weight=events.size) for var, v in varnames.items(): for ch in channels2LSS + channels3L: for lev in levels: weight = weights.weight() cuts = [ch] + [lev] cut = selections.all(*cuts) weights_flat = weight[cut].flatten() weights_ones = np.ones_like(weights_flat, dtype=np.int) if var == 'invmass': if ch in ['eeeSSoffZ', 'mmmSSoffZ']: continue elif ch in ['eeeSSonZ', 'mmmSSonZ']: continue #values = v[ch] else: values = v[ch][cut].flatten() hout['invmass'].fill(sample=dataset, channel=ch, cut=lev, invmass=values, weight=weights_flat) elif var == 'm3l': if ch in [ 'eeSSonZ', 'eeSSoffZ', 'mmSSonZ', 'mmSSoffZ', 'emSS', 'eeeSSoffZ', 'mmmSSoffZ', 'eeeSSonZ', 'mmmSSonZ' ]: continue values = v[ch][cut].flatten() hout['m3l'].fill(sample=dataset, channel=ch, cut=lev, m3l=values, weight=weights_flat) else: values = v[cut].flatten() if var == 'ht': hout[var].fill(ht=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'met': hout[var].fill(met=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'njets': hout[var].fill(njets=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'nbtags': hout[var].fill(nbtags=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'counts': hout[var].fill(counts=values, sample=dataset, channel=ch, cut=lev, weight=weights_ones) elif var == 'e0pt': if ch in [ 'mmSSonZ', 'mmSSoffZ', 'mmmSSoffZ', 'mmmSSonZ' ]: continue hout[var].fill(e0pt=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'm0pt': if ch in [ 'eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ', 'eeeSSonZ' ]: continue hout[var].fill(m0pt=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'e0eta': if ch in [ 'mmSSonZ', 'mmSSoffZ', 'mmmSSoffZ', 'mmmSSonZ' ]: continue hout[var].fill(e0eta=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'm0eta': if ch in [ 'eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ', 'eeeSSonZ' ]: continue hout[var].fill(m0eta=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'j0pt': if lev == 'base': continue hout[var].fill(j0pt=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'j0eta': if lev == 'base': continue hout[var].fill(j0eta=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) return hout
def process(self, df): dataset = df.metadata['dataset'] isRealData = 'genWeight' not in df.columns output = self.accumulator.identity() selection = processor.PackedSelection() output = self.accumulator.identity() good = False goodMuon = ((df.Muon.pt > 27.) & (np.abs(df.Muon.eta) < 2.4)) nmuons = goodMuon.sum() goodElectron = ((df.Electron.pt > 30.) & (np.abs(df.Electron.eta) < 2.5)) nelectrons = goodElectron.sum() df.FatJet['msdcorr'] = corrected_msoftdrop(df.FatJet) goodFatJet = ((df.FatJet.pt > 300.) & (np.abs(df.FatJet.eta) < 2.4) & (df.FatJet.msdcorr > 10.) & (df.FatJet.isTight)) nfatjets = goodFatJet.sum() if self._channel == 'muon': good = ((nmuons >= 1) & (nfatjets >= 1)) else: good = ((nelectrons >= 1) & (nfatjets >= 1)) events = df[good] if not isRealData: output['sumw'][dataset] += events.genWeight.sum() # trigger trigger = np.zeros(df.size, dtype='bool') for t in self._triggers[self._year + '_' + self._trigger]: try: trigger = trigger | df.HLT[t] except: warnings.warn("Missing trigger %s" % t, RuntimeWarning) selection.add('trigger', trigger[good]) # Muons candidatemuon = events.Muon[:, 0:1] nmuons = events.Muon.counts # Electrons candidateelectron = events.Electron[:, 0:1] nelectrons = events.Electron.counts if self._channel == 'muon': candidatelep = candidatemuon selection.add('nootherlepton', (nelectrons == 0)) else: candidatelep = candidateelectron selection.add('nootherlepton', (nmuons == 0)) selection.add('iplepton', ((np.abs(candidatelep.dz) < 0.1) & (np.abs(candidatelep.dxy) < 0.05)).any()) # FatJets ak8_lep_pair = candidatelep.cross(events.FatJet) ak8_lep_dR = ak8_lep_pair.i0.delta_r(ak8_lep_pair.i1) candidatejet = events.FatJet[ak8_lep_dR.argmin()] leadingjet = events.FatJet[:, 0:1] ak8_lep_dR_closest = candidatelep.delta_r(candidatejet) selection.add('jetkin', (candidatejet.pt > self._fjetptMIN).any()) selection.add('jetmsd', (candidatejet.msdcorr > 20).any()) selection.add('LSF3medium', (candidatejet.lsf3 > 0.7).any()) selection.add('LSF3tight', (candidatejet.lsf3 > 0.78).any()) selection.add('lepnearjet', (ak8_lep_dR.min() < 1.5)) selection.add('lepinjet', (ak8_lep_dR.min() < 0.8)) # FatJet substracted Lepton # sj1_sj2_btagDeepB_pair = candidatejet.LSsubJet1btagDeepB.cross(candidatejet.LSsubJet2btagDeepB) # fls_btagDeepB_max = max(sj1_sj2_btagDeepB_pair.i0,sj1_sj2_btagDeepB_pair.i1) # Jets jets = events.Jet[(events.Jet.pt > 30.) & (abs(events.Jet.eta) < 2.5) & (events.Jet.isTight)] ak4_ak8_pair = jets.cross(candidatejet, nested=True) ak4_ak8_dphi = abs(ak4_ak8_pair.i0.delta_phi(ak4_ak8_pair.i1)) ak4_opposite = jets[(ak4_ak8_dphi > np.pi / 2).all()] ak4_away = jets[(ak4_ak8_dphi > 0.8).all()] selection.add( 'antiak4btagMediumOppHem', ak4_opposite.btagDeepB.max() < self._btagWPs['med'][self._year]) selection.add( 'ak4btagMedium08', ak4_away.btagDeepB.max() < self._btagWPs['med'][self._year]) # MET met = events.MET # MET eta with mass assumption mm = (candidatejet - candidatelep).mass2 jmass = (mm > 0) * np.sqrt(np.maximum( 0, mm)) + (mm < 0) * candidatejet.mass joffshell = jmass < 62.5 massassumption = 80. * joffshell + (125 - 80.) * ~joffshell x = massassumption**2 / (2 * candidatelep.pt * met.pt) + np.cos(candidatelep.phi - met.phi) met_eta = ((x < 1) * np.arcsinh(x * np.sinh(candidatelep.eta)) + (x > 1) * (candidatelep.eta - np.sign(candidatelep.eta) * np.arccosh(candidatelep.eta))) met_p4 = TLorentzVectorArray.from_ptetaphim(np.array([0.]), np.array([0.]), np.array([0.]), np.array([0.])) if met.size > 0: met_p4 = TLorentzVectorArray.from_ptetaphim( met.pt, met_eta.fillna(0.), met.phi, np.zeros(met.size)) hmass = (candidatejet + met_p4).mass else: hmass = candidatejet.pt.zeros_like() # weights weights = processor.Weights(len(events), storeIndividual=True) if isRealData: genflavor = candidatejet.pt.zeros_like() else: try: weights.add('genweight', events.genWeight) add_pileup_weight(weights, events.Pileup.nPU, self._year) #print("Weight statistics: %r" % weights._weightStats) except: print('no gen weight') if 'TTTo' in dataset: genW, genW_idx = getParticles( events, 24, ['fromHardProcess', 'isLastCopy']) genb, genb_idx = getParticles( events, 5, ['fromHardProcess', 'isLastCopy']) genflavorW = matchedParticleFlavor(candidatelep, genW, 'child', 0.4) genflavorb = matchedParticleFlavor(candidatelep, genb, 'mom', 0.4) genflavor = getFlavor(genflavorW, genflavorb) elif (('hww_2017' in dataset) or ('GluGluHToWW' in dataset)): genH, genH_idx = getParticles( events, 25, ['fromHardProcess', 'isLastCopy']) genW, genW_idx = getParticles( events, 24, ['fromHardProcess', 'isLastCopy']) genE, genE_idx = getParticles( events, 11, ['fromHardProcess', 'isFirstCopy'], 1) genM, genM_idx = getParticles( events, 13, ['fromHardProcess', 'isFirstCopy'], 1) genT, genT_idx = getParticles( events, 15, ['fromHardProcess', 'isFirstCopy'], 1) genQ, genQ_idx = getParticles( events, [0, 5], ['fromHardProcess', 'isFirstCopy']) ishWW_qqelev = (genH.counts == 1) & (genW.counts == 2) & ( genE.counts == 1) & (genM.counts == 0) & (genT.counts == 0) ishWW_qqmuv = (genH.counts == 1) & (genW.counts == 2) & ( genM.counts == 1) & (genE.counts == 0) & (genT.counts == 0) ishWW_qqtauv = (genH.counts == 1) & (genW.counts == 2) & ( genT.counts == 1) & (genM.counts == 0) & (genE.counts == 0) ishWW_qqqq = (genH.counts == 1) & (genW.counts == 2) & ( genQ.counts == 4) & (genM.counts == 0) & (genE.counts == 0) ishWW_muvelev = (genH.counts == 1) & (genW.counts == 2) & ( genE.counts == 1) & (genM.counts == 1) ishWW_elevelev = (genH.counts == 1) & (genW.counts == 2) & ( genE.counts == 2) & (genM.counts == 0) ishWW_tauvtauv = (genH.counts == 1) & (genW.counts == 2) & ( genT.counts == 2) & (genM.counts == 0) & (genE.counts == 0) ishWW_muvmuv = (genH.counts == 1) & (genW.counts == 2) & ( genE.counts == 0) & (genM.counts == 2) genflavor = ((ishWW_qqelev) * 8 + (ishWW_qqmuv) * 9) else: genflavor = candidatejet.pt.zeros_like() # fill cutflow cutflow = [ 'trigger', 'jetkin', 'jetmsd', 'lepnearjet', 'lepinjet', 'antiak4btagMediumOppHem', 'nootherlepton', 'iplepton', 'LSF3medium', 'LSF3tight' ] allcuts = set() output['cutflow']['none'] += len(events) for cut in cutflow: allcuts.add(cut) output['cutflow'][cut] += selection.all(*allcuts).sum() regions = {} regions['presel'] = {'trigger', 'jetkin', 'jetmsd', 'lepinjet'} regions['antibtag'] = { 'trigger', 'jetkin', 'jetmsd', 'antiak4btagMediumOppHem' } regions['noinjet'] = { 'trigger', 'jetkin', 'jetmsd', 'lepnearjet', 'antiak4btagMediumOppHem' } regions['nolsf'] = { 'trigger', 'jetkin', 'jetmsd', 'lepinjet', 'antiak4btagMediumOppHem' } #,'nootherlepton'} regions['lsf'] = { 'trigger', 'jetkin', 'jetmsd', 'lepinjet', 'LSF3tight' } regions['bopp'] = { 'trigger', 'jetkin', 'jetmsd', 'lepinjet', 'LSF3tight', 'antiak4btagMediumOppHem' } regions['lep'] = { 'trigger', 'jetkin', 'jetmsd', 'lepinjet', 'LSF3tight', 'antiak4btagMediumOppHem', 'nootherlepton', 'iplepton' } for region in self._regions: selections = regions[region] cut = selection.all(*selections) weight = weights.weight()[cut] def normalize(val): try: return val[cut].pad(1, clip=True).fillna(0).flatten() except: try: return val[cut].flatten() except: return val[cut] # output['%s_fjetprop'%region].fill(#fjet_pt = normalize(candidatejet.pt), # fjet_msd = normalize(candidatejet.msdcorr), # fjet_lsf3 = normalize(candidatejet.lsf3), # #jet_oppbtag = normalize(ak4_opposite.btagDeepB.max()), # genflavor = normalize(genflavor), # dataset=dataset, # weight=weight # ) # output['%s_fjetextraprop'%region].fill(fjet_t41 = normalize(candidatejet.tau4/candidatejet.tau1), # fjet_t42 = normalize(candidatejet.tau4/candidatejet.tau2), # fjet_t31 = normalize(candidatejet.tau3/candidatejet.tau1), # dataset=dataset, # weight=weight # ) # output['%s_jetprop'%region].fill(jet_oppbtag = normalize(ak4_opposite.btagDeepB.max()), # genflavor = normalize(genflavor), # dataset=dataset, # weight=weight # ) output['%s_fmmjetprop' % region].fill( fjet_pt=normalize(candidatejet.pt), #fjet_mmass = normalize(jmass), #fjet_hmass = normalize(hmass), lep_pt=normalize(candidatelep.pt), fjet_lsf3=normalize(candidatejet.lsf3), genflavor=normalize(genflavor), dataset=dataset, weight=weight) output['%s_fmmjetprop2' % region].fill( fjet_mmass=normalize(jmass), fjet_lsf3=normalize(candidatejet.lsf3), genflavor=normalize(genflavor), dataset=dataset, weight=weight) # output['%s_flsjetprop'%region].fill(#flsjet_pt = normalize(candidatejet.LSpt), # flsjet_msd = normalize(candidatejet.LSmsoftdrop), # #flsjet_n2b1 = normalize(candidatejet.LSn2b1), # #flsjet_n3b1 = normalize(candidatejet.LSn3b1), # #flsjet_t21 = normalize(candidatejet.LStau2/candidatejet.LStau1), # #flsjet_t32 = normalize(candidatejet.LStau3/candidatejet.LStau2), # genflavor = normalize(genflavor), # dataset=dataset, # weight=weight) #output['%s_metprop'%region].fill(met_pt = normalize(met.pt), # met_phi = normalize(met.phi), # dataset=dataset, # weight=weight) # output['%s_weight'%region].fill(puweight=weights.partial_weight(include=["pileup_weight"])[cut], # genweight=weights.partial_weight(include=["genweight"])[cut], # dataset=dataset, # ) # if self._channel=='muon': # output['%s_muonprop'%region].fill(muon_pt = normalize(candidatemuon.pt), # muon_miso = normalize(candidatemuon.miniPFRelIso_all), # muon_sip = normalize(candidatemuon.sip3d), # dataset=dataset, # weight=weight) # output['%s_muonextraprop'%region].fill(nmuons = normalize(nmuons), # nelectrons = normalize(nelectrons), # muon_dz = normalize(candidatemuon.dz), # muon_dxy = normalize(candidatemuon.dxy), # dataset=dataset, # weight=weight) # else: # output['%s_electronprop'%region].fill(electron_pt = normalize(candidateelectron.pt), # electron_miso = normalize(candidateelectron.miniPFRelIso_all), # electron_sip = normalize(candidateelectron.sip3d), # dataset=dataset, # weight=weight) # output['%s_electronextraprop'%region].fill(nmuons = normalize(nmuons), # nelectrons = normalize(nelectrons), # electron_dz = normalize(candidateelectron.dz), # electron_dxy = normalize(candidateelectron.dxy), # dataset=dataset, # weight=weight) return output
def process(self, df): dataset = df['dataset'] isRealData = 'genWeight' not in df isSignal = 'htautau' in dataset output = self.accumulator.identity() # select at least one jet and one muon ( this is Pre-Selection! ) events = buildevents(df, fatjet='CustomAK8Puppi') good = ( (events.muons.counts >= 1) & (events.fatjets.counts >= 1) ) events = events[good] selection = processor.PackedSelection() # trigger trigger = np.ones(df.size, dtype='bool') for t in self._triggers[self._year+'_'+self._trigger]: trigger &= df[t] selection.add('trigger', trigger[good]) # muon selection goodmuon = ( (events.muons.p4.pt > 10) & (np.abs(events.muons.p4.eta) < 2.4) & (events.muons.sip3d < 4) & (np.abs(events.muons.dz) < 0.1) & (np.abs(events.muons.dxy) < 0.05) & (events.muons.mvaId == 2) ) nmuons = goodmuon.sum() leadingmuon = events.muons[goodmuon][:, 0:1] # fatjet closest to lepton leadingmuon = events.muons[:, 0] mujet_dR = leadingmuon.p4.delta_r(events.fatjets.p4) mu_in_cone = mujet_dR.min() < 0.8 # this I am not sure we have to put as a selection... mujet_bestidx = mujet_dR.argmin() leadingjet_mu = events.fatjets[mujet_bestidx] selection.add('jetkin', ( (leadingjet_mu.p4.pt > 300) & (leadingjet_mu.p4.eta < 2.4) & (leadingjet_mu.msoftdrop > 10.) ).any()) selection.add('jetid', (leadingjet_mu.jetId & 2).any()) # tight id # lepton inside jet? selection.add('muinside', mu_in_cone.astype(bool)) selection.add('LSF3muinside', (leadingjet_mu.electronIdx3SJ == 0).any()) selection.add('LSF3medium', (leadingjet_mu.lsf3>0.78).any()) # veto b-tag in opposite side jets = events.jets[ (events.jets.p4.pt > 30.) & (events.jets.jetId & 2) # tight id ] ak4_ak8_pair = jets.cross(leadingjet_mu, nested=True) dphi = ak4_ak8_pair.i0.p4.delta_phi(ak4_ak8_pair.i1.p4) ak4_opposite = jets[(np.abs(dphi) > np.pi / 2).all()] selection.add('antiak4btagMediumOppHem', ak4_opposite.deepcsvb.max() < self._btagWPs['med'][self._year]) # b-tag in same side #subjets = events.subjets[:, leadingjet_mu.subJetIdx1] # final lepton selection nelectrons = ( (events.electrons.p4.pt > 10) & (np.abs(events.electrons.p4.eta) < 2.5) & (events.electrons.cutBased & (1 << 2)).astype(bool) # 2017V2 loose ).sum() selection.add('onemuon', (nmuons == 1) & (nelectrons == 0)) # should we veto taus? selection.add('muonkin', ( (leadingmuon.p4.pt > 27.) & (np.abs(leadingmuon.p4.eta) < 2.4) )) # building variables leadingjet_mu = leadingjet_mu.flatten() mm = (leadingjet_mu.p4 - leadingmuon.p4).mass2 jmass = (mm>0)*np.sqrt(np.maximum(0, mm)) + (mm<0)*leadingjet_mu.p4.mass # (jet - lep).M met = events.met joffshell = jmass < 62.5 massassumption = 80.*joffshell + (125 - 80.)*~joffshell x = massassumption**2/(2*leadingmuon.p4.pt*met.rho) + np.cos(leadingmuon.p4.phi - met.phi) met_eta = ( (x < 1)*np.arcsinh(x*np.sinh(leadingmuon.p4.eta)) + (x >= 1)*( leadingmuon.p4.eta - np.sign(leadingmuon.p4.eta)*np.arccosh(np.maximum(1., x)) ) ) met_p4 = TLorentzVectorArray.from_ptetaphim(met.rho, met_eta, met.phi, np.zeros(met.size)) # filling missing columns df['jet_pt'] = leadingjet_mu.p4.pt df['jet_lsf3'] = leadingjet_mu.lsf3 df['jet_mmass'] = jmass df['jet_hmass'] = (met_p4 + leadingjet_mu.p4).mass df['jet_oppbtag'] = ak4_opposite.deepcsvb.max() df['muon_pt'] = leadingmuon.p4.pt df['muon_miso'] = leadingmuon.miniPFRelIso_all df['met_pt'] = met.rho df['met_eta'] = met_eta # fill cutflow cutflow = ['trigger', 'jetkin', 'jetid', 'antiak4btagMediumOppHem', 'onemuon', 'muonkin', 'muinside', 'LSF3muinside','LSF3muinside'] allcuts = set() output['cutflow']['none'] += len(events) for cut in cutflow: allcuts.add(cut) output['cutflow'][cut] += selection.all(*allcuts).sum() weights = processor.Weights(len(events)) if not isRealData: weights.add('genweight', events.genWeight) regions = {} regions['presel'] = {'trigger', 'jetkin', 'jetid', 'antiak4btagMediumOppHem', 'onemuon', 'muonkin'} regions['muinjet'] = {'trigger', 'jetkin', 'jetid', 'antiak4btagMediumOppHem', 'onemuon', 'muonkin', 'muinside', 'LSF3muinside','LSF3muinside'} for histname, h in output.items(): if not isinstance(h, hist.Hist): continue if not all(k in df or k == 'systematic' for k in h.fields): print("Missing fields %r from %r" % (set(h.fields) - set(df.keys()), h)) continue fields = {k: df[k] for k in h.fields if k in df} region = [r for r in regions.keys() if r in histname.split('_')] if len(region) == 1: region = region[0] cut = selection.all(*regions[region]) h.fill(**fields, weight=cut) elif len(region) > 1: raise ValueError("Histogram '%s' has a name matching multiple region definitions: %r" % (histname, region)) else: raise ValueError("Histogram '%s' does not fall into any region definitions." % (histname, )) return output
def process(self, events): dataset = events.metadata['dataset'] print('process dataset', dataset) isRealData = 'genWeight' not in events.columns selection = processor.PackedSelection() weights = processor.Weights(len(events)) output = self.accumulator.identity() if (len(events) == 0): return output if not isRealData: output['sumw'][dataset] += events.genWeight.sum() # trigger paths if isRealData: trigger_fatjet = np.zeros(events.size, dtype='bool') for t in self._triggers[self._year]: try: trigger_fatjet = trigger_fatjet | events.HLT[t] except: print('trigger %s not available' % t) continue trigger_muon = np.zeros(events.size, dtype='bool') for t in self._muontriggers[self._year]: trigger_muon = trigger_muon | events.HLT[t] else: trigger_fatjet = np.ones(events.size, dtype='bool') trigger_muon = np.ones(events.size, dtype='bool') selection.add('fatjet_trigger', trigger_fatjet) selection.add('muon_trigger', trigger_muon) #jet corrected kinematics gru = events.GRU IN = events.IN fatjets = events.FatJet fatjets['msdcorr'] = corrected_msoftdrop(fatjets) fatjets['rhocorr'] = 2 * np.log(fatjets.msdcorr / fatjets.pt) fatjets['gruddt'] = gru.v25 - shift( fatjets, algo='gruddt', year=self._year) fatjets['gru'] = gru.v25 fatjets['in_v3'] = IN.v3 fatjets['in_v3_ddt'] = IN.v3 - shift( fatjets, algo='inddt', year=self._year) fatjets['in_v3_ddt_90pctl'] = IN.v3 - shift( fatjets, algo='inddt90pctl', year=self._year) fatjets['n2ddt'] = fatjets.n2b1 - n2ddt_shift(fatjets, year=self._year) fatjets["genMatchFull"] = genmatch(events, dataset) #else: fatjets["genMatchFull"] = fatjets.pt.zeros_like() #np.zeros(events.size, dtype='bool') candidatejet = fatjets[:, :1] candidatemuon = events.Muon[:, :5] # run model on PFCands associated to FatJet (FatJetPFCands) #events.FatJet.array.content["PFCands"] = type(events.FatJetPFCands.array).fromcounts(events.FatJet.nPFConstituents.flatten(), events.FatJetPFCands.flatten()) #events.FatJet.array.content["twoProngGru"] = run_model(events.FatJet.flatten()) selection.add('pt', (candidatejet.pt > 525).any()) selection.add('msdcorr', (candidatejet.msdcorr > 40).any()) # basic jet selection goodjet_sel = ((candidatejet.pt > 525) & (abs(candidatejet.eta) < 2.5) & (candidatejet.msoftdrop > 40.) & (candidatejet.rhocorr > -5.5) & (candidatejet.rhocorr < -2) & (candidatejet.genMatchFull if ('WJetsToQQ' in dataset or 'ZJetsToQQ' in dataset) else (1 == 1))).any() vselection_goodjet_sel = ((candidatejet.pt > 200) & (abs(candidatejet.eta) < 2.5) & (candidatejet.msoftdrop > 40.)).any() #& (candidatejet.genMatchFull if ('TTTo' in dataset) else (1==1))).any() #& (candidatejet.rhocorr > -5.5) #& (candidatejet.rhocorr < -2)).any() selection.add('vselection_jetkin', vselection_goodjet_sel) #goodmuon sel for muon CR (lep vetos below) goodmuon_sel = ((candidatemuon.pt > 55) & (abs(candidatemuon.eta) < 2.1) & (candidatemuon.looseId).astype(bool) & (candidatemuon.pfRelIso04_all < 0.15)).any() vselection_goodmuon_sel = ((candidatemuon.pt > 53) & (abs(candidatemuon.eta) < 2.1) & (candidatemuon.tightId).astype(bool)) #& (candidatemuon.pfRelIso04_all < 0.15)) vselection_goodmuon_sel_loose = ((candidatemuon.pt > 20) & (candidatemuon.looseId).astype(bool) & (abs(candidatemuon.eta) < 2.4)) selection.add('vselection_muonkin', vselection_goodmuon_sel.any()) selection.add('vselection_onetightmuon', vselection_goodmuon_sel.sum() == 1) selection.add('vselection_oneloosemuon', vselection_goodmuon_sel_loose.sum() == 1) candidatemuon = candidatemuon[:, 0:1] selection.add('muonkin', goodmuon_sel) selection.add('jetkin', goodjet_sel) selection.add('n2ddt', (candidatejet.n2ddt < 0.).any()) selection.add('jetid', candidatejet.isTight.any()) selection.add('met', events.MET.pt > 40.) muon_ak8_pair = candidatemuon.cross(candidatejet, nested=True) selection.add('muonDphiAK8', (abs(muon_ak8_pair.i0.delta_phi(muon_ak8_pair.i1)) > 2 * np.pi / 3).all().all()) selection.add('vselection_muonDphiAK8', (abs( muon_ak8_pair.i0.delta_phi(muon_ak8_pair.i1)) > 1).all().all()) #ak4 puppi jet for CR jets = events.Jet[((events.Jet.pt > 50.) & (abs(events.Jet.eta) < 2.5))][:, :10] # only consider first 4 jets to be consistent with old framework ak4_ak8_pair = jets.cross(candidatejet, nested=True) dr = abs(ak4_ak8_pair.i0.delta_r(ak4_ak8_pair.i1)) dphi = abs(ak4_ak8_pair.i0.delta_phi(ak4_ak8_pair.i1)) ak4_away = jets[(dr > 0.8).all()] selection.add('ak4btagMedium08', ak4_away.btagCSVV2.max() > 0.8838) ak4_opposite = jets[(dphi > np.pi / 2).all()] selection.add('antiak4btagMediumOppHem', ak4_opposite.btagCSVV2.max() < 0.8838) mu_p4 = TLorentzVectorArray.from_ptetaphim( candidatemuon.pt.fillna(0), candidatemuon.eta.fillna(0), candidatemuon.phi.fillna(0), candidatemuon.mass.fillna(0)) met_p4 = TLorentzVectorArray.from_ptetaphim( awkward.JaggedArray.fromiter([[v] for v in events.MET.pt]), awkward.JaggedArray.fromiter([[v] for v in np.zeros(events.size)]), awkward.JaggedArray.fromiter([[v] for v in events.MET.phi]), awkward.JaggedArray.fromiter([[v] for v in np.zeros(events.size)])) met_candidatemuon_pair = met_p4.cross(mu_p4) Wleptoniccandidate = met_candidatemuon_pair.i0 + met_candidatemuon_pair.i1 selection.add('Wleptonic_candidate', (Wleptoniccandidate.pt > 200).any()) vselection_jets = events.Jet[((events.Jet.pt > 30.) & (abs(events.Jet.eta) < 2.4))] vselection_ak4_ak8_pair = vselection_jets.cross(candidatejet, nested=True) muon_ak4_pair = vselection_jets.cross(candidatemuon, nested=True) dr_ak8 = abs( vselection_ak4_ak8_pair.i0.delta_r(vselection_ak4_ak8_pair.i1)) dr_muon = abs(muon_ak4_pair.i0.delta_r(muon_ak4_pair.i1)) ak4_away = vselection_jets[(dr_ak8 > 0.8).all()] selection.add('vselection_ak4btagMedium08', ak4_away.btagCSVV2.max() > 0.8838) ak4_away = vselection_jets[(dr_muon > 0.3).all()] selection.add('vselection_muonDphiAK4', ak4_away.btagCSVV2.max() > 0.8838) nelectrons = (( (events.Electron.pt > 10.) & (abs(events.Electron.eta) < 2.5) #& (events.Electron.cutBased >= events.Electron.LOOSE)) #& (events.Electron.cutBased_Fall17_V1 >= 1)) & (events.Electron.cutBased >= 2))).sum() nmuons = (((events.Muon.pt > 10) & (abs(events.Muon.eta) < 2.1) #& (events.Muon.pfRelIso04_all < 0.4) & (events.Muon.looseId).astype(bool))).sum() ntaus = (((events.Tau.pt > 20.) #& (events.Tau.idMVAnewDM2017v2 >=4)) & (events.Tau.idDecayMode).astype(bool) & (events.Tau.rawIso < 5) & (abs(events.Tau.eta) < 2.3))).sum() selection.add('noleptons', (nmuons == 0) & (nelectrons == 0) & (ntaus == 0)) selection.add('noelectron_notau', (nelectrons == 0) & (ntaus == 0)) #weights.add('metfilter', events.Flag.METFilters) if isRealData: genflavor = candidatejet.pt.zeros_like().pad( 1, clip=True).fillna(-1).flatten() if not isRealData: weights.add('genweight', events.genWeight) add_pileup_weight(weights, events.Pileup.nPU, self._year, dataset) #add_jetTriggerWeight(weights, candidatejet.msdcorr, candidatejet.pt, self._year) #signal region only #add_singleMuTriggerWeight(weights, abs(candidatemuon.eta), candidatemuon.pt, self._year) bosons = getBosons(events) genBosonPt = bosons.pt.pad(1, clip=True).fillna(0) add_VJets_NLOkFactor(weights, genBosonPt, self._year, dataset) genflavor = matchedBosonFlavor(candidatejet, bosons).pad( 1, clip=True).fillna(-1).flatten() #b-tag weights regions = { 'signal': [ 'fatjet_trigger', 'jetkin', 'noleptons', 'jetid', 'antiak4btagMediumOppHem', ], 'ttbar_muoncontrol': [ 'muon_trigger', 'pt', 'msdcorr', 'jetid', 'jetkin', 'muonkin', 'muonDphiAK8', 'ak4btagMedium08', 'noelectron_notau', ], 'vselection': [ 'muon_trigger', 'vselection_jetkin', 'vselection_muonkin', 'vselection_onetightmuon', 'vselection_oneloosemuon', 'vselection_muonDphiAK8', 'vselection_ak4btagMedium08', 'vselection_muonDphiAK4', 'Wleptonic_candidate', 'met' ], 'noselection': [], #'vselection_muoncontrol' : ['muon_trigger', 'v_selection_jetkin', 'genmatch', 'jetid', 'ak4btagMedium08', 'muonkin','met'], } allcuts_signal = set() output['cutflow_signal'][dataset]['none'] += float( weights.weight().sum()) allcuts_ttbar_muoncontrol = set() output['cutflow_ttbar_muoncontrol'][dataset]['none'] += float( weights.weight().sum()) allcuts_vselection = set() output['cutflow_vselection'][dataset]['none'] += float( weights.weight().sum()) for cut in regions['signal']: allcuts_signal.add(cut) output['cutflow_signal'][dataset][cut] += float( weights.weight()[selection.all(*allcuts_signal)].sum()) for cut in regions['ttbar_muoncontrol']: allcuts_ttbar_muoncontrol.add(cut) output['cutflow_ttbar_muoncontrol'][dataset][cut] += float( weights.weight()[selection.all( *allcuts_ttbar_muoncontrol)].sum()) for cut in regions['vselection']: allcuts_vselection.add(cut) output['cutflow_vselection'][dataset][cut] += float( weights.weight()[selection.all(*allcuts_vselection)].sum()) def normalize(val, cut): return val[cut].pad(1, clip=True).fillna(0).flatten() def fill(region, systematic=None, wmod=None): print('filling %s' % region) selections = regions[region] cut = selection.all(*selections) weight = weights.weight()[cut] output['templates'].fill( dataset=dataset, region=region, pt=normalize(candidatejet.pt, cut), msd=normalize(candidatejet.msdcorr, cut), n2ddt=normalize(candidatejet.n2ddt, cut), gruddt=normalize(candidatejet.gruddt, cut), in_v3_ddt=normalize(candidatejet.in_v3_ddt_90pctl, cut), weight=weight, ), output['event'].fill( dataset=dataset, region=region, MET=events.MET.pt[cut], nJet=fatjets.counts[cut], nPFConstituents=normalize(candidatejet.nPFConstituents, cut), weight=weight, ), output['muon'].fill( dataset=dataset, region=region, mu_pt=normalize(candidatemuon.pt, cut), mu_pfRelIso04_all=normalize(candidatemuon.pfRelIso04_all, cut), weight=weight, ), output['deepAK8'].fill( dataset=dataset, region=region, deepTagMDWqq=normalize(candidatejet.deepTagMDWqq, cut), deepTagMDZqq=normalize(candidatejet.deepTagMDZqq, cut), msd=normalize(candidatejet.msdcorr, cut), genflavor=genflavor[cut], weight=weight, ), output['in_v3'].fill( dataset=dataset, region=region, genflavor=genflavor[cut], in_v3=normalize(candidatejet.in_v3, cut), n2=normalize(candidatejet.n2b1, cut), gru=normalize(candidatejet.gru, cut), weight=weight, ) for region in regions: fill(region) return output
def process(self, df): dataset = df['dataset'] selected_regions = {} if not dataset in selected_regions: selected_regions[dataset] = [] for selection,v in self._samples.items(): for i in range (0,len(v)): if v[i] not in dataset: continue selected_regions[dataset].append(selection) ### #Getting corrections, ids, triggers, ecc, from .coffea files ### met_trigger_paths = self._triggers['met_trigger_paths'] singleele_trigger_paths = self._triggers['singleele_trigger_paths'] singlepho_trigger_paths = self._triggers['singlepho_trigger_paths'] get_msd_weight = self._corrections['get_msd_weight'] get_ttbar_weight = self._corrections['get_ttbar_weight'] get_nlo_weight = self._corrections['get_nlo_weight'] get_adhoc_weight = self._corrections['get_adhoc_weight'] get_pu_weight = self._corrections['get_pu_weight'] get_met_trig_weight = self._corrections['get_met_trig_weight'] get_met_zmm_trig_weight = self._corrections['get_met_zmm_trig_weight'] get_ele_trig_weight = self._corrections['get_ele_trig_weight'] get_pho_trig_weight = self._corrections['get_pho_trig_weight'] get_ecal_bad_calib = self._corrections['get_ecal_bad_calib'] isLooseElectron = self._ids['isLooseElectron'] isTightElectron = self._ids['isTightElectron'] isLooseMuon = self._ids['isLooseMuon'] isTightMuon = self._ids['isTightMuon'] isLooseTau = self._ids['isLooseTau'] isLoosePhoton = self._ids['isLoosePhoton'] isTightPhoton = self._ids['isTightPhoton'] isGoodJet = self._ids['isGoodJet'] isGoodFatJet = self._ids['isGoodFatJet'] isHEMJet = self._ids['isHEMJet'] met_filter_flags = self._metfilters['met_filter_flags'] ### #Initialize global quantities (MET ecc.) ### met = Initialize({'pt':df['MET_pt'], 'eta':0, 'phi':df['MET_phi'], 'mass':0}) calomet = Initialize({'pt':df['CaloMET_pt'], 'eta':0, 'phi':df['CaloMET_phi'], 'mass':0}) ### #Initialize physics objects ### #Define first and empty object that will use as protection against arrays with size 0 #Will use MET to set the correct size for the arrays #Not used at the moment #empty_jagged = awkward.JaggedArray.fromcounts(np.ones_like(met.pt, dtype=int),np.zeros_like(met.pt)) #empty_obj = Initialize({'pt':empty_jagged, # 'eta':empty_jagged, # 'phi':empty_jagged, # 'mass':empty_jagged}) e = Initialize({'pt':df['Electron_pt'], 'eta':df['Electron_eta'], 'phi':df['Electron_phi'], 'mass':df['Electron_mass']}) for key in self._e_id[self._year]: if self._e_id[self._year][key] in df: e[key] = df[self._e_id[self._year][key]] else: e[key] = e.pt.zeros_like() e['isloose'] = isLooseElectron(e.pt,e.eta,e.dxy,e.dz,e.iso,e.loose_id,self._year) e['istight'] = isTightElectron(e.pt,e.eta,e.dxy,e.dz,e.iso,e.tight_id,self._year) leading_e = e[e.pt.argmax()] leading_e = leading_e[leading_e.istight.astype(np.bool)] e_loose = e[e.isloose.astype(np.bool)] e_tight = e[e.istight.astype(np.bool)] e_ntot = e.counts e_nloose = e_loose.counts e_ntight = e_tight.counts mu = Initialize({'pt':df['Muon_pt'], 'eta':df['Muon_eta'], 'phi':df['Muon_phi'], 'mass':df['Muon_mass']}) for key in self._mu_id[self._year]: if self._mu_id[self._year][key] in df: mu[key] = df[self._mu_id[self._year][key]] else: mu[key] = mu.pt.zeros_like() mu['isloose'] = isLooseMuon(mu.pt,mu.eta,mu.dxy,mu.dz,mu.iso,mu.med_id,self._year) mu['istight'] = isTightMuon(mu.pt,mu.eta,mu.dxy,mu.dz,mu.iso,mu.tight_id,self._year) leading_mu = mu[mu.pt.argmax()] leading_mu = leading_mu[leading_mu.istight.astype(np.bool)] mu_loose=mu[mu.isloose.astype(np.bool)] mu_tight=mu[mu.istight.astype(np.bool)] mu_ntot = mu.counts mu_nloose = mu_loose.counts mu_ntight = mu_tight.counts tau = Initialize({'pt':df['Tau_pt'], 'eta':df['Tau_eta'], 'phi':df['Tau_phi'], 'mass':df['Tau_mass']}) for key in self._tau_id[self._year]: if self._tau_id[self._year][key] in df: tau[key] = df[self._tau_id[self._year][key]] else: tau[key] = tau.pt.zeros_like() tau['isclean'] =~tau.match(mu_loose,0.3)&~tau.match(e_loose,0.3) tau['isloose']=isLooseTau(tau.pt,tau.eta,tau.decayMode,tau.id,self._year)&tau.isclean.astype(np.bool) tau_loose=tau[tau.isloose.astype(np.bool)] tau_ntot=tau.counts tau_nloose=tau_loose.counts pho = Initialize({'pt':df['Photon_pt'], 'eta':df['Photon_eta'], 'phi':df['Photon_phi'], 'mass':df['Photon_mass']}) for key in self._pho_id[self._year]: if self._pho_id[self._year][key] in df: pho[key] = df[self._pho_id[self._year][key]] else: pho[key] = pho.pt.zeros_like() pho['isclean'] =~pho.match(e_loose,0.4) pho['isloose']=isLoosePhoton(pho.pt,pho.eta,pho.loose_id,pho.eleveto,self._year)&pho.isclean.astype(np.bool) pho['istight']=isTightPhoton(pho.pt,pho.eta,pho.tight_id,pho.eleveto,self._year)&pho.isclean.astype(np.bool) leading_pho = pho[pho.pt.argmax()] leading_pho = leading_pho[leading_pho.istight.astype(np.bool)] pho_loose=pho[pho.isloose.astype(np.bool)] pho_tight=pho[pho.istight.astype(np.bool)] pho_ntot=pho.counts pho_nloose=pho_loose.counts pho_ntight=pho_tight.counts fj = Initialize({'pt':df['AK15Puppi_pt'], 'eta':df['AK15Puppi_eta'], 'phi':df['AK15Puppi_phi'], 'mass':df['AK15Puppi_mass']}) fj['msd'] = df['AK15Puppi_msoftdrop'] for key in self._fj_id[self._year]: if self._fj_id[self._year][key] in df: fj[key] = df[self._fj_id[self._year][key]] else: fj[key] = fj.pt.zeros_like() fj['isgood'] = isGoodFatJet(fj.pt, fj.eta, fj.id) fj['isclean'] =~fj.match(pho_loose,1.5)&~fj.match(mu_loose,1.5)&~fj.match(e_loose,1.5)&fj.isgood.astype(np.bool) for key in self._deep[self._year]: if self._deep[self._year][key] in df: fj[key] = df[self._deep[self._year][key]] else: fj[key] = fj.pt.zeros_like() fj['probQCD'] = fj.probQCDbb+fj.probQCDcc+fj.probQCDb+fj.probQCDc+fj.probQCDothers fj['TvsQCD'] = (fj.probTbcq + fj.probTbqq) / (fj.probTbcq + fj.probTbqq + fj.probQCD) fj['ZHbbvsQCD'] = (fj.probZbb + fj.probHbb) / (fj.probZbb+ fj.probHbb+ fj.probQCD) fj['VvsQCD'] = (fj.probWcq+fj.probWqq+fj.probZcc+fj.probZqq+fj.probZbb) / (fj.probWcq+fj.probWqq+fj.probZcc+fj.probZqq+fj.probZbb+fj.probQCD) leading_fj = fj[fj.pt.argmax()] leading_fj = leading_fj[leading_fj.isclean.astype(np.bool)] leading_fj_msd_corr = leading_fj.msd.sum()*get_msd_weight(leading_fj.pt.sum(),leading_fj.eta.sum()) fj_good = fj[fj.isgood.astype(np.bool)] fj_clean=fj[fj.isclean.astype(np.bool)] fj_ntot=fj.counts fj_ngood=fj_good.counts fj_nclean=fj_clean.counts j = Initialize({'pt':df['Jet_pt'], 'eta':df['Jet_eta'], 'phi':df['Jet_phi'], 'mass':df['Jet_mass']}) #https://twiki.cern.ch/twiki/bin/viewauth/CMS/BtagRecommendation102X j['deepcsv'] = df['Jet_btagDeepB'] j['deepflv'] = df['Jet_btagDeepFlavB'] for key in self._j_id[self._year]: if self._j_id[self._year][key] in df: j[key] = df[self._j_id[self._year][key]] else: j[key] = j.pt.zeros_like() j['isgood'] = isGoodJet(j.pt, j.eta, j.id, j.nhf, j.nef, j.chf, j.cef) j['isHEM'] = isHEMJet(j.pt, j.eta, j.phi) j['isclean'] = ~j.match(e_loose,0.4)&~j.match(mu_loose,0.4)&~j.match(pho_loose,0.4)&j.isgood.astype(np.bool) #j['isclean'] = ~j.match(e_tight,0.4)&~j.match(mu_tight,0.4)&~j.match(pho_tight,0.4)&j.isgood j['isiso'] = ~(j.match(fj_clean,1.5))&j.isclean.astype(np.bool) j['isdcsvL'] = (j.deepcsv>0.1241)&j.isiso.astype(np.bool) j['isdflvL'] = (j.deepflv>0.0494)&j.isiso.astype(np.bool) j['isdcsvM'] = (j.deepcsv>0.4184)&j.isiso.astype(np.bool) j['isdflvM'] = (j.deepflv>0.2770)&j.isiso.astype(np.bool) j['isdcsvT'] = (j.deepcsv>0.7527)&j.isiso.astype(np.bool) j['isdflvT'] = (j.deepflv>0.7264)&j.isiso.astype(np.bool) leading_j = j[j.pt.argmax()] leading_j = leading_j[leading_j.isclean.astype(np.bool)] j_good = j[j.isgood.astype(np.bool)] j_clean = j[j.isclean.astype(np.bool)] j_iso = j[j.isiso.astype(np.bool)] j_dcsvL = j[j.isdcsvL] j_dflvL = j[j.isdflvL] j_dcsvM = j[j.isdcsvM] j_dflvM = j[j.isdflvM] j_dcsvT = j[j.isdcsvT] j_dflvT = j[j.isdflvT] j_HEM = j[j.isHEM.astype(np.bool)] j_ntot=j.counts j_ngood=j_good.counts j_nclean=j_clean.counts j_niso=j_iso.counts j_ndcsvL=j_dcsvL.counts j_ndflvL=j_dflvL.counts j_ndcsvM=j_dcsvM.counts j_ndflvM=j_dflvM.counts j_ndcsvT=j_dcsvT.counts j_ndflvT=j_dflvT.counts j_nHEM = j_HEM.counts ### #Calculating derivatives ### ele_pairs = e_loose.distincts() diele = leading_e leading_diele = leading_e if ele_pairs.i0.content.size>0: diele = ele_pairs.i0+ele_pairs.i1 leading_diele = diele[diele.pt.argmax()] mu_pairs = mu_loose.distincts() dimu = leading_mu leading_dimu = leading_mu if mu_pairs.i0.content.size>0: dimu = mu_pairs.i0+mu_pairs.i1 leading_dimu = dimu[dimu.pt.argmax()] u={} u["iszeroL"] = met u["isoneM"] = met+leading_mu.sum() u["isoneE"] = met+leading_e.sum() u["istwoM"] = met+leading_dimu.sum() u["istwoE"] = met+leading_diele.sum() u["isoneA"] = met+leading_pho.sum() lepSys={} lepSys["iszeroL"] = met lepSys["isoneM"] = leading_mu.sum() lepSys["isoneE"] = leading_e.sum() lepSys["istwoM"] = leading_dimu.sum() lepSys["istwoE"] = leading_diele.sum() lepSys["isoneA"] = leading_pho.sum() leadlepton={} leadlepton["iszeroL"] = met leadlepton["isoneM"] = leading_mu.sum() leadlepton["isoneE"] = leading_e.sum() leadlepton["istwoM"] = leading_mu.sum() leadlepton["istwoE"] = leading_e.sum() leadlepton["isoneA"] = leading_pho.sum() ### #Calculating weights ### ### # For MC, retrieve the LHE weights, to take into account NLO destructive interference, and their sum ### genw = np.ones_like(df['MET_pt']) sumw = 1. wnlo = np.ones_like(df['MET_pt']) adhocw = np.ones_like(df['MET_pt']) if self._xsec[dataset] != -1: genw = df['genWeight'] sumw = genw.sum() if 'TTJets' in dataset or 'WJets' in dataset or 'DY' in dataset or 'ZJets' in dataset: gen_flags = df['GenPart_statusFlags'] LastCopy = (gen_flags&(1 << 13))==0 #genLastCopy = Initialize({'pt':df['GenPart_pt'][LastCopy], # 'eta':df['GenPart_eta'][LastCopy], # 'phi':df['GenPart_phi'][LastCopy], # 'mass':df['GenPart_mass'][LastCopy], # 'pdgid':df['GenPart_pdgId'][LastCopy]}) gen_pt = df['GenPart_pt'][LastCopy] gen_pdgid = df['GenPart_pdgId'][LastCopy] #genTops = genLastCopy[abs(genLastCopy.pdgid)==6] #genWs = genLastCopy[abs(genLastCopy.pdgid)==24] #genZs = genLastCopy[abs(genLastCopy.pdgid)==23] #genAs = genLastCopy[abs(genLastCopy.pdgid)==22] #genHs = genLastCopy[abs(genLastCopy.pdgid)==25] genTops = gen_pt[abs(gen_pdgid)==6] genWs = gen_pt[abs(gen_pdgid)==24] genZs = gen_pt[abs(gen_pdgid)==23] genAs = gen_pt[abs(gen_pdgid)==22] genHs = gen_pt[abs(gen_pdgid)==25] isTT = (genTops.counts==2) isW = (genTops.counts==0)&(genWs.counts==1)&(genZs.counts==0)&(genAs.counts==0)&(genHs.counts==0) isZ = (genTops.counts==0)&(genWs.counts==0)&(genZs.counts==1)&(genAs.counts==0)&(genHs.counts==0) isA = (genTops.counts==0)&(genWs.counts==0)&(genZs.counts==0)&(genAs.counts==1)&(genHs.counts==0) if('TTJets' in dataset): wnlo = np.sqrt(get_ttbar_weight(genTops[0].sum()) * get_ttbar_weight(genTops[1].sum())) elif('WJets' in dataset): wnlo = get_nlo_weight[self._year]['w'](genWs[0].sum()) if self._year != '2016': adhocw = get_adhoc_weight['w'](genWs[0].sum()) elif('DY' in dataset or 'ZJets' in dataset): wnlo = get_nlo_weight[self._year]['z'](genZs[0].sum()) if self._year != '2016': adhocw = get_adhoc_weight['z'](genZs[0].sum()) elif('GJets' in dataset): wnlo = get_nlo_weight[self._year]['a'](genAs[0].sum()) ### # Calculate PU weight and systematic variations ### nvtx = df['PV_npvs'] pu = get_pu_weight[self._year]['cen'](nvtx) puUp = get_pu_weight[self._year]['up'](nvtx) puDown = get_pu_weight[self._year]['down'](nvtx) ### #Importing the MET filters per year from metfilters.py and constructing the filter boolean ### met_filters = {} for flag in met_filter_flags[self._year]: if flag in df: met_filters[flag] = df[flag] ### #Importing the trigger paths per year from trigger.py and constructing the trigger boolean ### pass_trig = {} met_trigger = {} for path in met_trigger_paths[self._year]: if path in df: met_trigger[path] = df[path] passMetTrig = np.zeros_like(df['MET_pt'], dtype=np.bool) for path in met_trigger: passMetTrig |= met_trigger[path] singleele_trigger = {} for path in singleele_trigger_paths[self._year]: if path in df: singleele_trigger[path] = df[path] passSingleEleTrig = np.zeros_like(df['MET_pt'], dtype=np.bool) for path in singleele_trigger: passSingleEleTrig |= singleele_trigger[path] singlepho_trigger = {} for path in singlepho_trigger_paths[self._year]: if path in df: singlepho_trigger[path] = df[path] passSinglePhoTrig = np.zeros_like(df['MET_pt'], dtype=np.bool) for path in singlepho_trigger: passSinglePhoTrig |= singlepho_trigger[path] pass_trig['iszeroL'] = passMetTrig pass_trig['isoneM'] = passMetTrig pass_trig['istwoM'] = passMetTrig pass_trig['isoneE'] = passSingleEleTrig pass_trig['istwoE'] = passSingleEleTrig pass_trig['isoneA'] =passSinglePhoTrig ### # Trigger efficiency weight ### trig = {} trig['iszeroL'] = get_met_trig_weight[self._year](u["iszeroL"].pt) trig['isoneM'] = get_met_trig_weight[self._year](u["isoneM"].pt) trig['istwoM'] = get_met_zmm_trig_weight[self._year](u["istwoM"].pt) trig['isoneE'] = get_ele_trig_weight[self._year](leading_e.eta.sum(), leading_e.pt.sum()) trig['istwoE'] = trig['isoneE'] if ele_pairs.i0.content.size>0: eff1 = get_ele_trig_weight[self._year](ele_pairs[diele.pt.argmax()].i0.eta.sum(),ele_pairs[diele.pt.argmax()].i0.pt.sum()) eff2 = get_ele_trig_weight[self._year](ele_pairs[diele.pt.argmax()].i1.eta.sum(),ele_pairs[diele.pt.argmax()].i1.pt.sum()) trig['istwoE'] = 1 - (1-eff1)*(1-eff2) trig['isoneA'] = get_pho_trig_weight[self._year](leading_pho.pt.sum()) ### #Event selection ### selections = processor.PackedSelection() selections.add('iszeroL', (e_nloose==0)&(mu_nloose==0)&(tau_nloose==0)&(pho_nloose==0)) selections.add('isoneM', (e_nloose==0)&(mu_ntight==1)&(tau_nloose==0)&(pho_nloose==0)) selections.add('isoneE', (e_ntight==1)&(mu_nloose==0)&(tau_nloose==0)&(pho_nloose==0)&(met.pt>50)) selections.add('istwoM', (e_nloose==0) & (mu_ntight>=1) & (mu_nloose==2) & (tau_nloose==0)&(pho_nloose==0)&(leading_dimu.mass.sum()>60) & (leading_dimu.mass.sum()<120)) selections.add('istwoE', (e_ntight>=1) & (e_nloose==2)&(mu_nloose==0)&(tau_nloose==0)&(pho_nloose==0)&(leading_diele.mass.sum()>60)&(leading_diele.mass.sum()<120)) selections.add('isoneA', (e_nloose==0)&(mu_nloose==0)&(tau_nloose==0)&(pho_ntight==1)) selections.add('noextrab', (j_ndflvL==0)) selections.add('extrab', (j_ndflvL>0)) selections.add('ismonohs', (leading_fj.ZHbbvsQCD.sum()>0.65)) selections.add('ismonojet', ~(leading_fj.ZHbbvsQCD.sum()>0.65)) selections.add('mass0', (leading_fj_msd_corr<30)) selections.add('mass1', (leading_fj_msd_corr>=30)&(leading_fj_msd_corr<60)) selections.add('mass2', (leading_fj_msd_corr>=60)&(leading_fj_msd_corr<80)) selections.add('mass3', (leading_fj_msd_corr>=80)&(leading_fj_msd_corr<120)) selections.add('mass4', (leading_fj_msd_corr>=120)) selections.add('noHEMj', (j_nHEM==0)) ### #Adding weights and selections ### weights = {} regions = {} for k in selected_regions[dataset]: weights[k] = processor.Weights(df.size) weights[k].add('nlo',wnlo) weights[k].add('adhoc',adhocw) weights[k].add('genw',genw) weights[k].add('pileup',pu,puUp,puDown) weights[k].add('passMetFilters',np.prod([met_filters[key] for key in met_filters], axis=0)) weights[k].add('trig', trig[k]) weights[k].add('pass_trig', pass_trig[k]) selections.add(k+'baggy', (fj_nclean>0)&(fj_clean.pt.max()>160)&(abs(u[k].delta_phi(j_clean)).min()>0.8)&(u[k].pt>250)) regions[k+'_baggy'] = {k,k+'baggy','noHEMj','noextrab'} regions[k+'_mass0'] = {k,k+'baggy','mass0','noHEMj','noextrab'} regions[k+'_mass1'] = {k,k+'baggy','mass1','noHEMj','noextrab'} regions[k+'_mass2'] = {k,k+'baggy','mass2','noHEMj','noextrab'} regions[k+'_mass3'] = {k,k+'baggy','mass3','noHEMj','noextrab'} regions[k+'_mass4'] = {k,k+'baggy','mass4','noHEMj','noextrab'} regions[k+'_baggy_extrab'] = {k,k+'baggy','noHEMj','extrab'} regions[k+'_mass0_extrab'] = {k,k+'baggy','mass0','noHEMj','extrab'} regions[k+'_mass1_extrab'] = {k,k+'baggy','mass1','noHEMj','extrab'} regions[k+'_mass2_extrab'] = {k,k+'baggy','mass2','noHEMj','extrab'} regions[k+'_mass3_extrab'] = {k,k+'baggy','mass3','noHEMj','extrab'} regions[k+'_mass4_extrab'] = {k,k+'baggy','mass4','noHEMj','extrab'} regions[k+'_baggy_ismonohs'] = {k,k+'baggy','noHEMj','noextrab','ismonohs'} regions[k+'_mass0_ismonohs'] = {k,k+'baggy','mass0','noHEMj','noextrab','ismonohs'} regions[k+'_mass1_ismonohs'] = {k,k+'baggy','mass1','noHEMj','noextrab','ismonohs'} regions[k+'_mass2_ismonohs'] = {k,k+'baggy','mass2','noHEMj','noextrab','ismonohs'} regions[k+'_mass3_ismonohs'] = {k,k+'baggy','mass3','noHEMj','noextrab','ismonohs'} regions[k+'_mass4_ismonohs'] = {k,k+'baggy','mass4','noHEMj','noextrab','ismonohs'} regions[k+'_baggy_extrab_ismonohs'] = {k,k+'baggy','noHEMj','extrab','ismonohs'} regions[k+'_mass0_extrab_ismonohs'] = {k,k+'baggy','mass0','noHEMj','extrab','ismonohs'} regions[k+'_mass1_extrab_ismonohs'] = {k,k+'baggy','mass1','noHEMj','extrab','ismonohs'} regions[k+'_mass2_extrab_ismonohs'] = {k,k+'baggy','mass2','noHEMj','extrab','ismonohs'} regions[k+'_mass3_extrab_ismonohs'] = {k,k+'baggy','mass3','noHEMj','extrab','ismonohs'} regions[k+'_mass4_extrab_ismonohs'] = {k,k+'baggy','mass4','noHEMj','extrab','ismonohs'} regions[k+'_baggy_ismonojet'] = {k,k+'baggy','noHEMj','noextrab','ismonojet'} regions[k+'_mass0_ismonojet'] = {k,k+'baggy','mass0','noHEMj','noextrab','ismonojet'} regions[k+'_mass1_ismonojet'] = {k,k+'baggy','mass1','noHEMj','noextrab','ismonojet'} regions[k+'_mass2_ismonojet'] = {k,k+'baggy','mass2','noHEMj','noextrab','ismonojet'} regions[k+'_mass3_ismonojet'] = {k,k+'baggy','mass3','noHEMj','noextrab','ismonojet'} regions[k+'_mass4_ismonojet'] = {k,k+'baggy','mass4','noHEMj','noextrab','ismonojet'} regions[k+'_baggy_extrab_ismonojet'] = {k,k+'baggy','noHEMj','extrab','ismonojet'} regions[k+'_mass0_extrab_ismonojet'] = {k,k+'baggy','mass0','noHEMj','extrab','ismonojet'} regions[k+'_mass1_extrab_ismonojet'] = {k,k+'baggy','mass1','noHEMj','extrab','ismonojet'} regions[k+'_mass2_extrab_ismonojet'] = {k,k+'baggy','mass2','noHEMj','extrab','ismonojet'} regions[k+'_mass3_extrab_ismonojet'] = {k,k+'baggy','mass3','noHEMj','extrab','ismonojet'} regions[k+'_mass4_extrab_ismonojet'] = {k,k+'baggy','mass4','noHEMj','extrab','ismonojet'} variables = {} variables['j1pt'] = leading_j.pt variables['j1eta'] = leading_j.eta variables['j1phi'] = leading_j.phi variables['fj1pt'] = leading_fj.pt variables['fj1eta'] = leading_fj.eta variables['fj1phi'] = leading_fj.phi variables['e1pt'] = leading_e.pt variables['e1phi'] = leading_e.phi variables['e1eta'] = leading_e.eta variables['dielemass'] = leading_diele.mass variables['mu1pt'] = leading_mu.pt variables['mu1phi'] = leading_mu.phi variables['mu1eta'] = leading_mu.eta variables['dimumass'] = leading_dimu.mass variables['njets'] = j_nclean variables['ndcsvL'] = j_ndcsvL variables['ndflvL'] = j_ndflvL variables['ndcsvM'] = j_ndcsvM variables['ndflvM'] = j_ndflvM variables['ndcsvT'] = j_ndcsvT variables['ndflvT'] = j_ndflvT variables['nfjtot'] = fj_ntot variables['nfjgood'] = fj_ngood variables['nfjclean'] = fj_nclean variables['TvsQCD'] = leading_fj.TvsQCD variables['ZHbbvsQCD'] = leading_fj.ZHbbvsQCD variables['VvsQCD'] = leading_fj.VvsQCD variables['probTbcq'] = leading_fj.probTbcq variables['probTbqq'] = leading_fj.probTbqq variables['probTbc'] = leading_fj.probTbc variables['probTbq'] = leading_fj.probTbq variables['probWcq'] = leading_fj.probWcq variables['probWqq'] = leading_fj.probWqq variables['probZbb'] = leading_fj.probZbb variables['probZcc'] = leading_fj.probZcc variables['probZqq'] = leading_fj.probZqq variables['probHbb'] = leading_fj.probHbb variables['probHcc'] = leading_fj.probHcc variables['probHqqqq'] = leading_fj.probHqqqq variables['probQCDbb'] = leading_fj.probQCDbb variables['probQCDcc'] = leading_fj.probQCDcc variables['probQCDb'] = leading_fj.probQCDb variables['probQCDc'] = leading_fj.probQCDc variables['probQCDothers'] = leading_fj.probQCDothers hout = self.accumulator.identity() hout['sumw'].fill(dataset=dataset, sumw=1, weight=sumw) i = 0 while i < len(selected_regions[dataset]): r = selected_regions[dataset][i] weight = weights[r].weight() for s in ['baggy','mass0','mass1','mass2','mass3','mass4', 'baggy_extrab','mass0_extrab','mass1_extrab','mass2_extrab','mass3_extrab','mass4_extrab', 'baggy_ismonohs','mass0_ismonohs','mass1_ismonohs','mass2_ismonohs','mass3_ismonohs','mass4_ismonohs', 'baggy_extrab_ismonohs','mass0_extrab_ismonohs','mass1_extrab_ismonohs','mass2_extrab_ismonohs','mass3_extrab_ismonohs','mass4_extrab_ismonohs', 'baggy_ismonojet','mass0_ismonojet','mass1_ismonojet','mass2_ismonojet','mass3_ismonojet','mass4_ismonojet', 'baggy_extrab_ismonojet','mass0_extrab_ismonojet','mass1_extrab_ismonojet','mass2_extrab_ismonojet','mass3_extrab_ismonojet','mass4_extrab_ismonojet']: cut = selections.all(*regions[r+'_'+s]) flat_variables = {k: v[cut].flatten() for k, v in variables.items()} flat_weights = {k: (~np.isnan(v[cut])*weight[cut]).flatten() for k, v in variables.items()} for histname, h in hout.items(): if not isinstance(h, hist.Hist): continue elif histname == 'sumw': continue elif histname == 'fjmass': h.fill(dataset=dataset, region=r, jet_selection=s, fjmass=leading_fj_msd_corr, weight=weight*cut) elif histname == 'recoil': h.fill(dataset=dataset, region=r, jet_selection=s, recoil=u[r].pt, weight=weight*cut) elif histname == 'CaloMinusPfOverRecoil': h.fill(dataset=dataset, region=r, jet_selection=s, CaloMinusPfOverRecoil= abs(calomet.pt - met.pt) / u[r].pt, weight=weight*cut) elif histname == 'mindphi': h.fill(dataset=dataset, region=r, jet_selection=s, mindphi=abs(u[r].delta_phi(j_clean)).min(), weight=weight*cut) elif histname == 'diledphi': h.fill(dataset=dataset, region=r, jet_selection=s, diledphi=abs(lepSys[r].delta_phi(j_clean)).min(), weight=weight*cut) elif histname == 'ledphi': h.fill(dataset=dataset, region=r, jet_selection=s, ledphi=abs(leadlepton[r].delta_phi(j_clean)).min(), weight=weight*cut) elif histname == 'recoilVSmindphi': h.fill(dataset=dataset, region=r, jet_selection=s, recoil=u[r].pt, mindphi=abs(u[r].delta_phi(j_clean)).min(), weight=weight*cut) else: flat_variable = {histname: flat_variables[histname]} h.fill(dataset=dataset, region=r, jet_selection=s, **flat_variable, weight=flat_weights[histname]) i += 1 return hout
def process(self, df): np.random.seed( 10 ) # sets seed so values from random distributions are reproducible (JER corrections) output = self.accumulator.identity() self.sample_name = df.dataset ## make event weights # data or MC distinction made internally evt_weights = MCWeights.get_event_weights(df, year=args.year, corrections=self.corrections, BTagSFs=btaggers) ## initialize selections and regions selection = processor.PackedSelection() regions = { 'Muon': { 'Loose': { 'zero_btags': { '3Jets': {'objselection', 'jets_3', 'loose_MU', 'DeepCSV_0'}, '4PJets': {'objselection', 'jets_4p', 'loose_MU', 'DeepCSV_0'}, }, 'one_btag': { '3Jets': {'objselection', 'jets_3', 'loose_MU', 'DeepCSV_1'}, '4PJets': {'objselection', 'jets_4p', 'loose_MU', 'DeepCSV_1'}, }, 'two_btags': { '3Jets': {'objselection', 'jets_3', 'loose_MU', 'DeepCSV_2'}, '4PJets': {'objselection', 'jets_4p', 'loose_MU', 'DeepCSV_2'}, }, 'threePlus_btags': { '3Jets': {'objselection', 'jets_3', 'loose_MU', 'DeepCSV_3p'}, '4PJets': {'objselection', 'jets_4p', 'loose_MU', 'DeepCSV_3p'}, }, }, 'Tight': { 'zero_btags': { '3Jets': {'objselection', 'jets_3', 'tight_MU', 'DeepCSV_0'}, '4PJets': {'objselection', 'jets_4p', 'tight_MU', 'DeepCSV_0'}, }, 'one_btag': { '3Jets': {'objselection', 'jets_3', 'tight_MU', 'DeepCSV_1'}, '4PJets': {'objselection', 'jets_4p', 'tight_MU', 'DeepCSV_1'}, }, 'two_btags': { '3Jets': {'objselection', 'jets_3', 'tight_MU', 'DeepCSV_2'}, '4PJets': {'objselection', 'jets_4p', 'tight_MU', 'DeepCSV_2'}, }, 'threePlus_btags': { '3Jets': {'objselection', 'jets_3', 'tight_MU', 'DeepCSV_3p'}, '4PJets': {'objselection', 'jets_4p', 'tight_MU', 'DeepCSV_3p'}, }, }, }, 'Electron': { 'Loose': { 'zero_btags': { '3Jets': {'objselection', 'jets_3', 'loose_EL', 'DeepCSV_0'}, '4PJets': {'objselection', 'jets_4p', 'loose_EL', 'DeepCSV_0'}, }, 'one_btag': { '3Jets': {'objselection', 'jets_3', 'loose_EL', 'DeepCSV_1'}, '4PJets': {'objselection', 'jets_4p', 'loose_EL', 'DeepCSV_1'}, }, 'two_btags': { '3Jets': {'objselection', 'jets_3', 'loose_EL', 'DeepCSV_2'}, '4PJets': {'objselection', 'jets_4p', 'loose_EL', 'DeepCSV_2'}, }, 'threePlus_btags': { '3Jets': {'objselection', 'jets_3', 'loose_EL', 'DeepCSV_3p'}, '4PJets': {'objselection', 'jets_4p', 'loose_EL', 'DeepCSV_3p'}, }, }, 'Tight': { 'zero_btags': { '3Jets': {'objselection', 'jets_3', 'tight_EL', 'DeepCSV_0'}, '4PJets': {'objselection', 'jets_4p', 'tight_EL', 'DeepCSV_0'}, }, 'one_btag': { '3Jets': {'objselection', 'jets_3', 'tight_EL', 'DeepCSV_1'}, '4PJets': {'objselection', 'jets_4p', 'tight_EL', 'DeepCSV_1'}, }, 'two_btags': { '3Jets': {'objselection', 'jets_3', 'tight_EL', 'DeepCSV_2'}, '4PJets': {'objselection', 'jets_4p', 'tight_EL', 'DeepCSV_2'}, }, 'threePlus_btags': { '3Jets': {'objselection', 'jets_3', 'tight_EL', 'DeepCSV_3p'}, '4PJets': {'objselection', 'jets_4p', 'tight_EL', 'DeepCSV_3p'}, }, }, }, } ## object selection objsel_evts = objsel.select(df, year=args.year, corrections=self.corrections, accumulator=output) output['cutflow'][ 'nEvts passing jet and lepton obj selection'] += objsel_evts.sum() selection.add('jets_3', df['Jet'].counts == 3) selection.add('jets_4p', df['Jet'].counts > 3) selection.add('objselection', objsel_evts) #selection.add('DeepJet_pass', df['Jet']['DeepJet'+wps_to_use[0]].sum() >= 2) #selection.add('DeepCSV_pass', df['Jet']['DeepCSV'+wps_to_use[0]].sum() >= 2) selection.add('DeepCSV_0', df['Jet']['DeepCSV' + wps_to_use[0]].sum() == 0) selection.add('DeepCSV_1', df['Jet']['DeepCSV' + wps_to_use[0]].sum() == 1) selection.add('DeepCSV_2', df['Jet']['DeepCSV' + wps_to_use[0]].sum() == 2) selection.add('DeepCSV_3p', df['Jet']['DeepCSV' + wps_to_use[0]].sum() >= 3) #set_trace() # sort jets by btag value, needed when making permutations df['Jet'] = df['Jet'][df['Jet']['btagDeepB'].argsort( ascending=False)] if btaggers[0] == 'DeepCSV' else df['Jet'][ df['Jet']['btagDeepFlavB'].argsort(ascending=False)] self.isData = self.sample_name.startswith('data_Single') if self.isData: isSE_Data = self.sample_name.startswith('data_SingleElectron') isSM_Data = self.sample_name.startswith('data_SingleMuon') runs = df.run lumis = df.luminosityBlock Golden_Json_LumiMask = lumi_tools.LumiMask( '%s/inputs/data/LumiMasks/%s_GoldenJson.txt' % (proj_dir, args.year)) LumiMask = Golden_Json_LumiMask.__call__( runs, lumis) ## returns array of valid events selection.add('lumimask', LumiMask) ## object selection and add different selections if isSM_Data: del regions['Electron'] ## muons selection.add('tight_MU', df['Muon']['TIGHTMU'].sum() == 1) # one muon passing TIGHT criteria selection.add('loose_MU', df['Muon']['LOOSEMU'].sum() == 1) # one muon passing LOOSE criteria #selection.add('loose_or_tight_MU', (df['Muon']['LOOSEMU'] | df['Muon']['TIGHTMU']).sum() == 1) # one muon passing LOOSE or TIGHT criteria if isSE_Data: del regions['Muon'] ## electrons selection.add('tight_EL', df['Electron']['TIGHTEL'].sum() == 1) # one electron passing TIGHT criteria selection.add('loose_EL', df['Electron']['LOOSEEL'].sum() == 1) # one electron passing LOOSE criteria #selection.add('loose_or_tight_EL', (df['Electron']['LOOSEEL'] | df['Electron']['TIGHTEL']).sum() == 1) # one electron passing LOOSE or TIGHT criteria for lepton in regions.keys(): for lepcat in regions[lepton].keys(): for btagregion in regions[lepton][lepcat].keys(): for jmult in regions[lepton][lepcat][btagregion].keys( ): regions[lepton][lepcat][btagregion][jmult].update( {'lumimask'}) if not self.isData: ## add different selections ## muons selection.add('tight_MU', df['Muon']['TIGHTMU'].sum() == 1) # one muon passing TIGHT criteria selection.add('loose_MU', df['Muon']['LOOSEMU'].sum() == 1) # one muon passing LOOSE criteria #selection.add('loose_or_tight_MU', (df['Muon']['LOOSEMU'] | df['Muon']['TIGHTMU']).sum() == 1) # one muon passing LOOSE or TIGHT criteria ## electrons selection.add('tight_EL', df['Electron']['TIGHTEL'].sum() == 1) # one electron passing TIGHT criteria selection.add('loose_EL', df['Electron']['LOOSEEL'].sum() == 1) # one electron passing LOOSE criteria #selection.add('loose_or_tight_EL', (df['Electron']['LOOSEEL'] | df['Electron']['TIGHTEL']).sum() == 1) # one electron passing LOOSE or TIGHT criteria #set_trace() ### apply lepton SFs to MC (only applicable to tight leptons) if 'LeptonSF' in corrections.keys(): tight_mu_cut = selection.require( objselection=True, tight_MU=True ) # find events passing muon object selection with one tight muon tight_muons = df['Muon'][tight_mu_cut][( df['Muon'][tight_mu_cut]['TIGHTMU'] == True)] evt_weights._weights['Muon_SF'][ tight_mu_cut] = MCWeights.get_lepton_sf( year=args.year, lepton='Muons', corrections=lepSF_correction, pt=tight_muons.pt.flatten(), eta=tight_muons.eta.flatten()) tight_el_cut = selection.require( objselection=True, tight_EL=True ) # find events passing electron object selection with one tight electron tight_electrons = df['Electron'][tight_el_cut][( df['Electron'][tight_el_cut]['TIGHTEL'] == True)] evt_weights._weights['Electron_SF'][ tight_el_cut] = MCWeights.get_lepton_sf( year=args.year, lepton='Electrons', corrections=lepSF_correction, pt=tight_electrons.pt.flatten(), eta=tight_electrons.etaSC.flatten()) ## apply btagging SFs to MC if corrections['BTagSF'] == True: #set_trace() threeJets_cut = selection.require(objselection=True, jets_3=True) #deepjet_3j_wts = self.corrections['BTag_Constructors']['DeepJet']['3Jets'].get_scale_factor(jets=df['Jet'][threeJets_cut], passing_cut='DeepJet'+wps_to_use[0]) #evt_weights._weights['DeepJet'][threeJets_cut] = deepjet_3j_wts['central'].prod() deepcsv_3j_wts = self.corrections['BTag_Constructors'][ 'DeepCSV']['3Jets'].get_scale_factor( jets=df['Jet'][threeJets_cut], passing_cut='DeepCSV' + wps_to_use[0]) evt_weights._weights['DeepCSV'][ threeJets_cut] = deepcsv_3j_wts['central'].prod() fourplusJets_cut = selection.require(objselection=True, jets_4p=True) #deepjet_4pj_wts = self.corrections['BTag_Constructors']['DeepJet']['4PJets'].get_scale_factor(jets=df['Jet'][fourplusJets_cut], passing_cut='DeepJet'+wps_to_use[0]) #evt_weights._weights['DeepJet'][fourplusJets_cut] = deepjet_4pj_wts['central'].prod() deepcsv_4pj_wts = self.corrections['BTag_Constructors'][ 'DeepCSV']['4PJets'].get_scale_factor( jets=df['Jet'][fourplusJets_cut], passing_cut='DeepCSV' + wps_to_use[0]) evt_weights._weights['DeepCSV'][ fourplusJets_cut] = deepcsv_4pj_wts['central'].prod() # don't use ttbar events with indices % 10 == 0, 1, 2 if self.sample_name in Nominal_ttJets: events = df.event selection.add( 'keep_ttbar', ~np.stack([((events % 10) == idx) for idx in [0, 1, 2]], axis=1).any(axis=1)) for lepton in regions.keys(): for lepcat in regions[lepton].keys(): for btagregion in regions[lepton][lepcat].keys(): for jmult in regions[lepton][lepcat][ btagregion].keys(): sel = regions[lepton][lepcat][btagregion][ jmult] sel.update({'keep_ttbar'}) #set_trace() ## fill hists for each region for lepton in regions.keys(): lepSF_to_exclude = 'Electron_SF' if lepton == 'Muon' else 'Muon_SF' btagSF_to_exclude = 'DeepCSV' for lepcat in regions[lepton].keys(): for btagregion in regions[lepton][lepcat].keys(): for jmult in regions[lepton][lepcat][btagregion].keys(): cut = selection.all( *regions[lepton][lepcat][btagregion][jmult]) #set_trace() if cut.sum() > 0: ltype = 'MU' if lepton == 'Muon' else 'EL' if 'loose_or_tight_%s' % ltype in regions[lepton][ lepcat][btagregion][jmult]: lep_mask = ((df[lepton][cut]['TIGHT%s' % ltype] == True) | (df[lepton][cut]['LOOSE%s' % ltype] == True)) elif 'tight_%s' % ltype in regions[lepton][lepcat][ btagregion][jmult]: lep_mask = (df[lepton][cut]['TIGHT%s' % ltype] == True) elif 'loose_%s' % ltype in regions[lepton][lepcat][ btagregion][jmult]: lep_mask = (df[lepton][cut]['LOOSE%s' % ltype] == True) else: raise ValueError( "Not sure what lepton type to choose for event" ) ## calculate MT MT = make_vars.MT(df[lepton][cut][lep_mask], df['MET'][cut]) MTHigh = (MT >= MTcut).flatten() jets = df['Jet'][cut][MTHigh] leptons = df[lepton][cut][lep_mask][MTHigh] evt_weights_to_use = evt_weights.weight() lepSF = np.ones(MTHigh.sum( )) if self.isData else evt_weights._weights[ '%s_SF' % lepton][cut][MTHigh].flatten() pu_weight = np.ones(MTHigh.sum( )) if self.isData else evt_weights._weights[ 'pileup_weight'][cut][MTHigh].flatten() for btag_applied in [True, False]: btagSF = np.ones(MTHigh.sum()) if ( self.isData or btag_applied == False) else evt_weights._weights[ btaggers[0]][cut][MTHigh].flatten() if not self.isData: SFs_to_exclude = [ lepSF_to_exclude ] if btag_applied else [ lepSF_to_exclude, btagSF_to_exclude ] evt_weights_to_use = evt_weights.partial_weight( exclude=SFs_to_exclude) tot_weight = evt_weights_to_use[cut][ MTHigh].flatten() output['BTagSF'].fill( dataset=self.sample_name, btagging=str(btag_applied), jmult=jmult, leptype=lepton, lepcat=lepcat, btag=btagregion, sf=btagSF) output['EvtWeight'].fill( dataset=self.sample_name, btagging=str(btag_applied), jmult=jmult, leptype=lepton, lepcat=lepcat, btag=btagregion, sf=tot_weight) output = self.fill_hists( acc=output, btagging_applied=str(btag_applied), jetmult=jmult, leptype=lepton, lepcat=lepcat, btag=btagregion, jets=jets, leptons=leptons, MT=MT[MTHigh].flatten(), evt_weights=tot_weight) #set_trace() if not self.isData: output['nTrueInt_puweight'].fill( dataset=self.sample_name, jmult=jmult, leptype=lepton, lepcat=lepcat, btag=btagregion, pu=df['Pileup_nTrueInt'][cut][MTHigh], weight=pu_weight) output['nTrueInt_noweight'].fill( dataset=self.sample_name, jmult=jmult, leptype=lepton, lepcat=lepcat, btag=btagregion, pu=df['Pileup_nTrueInt'][cut][MTHigh]) output['rho_puweight'].fill( dataset=self.sample_name, jmult=jmult, leptype=lepton, lepcat=lepcat, btag=btagregion, rho=df['fixedGridRhoFastjetAll'][cut][MTHigh], weight=pu_weight) output['rho_noweight'].fill( dataset=self.sample_name, jmult=jmult, leptype=lepton, lepcat=lepcat, btag=btagregion, rho=df['fixedGridRhoFastjetAll'][cut][MTHigh]) output['nvtx_puweight'].fill( dataset=self.sample_name, jmult=jmult, leptype=lepton, lepcat=lepcat, btag=btagregion, vtx=df['PV_npvs'][cut][MTHigh], weight=pu_weight) output['nvtx_noweight'].fill( dataset=self.sample_name, jmult=jmult, leptype=lepton, lepcat=lepcat, btag=btagregion, vtx=df['PV_npvs'][cut][MTHigh]) output['LepSF'].fill(dataset=self.sample_name, jmult=jmult, leptype=lepton, lepcat=lepcat, btag=btagregion, sf=lepSF) output['PileupWeight'].fill( dataset=self.sample_name, jmult=jmult, leptype=lepton, lepcat=lepcat, btag=btagregion, sf=pu_weight) return output
def process(self, df): if not df.size: return self.accumulator.identity() self._configure(df) dataset = df['dataset'] df['is_lo_w'] = is_lo_w(dataset) df['is_lo_z'] = is_lo_z(dataset) df['is_lo_znunu'] = is_lo_znunu(dataset) df['is_lo_w_ewk'] = is_lo_w_ewk(dataset) df['is_lo_z_ewk'] = is_lo_z_ewk(dataset) df['is_lo_g'] = is_lo_g(dataset) df['is_nlo_z'] = is_nlo_z(dataset) df['is_nlo_w'] = is_nlo_w(dataset) df['has_lhe_v_pt'] = df['is_lo_w'] | df['is_lo_z'] | df[ 'is_nlo_z'] | df['is_nlo_w'] | df['is_lo_g'] | df[ 'is_lo_w_ewk'] | df['is_lo_z_ewk'] df['is_data'] = is_data(dataset) gen_v_pt = None if df['is_lo_w'] or df['is_lo_z'] or df['is_nlo_z'] or df[ 'is_nlo_w'] or df['is_lo_z_ewk'] or df['is_lo_w_ewk']: gen = setup_gen_candidates(df) dressed = setup_dressed_gen_candidates(df) fill_gen_v_info(df, gen, dressed) gen_v_pt = df['gen_v_pt_combined'] elif df['is_lo_g']: gen = setup_gen_candidates(df) all_gen_photons = gen[(gen.pdg == 22)] prompt_mask = (all_gen_photons.status == 1) & (all_gen_photons.flag & 1 == 1) stat1_mask = (all_gen_photons.status == 1) gen_photons = all_gen_photons[prompt_mask | (~prompt_mask.any()) & stat1_mask] gen_photon = gen_photons[gen_photons.pt.argmax()] gen_v_pt = gen_photon.pt.max() # Generator-level leading dijet mass if df['has_lhe_v_pt']: genjets = setup_lhe_cleaned_genjets(df) digenjet = genjets[:, :2].distincts() df['mjj_gen'] = digenjet.mass.max() df['mjj_gen'] = np.where(df['mjj_gen'] > 0, df['mjj_gen'], 0) # Candidates # Already pre-filtered! # All leptons are at least loose # Check out setup_candidates for filtering details met_pt, met_phi, ak4, bjets, _, muons, electrons, taus, photons = setup_candidates( df, cfg) # Remove jets in accordance with the noise recipe if df['year'] == 2017: ak4 = ak4[(ak4.ptraw > 50) | (ak4.abseta < 2.65) | (ak4.abseta > 3.139)] bjets = bjets[(bjets.ptraw > 50) | (bjets.abseta < 2.65) | (bjets.abseta > 3.139)] # Filtering ak4 jets according to pileup ID ak4 = ak4[ak4.puid] # Muons df['is_tight_muon'] = muons.tightId \ & (muons.iso < cfg.MUON.CUTS.TIGHT.ISO) \ & (muons.pt>cfg.MUON.CUTS.TIGHT.PT) \ & (muons.abseta<cfg.MUON.CUTS.TIGHT.ETA) dimuons = muons.distincts() dimuon_charge = dimuons.i0['charge'] + dimuons.i1['charge'] df['MT_mu'] = ((muons.counts == 1) * mt(muons.pt, muons.phi, met_pt, met_phi)).max() # Electrons df['is_tight_electron'] = electrons.tightId \ & (electrons.pt > cfg.ELECTRON.CUTS.TIGHT.PT) \ & (electrons.absetasc < cfg.ELECTRON.CUTS.TIGHT.ETA) dielectrons = electrons.distincts() dielectron_charge = dielectrons.i0['charge'] + dielectrons.i1['charge'] df['MT_el'] = ((electrons.counts == 1) * mt(electrons.pt, electrons.phi, met_pt, met_phi)).max() # ak4 leadak4_index = ak4.pt.argmax() elejet_pairs = ak4[:, :1].cross(electrons) df['dREleJet'] = np.hypot( elejet_pairs.i0.eta - elejet_pairs.i1.eta, dphi(elejet_pairs.i0.phi, elejet_pairs.i1.phi)).min() muonjet_pairs = ak4[:, :1].cross(muons) df['dRMuonJet'] = np.hypot( muonjet_pairs.i0.eta - muonjet_pairs.i1.eta, dphi(muonjet_pairs.i0.phi, muonjet_pairs.i1.phi)).min() # Recoil df['recoil_pt'], df['recoil_phi'] = recoil(met_pt, met_phi, electrons, muons, photons) df["dPFCaloSR"] = (met_pt - df["CaloMET_pt"]) / met_pt df["dPFCaloCR"] = (met_pt - df["CaloMET_pt"]) / df["recoil_pt"] df["dPFTkSR"] = (met_pt - df["TkMET_pt"]) / met_pt df["minDPhiJetRecoil"] = min_dphi_jet_met(ak4, df['recoil_phi'], njet=4, ptmin=30, etamax=5.0) df["minDPhiJetMet"] = min_dphi_jet_met(ak4, met_phi, njet=4, ptmin=30, etamax=5.0) selection = processor.PackedSelection() # Triggers pass_all = np.ones(df.size) == 1 selection.add('inclusive', pass_all) selection = trigger_selection(selection, df, cfg) selection.add('mu_pt_trig_safe', muons.pt.max() > 30) # Common selection selection.add('veto_ele', electrons.counts == 0) selection.add('veto_muo', muons.counts == 0) selection.add('veto_photon', photons.counts == 0) selection.add('veto_tau', taus.counts == 0) selection.add('at_least_one_tau', taus.counts > 0) selection.add('veto_b', bjets.counts == 0) selection.add('mindphijr', df['minDPhiJetRecoil'] > cfg.SELECTION.SIGNAL.MINDPHIJR) selection.add('mindphijm', df['minDPhiJetMet'] > cfg.SELECTION.SIGNAL.MINDPHIJR) selection.add('dpfcalo_sr', np.abs(df['dPFCaloSR']) < cfg.SELECTION.SIGNAL.DPFCALO) selection.add('dpfcalo_cr', np.abs(df['dPFCaloCR']) < cfg.SELECTION.SIGNAL.DPFCALO) selection.add('recoil', df['recoil_pt'] > cfg.SELECTION.SIGNAL.RECOIL) selection.add('met_sr', met_pt > cfg.SELECTION.SIGNAL.RECOIL) # AK4 dijet diak4 = ak4[:, :2].distincts() leadak4_pt_eta = (diak4.i0.pt > cfg.SELECTION.SIGNAL.LEADAK4.PT) & ( np.abs(diak4.i0.eta) < cfg.SELECTION.SIGNAL.LEADAK4.ETA) trailak4_pt_eta = (diak4.i1.pt > cfg.SELECTION.SIGNAL.TRAILAK4.PT) & ( np.abs(diak4.i1.eta) < cfg.SELECTION.SIGNAL.TRAILAK4.ETA) hemisphere = (diak4.i0.eta * diak4.i1.eta < 0).any() has_track0 = np.abs(diak4.i0.eta) <= 2.5 has_track1 = np.abs(diak4.i1.eta) <= 2.5 leadak4_id = diak4.i0.tightId & (has_track0 * ( (diak4.i0.chf > cfg.SELECTION.SIGNAL.LEADAK4.CHF) & (diak4.i0.nhf < cfg.SELECTION.SIGNAL.LEADAK4.NHF)) + ~has_track0) trailak4_id = has_track1 * ( (diak4.i1.chf > cfg.SELECTION.SIGNAL.TRAILAK4.CHF) & (diak4.i1.nhf < cfg.SELECTION.SIGNAL.TRAILAK4.NHF)) + ~has_track1 df['mjj'] = diak4.mass.max() df['dphijj'] = dphi(diak4.i0.phi.min(), diak4.i1.phi.max()) df['detajj'] = np.abs(diak4.i0.eta - diak4.i1.eta).max() leading_jet_in_horn = ((diak4.i0.abseta < 3.2) & (diak4.i0.abseta > 2.8)).any() trailing_jet_in_horn = ((diak4.i1.abseta < 3.2) & (diak4.i1.abseta > 2.8)).any() selection.add('hornveto', (df['dPFTkSR'] < 0.8) | ~(leading_jet_in_horn | trailing_jet_in_horn)) if df['year'] == 2018: if df['is_data']: metphihem_mask = ~((met_phi > -1.8) & (met_phi < -0.6) & (df['run'] > 319077)) else: metphihem_mask = pass_all selection.add("metphihemextveto", metphihem_mask) selection.add('no_el_in_hem', electrons[electrons_in_hem(electrons)].counts == 0) else: selection.add("metphihemextveto", pass_all) selection.add('no_el_in_hem', pass_all) selection.add('two_jets', diak4.counts > 0) selection.add('leadak4_pt_eta', leadak4_pt_eta.any()) selection.add('trailak4_pt_eta', trailak4_pt_eta.any()) selection.add('hemisphere', hemisphere) selection.add('leadak4_id', leadak4_id.any()) selection.add('trailak4_id', trailak4_id.any()) selection.add('mjj', df['mjj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.MASS) selection.add( 'dphijj', df['dphijj'] < cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DPHI) selection.add( 'detajj', df['detajj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DETA) # Cleaning cuts for signal region max_neEmEF = np.maximum(diak4.i0.nef, diak4.i1.nef) selection.add('max_neEmEF', (max_neEmEF < 0.7).any()) vec_b = calculate_vecB(ak4, met_pt, met_phi) vec_dphi = calculate_vecDPhi(ak4, met_pt, met_phi, df['TkMET_phi']) no_jet_in_trk = (diak4.i0.abseta > 2.5).any() & (diak4.i1.abseta > 2.5).any() no_jet_in_hf = (diak4.i0.abseta < 3.0).any() & (diak4.i1.abseta < 3.0).any() at_least_one_jet_in_hf = (diak4.i0.abseta > 3.0).any() | (diak4.i1.abseta > 3.0).any() at_least_one_jet_in_trk = (diak4.i0.abseta < 2.5).any() | (diak4.i1.abseta < 2.5).any() # Categorized cleaning cuts eemitigation = ((no_jet_in_hf | at_least_one_jet_in_trk) & (vec_dphi < 1.0)) | ( (no_jet_in_trk & at_least_one_jet_in_hf) & (vec_b < 0.2)) selection.add('eemitigation', eemitigation) # HF-HF veto in SR both_jets_in_hf = (diak4.i0.abseta > 3.0) & (diak4.i1.abseta > 3.0) selection.add('veto_hfhf', ~both_jets_in_hf.any()) # Divide into three categories for trigger study if cfg.RUN.TRIGGER_STUDY: two_central_jets = (np.abs(diak4.i0.eta) <= 2.4) & (np.abs( diak4.i1.eta) <= 2.4) two_forward_jets = (np.abs(diak4.i0.eta) > 2.4) & (np.abs( diak4.i1.eta) > 2.4) one_jet_forward_one_jet_central = (~two_central_jets) & ( ~two_forward_jets) selection.add('two_central_jets', two_central_jets.any()) selection.add('two_forward_jets', two_forward_jets.any()) selection.add('one_jet_forward_one_jet_central', one_jet_forward_one_jet_central.any()) # Dimuon CR leadmuon_index = muons.pt.argmax() selection.add('at_least_one_tight_mu', df['is_tight_muon'].any()) selection.add('dimuon_mass', ((dimuons.mass > cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MIN) \ & (dimuons.mass < cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MAX)).any()) selection.add('dimuon_charge', (dimuon_charge == 0).any()) selection.add('two_muons', muons.counts == 2) # Single muon CR selection.add('one_muon', muons.counts == 1) selection.add('mt_mu', df['MT_mu'] < cfg.SELECTION.CONTROL.SINGLEMU.MT) # Diele CR leadelectron_index = electrons.pt.argmax() selection.add('one_electron', electrons.counts == 1) selection.add('two_electrons', electrons.counts == 2) selection.add('at_least_one_tight_el', df['is_tight_electron'].any()) selection.add('dielectron_mass', ((dielectrons.mass > cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MIN) \ & (dielectrons.mass < cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MAX)).any()) selection.add('dielectron_charge', (dielectron_charge == 0).any()) # Single Ele CR selection.add('met_el', met_pt > cfg.SELECTION.CONTROL.SINGLEEL.MET) selection.add('mt_el', df['MT_el'] < cfg.SELECTION.CONTROL.SINGLEEL.MT) # Photon CR leadphoton_index = photons.pt.argmax() df['is_tight_photon'] = photons.mediumId & photons.barrel selection.add('one_photon', photons.counts == 1) selection.add('at_least_one_tight_photon', df['is_tight_photon'].any()) selection.add('photon_pt', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PT) selection.add('photon_pt_trig', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PTTRIG) # Fill histograms output = self.accumulator.identity() # Gen if df['has_lhe_v_pt']: output['genvpt_check'].fill(vpt=gen_v_pt, type="Nano", dataset=dataset) if 'LHE_Njets' in df: output['lhe_njets'].fill(dataset=dataset, multiplicity=df['LHE_Njets']) if 'LHE_HT' in df: output['lhe_ht'].fill(dataset=dataset, ht=df['LHE_HT']) if 'LHE_HTIncoming' in df: output['lhe_htinc'].fill(dataset=dataset, ht=df['LHE_HTIncoming']) # Weights evaluator = evaluator_from_config(cfg) weights = processor.Weights(size=df.size, storeIndividual=True) if not df['is_data']: weights.add('gen', df['Generator_weight']) try: weights.add('prefire', df['PrefireWeight']) except KeyError: weights.add('prefire', np.ones(df.size)) weights = candidate_weights(weights, df, evaluator, muons, electrons, photons, cfg) weights = pileup_weights(weights, df, evaluator, cfg) weights = ak4_em_frac_weights(weights, diak4, evaluator) if not (gen_v_pt is None): weights = theory_weights_vbf(weights, df, evaluator, gen_v_pt, df['mjj_gen']) # Save per-event values for synchronization if cfg.RUN.KINEMATICS.SAVE: for event in cfg.RUN.KINEMATICS.EVENTS: mask = df['event'] == event if not mask.any(): continue output['kinematics']['event'] += [event] output['kinematics']['met'] += [met_pt[mask]] output['kinematics']['met_phi'] += [met_phi[mask]] output['kinematics']['recoil'] += [df['recoil_pt'][mask]] output['kinematics']['recoil_phi'] += [df['recoil_phi'][mask]] output['kinematics']['ak4pt0'] += [ak4[leadak4_index][mask].pt] output['kinematics']['ak4eta0'] += [ ak4[leadak4_index][mask].eta ] output['kinematics']['leadbtag'] += [ak4.pt.max() < 0][mask] output['kinematics']['nLooseMu'] += [muons.counts[mask]] output['kinematics']['nTightMu'] += [ muons[df['is_tight_muon']].counts[mask] ] output['kinematics']['mupt0'] += [ muons[leadmuon_index][mask].pt ] output['kinematics']['mueta0'] += [ muons[leadmuon_index][mask].eta ] output['kinematics']['nLooseEl'] += [electrons.counts[mask]] output['kinematics']['nTightEl'] += [ electrons[df['is_tight_electron']].counts[mask] ] output['kinematics']['elpt0'] += [ electrons[leadelectron_index][mask].pt ] output['kinematics']['eleta0'] += [ electrons[leadelectron_index][mask].eta ] output['kinematics']['nLooseGam'] += [photons.counts[mask]] output['kinematics']['nTightGam'] += [ photons[df['is_tight_photon']].counts[mask] ] output['kinematics']['gpt0'] += [ photons[leadphoton_index][mask].pt ] output['kinematics']['geta0'] += [ photons[leadphoton_index][mask].eta ] # Sum of all weights to use for normalization # TODO: Deal with systematic variations output['nevents'][dataset] += df.size if not df['is_data']: output['sumw'][dataset] += df['genEventSumw'] output['sumw2'][dataset] += df['genEventSumw2'] output['sumw_pileup'][dataset] += weights._weights['pileup'].sum() regions = vbfhinv_regions(cfg) # Get veto weights (only for MC) if not df['is_data']: veto_weights = get_veto_weights(df, cfg, evaluator, electrons, muons, taus) for region, cuts in regions.items(): exclude = [None] region_weights = copy.deepcopy(weights) if not df['is_data']: ### Trigger weights if re.match(r'cr_(\d+)e.*', region): p_pass_data = 1 - (1 - evaluator["trigger_electron_eff_data"] (electrons.etasc, electrons.pt)).prod() p_pass_mc = 1 - (1 - evaluator["trigger_electron_eff_mc"] (electrons.etasc, electrons.pt)).prod() trigger_weight = p_pass_data / p_pass_mc trigger_weight[np.isnan(trigger_weight)] = 1 region_weights.add('trigger', trigger_weight) elif re.match(r'cr_(\d+)m.*', region) or re.match( 'sr_.*', region): region_weights.add( 'trigger_met', evaluator["trigger_met"](df['recoil_pt'])) elif re.match(r'cr_g.*', region): photon_trigger_sf(region_weights, photons, df) # Veto weights if re.match('.*no_veto.*', region): exclude = [ "muon_id_iso_tight", "muon_id_tight", "muon_iso_tight", "muon_id_loose", "muon_iso_loose", "ele_reco", "ele_id_tight", "ele_id_loose", "tau_id" ] region_weights.add( "veto", veto_weights.partial_weight(include=["nominal"])) # HEM-veto weights for signal region MC if re.match('^sr_vbf.*', region) and df['year'] == 2018: # Events that lie in the HEM-veto region events_to_weight_mask = (met_phi > -1.8) & (met_phi < -0.6) # Weight is the "good lumi fraction" for 2018 weight = 21.1 / 59.7 hem_weight = np.where(events_to_weight_mask, weight, 1.0) region_weights.add("hem_weight", hem_weight) # This is the default weight for this region rweight = region_weights.partial_weight(exclude=exclude) # Blinding if (self._blind and df['is_data'] and region.startswith('sr')): continue # Cutflow plot for signal and control regions if any(x in region for x in ["sr", "cr", "tr"]): output['cutflow_' + region][dataset]['all'] += df.size for icut, cutname in enumerate(cuts): output['cutflow_' + region][dataset][cutname] += selection.all( *cuts[:icut + 1]).sum() mask = selection.all(*cuts) if cfg.RUN.SAVE.TREE: if region in ['cr_1e_vbf', 'cr_1m_vbf']: output['tree_int64'][region][ "event"] += processor.column_accumulator( df["event"][mask]) output['tree_float16'][region][ "gen_v_pt"] += processor.column_accumulator( np.float16(gen_v_pt[mask])) output['tree_float16'][region][ "gen_mjj"] += processor.column_accumulator( np.float16(df['mjj_gen'][mask])) output['tree_float16'][region][ "recoil_pt"] += processor.column_accumulator( np.float16(df["recoil_pt"][mask])) output['tree_float16'][region][ "recoil_phi"] += processor.column_accumulator( np.float16(df["recoil_phi"][mask])) output['tree_float16'][region][ "mjj"] += processor.column_accumulator( np.float16(df["mjj"][mask])) output['tree_float16'][region][ "leadak4_pt"] += processor.column_accumulator( np.float16(diak4.i0.pt[mask])) output['tree_float16'][region][ "leadak4_eta"] += processor.column_accumulator( np.float16(diak4.i0.eta[mask])) output['tree_float16'][region][ "leadak4_phi"] += processor.column_accumulator( np.float16(diak4.i0.phi[mask])) output['tree_float16'][region][ "trailak4_pt"] += processor.column_accumulator( np.float16(diak4.i1.pt[mask])) output['tree_float16'][region][ "trailak4_eta"] += processor.column_accumulator( np.float16(diak4.i1.eta[mask])) output['tree_float16'][region][ "trailak4_phi"] += processor.column_accumulator( np.float16(diak4.i1.phi[mask])) output['tree_float16'][region][ "minDPhiJetRecoil"] += processor.column_accumulator( np.float16(df["minDPhiJetRecoil"][mask])) if '_1e_' in region: output['tree_float16'][region][ "leadlep_pt"] += processor.column_accumulator( np.float16(electrons.pt.max()[mask])) output['tree_float16'][region][ "leadlep_eta"] += processor.column_accumulator( np.float16(electrons[ electrons.pt.argmax()].eta.max()[mask])) output['tree_float16'][region][ "leadlep_phi"] += processor.column_accumulator( np.float16(electrons[ electrons.pt.argmax()].phi.max()[mask])) elif '_1m_' in region: output['tree_float16'][region][ "leadlep_pt"] += processor.column_accumulator( np.float16(muons.pt.max()[mask])) output['tree_float16'][region][ "leadlep_eta"] += processor.column_accumulator( np.float16( muons[muons.pt.argmax()].eta.max()[mask])) output['tree_float16'][region][ "leadlep_phi"] += processor.column_accumulator( np.float16( muons[muons.pt.argmax()].phi.max()[mask])) for name, w in region_weights._weights.items(): output['tree_float16'][region][ f"weight_{name}"] += processor.column_accumulator( np.float16(w[mask])) output['tree_float16'][region][ f"weight_total"] += processor.column_accumulator( np.float16(rweight[mask])) if region == 'inclusive': output['tree_int64'][region][ "event"] += processor.column_accumulator( df["event"][mask]) for name in selection.names: output['tree_bool'][region][ name] += processor.column_accumulator( np.bool_(selection.all(*[name])[mask])) # Save the event numbers of events passing this selection # Save the event numbers of events passing this selection if cfg.RUN.SAVE.PASSING: output['selected_events'][region] += list(df['event'][mask]) # Multiplicities def fill_mult(name, candidates): output[name].fill(dataset=dataset, region=region, multiplicity=candidates[mask].counts, weight=rweight[mask]) fill_mult('ak4_mult', ak4[ak4.pt > 30]) fill_mult('bjet_mult', bjets) fill_mult('loose_ele_mult', electrons) fill_mult('tight_ele_mult', electrons[df['is_tight_electron']]) fill_mult('loose_muo_mult', muons) fill_mult('tight_muo_mult', muons[df['is_tight_muon']]) fill_mult('tau_mult', taus) fill_mult('photon_mult', photons) def ezfill(name, **kwargs): """Helper function to make filling easier.""" output[name].fill(dataset=dataset, region=region, **kwargs) # Monitor weights for wname, wvalue in region_weights._weights.items(): ezfill("weights", weight_type=wname, weight_value=wvalue[mask]) ezfill("weights_wide", weight_type=wname, weight_value=wvalue[mask]) # All ak4 # This is a workaround to create a weight array of the right dimension w_alljets = weight_shape(ak4[mask].eta, rweight[mask]) w_alljets_nopref = weight_shape( ak4[mask].eta, region_weights.partial_weight(exclude=exclude + ['prefire'])[mask]) ezfill('ak4_eta', jeteta=ak4[mask].eta.flatten(), weight=w_alljets) ezfill('ak4_phi', jetphi=ak4[mask].phi.flatten(), weight=w_alljets) ezfill('ak4_pt', jetpt=ak4[mask].pt.flatten(), weight=w_alljets) ezfill('ak4_eta_nopref', jeteta=ak4[mask].eta.flatten(), weight=w_alljets_nopref) ezfill('ak4_phi_nopref', jetphi=ak4[mask].phi.flatten(), weight=w_alljets_nopref) ezfill('ak4_pt_nopref', jetpt=ak4[mask].pt.flatten(), weight=w_alljets_nopref) # Leading ak4 w_diak4 = weight_shape(diak4.pt[mask], rweight[mask]) ezfill('ak4_eta0', jeteta=diak4.i0.eta[mask].flatten(), weight=w_diak4) ezfill('ak4_phi0', jetphi=diak4.i0.phi[mask].flatten(), weight=w_diak4) ezfill('ak4_pt0', jetpt=diak4.i0.pt[mask].flatten(), weight=w_diak4) ezfill('ak4_ptraw0', jetpt=diak4.i0.ptraw[mask].flatten(), weight=w_diak4) ezfill('ak4_chf0', frac=diak4.i0.chf[mask].flatten(), weight=w_diak4) ezfill('ak4_nhf0', frac=diak4.i0.nhf[mask].flatten(), weight=w_diak4) ezfill('ak4_nconst0', nconst=diak4.i0.nconst[mask].flatten(), weight=w_diak4) # Trailing ak4 ezfill('ak4_eta1', jeteta=diak4.i1.eta[mask].flatten(), weight=w_diak4) ezfill('ak4_phi1', jetphi=diak4.i1.phi[mask].flatten(), weight=w_diak4) ezfill('ak4_pt1', jetpt=diak4.i1.pt[mask].flatten(), weight=w_diak4) ezfill('ak4_ptraw1', jetpt=diak4.i1.ptraw[mask].flatten(), weight=w_diak4) ezfill('ak4_chf1', frac=diak4.i1.chf[mask].flatten(), weight=w_diak4) ezfill('ak4_nhf1', frac=diak4.i1.nhf[mask].flatten(), weight=w_diak4) ezfill('ak4_nconst1', nconst=diak4.i1.nconst[mask].flatten(), weight=w_diak4) # B tag discriminator btag = getattr(ak4, cfg.BTAG.ALGO) w_btag = weight_shape(btag[mask], rweight[mask]) ezfill('ak4_btag', btag=btag[mask].flatten(), weight=w_btag) # MET ezfill('dpfcalo_cr', dpfcalo=df["dPFCaloCR"][mask], weight=rweight[mask]) ezfill('dpfcalo_sr', dpfcalo=df["dPFCaloSR"][mask], weight=rweight[mask]) ezfill('met', met=met_pt[mask], weight=rweight[mask]) ezfill('met_phi', phi=met_phi[mask], weight=rweight[mask]) ezfill('recoil', recoil=df["recoil_pt"][mask], weight=rweight[mask]) ezfill('recoil_phi', phi=df["recoil_phi"][mask], weight=rweight[mask]) ezfill('dphijm', dphi=df["minDPhiJetMet"][mask], weight=rweight[mask]) ezfill('dphijr', dphi=df["minDPhiJetRecoil"][mask], weight=rweight[mask]) ezfill('dphijj', dphi=df["dphijj"][mask], weight=rweight[mask]) ezfill('detajj', deta=df["detajj"][mask], weight=rweight[mask]) ezfill('mjj', mjj=df["mjj"][mask], weight=rweight[mask]) if gen_v_pt is not None: ezfill('gen_vpt', vpt=gen_v_pt[mask], weight=df['Generator_weight'][mask]) ezfill('gen_mjj', mjj=df['mjj_gen'][mask], weight=df['Generator_weight'][mask]) # Photon CR data-driven QCD estimate if df['is_data'] and re.match("cr_g.*", region) and re.match( "(SinglePhoton|EGamma).*", dataset): w_imp = photon_impurity_weights( photons[leadphoton_index].pt.max()[mask], df["year"]) output['mjj'].fill(dataset=data_driven_qcd_dataset(dataset), region=region, mjj=df["mjj"][mask], weight=rweight[mask] * w_imp) output['recoil'].fill(dataset=data_driven_qcd_dataset(dataset), region=region, recoil=df["recoil_pt"][mask], weight=rweight[mask] * w_imp) # Uncertainty variations if df['is_lo_z'] or df['is_nlo_z'] or df['is_lo_z_ewk']: theory_uncs = [x for x in cfg.SF.keys() if x.startswith('unc')] for unc in theory_uncs: reweight = evaluator[unc](gen_v_pt) w = (region_weights.weight() * reweight)[mask] ezfill('mjj_unc', mjj=df['mjj'][mask], uncertainty=unc, weight=w) # Two dimensional ezfill('recoil_mjj', recoil=df["recoil_pt"][mask], mjj=df["mjj"][mask], weight=rweight[mask]) # Muons if '_1m_' in region or '_2m_' in region or 'no_veto' in region: w_allmu = weight_shape(muons.pt[mask], rweight[mask]) ezfill('muon_pt', pt=muons.pt[mask].flatten(), weight=w_allmu) ezfill('muon_pt_abseta', pt=muons.pt[mask].flatten(), abseta=muons.eta[mask].flatten(), weight=w_allmu) ezfill('muon_mt', mt=df['MT_mu'][mask], weight=rweight[mask]) ezfill('muon_eta', eta=muons.eta[mask].flatten(), weight=w_allmu) ezfill('muon_phi', phi=muons.phi[mask].flatten(), weight=w_allmu) # Dimuon if '_2m_' in region: w_dimu = weight_shape(dimuons.pt[mask], rweight[mask]) ezfill('muon_pt0', pt=dimuons.i0.pt[mask].flatten(), weight=w_dimu) ezfill('muon_pt1', pt=dimuons.i1.pt[mask].flatten(), weight=w_dimu) ezfill('muon_eta0', eta=dimuons.i0.eta[mask].flatten(), weight=w_dimu) ezfill('muon_eta1', eta=dimuons.i1.eta[mask].flatten(), weight=w_dimu) ezfill('muon_phi0', phi=dimuons.i0.phi[mask].flatten(), weight=w_dimu) ezfill('muon_phi1', phi=dimuons.i1.phi[mask].flatten(), weight=w_dimu) ezfill('dimuon_pt', pt=dimuons.pt[mask].flatten(), weight=w_dimu) ezfill('dimuon_eta', eta=dimuons.eta[mask].flatten(), weight=w_dimu) ezfill('dimuon_mass', dilepton_mass=dimuons.mass[mask].flatten(), weight=w_dimu) # Electrons if '_1e_' in region or '_2e_' in region or 'no_veto' in region: w_allel = weight_shape(electrons.pt[mask], rweight[mask]) ezfill('electron_pt', pt=electrons.pt[mask].flatten(), weight=w_allel) ezfill('electron_pt_eta', pt=electrons.pt[mask].flatten(), eta=electrons.eta[mask].flatten(), weight=w_allel) ezfill('electron_mt', mt=df['MT_el'][mask], weight=rweight[mask]) ezfill('electron_eta', eta=electrons.eta[mask].flatten(), weight=w_allel) ezfill('electron_phi', phi=electrons.phi[mask].flatten(), weight=w_allel) # Dielectron if '_2e_' in region: w_diel = weight_shape(dielectrons.pt[mask], rweight[mask]) ezfill('electron_pt0', pt=dielectrons.i0.pt[mask].flatten(), weight=w_diel) ezfill('electron_pt1', pt=dielectrons.i1.pt[mask].flatten(), weight=w_diel) ezfill('electron_eta0', eta=dielectrons.i0.eta[mask].flatten(), weight=w_diel) ezfill('electron_eta1', eta=dielectrons.i1.eta[mask].flatten(), weight=w_diel) ezfill('electron_phi0', phi=dielectrons.i0.phi[mask].flatten(), weight=w_diel) ezfill('electron_phi1', phi=dielectrons.i1.phi[mask].flatten(), weight=w_diel) ezfill('dielectron_pt', pt=dielectrons.pt[mask].flatten(), weight=w_diel) ezfill('dielectron_eta', eta=dielectrons.eta[mask].flatten(), weight=w_diel) ezfill('dielectron_mass', dilepton_mass=dielectrons.mass[mask].flatten(), weight=w_diel) # Photon if '_g_' in region: w_leading_photon = weight_shape( photons[leadphoton_index].pt[mask], rweight[mask]) ezfill('photon_pt0', pt=photons[leadphoton_index].pt[mask].flatten(), weight=w_leading_photon) ezfill('photon_eta0', eta=photons[leadphoton_index].eta[mask].flatten(), weight=w_leading_photon) ezfill('photon_phi0', phi=photons[leadphoton_index].phi[mask].flatten(), weight=w_leading_photon) ezfill('photon_pt0_recoil', pt=photons[leadphoton_index].pt[mask].flatten(), recoil=df['recoil_pt'][mask & (leadphoton_index.counts > 0)], weight=w_leading_photon) ezfill('photon_eta_phi', eta=photons[leadphoton_index].eta[mask].flatten(), phi=photons[leadphoton_index].phi[mask].flatten(), weight=w_leading_photon) # w_drphoton_jet = weight_shape(df['dRPhotonJet'][mask], rweight[mask]) # Tau if 'no_veto' in region: w_all_taus = weight_shape(taus.pt[mask], rweight[mask]) ezfill("tau_pt", pt=taus.pt[mask].flatten(), weight=w_all_taus) # PV ezfill('npv', nvtx=df['PV_npvs'][mask], weight=rweight[mask]) ezfill('npvgood', nvtx=df['PV_npvsGood'][mask], weight=rweight[mask]) ezfill('npv_nopu', nvtx=df['PV_npvs'][mask], weight=region_weights.partial_weight(exclude=exclude + ['pileup'])[mask]) ezfill('npvgood_nopu', nvtx=df['PV_npvsGood'][mask], weight=region_weights.partial_weight(exclude=exclude + ['pileup'])[mask]) ezfill('rho_all', rho=df['fixedGridRhoFastjetAll'][mask], weight=region_weights.partial_weight(exclude=exclude)[mask]) ezfill('rho_central', rho=df['fixedGridRhoFastjetCentral'][mask], weight=region_weights.partial_weight(exclude=exclude)[mask]) ezfill('rho_all_nopu', rho=df['fixedGridRhoFastjetAll'][mask], weight=region_weights.partial_weight(exclude=exclude + ['pileup'])[mask]) ezfill('rho_central_nopu', rho=df['fixedGridRhoFastjetCentral'][mask], weight=region_weights.partial_weight(exclude=exclude + ['pileup'])[mask]) return output
def process(self, df): if not df.size: return self.accumulator.identity() self._configure(df) dataset = df['dataset'] df['is_lo_w'] = is_lo_w(dataset) df['is_lo_z'] = is_lo_z(dataset) df['is_lo_w_ewk'] = is_lo_w_ewk(dataset) df['is_lo_z_ewk'] = is_lo_z_ewk(dataset) df['is_lo_g'] = is_lo_g(dataset) df['is_nlo_z'] = is_nlo_z(dataset) df['is_nlo_w'] = is_nlo_w(dataset) df['has_lhe_v_pt'] = df['is_lo_w'] | df['is_lo_z'] | df[ 'is_nlo_z'] | df['is_nlo_w'] | df['is_lo_g'] | df[ 'is_lo_w_ewk'] | df['is_lo_z_ewk'] df['is_data'] = is_data(dataset) gen_v_pt = None if df['is_lo_w'] or df['is_lo_z'] or df['is_nlo_z'] or df[ 'is_nlo_w'] or df['is_lo_z_ewk'] or df['is_lo_w_ewk']: gen = setup_gen_candidates(df) dressed = setup_dressed_gen_candidates(df) fill_gen_v_info(df, gen, dressed) gen_v_pt = df['gen_v_pt_dress'] elif df['is_lo_g']: gen = setup_gen_candidates(df) gen_v_pt = gen[(gen.pdg == 22) & (gen.status == 1)].pt.max() # Generator-level leading dijet mass if df['has_lhe_v_pt']: genjets = setup_lhe_cleaned_genjets(df) digenjet = genjets[:, :2].distincts() df['mjj_gen'] = digenjet.mass.max() # Candidates # Already pre-filtered! # All leptons are at least loose # Check out setup_candidates for filtering details met_pt, met_phi, ak4, bjets, _, muons, electrons, taus, photons = setup_candidates( df, cfg) # Filtering ak4 jets according to pileup ID ak4 = ak4[ak4.puid] bjets = bjets[bjets.puid] # Muons df['is_tight_muon'] = muons.tightId \ & (muons.iso < cfg.MUON.CUTS.TIGHT.ISO) \ & (muons.pt>cfg.MUON.CUTS.TIGHT.PT) \ & (muons.abseta<cfg.MUON.CUTS.TIGHT.ETA) dimuons = muons.distincts() dimuon_charge = dimuons.i0['charge'] + dimuons.i1['charge'] df['MT_mu'] = ((muons.counts == 1) * mt(muons.pt, muons.phi, met_pt, met_phi)).max() # Electrons df['is_tight_electron'] = electrons.tightId \ & (electrons.pt > cfg.ELECTRON.CUTS.TIGHT.PT) \ & (electrons.abseta < cfg.ELECTRON.CUTS.TIGHT.ETA) dielectrons = electrons.distincts() dielectron_charge = dielectrons.i0['charge'] + dielectrons.i1['charge'] df['MT_el'] = ((electrons.counts == 1) * mt(electrons.pt, electrons.phi, met_pt, met_phi)).max() # ak4 leadak4_index = ak4.pt.argmax() elejet_pairs = ak4[:, :1].cross(electrons) df['dREleJet'] = np.hypot( elejet_pairs.i0.eta - elejet_pairs.i1.eta, dphi(elejet_pairs.i0.phi, elejet_pairs.i1.phi)).min() muonjet_pairs = ak4[:, :1].cross(muons) df['dRMuonJet'] = np.hypot( muonjet_pairs.i0.eta - muonjet_pairs.i1.eta, dphi(muonjet_pairs.i0.phi, muonjet_pairs.i1.phi)).min() # Recoil df['recoil_pt'], df['recoil_phi'] = recoil(met_pt, met_phi, electrons, muons, photons) df["dPFCalo"] = (met_pt - df["CaloMET_pt"]) / df["recoil_pt"] df["minDPhiJetRecoil"] = min_dphi_jet_met(ak4, df['recoil_phi'], njet=4, ptmin=30, etamax=4.7) df["minDPhiJetMet"] = min_dphi_jet_met(ak4, met_phi, njet=4, ptmin=30, etamax=4.7) selection = processor.PackedSelection() # Triggers pass_all = np.ones(df.size) == 1 selection.add('inclusive', pass_all) selection = trigger_selection(selection, df, cfg) selection.add('mu_pt_trig_safe', muons.pt.max() > 30) # Common selection selection.add('veto_ele', electrons.counts == 0) selection.add('veto_muo', muons.counts == 0) selection.add('veto_photon', photons.counts == 0) selection.add('veto_tau', taus.counts == 0) selection.add('veto_b', bjets.counts == 0) selection.add('mindphijr', df['minDPhiJetRecoil'] > cfg.SELECTION.SIGNAL.MINDPHIJR) selection.add('dpfcalo', np.abs(df['dPFCalo']) < cfg.SELECTION.SIGNAL.DPFCALO) selection.add('recoil', df['recoil_pt'] > cfg.SELECTION.SIGNAL.RECOIL) if (cfg.MITIGATION.HEM and extract_year(df['dataset']) == 2018 and not cfg.RUN.SYNC): selection.add('hemveto', df['hemveto']) else: selection.add('hemveto', np.ones(df.size) == 1) # AK4 dijet diak4 = ak4[:, :2].distincts() leadak4_pt_eta = (diak4.i0.pt > cfg.SELECTION.SIGNAL.LEADAK4.PT) & ( np.abs(diak4.i0.eta) < cfg.SELECTION.SIGNAL.LEADAK4.ETA) trailak4_pt_eta = (diak4.i1.pt > cfg.SELECTION.SIGNAL.TRAILAK4.PT) & ( np.abs(diak4.i1.eta) < cfg.SELECTION.SIGNAL.TRAILAK4.ETA) hemisphere = (diak4.i0.eta * diak4.i1.eta < 0).any() has_track0 = np.abs(diak4.i0.eta) <= 2.5 has_track1 = np.abs(diak4.i1.eta) <= 2.5 leadak4_id = diak4.i0.tightId & (has_track0 * ( (diak4.i0.chf > cfg.SELECTION.SIGNAL.LEADAK4.CHF) & (diak4.i0.nhf < cfg.SELECTION.SIGNAL.LEADAK4.NHF)) + ~has_track0) trailak4_id = has_track1 * ( (diak4.i1.chf > cfg.SELECTION.SIGNAL.TRAILAK4.CHF) & (diak4.i1.nhf < cfg.SELECTION.SIGNAL.TRAILAK4.NHF)) + ~has_track1 df['mjj'] = diak4.mass.max() df['dphijj'] = dphi(diak4.i0.phi.min(), diak4.i1.phi.max()) df['detajj'] = np.abs(diak4.i0.eta - diak4.i1.eta).max() selection.add('two_jets', diak4.counts > 0) selection.add('leadak4_pt_eta', leadak4_pt_eta.any()) selection.add('trailak4_pt_eta', trailak4_pt_eta.any()) selection.add('hemisphere', hemisphere) selection.add('leadak4_id', leadak4_id.any()) selection.add('trailak4_id', trailak4_id.any()) selection.add('mjj', df['mjj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.MASS) selection.add( 'dphijj', df['dphijj'] < cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DPHI) selection.add( 'detajj', df['detajj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DETA) # Divide into three categories for trigger study if cfg.RUN.TRIGGER_STUDY: two_central_jets = (np.abs(diak4.i0.eta) <= 2.4) & (np.abs( diak4.i1.eta) <= 2.4) two_forward_jets = (np.abs(diak4.i0.eta) > 2.4) & (np.abs( diak4.i1.eta) > 2.4) one_jet_forward_one_jet_central = (~two_central_jets) & ( ~two_forward_jets) selection.add('two_central_jets', two_central_jets.any()) selection.add('two_forward_jets', two_forward_jets.any()) selection.add('one_jet_forward_one_jet_central', one_jet_forward_one_jet_central.any()) # Dimuon CR leadmuon_index = muons.pt.argmax() selection.add('at_least_one_tight_mu', df['is_tight_muon'].any()) selection.add('dimuon_mass', ((dimuons.mass > cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MIN) \ & (dimuons.mass < cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MAX)).any()) selection.add('dimuon_charge', (dimuon_charge == 0).any()) selection.add('two_muons', muons.counts == 2) # Single muon CR selection.add('one_muon', muons.counts == 1) selection.add('mt_mu', df['MT_mu'] < cfg.SELECTION.CONTROL.SINGLEMU.MT) # Diele CR leadelectron_index = electrons.pt.argmax() selection.add('one_electron', electrons.counts == 1) selection.add('two_electrons', electrons.counts == 2) selection.add('at_least_one_tight_el', df['is_tight_electron'].any()) selection.add('dielectron_mass', ((dielectrons.mass > cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MIN) \ & (dielectrons.mass < cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MAX)).any()) selection.add('dielectron_charge', (dielectron_charge == 0).any()) selection.add('two_electrons', electrons.counts == 2) # Single Ele CR selection.add('met_el', met_pt > cfg.SELECTION.CONTROL.SINGLEEL.MET) selection.add('mt_el', df['MT_el'] < cfg.SELECTION.CONTROL.SINGLEEL.MT) # Photon CR leadphoton_index = photons.pt.argmax() df['is_tight_photon'] = photons.mediumId \ & (photons.abseta < cfg.PHOTON.CUTS.TIGHT.ETA) selection.add('one_photon', photons.counts == 1) selection.add('at_least_one_tight_photon', df['is_tight_photon'].any()) selection.add('photon_pt', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PT) selection.add('photon_pt_trig', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PTTRIG) # Fill histograms output = self.accumulator.identity() # Gen if df['has_lhe_v_pt']: output['genvpt_check'].fill(vpt=gen_v_pt, type="Nano", dataset=dataset) if 'LHE_Njets' in df: output['lhe_njets'].fill(dataset=dataset, multiplicity=df['LHE_Njets']) if 'LHE_HT' in df: output['lhe_ht'].fill(dataset=dataset, ht=df['LHE_HT']) if 'LHE_HTIncoming' in df: output['lhe_htinc'].fill(dataset=dataset, ht=df['LHE_HTIncoming']) # Weights evaluator = evaluator_from_config(cfg) weights = processor.Weights(size=df.size, storeIndividual=True) if not df['is_data']: weights.add('gen', df['Generator_weight']) try: weights.add('prefire', df['PrefireWeight']) except KeyError: weights.add('prefire', np.ones(df.size)) weights = candidate_weights(weights, df, evaluator, muons, electrons, photons) weights = pileup_weights(weights, df, evaluator, cfg) if not (gen_v_pt is None): weights = theory_weights_vbf(weights, df, evaluator, gen_v_pt, df['mjj_gen']) # Save per-event values for synchronization if cfg.RUN.KINEMATICS.SAVE: for event in cfg.RUN.KINEMATICS.EVENTS: mask = df['event'] == event if not mask.any(): continue output['kinematics']['event'] += [event] output['kinematics']['met'] += [met_pt[mask]] output['kinematics']['met_phi'] += [met_phi[mask]] output['kinematics']['recoil'] += [df['recoil_pt'][mask]] output['kinematics']['recoil_phi'] += [df['recoil_phi'][mask]] output['kinematics']['ak4pt0'] += [ak4[leadak4_index][mask].pt] output['kinematics']['ak4eta0'] += [ ak4[leadak4_index][mask].eta ] output['kinematics']['leadbtag'] += [ak4.pt.max() < 0][mask] output['kinematics']['nLooseMu'] += [muons.counts[mask]] output['kinematics']['nTightMu'] += [ muons[df['is_tight_muon']].counts[mask] ] output['kinematics']['mupt0'] += [ muons[leadmuon_index][mask].pt ] output['kinematics']['mueta0'] += [ muons[leadmuon_index][mask].eta ] output['kinematics']['nLooseEl'] += [electrons.counts[mask]] output['kinematics']['nTightEl'] += [ electrons[df['is_tight_electron']].counts[mask] ] output['kinematics']['elpt0'] += [ electrons[leadelectron_index][mask].pt ] output['kinematics']['eleta0'] += [ electrons[leadelectron_index][mask].eta ] output['kinematics']['nLooseGam'] += [photons.counts[mask]] output['kinematics']['nTightGam'] += [ photons[df['is_tight_photon']].counts[mask] ] output['kinematics']['gpt0'] += [ photons[leadphoton_index][mask].pt ] output['kinematics']['geta0'] += [ photons[leadphoton_index][mask].eta ] # Sum of all weights to use for normalization # TODO: Deal with systematic variations output['nevents'][dataset] += df.size if not df['is_data']: output['sumw'][dataset] += df['genEventSumw'] output['sumw2'][dataset] += df['genEventSumw2'] output['sumw_pileup'][dataset] += weights._weights['pileup'].sum() regions = vbfhinv_regions(cfg) for region, cuts in regions.items(): # Blinding if (self._blind and df['is_data'] and region.startswith('sr')): continue # Cutflow plot for signal and control regions if any(x in region for x in ["sr", "cr", "tr"]): output['cutflow_' + region]['all'] += df.size for icut, cutname in enumerate(cuts): output['cutflow_' + region][cutname] += selection.all( *cuts[:icut + 1]).sum() mask = selection.all(*cuts) # Save the event numbers of events passing this selection if cfg.RUN.SAVE.PASSING: output['selected_events'][region] += list(df['event'][mask]) # Multiplicities def fill_mult(name, candidates): output[name].fill(dataset=dataset, region=region, multiplicity=candidates[mask].counts, weight=weights.weight()[mask]) fill_mult('ak4_mult', ak4) fill_mult('bjet_mult', bjets) fill_mult('loose_ele_mult', electrons) fill_mult('tight_ele_mult', electrons[df['is_tight_electron']]) fill_mult('loose_muo_mult', muons) fill_mult('tight_muo_mult', muons[df['is_tight_muon']]) fill_mult('tau_mult', taus) fill_mult('photon_mult', photons) def ezfill(name, **kwargs): """Helper function to make filling easier.""" output[name].fill(dataset=dataset, region=region, **kwargs) # Monitor weights for wname, wvalue in weights._weights.items(): ezfill("weights", weight_type=wname, weight_value=wvalue[mask]) ezfill("weights_wide", weight_type=wname, weight_value=wvalue[mask]) # All ak4 # This is a workaround to create a weight array of the right dimension w_alljets = weight_shape(ak4[mask].eta, weights.weight()[mask]) w_alljets_nopref = weight_shape( ak4[mask].eta, weights.partial_weight(exclude=['prefire'])[mask]) ezfill('ak4_eta', jeteta=ak4[mask].eta.flatten(), weight=w_alljets) ezfill('ak4_phi', jetphi=ak4[mask].phi.flatten(), weight=w_alljets) ezfill('ak4_pt', jetpt=ak4[mask].pt.flatten(), weight=w_alljets) ezfill('ak4_eta_nopref', jeteta=ak4[mask].eta.flatten(), weight=w_alljets_nopref) ezfill('ak4_phi_nopref', jetphi=ak4[mask].phi.flatten(), weight=w_alljets_nopref) ezfill('ak4_pt_nopref', jetpt=ak4[mask].pt.flatten(), weight=w_alljets_nopref) # Leading ak4 w_diak4 = weight_shape(diak4.pt[mask], weights.weight()[mask]) ezfill('ak4_eta0', jeteta=diak4.i0.eta[mask].flatten(), weight=w_diak4) ezfill('ak4_phi0', jetphi=diak4.i0.phi[mask].flatten(), weight=w_diak4) ezfill('ak4_pt0', jetpt=diak4.i0.pt[mask].flatten(), weight=w_diak4) ezfill('ak4_ptraw0', jetpt=diak4.i0.ptraw[mask].flatten(), weight=w_diak4) ezfill('ak4_chf0', frac=diak4.i0.chf[mask].flatten(), weight=w_diak4) ezfill('ak4_nhf0', frac=diak4.i0.nhf[mask].flatten(), weight=w_diak4) ezfill('ak4_nconst0', nconst=diak4.i0.nconst[mask].flatten(), weight=w_diak4) # Trailing ak4 ezfill('ak4_eta1', jeteta=diak4.i1.eta[mask].flatten(), weight=w_diak4) ezfill('ak4_phi1', jetphi=diak4.i1.phi[mask].flatten(), weight=w_diak4) ezfill('ak4_pt1', jetpt=diak4.i1.pt[mask].flatten(), weight=w_diak4) ezfill('ak4_ptraw1', jetpt=diak4.i1.ptraw[mask].flatten(), weight=w_diak4) ezfill('ak4_chf1', frac=diak4.i1.chf[mask].flatten(), weight=w_diak4) ezfill('ak4_nhf1', frac=diak4.i1.nhf[mask].flatten(), weight=w_diak4) ezfill('ak4_nconst1', nconst=diak4.i1.nconst[mask].flatten(), weight=w_diak4) # B tag discriminator btag = getattr(ak4, cfg.BTAG.ALGO) w_btag = weight_shape(btag[mask], weights.weight()[mask]) ezfill('ak4_btag', btag=btag[mask].flatten(), weight=w_btag) # MET ezfill('dpfcalo', dpfcalo=df["dPFCalo"][mask], weight=weights.weight()[mask]) ezfill('met', met=met_pt[mask], weight=weights.weight()[mask]) ezfill('met_phi', phi=met_phi[mask], weight=weights.weight()[mask]) ezfill('recoil', recoil=df["recoil_pt"][mask], weight=weights.weight()[mask]) ezfill('recoil_phi', phi=df["recoil_phi"][mask], weight=weights.weight()[mask]) ezfill('dphijm', dphi=df["minDPhiJetMet"][mask], weight=weights.weight()[mask]) ezfill('dphijr', dphi=df["minDPhiJetRecoil"][mask], weight=weights.weight()[mask]) ezfill('dphijj', dphi=df["dphijj"][mask], weight=weights.weight()[mask]) ezfill('detajj', deta=df["detajj"][mask], weight=weights.weight()[mask]) ezfill('mjj', mjj=df["mjj"][mask], weight=weights.weight()[mask]) # Two dimensional ezfill('recoil_mjj', recoil=df["recoil_pt"][mask], mjj=df["mjj"][mask], weight=weights.weight()[mask]) # Muons if '_1m_' in region or '_2m_' in region: w_allmu = weight_shape(muons.pt[mask], weights.weight()[mask]) ezfill('muon_pt', pt=muons.pt[mask].flatten(), weight=w_allmu) ezfill('muon_mt', mt=df['MT_mu'][mask], weight=weights.weight()[mask]) ezfill('muon_eta', eta=muons.eta[mask].flatten(), weight=w_allmu) ezfill('muon_phi', phi=muons.phi[mask].flatten(), weight=w_allmu) # Dimuon if '_2m_' in region: w_dimu = weight_shape(dimuons.pt[mask], weights.weight()[mask]) ezfill('muon_pt0', pt=dimuons.i0.pt[mask].flatten(), weight=w_dimu) ezfill('muon_pt1', pt=dimuons.i1.pt[mask].flatten(), weight=w_dimu) ezfill('muon_eta0', eta=dimuons.i0.eta[mask].flatten(), weight=w_dimu) ezfill('muon_eta1', eta=dimuons.i1.eta[mask].flatten(), weight=w_dimu) ezfill('muon_phi0', phi=dimuons.i0.phi[mask].flatten(), weight=w_dimu) ezfill('muon_phi1', phi=dimuons.i1.phi[mask].flatten(), weight=w_dimu) ezfill('dimuon_pt', pt=dimuons.pt[mask].flatten(), weight=w_dimu) ezfill('dimuon_eta', eta=dimuons.eta[mask].flatten(), weight=w_dimu) ezfill('dimuon_mass', dilepton_mass=dimuons.mass[mask].flatten(), weight=w_dimu) # Electrons if '_1e_' in region or '_2e_' in region: w_allel = weight_shape(electrons.pt[mask], weights.weight()[mask]) ezfill('electron_pt', pt=electrons.pt[mask].flatten(), weight=w_allel) ezfill('electron_mt', mt=df['MT_el'][mask], weight=weights.weight()[mask]) ezfill('electron_eta', eta=electrons.eta[mask].flatten(), weight=w_allel) ezfill('electron_phi', phi=electrons.phi[mask].flatten(), weight=w_allel) # Dielectron if '_2e_' in region: w_diel = weight_shape(dielectrons.pt[mask], weights.weight()[mask]) ezfill('electron_pt0', pt=dielectrons.i0.pt[mask].flatten(), weight=w_diel) ezfill('electron_pt1', pt=dielectrons.i1.pt[mask].flatten(), weight=w_diel) ezfill('electron_eta0', eta=dielectrons.i0.eta[mask].flatten(), weight=w_diel) ezfill('electron_eta1', eta=dielectrons.i1.eta[mask].flatten(), weight=w_diel) ezfill('electron_phi0', phi=dielectrons.i0.phi[mask].flatten(), weight=w_diel) ezfill('electron_phi1', phi=dielectrons.i1.phi[mask].flatten(), weight=w_diel) ezfill('dielectron_pt', pt=dielectrons.pt[mask].flatten(), weight=w_diel) ezfill('dielectron_eta', eta=dielectrons.eta[mask].flatten(), weight=w_diel) ezfill('dielectron_mass', dilepton_mass=dielectrons.mass[mask].flatten(), weight=w_diel) # Photon if '_g_' in region: w_leading_photon = weight_shape( photons[leadphoton_index].pt[mask], weights.weight()[mask]) ezfill('photon_pt0', pt=photons[leadphoton_index].pt[mask].flatten(), weight=w_leading_photon) ezfill('photon_eta0', eta=photons[leadphoton_index].eta[mask].flatten(), weight=w_leading_photon) ezfill('photon_phi0', phi=photons[leadphoton_index].phi[mask].flatten(), weight=w_leading_photon) ezfill('photon_pt0_recoil', pt=photons[leadphoton_index].pt[mask].flatten(), recoil=df['recoil_pt'][mask & (leadphoton_index.counts > 0)], weight=w_leading_photon) ezfill('photon_eta_phi', eta=photons[leadphoton_index].eta[mask].flatten(), phi=photons[leadphoton_index].phi[mask].flatten(), weight=w_leading_photon) # w_drphoton_jet = weight_shape(df['dRPhotonJet'][mask], weights.weight()[mask]) # PV ezfill('npv', nvtx=df['PV_npvs'][mask], weight=weights.weight()[mask]) ezfill('npvgood', nvtx=df['PV_npvsGood'][mask], weight=weights.weight()[mask]) ezfill('npv_nopu', nvtx=df['PV_npvs'][mask], weight=weights.partial_weight(exclude=['pileup'])[mask]) ezfill('npvgood_nopu', nvtx=df['PV_npvsGood'][mask], weight=weights.partial_weight(exclude=['pileup'])[mask]) ezfill('rho_all', rho=df['fixedGridRhoFastjetAll'][mask], weight=weights.weight()[mask]) ezfill('rho_central', rho=df['fixedGridRhoFastjetCentral'][mask], weight=weights.weight()[mask]) ezfill('rho_all_nopu', rho=df['fixedGridRhoFastjetAll'][mask], weight=weights.partial_weight(exclude=['pileup'])[mask]) ezfill('rho_central_nopu', rho=df['fixedGridRhoFastjetCentral'][mask], weight=weights.partial_weight(exclude=['pileup'])[mask]) return output
def process(self, df): # Dataset parameters dataset = df['dataset'] year = self._samples[dataset]['year'] xsec = self._samples[dataset]['xsec'] sow = self._samples[dataset]['nSumOfWeights'] isData = self._samples[dataset]['isData'] datasets = [ 'SingleMuon', 'SingleElectron', 'EGamma', 'MuonEG', 'DoubleMuon', 'DoubleElectron' ] for d in datasets: if d in dataset: dataset = dataset.split('_')[0] ### Recover objects, selection, functions and others... # Objects isTightMuon = self._objects['isTightMuonPOG'] isTightElectron = self._objects['isTightElectronPOG'] isGoodJet = self._objects['isGoodJet'] isMuonMVA = self._objects[ 'isMuonMVA'] #isMuonMVA(pt, eta, dxy, dz, miniIso, sip3D, mvaTTH, mediumPrompt, tightCharge, jetDeepB=0, minpt=15) isElecMVA = self._objects[ 'isElecMVA'] #isElecMVA(pt, eta, dxy, dz, miniIso, sip3D, mvaTTH, elecMVA, lostHits, convVeto, tightCharge, jetDeepB=0, minpt=15) # Corrections GetMuonIsoSF = self._corrections['getMuonIso'] GetMuonIDSF = self._corrections['getMuonID'] # Selection passNJets = self._selection['passNJets'] passMETcut = self._selection['passMETcut'] passTrigger = self._selection['passTrigger'] # Functions pow2 = self._functions['pow2'] IsClosestToZ = self._functions['IsClosestToZ'] GetGoodTriplets = self._functions['GetGoodTriplets'] # Initialize objects met = Initialize({ 'pt': df['MET_pt'], 'eta': 0, 'phi': df['MET_phi'], 'mass': 0 }) e = Initialize({ 'pt': df['Electron_pt'], 'eta': df['Electron_eta'], 'phi': df['Electron_phi'], 'mass': df['Electron_mass'] }) mu = Initialize({ 'pt': df['Muon_pt'], 'eta': df['Muon_eta'], 'phi': df['Muon_phi'], 'mass': df['Muon_mass'] }) j = Initialize({ 'pt': df['Jet_pt'], 'eta': df['Jet_eta'], 'phi': df['Jet_phi'], 'mass': df['Jet_mass'] }) # Electron selection for key in self._e: e[key] = e.pt.zeros_like() if self._e[key] in df: e[key] = df[self._e[key]] #e['isGood'] = isTightElectron(e.pt, e.eta, e.dxy, e.dz, e.id, e.tightChrage, year) e['isGood'] = isElecMVA(e.pt, e.eta, e.dxy, e.dz, e.miniIso, e.sip3d, e.mvaTTH, e.elecMVA, e.lostHits, e.convVeto, e.tightCharge, minpt=10) leading_e = e[e.pt.argmax()] leading_e = leading_e[leading_e.isGood.astype(np.bool)] # Muon selection for key in self._mu: mu[key] = mu.pt.zeros_like() if self._mu[key] in df: mu[key] = df[self._mu[key]] #mu['istight'] = isTightMuon(mu.pt, mu.eta, mu.dxy, mu.dz, mu.iso, mu.tight_id, mu.tightCharge, year) mu['isGood'] = isMuonMVA(mu.pt, mu.eta, mu.dxy, mu.dz, mu.miniIso, mu.sip3d, mu.mvaTTH, mu.mediumPrompt, mu.tightCharge, minpt=10) leading_mu = mu[mu.pt.argmax()] leading_mu = leading_mu[leading_mu.isGood.astype(np.bool)] e = e[e.isGood.astype(np.bool)] mu = mu[mu.isGood.astype(np.bool)] nElec = e.counts nMuon = mu.counts twoLeps = (nElec + nMuon) == 2 threeLeps = (nElec + nMuon) == 3 twoElec = (nElec == 2) twoMuon = (nMuon == 2) e0 = e[e.pt.argmax()] m0 = mu[mu.pt.argmax()] # Jet selection j['deepjet'] = df['Jet_btagDeepFlavB'] for key in self._jet: j[key] = j.pt.zeros_like() if self._jet[key] in df: j[key] = df[self._jet[key]] j['isgood'] = isGoodJet(j.pt, j.eta, j.id) j['isclean'] = ~j.match(e, 0.4) & ~j.match(mu, 0.4) & j.isgood.astype( np.bool) #goodJets = j[(j['isgood'])&(j['isclean'])] #j0 = goodJets[goodJets.pt.argmax()] #nJets = goodJets.counts ################################################################## ### 2 same-sign leptons ################################################################## # emu singe = e[(nElec == 1) & (nMuon == 1) & (e.pt > -1)] singm = mu[(nElec == 1) & (nMuon == 1) & (mu.pt > -1)] em = singe.cross(singm) emSSmask = (em.i0.charge * em.i1.charge > 0) emSS = em[emSSmask] nemSS = len(emSS.flatten()) # ee and mumu # pt>-1 to preserve jagged dimensions ee = e[(nElec == 2) & (nMuon == 0) & (e.pt > -1)] mm = mu[(nElec == 0) & (nMuon == 2) & (mu.pt > -1)] eepairs = ee.distincts() eeSSmask = (eepairs.i0.charge * eepairs.i1.charge > 0) eeonZmask = (np.abs((eepairs.i0 + eepairs.i1).mass - 91) < 15) eeoffZmask = (eeonZmask == 0) mmpairs = mm.distincts() mmSSmask = (mmpairs.i0.charge * mmpairs.i1.charge > 0) mmonZmask = (np.abs((mmpairs.i0 + mmpairs.i1).mass - 91) < 15) mmoffZmask = (mmonZmask == 0) eeSSonZ = eepairs[eeSSmask & eeonZmask] eeSSoffZ = eepairs[eeSSmask & eeoffZmask] mmSSonZ = mmpairs[mmSSmask & mmonZmask] mmSSoffZ = mmpairs[mmSSmask & mmoffZmask] neeSS = len(eeSSonZ.flatten()) + len(eeSSoffZ.flatten()) nmmSS = len(mmSSonZ.flatten()) + len(mmSSoffZ.flatten()) #print('Same-sign events [ee, emu, mumu] = [%i, %i, %i]'%(neeSS, nemSS, nmmSS)) # Cuts eeSSmask = (eeSSmask[eeSSmask].counts > 0) mmSSmask = (mmSSmask[mmSSmask].counts > 0) eeonZmask = (eeonZmask[eeonZmask].counts > 0) eeoffZmask = (eeoffZmask[eeoffZmask].counts > 0) mmonZmask = (mmonZmask[mmonZmask].counts > 0) mmoffZmask = (mmoffZmask[mmoffZmask].counts > 0) emSSmask = (emSSmask[emSSmask].counts > 0) # njets goodJets = j[(j.isclean) & (j.isgood)] njets = goodJets.counts ht = goodJets.pt.sum() j0 = goodJets[goodJets.pt.argmax()] # nbtags nbtags = goodJets[goodJets.deepjet > 0.2770].counts ################################################################## ### 3 leptons ################################################################## # eem muon_eem = mu[(nElec == 2) & (nMuon == 1) & (mu.pt > -1)] elec_eem = e[(nElec == 2) & (nMuon == 1) & (e.pt > -1)] ee_eem = elec_eem.distincts() ee_eemZmask = (ee_eem.i0.charge * ee_eem.i1.charge < 1) & (np.abs( (ee_eem.i0 + ee_eem.i1).mass - 91) < 15) ee_eemOffZmask = (ee_eem.i0.charge * ee_eem.i1.charge < 1) & (np.abs( (ee_eem.i0 + ee_eem.i1).mass - 91) > 15) ee_eemZmask = (ee_eemZmask[ee_eemZmask].counts > 0) ee_eemOffZmask = (ee_eemOffZmask[ee_eemOffZmask].counts > 0) eepair_eem = (ee_eem.i0 + ee_eem.i1) trilep_eem = eepair_eem.cross(muon_eem) trilep_eem = (trilep_eem.i0 + trilep_eem.i1) # mme muon_mme = mu[(nElec == 1) & (nMuon == 2) & (mu.pt > -1)] elec_mme = e[(nElec == 1) & (nMuon == 2) & (e.pt > -1)] mm_mme = muon_mme.distincts() mm_mmeZmask = (mm_mme.i0.charge * mm_mme.i1.charge < 1) & (np.abs( (mm_mme.i0 + mm_mme.i1).mass - 91) < 15) mm_mmeOffZmask = (mm_mme.i0.charge * mm_mme.i1.charge < 1) & (np.abs( (mm_mme.i0 + mm_mme.i1).mass - 91) > 15) mm_mmeZmask = (mm_mmeZmask[mm_mmeZmask].counts > 0) mm_mmeOffZmask = (mm_mmeOffZmask[mm_mmeOffZmask].counts > 0) mmpair_mme = (mm_mme.i0 + mm_mme.i1) trilep_mme = mmpair_mme.cross(elec_mme) trilep_mme = (trilep_mme.i0 + trilep_mme.i1) mZ_mme = mmpair_mme.mass mZ_eem = eepair_eem.mass m3l_eem = trilep_eem.mass m3l_mme = trilep_mme.mass ### eee and mmm eee = e[(nElec == 3) & (nMuon == 0) & (e.pt > -1)] mmm = mu[(nElec == 0) & (nMuon == 3) & (mu.pt > -1)] # Create pairs eee_groups = eee.distincts() mmm_groups = mmm.distincts() # Calculate the invariant mass of the pairs invMass_eee = ((eee_groups.i0 + eee_groups.i1).mass) invMass_mmm = ((mmm_groups.i0 + mmm_groups.i1).mass) # OS pairs isOSeee = ((eee_groups.i0.charge != eee_groups.i1.charge)) isOSmmm = ((mmm_groups.i0.charge != mmm_groups.i1.charge)) # Get the ones with a mass closest to the Z mass (and in a range of thr) clos_eee = IsClosestToZ(invMass_eee, thr=15) clos_mmm = IsClosestToZ(invMass_mmm, thr=15) # Finally, the mask for eee/mmm with/without OS onZ pair eeeOnZmask = (clos_eee) & (isOSeee) eeeOffZmask = (eeeOnZmask == 0) mmmOnZmask = (clos_mmm) & (isOSmmm) mmmOffZmask = (mmmOnZmask == 0) eeeOnZmask = (eeeOnZmask[eeeOnZmask].counts > 0) eeeOffZmask = (eeeOffZmask[eeeOffZmask].counts > 0) mmmOnZmask = (mmmOnZmask[mmmOnZmask].counts > 0) mmmOffZmask = (mmmOffZmask[mmmOffZmask].counts > 0) # Get Z and W invariant masses goodPairs_eee = eee_groups[(clos_eee) & (isOSeee)] eZ0 = goodPairs_eee.i0[goodPairs_eee.counts > 0].regular( ) #[(goodPairs_eee.counts>0)].regular() eZ1 = goodPairs_eee.i1[goodPairs_eee.counts > 0].regular( ) #[(goodPairs_eee.counts>0)].regular() goodPairs_mmm = mmm_groups[(clos_mmm) & (isOSmmm)] mZ0 = goodPairs_mmm.i0[goodPairs_mmm.counts > 0].regular( ) #[(goodPairs_eee.counts>0)].regular() mZ1 = goodPairs_mmm.i1[goodPairs_mmm.counts > 0].regular( ) #[(goodPairs_eee.counts>0)].regular() eee_reg = eee[(eeeOnZmask)].regular() eW = np.append(eee_reg, eZ0, axis=1) eW = np.append(eW, eZ1, axis=1) eWmask = np.apply_along_axis( lambda a: [list(a).count(x) == 1 for x in a], 1, eW) eW = eW[eWmask] mmm_reg = mmm[(mmmOnZmask)].regular() mW = np.append(mmm_reg, mZ0, axis=1) mW = np.append(mW, mZ1, axis=1) mWmask = np.apply_along_axis( lambda a: [list(a).count(x) == 1 for x in a], 1, mW) mW = mW[mWmask] eZ = [x + y for x, y in zip(eZ0, eZ1)] triElec = [x + y for x, y in zip(eZ, eW)] mZ_eee = [t[0].mass for t in eZ] m3l_eee = [t[0].mass for t in triElec] mZ = [x + y for x, y in zip(mZ0, mZ1)] triMuon = [x + y for x, y in zip(mZ, mW)] mZ_mmm = [t[0].mass for t in mZ] m3l_mmm = [t[0].mass for t in triMuon] # Triggers #passTrigger = lambda df, n, m, o : np.ones_like(df['MET_pt'], dtype=np.bool) # XXX trig_eeSS = passTrigger(df, 'ee', isData, dataset) trig_mmSS = passTrigger(df, 'mm', isData, dataset) trig_emSS = passTrigger(df, 'em', isData, dataset) trig_eee = passTrigger(df, 'eee', isData, dataset) trig_mmm = passTrigger(df, 'mmm', isData, dataset) trig_eem = passTrigger(df, 'eem', isData, dataset) trig_mme = passTrigger(df, 'mme', isData, dataset) # MET filters # Weights genw = np.ones_like(df['MET_pt']) if isData else df['genWeight'] weights = processor.Weights(df.size) weights.add('norm', genw if isData else (xsec / sow) * genw) # Selections and cuts selections = processor.PackedSelection() channels2LSS = ['eeSSonZ', 'eeSSoffZ', 'mmSSonZ', 'mmSSoffZ', 'emSS'] selections.add('eeSSonZ', (eeonZmask) & (eeSSmask) & (trig_eeSS)) selections.add('eeSSoffZ', (eeoffZmask) & (eeSSmask) & (trig_eeSS)) selections.add('mmSSonZ', (mmonZmask) & (mmSSmask) & (trig_mmSS)) selections.add('mmSSoffZ', (mmoffZmask) & (mmSSmask) & (trig_mmSS)) selections.add('emSS', (emSSmask) & (trig_emSS)) channels3L = ['eemSSonZ', 'eemSSoffZ', 'mmeSSonZ', 'mmeSSoffZ'] selections.add('eemSSonZ', (ee_eemZmask) & (trig_eem)) selections.add('eemSSoffZ', (ee_eemOffZmask) & (trig_eem)) selections.add('mmeSSonZ', (mm_mmeZmask) & (trig_mme)) selections.add('mmeSSoffZ', (mm_mmeOffZmask) & (trig_mme)) channels3L += ['eeeSSonZ', 'eeeSSoffZ', 'mmmSSonZ', 'mmmSSoffZ'] selections.add('eeeSSonZ', (eeeOnZmask) & (trig_eee)) selections.add('eeeSSoffZ', (eeeOffZmask) & (trig_eee)) selections.add('mmmSSonZ', (mmmOnZmask) & (trig_mmm)) selections.add('mmmSSoffZ', (mmmOffZmask) & (trig_mmm)) levels = ['base', '2jets', '4jets', '4j1b', '4j2b'] selections.add('base', (nElec + nMuon >= 2)) selections.add('2jets', (njets >= 2)) selections.add('4jets', (njets >= 4)) selections.add('4j1b', (njets >= 4) & (nbtags >= 1)) selections.add('4j2b', (njets >= 4) & (nbtags >= 2)) # Variables invMass_eeSSonZ = (eeSSonZ.i0 + eeSSonZ.i1).mass invMass_eeSSoffZ = (eeSSoffZ.i0 + eeSSoffZ.i1).mass invMass_mmSSonZ = (mmSSonZ.i0 + mmSSonZ.i1).mass invMass_mmSSoffZ = (mmSSoffZ.i0 + mmSSoffZ.i1).mass invMass_emSS = (emSS.i0 + emSS.i1).mass varnames = {} varnames['met'] = met.pt varnames['ht'] = ht varnames['njets'] = njets varnames['nbtags'] = nbtags varnames['invmass'] = { 'eeSSonZ': invMass_eeSSonZ, 'eeSSoffZ': invMass_eeSSoffZ, 'mmSSonZ': invMass_mmSSonZ, 'mmSSoffZ': invMass_mmSSoffZ, 'emSS': invMass_emSS, 'eemSSonZ': mZ_eem, 'eemSSoffZ': mZ_eem, 'mmeSSonZ': mZ_mme, 'mmeSSoffZ': mZ_mme, 'eeeSSonZ': mZ_eee, 'eeeSSoffZ': mZ_eee, 'mmmSSonZ': mZ_mmm, 'mmmSSoffZ': mZ_mmm, } varnames['m3l'] = { 'eemSSonZ': m3l_eem, 'eemSSoffZ': m3l_eem, 'mmeSSonZ': m3l_mme, 'mmeSSoffZ': m3l_mme, 'eeeSSonZ': m3l_eee, 'eeeSSoffZ': m3l_eee, 'mmmSSonZ': m3l_mmm, 'mmmSSoffZ': m3l_mmm, } varnames['e0pt'] = e0.pt varnames['e0eta'] = e0.eta varnames['m0pt'] = m0.pt varnames['m0eta'] = m0.eta varnames['j0pt'] = j0.pt varnames['j0eta'] = j0.eta varnames['counts'] = np.ones_like(df['MET_pt'], dtype=np.int) # Fill Histos hout = self.accumulator.identity() hout['dummy'].fill(sample=dataset, dummy=1, weight=df.size) for var, v in varnames.items(): for ch in channels2LSS + channels3L: for lev in levels: weight = weights.weight() cuts = [ch] + [lev] cut = selections.all(*cuts) weights_flat = weight[cut].flatten() weights_ones = np.ones_like(weights_flat, dtype=np.int) if var == 'invmass': if ch in ['eeeSSoffZ', 'mmmSSoffZ']: continue elif ch in ['eeeSSonZ', 'mmmSSonZ']: continue #values = v[ch] else: values = v[ch][cut].flatten() hout['invmass'].fill(sample=dataset, channel=ch, cut=lev, invmass=values, weight=weights_flat) elif var == 'm3l': if ch in [ 'eeSSonZ', 'eeSSoffZ', 'mmSSonZ', 'mmSSoffZ', 'emSS', 'eeeSSoffZ', 'mmmSSoffZ', 'eeeSSonZ', 'mmmSSonZ' ]: continue values = v[ch][cut].flatten() hout['m3l'].fill(sample=dataset, channel=ch, cut=lev, m3l=values, weight=weights_flat) else: values = v[cut].flatten() if var == 'ht': hout[var].fill(ht=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'met': hout[var].fill(met=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'njets': hout[var].fill(njets=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'nbtags': hout[var].fill(nbtags=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'counts': hout[var].fill(counts=values, sample=dataset, channel=ch, cut=lev, weight=weights_ones) elif var == 'e0pt': if ch in [ 'mmSSonZ', 'mmSSoffZ', 'mmmSSoffZ', 'mmmSSonZ' ]: continue hout[var].fill(e0pt=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'm0pt': if ch in [ 'eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ', 'eeeSSonZ' ]: continue hout[var].fill(m0pt=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'e0eta': if ch in [ 'mmSSonZ', 'mmSSoffZ', 'mmmSSoffZ', 'mmmSSonZ' ]: continue hout[var].fill(e0eta=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'm0eta': if ch in [ 'eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ', 'eeeSSonZ' ]: continue hout[var].fill(m0eta=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'j0pt': if lev == 'base': continue hout[var].fill(j0pt=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'j0eta': if lev == 'base': continue hout[var].fill(j0eta=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) return hout
def process(self, events): dataset = events.metadata['dataset'] selected_regions = [] for region, samples in self._samples.items(): for sample in samples: if sample not in dataset: continue selected_regions.append(region) isData = 'genWeight' not in events.columns selection = processor.PackedSelection() weights = {} hout = self.accumulator.identity() ### #Getting corrections, ids from .coffea files ### Sunil Need to check why we need corrections #get_msd_weight = self._corrections['get_msd_weight'] get_ttbar_weight = self._corrections['get_ttbar_weight'] get_nlo_weight = self._corrections['get_nlo_weight'][self._year] get_nnlo_weight = self._corrections['get_nnlo_weight'] get_nnlo_nlo_weight = self._corrections['get_nnlo_nlo_weight'] get_adhoc_weight = self._corrections['get_adhoc_weight'] get_pu_weight = self._corrections['get_pu_weight'][self._year] get_met_trig_weight = self._corrections['get_met_trig_weight'][self._year] get_met_zmm_trig_weight = self._corrections['get_met_zmm_trig_weight'][self._year] get_ele_trig_weight = self._corrections['get_ele_trig_weight'][self._year] get_pho_trig_weight = self._corrections['get_pho_trig_weight'][self._year] get_ele_loose_id_sf = self._corrections['get_ele_loose_id_sf'][self._year] get_ele_tight_id_sf = self._corrections['get_ele_tight_id_sf'][self._year] get_ele_loose_id_eff = self._corrections['get_ele_loose_id_eff'][self._year] get_ele_tight_id_eff = self._corrections['get_ele_tight_id_eff'][self._year] get_pho_tight_id_sf = self._corrections['get_pho_tight_id_sf'][self._year] get_mu_tight_id_sf = self._corrections['get_mu_tight_id_sf'][self._year] get_mu_loose_id_sf = self._corrections['get_mu_loose_id_sf'][self._year] get_ele_reco_sf = self._corrections['get_ele_reco_sf'][self._year] get_mu_tight_iso_sf = self._corrections['get_mu_tight_iso_sf'][self._year] get_mu_loose_iso_sf = self._corrections['get_mu_loose_iso_sf'][self._year] get_ecal_bad_calib = self._corrections['get_ecal_bad_calib'] get_deepflav_weight = self._corrections['get_btag_weight']['deepflav'][self._year] Jetevaluator = self._corrections['Jetevaluator'] isLooseElectron = self._ids['isLooseElectron'] isTightElectron = self._ids['isTightElectron'] isLooseMuon = self._ids['isLooseMuon'] isTightMuon = self._ids['isTightMuon'] isLooseTau = self._ids['isLooseTau'] isLoosePhoton = self._ids['isLoosePhoton'] isTightPhoton = self._ids['isTightPhoton'] isGoodJet = self._ids['isGoodJet'] #isGoodFatJet = self._ids['isGoodFatJet'] isHEMJet = self._ids['isHEMJet'] match = self._common['match'] deepflavWPs = self._common['btagWPs']['deepflav'][self._year] deepcsvWPs = self._common['btagWPs']['deepcsv'][self._year] ### # Derive jet corrector for JEC/JER ### JECcorrector = FactorizedJetCorrector(**{name: Jetevaluator[name] for name in self._jec[self._year]}) JECuncertainties = JetCorrectionUncertainty(**{name:Jetevaluator[name] for name in self._junc[self._year]}) JER = JetResolution(**{name:Jetevaluator[name] for name in self._jr[self._year]}) JERsf = JetResolutionScaleFactor(**{name:Jetevaluator[name] for name in self._jersf[self._year]}) Jet_transformer = JetTransformer(jec=JECcorrector,junc=JECuncertainties, jer = JER, jersf = JERsf) ### #Initialize global quantities (MET ecc.) ### met = events.MET met['T'] = TVector2Array.from_polar(met.pt, met.phi) met['p4'] = TLorentzVectorArray.from_ptetaphim(met.pt, 0., met.phi, 0.) calomet = events.CaloMET ### #Initialize physics objects ### e = events.Electron e['isloose'] = isLooseElectron(e.pt,e.eta,e.dxy,e.dz,e.cutBased,self._year) e['istight'] = isTightElectron(e.pt,e.eta,e.dxy,e.dz,e.cutBased,self._year) e['T'] = TVector2Array.from_polar(e.pt, e.phi) #e['p4'] = TLorentzVectorArray.from_ptetaphim(e.pt, e.eta, e.phi, e.mass) e_loose = e[e.isloose.astype(np.bool)] e_tight = e[e.istight.astype(np.bool)] e_ntot = e.counts e_nloose = e_loose.counts e_ntight = e_tight.counts leading_e = e[e.pt.argmax()] leading_e = leading_e[leading_e.istight.astype(np.bool)] mu = events.Muon mu['isloose'] = isLooseMuon(mu.pt,mu.eta,mu.pfRelIso04_all,mu.looseId,self._year) mu['istight'] = isTightMuon(mu.pt,mu.eta,mu.pfRelIso04_all,mu.tightId,self._year) mu['T'] = TVector2Array.from_polar(mu.pt, mu.phi) #mu['p4'] = TLorentzVectorArray.from_ptetaphim(mu.pt, mu.eta, mu.phi, mu.mass) mu_loose=mu[mu.isloose.astype(np.bool)] mu_tight=mu[mu.istight.astype(np.bool)] mu_ntot = mu.counts mu_nloose = mu_loose.counts mu_ntight = mu_tight.counts leading_mu = mu[mu.pt.argmax()] leading_mu = leading_mu[leading_mu.istight.astype(np.bool)] tau = events.Tau tau['isclean']=~match(tau,mu_loose,0.5)&~match(tau,e_loose,0.5) tau['isloose']=isLooseTau(tau.pt,tau.eta,tau.idDecayMode,tau.idMVAoldDM2017v2,self._year) tau_clean=tau[tau.isclean.astype(np.bool)] tau_loose=tau_clean[tau_clean.isloose.astype(np.bool)] tau_ntot=tau.counts tau_nloose=tau_loose.counts pho = events.Photon pho['isclean']=~match(pho,mu_loose,0.5)&~match(pho,e_loose,0.5) _id = 'cutBasedBitmap' if self._year=='2016': _id = 'cutBased' pho['isloose']=isLoosePhoton(pho.pt,pho.eta,pho[_id],self._year) pho['istight']=isTightPhoton(pho.pt,pho.eta,pho[_id],self._year) pho['T'] = TVector2Array.from_polar(pho.pt, pho.phi) #pho['p4'] = TLorentzVectorArray.from_ptetaphim(pho.pt, pho.eta, pho.phi, pho.mass) pho_clean=pho[pho.isclean.astype(np.bool)] pho_loose=pho_clean[pho_clean.isloose.astype(np.bool)] pho_tight=pho_clean[pho_clean.istight.astype(np.bool)] pho_ntot=pho.counts pho_nloose=pho_loose.counts pho_ntight=pho_tight.counts leading_pho = pho[pho.pt.argmax()] leading_pho = leading_pho[leading_pho.isclean.astype(np.bool)] leading_pho = leading_pho[leading_pho.istight.astype(np.bool)] j = events.Jet j['isgood'] = isGoodJet(j.pt, j.eta, j.jetId, j.neHEF, j.neEmEF, j.chHEF, j.chEmEF) j['isHEM'] = isHEMJet(j.pt, j.eta, j.phi) j['isclean'] = ~match(j,e_loose,0.4)&~match(j,mu_loose,0.4)&~match(j,pho_loose,0.4) #j['isiso'] = ~match(j,fj_clean,1.5) # What is this ????? j['isdcsvL'] = (j.btagDeepB>deepcsvWPs['loose']) j['isdflvL'] = (j.btagDeepFlavB>deepflavWPs['loose']) j['T'] = TVector2Array.from_polar(j.pt, j.phi) j['p4'] = TLorentzVectorArray.from_ptetaphim(j.pt, j.eta, j.phi, j.mass) j['ptRaw'] =j.pt * (1-j.rawFactor) j['massRaw'] = j.mass * (1-j.rawFactor) j['rho'] = j.pt.ones_like()*events.fixedGridRhoFastjetAll.array j_good = j[j.isgood.astype(np.bool)] j_clean = j_good[j_good.isclean.astype(np.bool)] # USe this instead of j_iso Sunil #j_iso = j_clean[j_clean.isiso.astype(np.bool)] j_iso = j_clean[j_clean.astype(np.bool)] #Sunil changed j_dcsvL = j_iso[j_iso.isdcsvL.astype(np.bool)] j_dflvL = j_iso[j_iso.isdflvL.astype(np.bool)] j_HEM = j[j.isHEM.astype(np.bool)] j_ntot=j.counts j_ngood=j_good.counts j_nclean=j_clean.counts j_niso=j_iso.counts j_ndcsvL=j_dcsvL.counts j_ndflvL=j_dflvL.counts j_nHEM = j_HEM.counts leading_j = j[j.pt.argmax()] leading_j = leading_j[leading_j.isgood.astype(np.bool)] leading_j = leading_j[leading_j.isclean.astype(np.bool)] ### #Calculating derivatives ### ele_pairs = e_loose.distincts() diele = ele_pairs.i0+ele_pairs.i1 diele['T'] = TVector2Array.from_polar(diele.pt, diele.phi) leading_ele_pair = ele_pairs[diele.pt.argmax()] leading_diele = diele[diele.pt.argmax()] mu_pairs = mu_loose.distincts() dimu = mu_pairs.i0+mu_pairs.i1 dimu['T'] = TVector2Array.from_polar(dimu.pt, dimu.phi) leading_mu_pair = mu_pairs[dimu.pt.argmax()] leading_dimu = dimu[dimu.pt.argmax()] ### # Calculate recoil ### HT, LT, dPhi, mT_{W}, MT_misET um = met.T+leading_mu.T.sum() ue = met.T+leading_e.T.sum() umm = met.T+leading_dimu.T.sum() uee = met.T+leading_diele.T.sum() ua = met.T+leading_pho.T.sum() #Need help from Matteo u = {} u['sr']=met.T u['wecr']=ue u['tecr']=ue u['wmcr']=um u['tmcr']=um u['zecr']=uee u['zmcr']=umm u['gcr']=ua ### #Calculating weights ### if not isData: ### # JEC/JER ### #j['ptGenJet'] = j.matched_gen.pt #Jet_transformer.transform(j) gen = events.GenPart #Need to understand this part Sunil gen['isb'] = (abs(gen.pdgId)==5)&gen.hasFlags(['fromHardProcess', 'isLastCopy']) gen['isc'] = (abs(gen.pdgId)==4)&gen.hasFlags(['fromHardProcess', 'isLastCopy']) gen['isTop'] = (abs(gen.pdgId)==6)&gen.hasFlags(['fromHardProcess', 'isLastCopy']) gen['isW'] = (abs(gen.pdgId)==24)&gen.hasFlags(['fromHardProcess', 'isLastCopy']) gen['isZ'] = (abs(gen.pdgId)==23)&gen.hasFlags(['fromHardProcess', 'isLastCopy']) gen['isA'] = (abs(gen.pdgId)==22)&gen.hasFlags(['fromHardProcess', 'isLastCopy']) genTops = gen[gen.isTop] genWs = gen[gen.isW] genZs = gen[gen.isZ] genAs = gen[gen.isA] nlo = np.ones(events.size) nnlo = np.ones(events.size) nnlo_nlo = np.ones(events.size) adhoc = np.ones(events.size) if('TTJets' in dataset): nlo = np.sqrt(get_ttbar_weight(genTops[:,0].pt.sum()) * get_ttbar_weight(genTops[:,1].pt.sum())) #elif('GJets' in dataset): # nlo = get_nlo_weight['a'](genAs.pt.max()) elif('WJets' in dataset): #nlo = get_nlo_weight['w'](genWs.pt.max()) #if self._year != '2016': adhoc = get_adhoc_weight['w'](genWs.pt.max()) #nnlo = get_nnlo_weight['w'](genWs.pt.max()) nnlo_nlo = get_nnlo_nlo_weight['w'](genWs.pt.max())*(genWs.pt.max()>100).astype(np.int) + (genWs.pt.max()<=100).astype(np.int) elif('DY' in dataset): #nlo = get_nlo_weight['z'](genZs.pt.max()) #if self._year != '2016': adhoc = get_adhoc_weight['z'](genZs.pt.max()) #nnlo = get_nnlo_weight['dy'](genZs.pt.max()) nnlo_nlo = get_nnlo_nlo_weight['dy'](genZs.pt.max())*(genZs.pt.max()>100).astype(np.int) + (genZs.pt.max()<=100).astype(np.int) elif('ZJets' in dataset): #nlo = get_nlo_weight['z'](genZs.pt.max()) #if self._year != '2016': adhoc = get_adhoc_weight['z'](genZs.pt.max()) #nnlo = get_nnlo_weight['z'](genZs.pt.max()) nnlo_nlo = get_nnlo_nlo_weight['z'](genZs.pt.max())*(genZs.pt.max()>100).astype(np.int) + (genZs.pt.max()<=100).astype(np.int) ### # Calculate PU weight and systematic variations ### pu = get_pu_weight['cen'](events.PV.npvs) #puUp = get_pu_weight['up'](events.PV.npvs) #puDown = get_pu_weight['down'](events.PV.npvs) ### # Trigger efficiency weight ### ele1_trig_weight = get_ele_trig_weight(leading_ele_pair.i0.eta.sum(),leading_ele_pair.i0.pt.sum()) ele2_trig_weight = get_ele_trig_weight(leading_ele_pair.i1.eta.sum(),leading_ele_pair.i1.pt.sum()) # Need Help from Matteo trig = {} trig['sre'] = get_ele_trig_weight(leading_e.eta.sum(), leading_e.pt.sum()) trig['srm'] = #Need be fixed in Util first trig['ttbare'] = get_ele_trig_weight(leading_e.eta.sum(), leading_e.pt.sum()) trig['ttbarm'] = #Need be fixed in Util first trig['wjete'] = get_ele_trig_weight(leading_e.eta.sum(), leading_e.pt.sum()) trig['wjetm'] = #Need be fixed in Util first trig['dilepe'] = 1 - (1-ele1_trig_weight)*(1-ele2_trig_weight) #trig['dilepm'] = Need be fixed in Util first # For muon ID weights, SFs are given as a function of abs(eta), but in 2016 ## mueta = abs(leading_mu.eta.sum()) mu1eta=abs(leading_mu_pair.i0.eta.sum()) mu2eta=abs(leading_mu_pair.i1.eta.sum()) if self._year=='2016': mueta=leading_mu.eta.sum() mu1eta=leading_mu_pair.i0.eta.sum() mu2eta=leading_mu_pair.i1.eta.sum() ### # Calculating electron and muon ID SF and efficiencies (when provided) ### mu1Tsf = get_mu_tight_id_sf(mu1eta,leading_mu_pair.i0.pt.sum()) mu2Tsf = get_mu_tight_id_sf(mu2eta,leading_mu_pair.i1.pt.sum()) mu1Lsf = get_mu_loose_id_sf(mu1eta,leading_mu_pair.i0.pt.sum()) mu2Lsf = get_mu_loose_id_sf(mu2eta,leading_mu_pair.i1.pt.sum()) e1Tsf = get_ele_tight_id_sf(leading_ele_pair.i0.eta.sum(),leading_ele_pair.i0.pt.sum()) e2Tsf = get_ele_tight_id_sf(leading_ele_pair.i1.eta.sum(),leading_ele_pair.i1.pt.sum()) e1Lsf = get_ele_loose_id_sf(leading_ele_pair.i0.eta.sum(),leading_ele_pair.i0.pt.sum()) e2Lsf = get_ele_loose_id_sf(leading_ele_pair.i1.eta.sum(),leading_ele_pair.i1.pt.sum()) e1Teff= get_ele_tight_id_eff(leading_ele_pair.i0.eta.sum(),leading_ele_pair.i0.pt.sum()) e2Teff= get_ele_tight_id_eff(leading_ele_pair.i1.eta.sum(),leading_ele_pair.i1.pt.sum()) e1Leff= get_ele_loose_id_eff(leading_ele_pair.i0.eta.sum(),leading_ele_pair.i0.pt.sum()) e2Leff= get_ele_loose_id_eff(leading_ele_pair.i1.eta.sum(),leading_ele_pair.i1.pt.sum()) # Need Help from Matteo ids={} ids['sre'] = get_ele_tight_id_sf(leading_e.eta.sum(),leading_e.pt.sum()) ids['srm'] = get_mu_tight_id_sf(mueta,leading_mu.pt.sum()) ids['ttbare'] = get_ele_tight_id_sf(leading_e.eta.sum(),leading_e.pt.sum()) ids['ttbarm'] = get_mu_tight_id_sf(mueta,leading_mu.pt.sum()) ids['wjete'] = get_ele_tight_id_sf(leading_e.eta.sum(),leading_e.pt.sum()) ids['wjetm'] = get_mu_tight_id_sf(mueta,leading_mu.pt.sum()) ids['dilepe'] = e1Lsf*e2Lsf ids['dilepm'] = mu1Lsf*mu2Lsf ### # Reconstruction weights for electrons ### e1sf_reco = get_ele_reco_sf(leading_ele_pair.i0.eta.sum(),leading_ele_pair.i0.pt.sum()) e2sf_reco = get_ele_reco_sf(leading_ele_pair.i1.eta.sum(),leading_ele_pair.i1.pt.sum()) # Need Help from Matteo reco = {} reco['sre'] = get_ele_reco_sf(leading_e.eta.sum(),leading_e.pt.sum()) reco['srm'] = np.ones(events.size) reco['ttbare'] = get_ele_reco_sf(leading_e.eta.sum(),leading_e.pt.sum()) reco['ttbarm'] = np.ones(events.size) reco['wjete'] = get_ele_reco_sf(leading_e.eta.sum(),leading_e.pt.sum()) reco['wjetm'] = np.ones(events.size) reco['dilepe'] = e1sf_reco * e2sf_reco reco['dilepm'] = np.ones(events.size) ### # Isolation weights for muons ### mu1Tsf_iso = get_mu_tight_iso_sf(mu1eta,leading_mu_pair.i0.pt.sum()) mu2Tsf_iso = get_mu_tight_iso_sf(mu2eta,leading_mu_pair.i1.pt.sum()) mu1Lsf_iso = get_mu_loose_iso_sf(mu1eta,leading_mu_pair.i0.pt.sum()) mu2Lsf_iso = get_mu_loose_iso_sf(mu2eta,leading_mu_pair.i1.pt.sum()) # Need Help from Matteo isolation = {} isolation['sre'] = np.ones(events.size) isolation['srm'] = get_mu_tight_iso_sf(mueta,leading_mu.pt.sum()) isolation['ttbare'] = np.ones(events.size) isolation['ttbarm'] = get_mu_tight_iso_sf(mueta,leading_mu.pt.sum()) isolation['wjete'] = np.ones(events.size) isolation['wjetm'] = get_mu_tight_iso_sf(mueta,leading_mu.pt.sum()) isolation['dilepe'] = np.ones(events.size) isolation['dilepm'] = mu1Lsf_iso*mu2Lsf_iso ### # AK4 b-tagging weights ### btag = {} btagUp = {} btagDown = {} # Need Help from Matteo btag['sr'], btagUp['sr'], btagDown['sr'] = get_deepflav_weight['loose'](j_iso.pt,j_iso.eta,j_iso.hadronFlavour,'0') btag['wmcr'], btagUp['wmcr'], btagDown['wmcr'] = get_deepflav_weight['loose'](j_iso.pt,j_iso.eta,j_iso.hadronFlavour,'0') btag['tmcr'], btagUp['tmcr'], btagDown['tmcr'] = get_deepflav_weight['loose'](j_iso.pt,j_iso.eta,j_iso.hadronFlavour,'-1') btag['wecr'], btagUp['wecr'], btagDown['wecr'] = get_deepflav_weight['loose'](j_iso.pt,j_iso.eta,j_iso.hadronFlavour,'0') btag['tecr'], btagUp['tecr'], btagDown['tecr'] = get_deepflav_weight['loose'](j_iso.pt,j_iso.eta,j_iso.hadronFlavour,'-1') btag['zmcr'], btagUp['zmcr'], btagDown['zmcr'] = np.ones(events.size), np.ones(events.size), np.ones(events.size)#get_deepflav_weight['loose'](j_iso.pt,j_iso.eta,j_iso.hadronFlavour,'0') btag['zecr'], btagUp['zecr'], btagDown['zecr'] = np.ones(events.size), np.ones(events.size), np.ones(events.size)#get_deepflav_weight['loose'](j_iso.pt,j_iso.eta,j_iso.hadronFlavour,'0') btag['gcr'], btagUp['gcr'], btagDown['gcr'] = np.ones(events.size), np.ones(events.size), np.ones(events.size)#get_deepflav_weight['loose'](j_iso.pt,j_iso.eta,j_iso.hadronFlavour,'0') for r in selected_regions: weights[r] = processor.Weights(len(events)) weights[r].add('genw',events.genWeight) weights[r].add('nlo',nlo) #weights[r].add('adhoc',adhoc) #weights[r].add('nnlo',nnlo) weights[r].add('nnlo_nlo',nnlo_nlo) weights[r].add('pileup',pu)#,puUp,puDown) weights[r].add('trig', trig[r]) weights[r].add('ids', ids[r]) weights[r].add('reco', reco[r]) weights[r].add('isolation', isolation[r]) weights[r].add('btag',btag[r], btagUp[r], btagDown[r]) #leading_fj = fj[fj.pt.argmax()] #leading_fj = leading_fj[leading_fj.isgood.astype(np.bool)] #leading_fj = leading_fj[leading_fj.isclean.astype(np.bool)] ### #Importing the MET filters per year from metfilters.py and constructing the filter boolean ### met_filters = np.ones(events.size, dtype=np.bool) for flag in AnalysisProcessor.met_filter_flags[self._year]: met_filters = met_filters & events.Flag[flag] selection.add('met_filters',met_filters) triggers = np.zeros(events.size, dtype=np.bool) for path in self._met_triggers[self._year]: if path not in events.HLT.columns: continue triggers = triggers | events.HLT[path] selection.add('met_triggers', triggers) triggers = np.zeros(events.size, dtype=np.bool) for path in self._singleelectron_triggers[self._year]: if path not in events.HLT.columns: continue triggers = triggers | events.HLT[path] selection.add('singleelectron_triggers', triggers) triggers = np.zeros(events.size, dtype=np.bool) for path in self._singlemuon_triggers[self._year]: if path not in events.HLT.columns: continue triggers = triggers | events.HLT[path] selection.add('singlemuon_triggers', triggers) triggers = np.zeros(events.size, dtype=np.bool) for path in self._singlephoton_triggers[self._year]: if path not in events.HLT.columns: continue triggers = triggers | events.HLT[path] selection.add('singlephoton_triggers', triggers) noHEMj = np.ones(events.size, dtype=np.bool) if self._year=='2018': noHEMj = (j_nHEM==0) selection.add('iszeroL', (e_nloose==0)&(mu_nloose==0)&(tau_nloose==0)&(pho_nloose==0)