def process(self, df): output = self.accumulator.identity() if df.size == 0: return output dataset = df['dataset'] ## construct weights ## wgts = processor.Weights(df.size) if self.data_type != 'data': wgts.add('genw', df['weight']) npv = df['trueInteractionNum'] wgts.add('pileup', *(f(npv) for f in self.pucorrs)) triggermask = np.logical_or.reduce([df[t] for t in Triggers]) wgts.add('trigger', triggermask) cosmicpairmask = df['cosmicveto_result'] wgts.add('cosmicveto', cosmicpairmask) pvmask = df['metfilters_PrimaryVertexFilter'] wgts.add('primaryvtx', pvmask) # ...bla bla, other weights goes here weight = wgts.weight() ######################## genjets = JaggedCandidateArray.candidatesfromcounts( df['genjet_p4'], px=df['genjet_p4.fCoordinates.fX'].content, py=df['genjet_p4.fCoordinates.fY'].content, pz=df['genjet_p4.fCoordinates.fZ'].content, energy=df['genjet_p4.fCoordinates.fT'].content, ) genparticles = JaggedCandidateArray.candidatesfromcounts( df['gen_p4'], px=df['gen_p4.fCoordinates.fX'].content, py=df['gen_p4.fCoordinates.fY'].content, pz=df['gen_p4.fCoordinates.fZ'].content, energy=df['gen_p4.fCoordinates.fT'].content, pid=df['gen_pid'].content, ) darkphotons = genparticles[genparticles.pid == 32] dpptmax = darkphotons.pt.max() mask_ = genjets.match(darkphotons, deltaRCut=0.4) genjets = genjets[~mask_] output['njets'].fill( dataset=dataset, cnt=genjets[genjets.pt > dpptmax].counts, weight=weight, ) return output
def process(self, events): dataset = events.metadata['dataset'] isData = 'genWeight' not in events.columns selection = processor.PackedSelection() hout = self.accumulator.identity() ### #Getting ids from .coffea files ### get_msd_weight = self._corrections['get_msd_weight'] isLooseMuon = self._ids['isLooseMuon'] isTightMuon = self._ids['isTightMuon'] isGoodFatJet = self._ids['isGoodFatJet'] match = self._common['match'] ### #Initialize physics objects ### mu = events.Muon leading_mu = mu[mu.pt.argmax()] fj = events.AK15Puppi fj['sd'] = fj.subjets.sum() fj['isgood'] = isGoodFatJet(fj.sd.pt, fj.sd.eta, fj.jetId) fj['T'] = TVector2Array.from_polar(fj.pt, fj.phi) fj['msd_raw'] = (fj.subjets * (1 - fj.subjets.rawFactor)).sum().mass fj['msd_corr'] = fj.msd_raw * awkward.JaggedArray.fromoffsets( fj.array.offsets, np.maximum( 1e-5, get_msd_weight(fj.sd.pt.flatten(), fj.sd.eta.flatten()))) probQCD = fj.probQCDbb + fj.probQCDcc + fj.probQCDb + fj.probQCDc + fj.probQCDothers probZHbb = fj.probZbb + fj.probHbb fj['ZHbbvsQCD'] = probZHbb / (probZHbb + probQCD) fj['tau21'] = fj.tau2 / fj.tau1 SV = events.SV ### # Calculating weights ### if not isData: gen = events.GenPart gen['isb'] = (abs(gen.pdgId) == 5) & gen.hasFlags( ['fromHardProcess', 'isLastCopy']) jetgenb = fj.sd.cross(gen[gen.isb], nested=True) bmatch = ((jetgenb.i0.delta_r(jetgenb.i1) < 1.5).sum() == 1) & (gen[gen.isb].counts > 0) fj['isb'] = bmatch bmatch = ((jetgenb.i0.delta_r(jetgenb.i1) < 1.5).sum() == 2) & (gen[gen.isb].counts > 0) fj['isbb'] = bmatch gen['isc'] = (abs(gen.pdgId) == 4) & gen.hasFlags( ['fromHardProcess', 'isLastCopy']) jetgenc = fj.sd.cross(gen[gen.isc], nested=True) cmatch = ((jetgenc.i0.delta_r(jetgenc.i1) < 1.5).sum() == 1) & (gen[gen.isc].counts > 0) fj['isc'] = cmatch cmatch = ((jetgenc.i0.delta_r(jetgenc.i1) < 1.5).sum() == 2) & (gen[gen.isc].counts > 0) fj['iscc'] = cmatch ##### axis=1 option to remove boundaries between fat-jets ##### ##### copy (match jaggedness and shape of array) the contents of crossed array into the fat-jet subjets ##### ##### we're not use copy since it keeps the original array type ##### ##### fj.subjets is a TLorentzVectorArray ##### mu = mu[mu.isGlobal] ## Use a global muon for QCD events jetmu = fj.subjets.flatten(axis=1).cross(mu, nested=True) mask = (mu.counts > 0) & ((jetmu.i0.delta_r(jetmu.i1) < 0.4) & ((jetmu.i1.pt / jetmu.i0.pt) < 0.7) & (jetmu.i1.pt > 7)).sum() == 1 ##### Three steps to match the jaggedness of the mask array to the fj.subjets array ##### ##### Using the offset function to copy contents not the type of the array ##### step1 = fj.subjets.flatten() step2 = awkward.JaggedArray.fromoffsets(step1.offsets, mask.content) step2 = step2.pad(1).fillna( 0) ##### Fill None for empty arrays and convert None to False step3 = awkward.JaggedArray.fromoffsets(fj.subjets.offsets, step2) ##### fatjet with two subjets matched with muons fj['withmu'] = step3.sum() == 2 ### # Selections ### #### trigger selection #### triggers = np.zeros(events.size, dtype=np.bool) for path in self._btagmu_triggers[self._year]: if path not in events.HLT.columns: continue triggers = triggers | events.HLT[path] selection.add('btagmu_triggers', triggers) #### MET filters #### met_filters = np.ones(events.size, dtype=np.bool) if isData: met_filters = met_filters & events.Flag[ 'eeBadScFilter'] #this filter is recommended for data only for flag in AnalysisProcessor.met_filter_flags[self._year]: met_filters = met_filters & events.Flag[flag] selection.add('met_filters', met_filters) #### ak15 jet selection #### leading_fj = fj[fj.sd.pt.argmax()] leading_fj = leading_fj[leading_fj.isgood.astype(np.bool)] leading_fj = leading_fj[leading_fj.withmu.astype(np.bool)] #### SV selection for matched with leading ak15 jet #### SV['ismatched'] = match(SV, leading_fj, 1.5) #leading_SV = SV[SV.pt.argmax()] leading_SV = SV[SV.dxySig.argmax()] leading_SV = leading_SV[leading_SV.ismatched.astype(np.bool)] #fj_good = fj[fj.isgood.astype(np.bool)] #fj_withmu = fj_good[fj_good.withmu.astype(np.bool)] #fj_nwithmu = fj_withmu.counts selection.add('fj_pt', (leading_fj.sd.pt.max() > 250)) selection.add( 'fj_mass', (leading_fj.msd_corr.sum() > 50)) ## optionally also <130 #selection.add('fj_tau21', (leading_fj.tau21.sum() < 0.3) ) #selection.add('fjCoupledMu', (fj_nwithmu > 0) ) print('Selections') print(selection.names, '\n') variables = { 'ZHbbvsQCD': leading_fj.ZHbbvsQCD, 'btagJP': leading_fj.btagJP, 'tau21': leading_fj.tau21, 'fjmass': leading_fj.msd_corr, 'fj1pt': leading_fj.sd.pt, #'svmass': leading_SV.mass, 'svmass': np.log(leading_SV.mass), 'svdxysig': leading_SV.dxySig } def fill(dataset, gentype, weight, cut): flat_variables = { k: v[cut].flatten() for k, v in variables.items() } flat_gentype = { k: (~np.isnan(v[cut]) * gentype[cut]).flatten() for k, v in variables.items() } flat_weight = { k: (~np.isnan(v[cut]) * weight[cut]).flatten() for k, v in variables.items() } #print('variables:', flat_variables) for histname, h in hout.items(): if not isinstance(h, hist.Hist): continue if histname not in variables: continue elif histname == 'sumw': continue elif histname == 'jptemplate' or histname == 'svtemplate': continue else: flat_variable = {histname: flat_variables[histname]} h.fill(dataset=dataset, gentype=flat_gentype[histname], **flat_variable, weight=flat_weight[histname]) isFilled = False if isData: if not isFilled: hout['sumw'].fill(dataset=dataset, sumw=1, weight=1) isFilled = True cut = selection.all(*selection.names) vcut = np.zeros(events.size, dtype=np.int) hout['cutflow'].fill(dataset=dataset, cutname='nocut', cut=vcut, weight=np.ones(events.size)) allcuts = set() ### cutflow fill for i, icut in enumerate(selection.names): allcuts.add(icut) jcut = selection.all(*allcuts) vcut = (i + 1) * jcut hout['cutflow'].fill(dataset=dataset, cutname=str(icut), cut=vcut, weight=jcut) ##### template for bb SF ##### ##### btagjp template ##### hout['jptemplate'].fill(dataset=dataset, gentype=np.zeros(events.size, dtype=np.int), btagJP=leading_fj.btagJP.sum(), ZHbbvsQCD=leading_fj.ZHbbvsQCD.sum(), weight=np.ones(events.size) * cut) ##### sv mass template ##### hout['svtemplate'].fill( dataset=dataset, gentype=np.zeros(events.size, dtype=np.int), #svmass=leading_SV.mass.sum(), svmass=np.log(leading_SV.mass.sum()), ZHbbvsQCD=leading_fj.ZHbbvsQCD.sum(), weight=np.ones(events.size) * cut) fill(dataset, np.zeros(events.size, dtype=np.int), np.ones(events.size), cut) else: weights = processor.Weights(len(events)) wgentype = { 'bb': (leading_fj.isbb).sum(), 'b': (~leading_fj.isbb & leading_fj.isb).sum(), 'cc': (~leading_fj.isbb & ~leading_fj.isb & leading_fj.iscc).sum(), 'c': (~leading_fj.isbb & ~leading_fj.isb & ~leading_fj.iscc & leading_fj.isc).sum(), 'other': (~leading_fj.isbb & ~leading_fj.isb & ~leading_fj.iscc & ~leading_fj.isc).sum(), } vgentype = np.zeros(events.size, dtype=np.int) for gentype in self._gentype_map.keys(): vgentype += self._gentype_map[gentype] * wgentype[gentype] if not isFilled: hout['sumw'].fill(dataset=dataset, sumw=1, weight=events.genWeight.sum()) isFilled = True cut = selection.all(*selection.names) if 'QCD' in dataset: vcut = np.zeros(events.size, dtype=np.int) hout['cutflow'].fill(dataset=dataset, cutname='nocut', cut=vcut, weight=weights.weight()) allcuts = set() ### cutflow fill for i, icut in enumerate(selection.names): allcuts.add(icut) jcut = selection.all(*allcuts) vcut = (i + 1) * jcut hout['cutflow'].fill(dataset=dataset, cutname=str(icut), cut=vcut, weight=weights.weight() * jcut) ### other variables fill(dataset, vgentype, weights.weight(), cut) ##### template for bb SF ##### ##### btagjp template ##### hout['jptemplate'].fill(dataset=dataset, gentype=vgentype, btagJP=leading_fj.btagJP.sum(), ZHbbvsQCD=leading_fj.ZHbbvsQCD.sum(), weight=weights.weight() * cut) ##### sv mass template ##### hout['svtemplate'].fill( dataset=dataset, gentype=vgentype, #svmass=leading_SV.mass.sum(), svmass=np.log(leading_SV.mass.sum()), ZHbbvsQCD=leading_fj.ZHbbvsQCD.sum(), weight=np.ones(events.size) * cut) else: fill(dataset, vgentype, weights.weight(), np.ones(events.size, dtype=np.int)) ##### template for bb SF ##### ##### btagjp template ##### hout['jptemplate'].fill(dataset=dataset, gentype=vgentype, btagJP=leading_fj.btagJP.sum(), ZHbbvsQCD=leading_fj.ZHbbvsQCD.sum(), weight=weights.weight()) ##### sv mass template ##### hout['svtemplate'].fill( dataset=dataset, gentype=vgentype, #svmass=leading_SV.mass.sum(), svmass=np.log(leading_SV.mass.sum()), ZHbbvsQCD=leading_fj.ZHbbvsQCD.sum(), weight=np.ones(events.size) * cut) return hout
def process(self, df): output = self.accumulator.identity() dataset = df['dataset'] ## construct weights ## wgts = processor.Weights(df.size) if self.data_type != 'data': wgts.add('genw', df['weight']) npv = df['trueInteractionNum'] wgts.add('pileup', *(f(npv) for f in self.pucorrs)) triggermask = np.logical_or.reduce([df[t] for t in Triggers]) wgts.add('trigger', triggermask) cosmicpairmask = df['cosmicveto_result'] wgts.add('cosmicveto', cosmicpairmask) pvmask = df['metfilters_PrimaryVertexFilter'] wgts.add('primaryvtx', pvmask) weight = wgts.weight() ######################## ak4jets = JaggedCandidateArray.candidatesfromcounts( df['akjet_ak4PFJetsCHS_p4'], px=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fX'].content, py=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fY'].content, pz=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fZ'].content, energy=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fT'].content, jetid=df['akjet_ak4PFJetsCHS_jetid'].content, ) ak4jets = ak4jets[ak4jets.jetid & (ak4jets.pt > 30) & (np.abs(ak4jets.eta) < 2.4)] leptonjets = JaggedCandidateArray.candidatesfromcounts( df['pfjet_p4'], px=df['pfjet_p4.fCoordinates.fX'].content, py=df['pfjet_p4.fCoordinates.fY'].content, pz=df['pfjet_p4.fCoordinates.fZ'].content, energy=df['pfjet_p4.fCoordinates.fT'].content, sumtkpt=df['pfjet_tkPtSum05'].content, pfiso=df['pfjet_pfIsolationNoPU05'].content, mintkdist=df['pfjet_pfcands_minTwoTkDist'].content, ) ljdautype = awkward.fromiter(df['pfjet_pfcand_type']) npfmu = (ljdautype == 3).sum() ndsa = (ljdautype == 8).sum() isegammajet = (npfmu == 0) & (ndsa == 0) ispfmujet = (npfmu >= 2) & (ndsa == 0) isdsajet = ndsa > 0 label = isegammajet.astype(int) * 1 + ispfmujet.astype( int) * 2 + isdsajet.astype(int) * 3 leptonjets.add_attributes(label=label, ndsa=ndsa) nmu = ((ljdautype == 3) | (ljdautype == 8)).sum() leptonjets.add_attributes(ismutype=(nmu >= 2), iseltype=(nmu == 0)) ljdaucharge = awkward.fromiter(df['pfjet_pfcand_charge']).sum() leptonjets.add_attributes(qsum=ljdaucharge) leptonjets.add_attributes( isneutral=(leptonjets.iseltype | (leptonjets.ismutype & (leptonjets.qsum == 0)))) leptonjets.add_attributes( mucharged=(leptonjets.iseltype | (leptonjets.ismutype & (leptonjets.qsum != 0)))) ljdsamuSubset = fromNestNestIndexArray( df['dsamuon_isSubsetFilteredCosmic1Leg'], awkward.fromiter(df['pfjet_pfcand_dsamuonIdx'])) leptonjets.add_attributes(nocosmic=(ljdsamuSubset.sum() == 0)) leptonjets = leptonjets[(leptonjets.nocosmic) & (leptonjets.pt > 30) & (leptonjets.mintkdist < 50)] ## __ twoleptonjets__ twoleptonjets = leptonjets.counts >= 2 dileptonjets = leptonjets[twoleptonjets] ak4jets = ak4jets[twoleptonjets] wgt = weight[twoleptonjets] if dileptonjets.size == 0: return output lj0 = dileptonjets[dileptonjets.pt.argmax()] lj1 = dileptonjets[dileptonjets.pt.argsort()[:, 1:2]] ak4jets = ak4jets[ak4jets.pt > (lj0.pt.flatten())] ak4jetCounts = (ak4jets.counts > 0).astype(int) minpfiso = ((lj0.pfiso > lj1.pfiso).astype(int) * lj1.pfiso + (lj0.pfiso < lj1.pfiso).astype(int) * lj0.pfiso).flatten() ljneutrality = ( (lj0.isneutral & lj1.isneutral).astype(int) * 1 + (lj0.mucharged & lj1.mucharged).astype(int) * 2).flatten() ## channel def ## #### 2mu2e singleMuljEvents = dileptonjets.ismutype.sum() == 1 muljInLeading2Events = (lj0.ismutype | lj1.ismutype).flatten() channel_2mu2e = singleMuljEvents & muljInLeading2Events output['chan-2mu2e'].fill(dataset=dataset, iso=minpfiso[channel_2mu2e], val=ak4jetCounts[channel_2mu2e], weight=wgt[channel_2mu2e]) #### 4mu doubleMuljEvents = dileptonjets.ismutype.sum() == 2 muljIsLeading2Events = (lj0.ismutype & lj1.ismutype).flatten() channel_4mu = doubleMuljEvents & muljIsLeading2Events output['chan-4mu'].fill(dataset=dataset, iso=minpfiso[channel_4mu], val=ljneutrality[channel_4mu], weight=wgt[channel_4mu]) ########### return output
def process(self, df): output = self.accumulator.identity() if df.size==0: return output dataset = df['dataset'] ## construct weights ## wgts = processor.Weights(df.size) if self.data_type!='data': wgts.add('genw', df['weight']) npv = df['trueInteractionNum'] wgts.add('pileup', *(f(npv) for f in self.pucorrs)) triggermask = np.logical_or.reduce([df[t] for t in Triggers]) wgts.add('trigger', triggermask) cosmicpairmask = df['cosmicveto_result'] wgts.add('cosmicveto', cosmicpairmask) pvmask = df['metfilters_PrimaryVertexFilter'] wgts.add('primaryvtx', pvmask) # ...bla bla, other weights goes here weight = wgts.weight() ######################## ak4jets = JaggedCandidateArray.candidatesfromcounts( df['akjet_ak4PFJetsCHS_p4'], px=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fX'].content, py=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fY'].content, pz=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fZ'].content, energy=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fT'].content, hadfrac=df['akjet_ak4PFJetsCHS_hadronEnergyFraction'].content, jetid=df['akjet_ak4PFJetsCHS_jetid'].content, deepcsv=df['hftagscore_DeepCSV_b'].content, ) deepcsv_tight = np.bitwise_and(ak4jets.deepcsv, 1<<2)==(1<<2) ak4jets.add_attributes(deepcsvTight=deepcsv_tight) ak4jets=ak4jets[ak4jets.jetid&(ak4jets.pt>20)&(np.abs(ak4jets.eta)<2.5)] leptonjets = JaggedCandidateArray.candidatesfromcounts( df['pfjet_p4'], px=df['pfjet_p4.fCoordinates.fX'].content, py=df['pfjet_p4.fCoordinates.fY'].content, pz=df['pfjet_p4.fCoordinates.fZ'].content, energy=df['pfjet_p4.fCoordinates.fT'].content, ncands=df['pfjet_pfcands_n'].content, ) ljdautype = awkward.fromiter(df['pfjet_pfcand_type']) npfmu = (ljdautype==3).sum() ndsa = (ljdautype==8).sum() isegammajet = (npfmu==0)&(ndsa==0) ispfmujet = (npfmu>=2)&(ndsa==0) isdsajet = ndsa>0 label = isegammajet.astype(int)*1+ispfmujet.astype(int)*2+isdsajet.astype(int)*3 leptonjets.add_attributes(label=label, ndsa=ndsa) nmu = ((ljdautype==3)|(ljdautype==8)).sum() leptonjets.add_attributes(ismutype=(nmu>=2), iseltype=(nmu==0)) leptonjets.add_attributes(muontiming=awkward.fromiter(df['pfjet_pfcand_muonTime']).mean()) ## __ twoleptonjets__ twoleptonjets = leptonjets.counts>=2 dileptonjets = leptonjets[twoleptonjets] ak4jets = ak4jets[twoleptonjets] wgt = weight[twoleptonjets] if dileptonjets.size==0: return output lj0 = dileptonjets[dileptonjets.pt.argmax()] lj1 = dileptonjets[dileptonjets.pt.argsort()[:, 1:2]] ## channel def ## singleMuljEvents = dileptonjets.ismutype.sum()==1 muljInLeading2Events = (lj0.ismutype | lj1.ismutype).flatten() channel_2mu2e = (singleMuljEvents&muljInLeading2Events).astype(int)*1 doubleMuljEvents = dileptonjets.ismutype.sum()==2 muljIsLeading2Events = (lj0.ismutype & lj1.ismutype).flatten() channel_4mu = (doubleMuljEvents&muljIsLeading2Events).astype(int)*2 channel_ = channel_2mu2e + channel_4mu ########### cuts = [ np.ones_like(wgt).astype(bool), # all (np.abs(lj0.p4.delta_phi(lj1.p4))>np.pi/2).flatten(), # dphi > pi/2 ak4jets.counts<4, # N(jets) < 4 ak4jets[(ak4jets.pt>30)&(np.abs(ak4jets.eta)<2.4)&ak4jets.deepcsvTight].counts==0, # N(tightB)==0 (~channel_2mu2e.astype(bool)) | (channel_2mu2e.astype(bool)&(((lj0.iseltype)&(lj0.pt>40)) | ((lj1.iseltype)&(lj1.pt>40))).flatten() ), # EGMpt0>40 ( (lj0.ismutype&(lj0.pt>40)) | ((~lj0.ismutype)&(lj1.ismutype&(lj1.pt>40))) ).flatten(), # Mupt0>40 ( (~(channel_==2)) | (channel_==2)&((lj1.pt>30).flatten()) ), # Mupt1>30 ] if self.region == 'CR': cuts[1] = ~cuts[1] totcut = np.logical_and.reduce(cuts) dileptonjets = dileptonjets[totcut] wgt = wgt[totcut] channel_ = channel_[totcut] ljmu = dileptonjets[dileptonjets.ismutype] ljmuones = ljmu.pt.ones_like() output['ndsa'].fill(dataset=dataset, cnt=ljmu.ndsa.flatten(), weight=(wgt*ljmuones).flatten(), channel=(channel_*ljmuones).flatten()) output['mutiming'].fill(dataset=dataset, t=ljmu.muontiming.flatten(), weight=(wgt*ljmuones).flatten(), channel=(channel_*ljmuones).flatten()) return output
def process(self, df): # Dataset parameters dataset = df['dataset'] year = self._samples[dataset]['year'] xsec = self._samples[dataset]['xsec'] sow = self._samples[dataset]['nSumOfWeights'] isData = self._samples[dataset]['isData'] datasets = [ 'SingleMuon', 'SingleElectron', 'EGamma', 'MuonEG', 'DoubleMuon', 'DoubleElectron' ] for d in datasets: if d in dataset: dataset = dataset.split('_')[0] ### Recover objects, selection, functions and others... # Objects isTightMuon = self._objects['isTightMuonPOG'] isTightElectron = self._objects['isTightElectronPOG'] isGoodJet = self._objects['isGoodJet'] isMuonMVA = self._objects[ 'isMuonMVA'] #isMuonMVA(pt, eta, dxy, dz, miniIso, sip3D, mvaTTH, mediumPrompt, tightCharge, jetDeepB=0, minpt=15) isElecMVA = self._objects[ 'isElecMVA'] #isElecMVA(pt, eta, dxy, dz, miniIso, sip3D, mvaTTH, elecMVA, lostHits, convVeto, tightCharge, jetDeepB=0, minpt=15) # Corrections GetMuonIsoSF = self._corrections['getMuonIso'] GetMuonIDSF = self._corrections['getMuonID'] # Selection passNJets = self._selection['passNJets'] passMETcut = self._selection['passMETcut'] passTrigger = self._selection['passTrigger'] # Functions pow2 = self._functions['pow2'] IsClosestToZ = self._functions['IsClosestToZ'] GetGoodTriplets = self._functions['GetGoodTriplets'] # Initialize objects met = Initialize({ 'pt': df['MET_pt'], 'eta': 0, 'phi': df['MET_phi'], 'mass': 0 }) e = Initialize({ 'pt': df['Electron_pt'], 'eta': df['Electron_eta'], 'phi': df['Electron_phi'], 'mass': df['Electron_mass'] }) mu = Initialize({ 'pt': df['Muon_pt'], 'eta': df['Muon_eta'], 'phi': df['Muon_phi'], 'mass': df['Muon_mass'] }) j = Initialize({ 'pt': df['Jet_pt'], 'eta': df['Jet_eta'], 'phi': df['Jet_phi'], 'mass': df['Jet_mass'] }) # Electron selection for key in self._e: e[key] = e.pt.zeros_like() if self._e[key] in df: e[key] = df[self._e[key]] #e['isGood'] = isTightElectron(e.pt, e.eta, e.dxy, e.dz, e.id, e.tightChrage, year) e['isGood'] = isElecMVA(e.pt, e.eta, e.dxy, e.dz, e.miniIso, e.sip3d, e.mvaTTH, e.elecMVA, e.lostHits, e.convVeto, e.tightCharge, minpt=10) leading_e = e[e.pt.argmax()] leading_e = leading_e[leading_e.isGood.astype(np.bool)] # Muon selection for key in self._mu: mu[key] = mu.pt.zeros_like() if self._mu[key] in df: mu[key] = df[self._mu[key]] #mu['istight'] = isTightMuon(mu.pt, mu.eta, mu.dxy, mu.dz, mu.iso, mu.tight_id, mu.tightCharge, year) mu['isGood'] = isMuonMVA(mu.pt, mu.eta, mu.dxy, mu.dz, mu.miniIso, mu.sip3d, mu.mvaTTH, mu.mediumPrompt, mu.tightCharge, minpt=10) leading_mu = mu[mu.pt.argmax()] leading_mu = leading_mu[leading_mu.isGood.astype(np.bool)] e = e[e.isGood.astype(np.bool)] mu = mu[mu.isGood.astype(np.bool)] nElec = e.counts nMuon = mu.counts twoLeps = (nElec + nMuon) == 2 threeLeps = (nElec + nMuon) == 3 twoElec = (nElec == 2) twoMuon = (nMuon == 2) e0 = e[e.pt.argmax()] m0 = mu[mu.pt.argmax()] # Jet selection j['deepjet'] = df['Jet_btagDeepFlavB'] for key in self._jet: j[key] = j.pt.zeros_like() if self._jet[key] in df: j[key] = df[self._jet[key]] j['isgood'] = isGoodJet(j.pt, j.eta, j.id) j['isclean'] = ~j.match(e, 0.4) & ~j.match(mu, 0.4) & j.isgood.astype( np.bool) #goodJets = j[(j['isgood'])&(j['isclean'])] #j0 = goodJets[goodJets.pt.argmax()] #nJets = goodJets.counts ################################################################## ### 2 same-sign leptons ################################################################## # emu singe = e[(nElec == 1) & (nMuon == 1) & (e.pt > -1)] singm = mu[(nElec == 1) & (nMuon == 1) & (mu.pt > -1)] em = singe.cross(singm) emSSmask = (em.i0.charge * em.i1.charge > 0) emSS = em[emSSmask] nemSS = len(emSS.flatten()) # ee and mumu # pt>-1 to preserve jagged dimensions ee = e[(nElec == 2) & (nMuon == 0) & (e.pt > -1)] mm = mu[(nElec == 0) & (nMuon == 2) & (mu.pt > -1)] eepairs = ee.distincts() eeSSmask = (eepairs.i0.charge * eepairs.i1.charge > 0) eeonZmask = (np.abs((eepairs.i0 + eepairs.i1).mass - 91) < 15) eeoffZmask = (eeonZmask == 0) mmpairs = mm.distincts() mmSSmask = (mmpairs.i0.charge * mmpairs.i1.charge > 0) mmonZmask = (np.abs((mmpairs.i0 + mmpairs.i1).mass - 91) < 15) mmoffZmask = (mmonZmask == 0) eeSSonZ = eepairs[eeSSmask & eeonZmask] eeSSoffZ = eepairs[eeSSmask & eeoffZmask] mmSSonZ = mmpairs[mmSSmask & mmonZmask] mmSSoffZ = mmpairs[mmSSmask & mmoffZmask] neeSS = len(eeSSonZ.flatten()) + len(eeSSoffZ.flatten()) nmmSS = len(mmSSonZ.flatten()) + len(mmSSoffZ.flatten()) #print('Same-sign events [ee, emu, mumu] = [%i, %i, %i]'%(neeSS, nemSS, nmmSS)) # Cuts eeSSmask = (eeSSmask[eeSSmask].counts > 0) mmSSmask = (mmSSmask[mmSSmask].counts > 0) eeonZmask = (eeonZmask[eeonZmask].counts > 0) eeoffZmask = (eeoffZmask[eeoffZmask].counts > 0) mmonZmask = (mmonZmask[mmonZmask].counts > 0) mmoffZmask = (mmoffZmask[mmoffZmask].counts > 0) emSSmask = (emSSmask[emSSmask].counts > 0) # njets goodJets = j[(j.isclean) & (j.isgood)] njets = goodJets.counts ht = goodJets.pt.sum() j0 = goodJets[goodJets.pt.argmax()] # nbtags nbtags = goodJets[goodJets.deepjet > 0.2770].counts ################################################################## ### 3 leptons ################################################################## # eem muon_eem = mu[(nElec == 2) & (nMuon == 1) & (mu.pt > -1)] elec_eem = e[(nElec == 2) & (nMuon == 1) & (e.pt > -1)] ee_eem = elec_eem.distincts() ee_eemZmask = (ee_eem.i0.charge * ee_eem.i1.charge < 1) & (np.abs( (ee_eem.i0 + ee_eem.i1).mass - 91) < 15) ee_eemOffZmask = (ee_eem.i0.charge * ee_eem.i1.charge < 1) & (np.abs( (ee_eem.i0 + ee_eem.i1).mass - 91) > 15) ee_eemZmask = (ee_eemZmask[ee_eemZmask].counts > 0) ee_eemOffZmask = (ee_eemOffZmask[ee_eemOffZmask].counts > 0) eepair_eem = (ee_eem.i0 + ee_eem.i1) trilep_eem = eepair_eem.cross(muon_eem) trilep_eem = (trilep_eem.i0 + trilep_eem.i1) # mme muon_mme = mu[(nElec == 1) & (nMuon == 2) & (mu.pt > -1)] elec_mme = e[(nElec == 1) & (nMuon == 2) & (e.pt > -1)] mm_mme = muon_mme.distincts() mm_mmeZmask = (mm_mme.i0.charge * mm_mme.i1.charge < 1) & (np.abs( (mm_mme.i0 + mm_mme.i1).mass - 91) < 15) mm_mmeOffZmask = (mm_mme.i0.charge * mm_mme.i1.charge < 1) & (np.abs( (mm_mme.i0 + mm_mme.i1).mass - 91) > 15) mm_mmeZmask = (mm_mmeZmask[mm_mmeZmask].counts > 0) mm_mmeOffZmask = (mm_mmeOffZmask[mm_mmeOffZmask].counts > 0) mmpair_mme = (mm_mme.i0 + mm_mme.i1) trilep_mme = mmpair_mme.cross(elec_mme) trilep_mme = (trilep_mme.i0 + trilep_mme.i1) mZ_mme = mmpair_mme.mass mZ_eem = eepair_eem.mass m3l_eem = trilep_eem.mass m3l_mme = trilep_mme.mass ### eee and mmm eee = e[(nElec == 3) & (nMuon == 0) & (e.pt > -1)] mmm = mu[(nElec == 0) & (nMuon == 3) & (mu.pt > -1)] # Create pairs eee_groups = eee.distincts() mmm_groups = mmm.distincts() # Calculate the invariant mass of the pairs invMass_eee = ((eee_groups.i0 + eee_groups.i1).mass) invMass_mmm = ((mmm_groups.i0 + mmm_groups.i1).mass) # OS pairs isOSeee = ((eee_groups.i0.charge != eee_groups.i1.charge)) isOSmmm = ((mmm_groups.i0.charge != mmm_groups.i1.charge)) # Get the ones with a mass closest to the Z mass (and in a range of thr) clos_eee = IsClosestToZ(invMass_eee, thr=15) clos_mmm = IsClosestToZ(invMass_mmm, thr=15) # Finally, the mask for eee/mmm with/without OS onZ pair eeeOnZmask = (clos_eee) & (isOSeee) eeeOffZmask = (eeeOnZmask == 0) mmmOnZmask = (clos_mmm) & (isOSmmm) mmmOffZmask = (mmmOnZmask == 0) eeeOnZmask = (eeeOnZmask[eeeOnZmask].counts > 0) eeeOffZmask = (eeeOffZmask[eeeOffZmask].counts > 0) mmmOnZmask = (mmmOnZmask[mmmOnZmask].counts > 0) mmmOffZmask = (mmmOffZmask[mmmOffZmask].counts > 0) # Get Z and W invariant masses goodPairs_eee = eee_groups[(clos_eee) & (isOSeee)] eZ0 = goodPairs_eee.i0[goodPairs_eee.counts > 0].regular( ) #[(goodPairs_eee.counts>0)].regular() eZ1 = goodPairs_eee.i1[goodPairs_eee.counts > 0].regular( ) #[(goodPairs_eee.counts>0)].regular() goodPairs_mmm = mmm_groups[(clos_mmm) & (isOSmmm)] mZ0 = goodPairs_mmm.i0[goodPairs_mmm.counts > 0].regular( ) #[(goodPairs_eee.counts>0)].regular() mZ1 = goodPairs_mmm.i1[goodPairs_mmm.counts > 0].regular( ) #[(goodPairs_eee.counts>0)].regular() eee_reg = eee[(eeeOnZmask)].regular() eW = np.append(eee_reg, eZ0, axis=1) eW = np.append(eW, eZ1, axis=1) eWmask = np.apply_along_axis( lambda a: [list(a).count(x) == 1 for x in a], 1, eW) eW = eW[eWmask] mmm_reg = mmm[(mmmOnZmask)].regular() mW = np.append(mmm_reg, mZ0, axis=1) mW = np.append(mW, mZ1, axis=1) mWmask = np.apply_along_axis( lambda a: [list(a).count(x) == 1 for x in a], 1, mW) mW = mW[mWmask] eZ = [x + y for x, y in zip(eZ0, eZ1)] triElec = [x + y for x, y in zip(eZ, eW)] mZ_eee = [t[0].mass for t in eZ] m3l_eee = [t[0].mass for t in triElec] mZ = [x + y for x, y in zip(mZ0, mZ1)] triMuon = [x + y for x, y in zip(mZ, mW)] mZ_mmm = [t[0].mass for t in mZ] m3l_mmm = [t[0].mass for t in triMuon] # Triggers #passTrigger = lambda df, n, m, o : np.ones_like(df['MET_pt'], dtype=np.bool) # XXX trig_eeSS = passTrigger(df, 'ee', isData, dataset) trig_mmSS = passTrigger(df, 'mm', isData, dataset) trig_emSS = passTrigger(df, 'em', isData, dataset) trig_eee = passTrigger(df, 'eee', isData, dataset) trig_mmm = passTrigger(df, 'mmm', isData, dataset) trig_eem = passTrigger(df, 'eem', isData, dataset) trig_mme = passTrigger(df, 'mme', isData, dataset) # MET filters # Weights genw = np.ones_like(df['MET_pt']) if isData else df['genWeight'] weights = processor.Weights(df.size) weights.add('norm', genw if isData else (xsec / sow) * genw) # Selections and cuts selections = processor.PackedSelection() channels2LSS = ['eeSSonZ', 'eeSSoffZ', 'mmSSonZ', 'mmSSoffZ', 'emSS'] selections.add('eeSSonZ', (eeonZmask) & (eeSSmask) & (trig_eeSS)) selections.add('eeSSoffZ', (eeoffZmask) & (eeSSmask) & (trig_eeSS)) selections.add('mmSSonZ', (mmonZmask) & (mmSSmask) & (trig_mmSS)) selections.add('mmSSoffZ', (mmoffZmask) & (mmSSmask) & (trig_mmSS)) selections.add('emSS', (emSSmask) & (trig_emSS)) channels3L = ['eemSSonZ', 'eemSSoffZ', 'mmeSSonZ', 'mmeSSoffZ'] selections.add('eemSSonZ', (ee_eemZmask) & (trig_eem)) selections.add('eemSSoffZ', (ee_eemOffZmask) & (trig_eem)) selections.add('mmeSSonZ', (mm_mmeZmask) & (trig_mme)) selections.add('mmeSSoffZ', (mm_mmeOffZmask) & (trig_mme)) channels3L += ['eeeSSonZ', 'eeeSSoffZ', 'mmmSSonZ', 'mmmSSoffZ'] selections.add('eeeSSonZ', (eeeOnZmask) & (trig_eee)) selections.add('eeeSSoffZ', (eeeOffZmask) & (trig_eee)) selections.add('mmmSSonZ', (mmmOnZmask) & (trig_mmm)) selections.add('mmmSSoffZ', (mmmOffZmask) & (trig_mmm)) levels = ['base', '2jets', '4jets', '4j1b', '4j2b'] selections.add('base', (nElec + nMuon >= 2)) selections.add('2jets', (njets >= 2)) selections.add('4jets', (njets >= 4)) selections.add('4j1b', (njets >= 4) & (nbtags >= 1)) selections.add('4j2b', (njets >= 4) & (nbtags >= 2)) # Variables invMass_eeSSonZ = (eeSSonZ.i0 + eeSSonZ.i1).mass invMass_eeSSoffZ = (eeSSoffZ.i0 + eeSSoffZ.i1).mass invMass_mmSSonZ = (mmSSonZ.i0 + mmSSonZ.i1).mass invMass_mmSSoffZ = (mmSSoffZ.i0 + mmSSoffZ.i1).mass invMass_emSS = (emSS.i0 + emSS.i1).mass varnames = {} varnames['met'] = met.pt varnames['ht'] = ht varnames['njets'] = njets varnames['nbtags'] = nbtags varnames['invmass'] = { 'eeSSonZ': invMass_eeSSonZ, 'eeSSoffZ': invMass_eeSSoffZ, 'mmSSonZ': invMass_mmSSonZ, 'mmSSoffZ': invMass_mmSSoffZ, 'emSS': invMass_emSS, 'eemSSonZ': mZ_eem, 'eemSSoffZ': mZ_eem, 'mmeSSonZ': mZ_mme, 'mmeSSoffZ': mZ_mme, 'eeeSSonZ': mZ_eee, 'eeeSSoffZ': mZ_eee, 'mmmSSonZ': mZ_mmm, 'mmmSSoffZ': mZ_mmm, } varnames['m3l'] = { 'eemSSonZ': m3l_eem, 'eemSSoffZ': m3l_eem, 'mmeSSonZ': m3l_mme, 'mmeSSoffZ': m3l_mme, 'eeeSSonZ': m3l_eee, 'eeeSSoffZ': m3l_eee, 'mmmSSonZ': m3l_mmm, 'mmmSSoffZ': m3l_mmm, } varnames['e0pt'] = e0.pt varnames['e0eta'] = e0.eta varnames['m0pt'] = m0.pt varnames['m0eta'] = m0.eta varnames['j0pt'] = j0.pt varnames['j0eta'] = j0.eta varnames['counts'] = np.ones_like(df['MET_pt'], dtype=np.int) # Fill Histos hout = self.accumulator.identity() hout['dummy'].fill(sample=dataset, dummy=1, weight=df.size) for var, v in varnames.items(): for ch in channels2LSS + channels3L: for lev in levels: weight = weights.weight() cuts = [ch] + [lev] cut = selections.all(*cuts) weights_flat = weight[cut].flatten() weights_ones = np.ones_like(weights_flat, dtype=np.int) if var == 'invmass': if ch in ['eeeSSoffZ', 'mmmSSoffZ']: continue elif ch in ['eeeSSonZ', 'mmmSSonZ']: continue #values = v[ch] else: values = v[ch][cut].flatten() hout['invmass'].fill(sample=dataset, channel=ch, cut=lev, invmass=values, weight=weights_flat) elif var == 'm3l': if ch in [ 'eeSSonZ', 'eeSSoffZ', 'mmSSonZ', 'mmSSoffZ', 'emSS', 'eeeSSoffZ', 'mmmSSoffZ', 'eeeSSonZ', 'mmmSSonZ' ]: continue values = v[ch][cut].flatten() hout['m3l'].fill(sample=dataset, channel=ch, cut=lev, m3l=values, weight=weights_flat) else: values = v[cut].flatten() if var == 'ht': hout[var].fill(ht=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'met': hout[var].fill(met=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'njets': hout[var].fill(njets=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'nbtags': hout[var].fill(nbtags=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'counts': hout[var].fill(counts=values, sample=dataset, channel=ch, cut=lev, weight=weights_ones) elif var == 'e0pt': if ch in [ 'mmSSonZ', 'mmSSoffZ', 'mmmSSoffZ', 'mmmSSonZ' ]: continue hout[var].fill(e0pt=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'm0pt': if ch in [ 'eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ', 'eeeSSonZ' ]: continue hout[var].fill(m0pt=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'e0eta': if ch in [ 'mmSSonZ', 'mmSSoffZ', 'mmmSSoffZ', 'mmmSSonZ' ]: continue hout[var].fill(e0eta=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'm0eta': if ch in [ 'eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ', 'eeeSSonZ' ]: continue hout[var].fill(m0eta=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'j0pt': if lev == 'base': continue hout[var].fill(j0pt=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'j0eta': if lev == 'base': continue hout[var].fill(j0eta=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) return hout
def process(self, df): output = self.accumulator.identity() datasetFull = df['dataset'] dataset = datasetFull.replace('_2016', '') isData = 'Data' in dataset ################################ # DEFINE JAGGED CANDIDATE ARRAYS ################################ #load muon objects muons = JaggedCandidateArray.candidatesfromcounts( df['nMuon'], pt=df['Muon_pt'], eta=df['Muon_eta'], phi=df['Muon_phi'], mass=df['Muon_mass'], charge=df['Muon_charge'], relIso=df['Muon_pfRelIso04_all'], tightId=df['Muon_tightId'], isPFcand=df['Muon_isPFcand'], isTracker=df['Muon_isTracker'], isGlobal=df['Muon_isGlobal'], ) #load electron objects electrons = JaggedCandidateArray.candidatesfromcounts( df['nElectron'], pt=df['Electron_pt'], eta=df['Electron_eta'], phi=df['Electron_phi'], mass=df['Electron_mass'], charge=df['Electron_charge'], cutBased=df['Electron_cutBased'], d0=df['Electron_dxy'], dz=df['Electron_dz'], ) #load jet object jets = JaggedCandidateArray.candidatesfromcounts( df['nJet'], pt=df['Jet_pt'], eta=df['Jet_eta'], phi=df['Jet_phi'], mass=df['Jet_mass'], jetId=df['Jet_jetId'], btag=df['Jet_btagDeepB'], area=df['Jet_area'], ptRaw=df['Jet_pt'] * (1 - df['Jet_rawFactor']), massRaw=df['Jet_mass'] * (1 - df['Jet_rawFactor']), hadFlav=df['Jet_hadronFlavour'] if not isData else np.ones_like(df['Jet_jetId']), genJetIdx=df['Jet_genJetIdx'] if not isData else np.ones_like(df['Jet_jetId']), ptGenJet=np.zeros_like(df['Jet_pt']), ) #load photon objects photons = JaggedCandidateArray.candidatesfromcounts( df['nPhoton'], pt=df['Photon_pt'], eta=df['Photon_eta'], phi=df['Photon_phi'], mass=np.zeros_like(df['Photon_pt']), isEE=df['Photon_isScEtaEE'], isEB=df['Photon_isScEtaEB'], photonId=df['Photon_cutBased'], passEleVeto=df['Photon_electronVeto'], pixelSeed=df['Photon_pixelSeed'], sieie=df['Photon_sieie'], chIso=df['Photon_pfRelIso03_chg'] * df['Photon_pt'], vidCuts=df['Photon_vidNestedWPBitmap'], genFlav=df['Photon_genPartFlav'] if not isData else np.ones_like(df['Photon_electronVeto']), genIdx=df['Photon_genPartIdx'] if not isData else np.ones_like(df['Photon_electronVeto']), ) rho = df['fixedGridRhoFastjetAll'] if not isData: #load gen parton objects genPart = JaggedCandidateArray.candidatesfromcounts( df['nGenPart'], pt=df['GenPart_pt'], eta=df['GenPart_eta'], phi=df['GenPart_phi'], mass=df['GenPart_mass'], pdgid=df['GenPart_pdgId'], motherIdx=df['GenPart_genPartIdxMother'], status=df['GenPart_status'], statusFlags=df['GenPart_statusFlags'], ) genmotherIdx = genPart.motherIdx genpdgid = genPart.pdgid ################# # OVERLAP REMOVAL ################# # Overlap removal between related samples # TTGamma and TTbar # WGamma and WJets # ZGamma and ZJets # We need to remove events from TTbar which are already counted in the phase space in which the TTGamma sample is produced # photon with pT> 10 GeV, eta<5, and at least dR>0.1 from other gen objects doOverlapRemoval = False if 'TTbar' in dataset: doOverlapRemoval = True overlapPt = 10. overlapEta = 5. overlapDR = 0.1 if re.search("^W[1234]jets$", dataset): doOverlapRemoval = True overlapPt = 10. overlapEta = 2.5 overlapDR = 0.05 if 'DYjetsM' in dataset: doOverlapRemoval = True overlapPt = 15. overlapEta = 2.6 overlapDR = 0.05 if doOverlapRemoval: overlapPhoSelect = ((genPart.pt >= overlapPt) & (abs(genPart.eta) < overlapEta) & (genPart.pdgid == 22) & (genPart.status == 1)) #potential overlap photons are only those passing the kinematic cuts OverlapPhotons = genPart[overlapPhoSelect] #if the overlap photon is actually from a non prompt decay, it's not part of the phase space of the separate sample idx = OverlapPhotons.motherIdx maxParent = maxHistoryPDGID(idx.content, idx.starts, idx.stops, genpdgid.content, genpdgid.starts, genpdgid.stops, genmotherIdx.content, genmotherIdx.starts, genmotherIdx.stops) finalGen = genPart[( (genPart.status == 1) | (genPart.status == 71)) & ~( (abs(genPart.pdgid) == 12) | (abs(genPart.pdgid) == 14) | (abs(genPart.pdgid) == 16))] genPairs = OverlapPhotons['p4'].cross(finalGen['p4'], nested=True) ##remove the case where the cross produce is the gen photon with itself genPairs = genPairs[~(genPairs.i0 == genPairs.i1)] #find closest gen particle to overlap photons dRPairs = genPairs.i0.delta_r(genPairs.i1) #the event is overlapping with the separate sample if there is an overlap photon passing the dR cut and not coming from hadronic activity isOverlap = ((dRPairs.min() > overlapDR) & (maxParent < 37)).any() passOverlapRemoval = ~isOverlap else: passOverlapRemoval = np.ones_like(df['event']) == 1 ################## # OBJECT SELECTION ################## # PART 1A Uncomment to add in object selection # 1. ADD SELECTION #select tight muons # tight muons should have a pt of at least 30 GeV, |eta| < 2.4, pass the tight muon ID cut (tightID variable), and have a relative isolation of less than 0.15 muonSelectTight = ((muons.pt > 30) & (abs(muons.eta) < 2.4) & (muons.tightID) & (muons.relIso < 0.15)) #select loose muons muonSelectLoose = ((muons.pt > 15) & (abs(muons.eta) < 2.4) & ((muons.isPFcand) & (muons.isTracker | muons.isGlobal)) & (muons.relIso < 0.25) & np.invert(muonSelectTight)) eleEtaGap = (abs(electrons.eta) < 1.4442) | (abs(electrons.eta) > 1.566) elePassD0 = ((abs(electrons.eta) < 1.479) & (abs(electrons.d0) < 0.05) | (abs(electrons.eta) > 1.479) & (abs(electrons.d0) < 0.1)) elePassDZ = ((abs(electrons.eta) < 1.479) & (abs(electrons.dz) < 0.1) | (abs(electrons.eta) > 1.479) & (abs(electrons.dz) < 0.2)) #select tight electrons # 1. ADD SELECTION #select tight electrons # tight electrons should have a pt of at least 35 GeV, |eta| < 2.1, pass the cut based electron id (cutBased variable in NanoAOD>=4), and pass the etaGap, D0, and DZ cuts defined above electronSelectTight = ((electrons.pt > 35) & (abs(electrons.eta) < 2.1) & (electrons.cutBased >= 4) & eleEtaGap & elePassD0 & elePassDZeleEtaGap) #select loose electrons electronSelectLoose = ((electrons.pt > 15) & (abs(electrons.eta) < 2.4) & (electrons.cutBased >= 1) & eleEtaGap & elePassD0 & elePassDZ & np.invert(electronSelectTight)) # 1. ADD SELECTION # Object selection #select the subset of muons passing the muonSelectTight and muonSelectLoose cuts tightMuon = muons[muonSelectTight] looseMuon = muons[muonSelectLoose] # 1. ADD SELECTION # Object selection #select the subset of electrons passing the electronSelectTight and electronSelectLoose cuts tightElectron = electros[electronSelectTight] looseElectron = electros[electronSelectLoose] #### Calculate deltaR between photon and nearest muon ####### make combination pairs phoMu = photons['p4'].cross(tightMuon['p4'], nested=True) ####### check delta R of each combination, if min is >0.1 it is okay, or if there are no tight muons it passes dRphomu = (phoMu.i0.delta_r(phoMu.i1) > 0.4).all() | (tightMuon.counts == 0) phoEle = photons['p4'].cross(tightElectron['p4'], nested=True) dRphoele = ((phoEle.i0.delta_r(phoEle.i1)).min() > 0.4) | (tightElectron.counts == 0) #photon selection (no ID requirement used here) photonSelect = ((photons.pt > 20) & (abs(photons.eta) < 1.4442) & (photons.isEE | photons.isEB) & (photons.passEleVeto) & np.invert(photons.pixelSeed) & dRphomu & dRphoele) #split out the ID requirement, enabling Iso and SIEIE to be inverted for control regions photonID = photons.photonId >= 2 #parse VID cuts, define loose photons (photons without chIso cut) photon_MinPtCut = (photons.vidCuts >> 0 & 3) >= 2 photon_PhoSCEtaMultiRangeCut = (photons.vidCuts >> 2 & 3) >= 2 photon_PhoSingleTowerHadOverEmCut = (photons.vidCuts >> 4 & 3) >= 2 photon_PhoFull5x5SigmaIEtaIEtaCut = (photons.vidCuts >> 6 & 3) >= 2 photon_ChIsoCut = (photons.vidCuts >> 8 & 3) >= 2 photon_NeuIsoCut = (photons.vidCuts >> 10 & 3) >= 2 photon_PhoIsoCut = (photons.vidCuts >> 12 & 3) >= 2 #photons passing all ID requirements, without the charged hadron isolation cut applied photonID_NoChIso = (photon_MinPtCut & photon_PhoSCEtaMultiRangeCut & photon_PhoSingleTowerHadOverEmCut & photon_PhoFull5x5SigmaIEtaIEtaCut & photon_NeuIsoCut & photon_PhoIsoCut) # 1. ADD SELECTION # Object selection #select tightPhotons, the subset of photons passing the photonSelect cut and the photonID cut tightPhotons = photons[photonSelect & photonID] #select loosePhotons, the subset of photons passing the photonSelect cut and all photonID cuts without the charged hadron isolation cut applied loosePhotons = photons[photonSelect & photonID_NoChIso] #update jet kinematics based on jete energy systematic uncertainties if not isData: genJet = JaggedCandidateArray.candidatesfromcounts( df['nGenJet'], pt=df['GenJet_pt'], eta=df['GenJet_eta'], phi=df['GenJet_phi'], mass=df['GenJet_mass'], ) jets.genJetIdx[ jets.genJetIdx >= genJet. counts] = -1 #fixes a but in genJet indices, skimmed after genJet matching jets['ptGenJet'][jets.genJetIdx > -1] = genJet[jets.genJetIdx[ jets.genJetIdx > -1]].pt jets['rho'] = jets.pt.ones_like() * rho #adds additional columns to the jets array, containing the jet pt with JEC and JER variations # additional columns added to jets: pt_jer_up, mass_jer_up # pt_jer_down, mass_jer_down # pt_jes_up, mass_jes_up # pt_jes_down, mass_jes_down Jet_transformer.transform(jets) # 4. ADD SYSTEMATICS # If processing a jet systematic (based on value of self.jetSyst variable) update the jet pt and mass to reflect the jet systematic uncertainty variations # Use the function updateJetP4(jets, pt=NEWPT, mass=NEWMASS) to update the pt and mass ##check dR jet,lepton & jet,photon jetMu = jets['p4'].cross(tightMuon['p4'], nested=True) dRjetmu = ( (jetMu.i0.delta_r(jetMu.i1)).min() > 0.4) | (tightMuon.counts == 0) jetEle = jets['p4'].cross(tightElectron['p4'], nested=True) dRjetele = ((jetEle.i0.delta_r(jetEle.i1)).min() > 0.4) | (tightElectron.counts == 0) jetPho = jets['p4'].cross(tightPhotons['p4'], nested=True) dRjetpho = ((jetPho.i0.delta_r(jetPho.i1)).min() > 0.1) | (tightPhotons.counts == 0) # 1. ADD SELECTION #select good jets # jetsshould have a pt of at least 30 GeV, |eta| < 2.4, pass the medium jet id (bit-wise selected from the jetID variable), and pass the delta R cuts defined above (dRjetmu, dRjetele, dRjetpho) jetSelect = ((jets.pt > 30) & (abs(jets.eta) < 2.4) & ((jets.jetId >> 1 & 1) == 1) & dRjetmu & dRjetele & dRjetpho) # 1. ADD SELECTION #select the subset of jets passing the jetSelect cuts tightJets = jets[jetSelect] #find jets passing DeepCSV medium working point bTagWP = 0.6321 #2016 DeepCSV working point # 1. ADD SELECTION # select the subset of tightJets which pass the Deep CSV tagger bTaggedJets = tightJets[jets.btag > bTagWP] ##################### # EVENT SELECTION ##################### ### PART 1B: Uncomment to add event selection """ # 1. ADD SELECTION ## apply triggers # muon events should be triggered by either the HLT_IsoMu24 or HLT_IsoTkMu24 triggers # electron events should be triggered by HLT_Ele27_WPTight_Gsf trigger # HINT: trigger values can be accessed with the variable df['TRIGGERNAME'], # the bitwise or operator can be used to select multiple triggers df['TRIGGER1'] | df['TRIGGER2'] muTrigger = df['HLT_IsoMu24'] & df['HLT_IsoTkMu24'] eleTrigger = df['HLT_Ele27_WPTight_Gsf'] # 1. ADD SELECTION # Event selection #oneMuon, should be true if there is exactly one tight muon in the event (hint, the .counts method returns the number of objects in each row of a jagged array) oneMuon = ? #muVeto, should be true if there are no tight muons in the event muVeto = ? # 1. ADD SELECTION # Event selection #oneEle should be true if there is exactly one tight electron in the event oneEle = ? #eleVeto should be true if there are no tight electrons in the event eleVeto = ? # 1. ADD SELECTION # Event selection #looseMuonSel and looseElectronSel should be tru if there are 0 loose muons or electrons in the event looseMuonSel = ? looseElectronSel = ? # 1. ADD SELECTION # muon selection, requires events to pass: muon trigger # overlap removal # have exactly one muon # have no electrons # have no loose muons # have no loose electrons muon_eventSelection = ? # electron selection, requires events to pass: electron trigger # overlap removal # have exactly one electron # have no muons # have no loose muons # have no loose electrons electron_eventSelection = ? #create a selection object selection = processor.PackedSelection() # 1. ADD SELECTION #add selection 'eleSel', for events passing the electron event selection, and muSel for those passing the muon event selection # ex: selection.add('testSelection', array_of_booleans) selection.add('eleSel', ???) selection.add('muSel', ???) #add two jet selection criteria # First, 'jetSel' which selects events with at least 4 tightJets and at least one bTaggedJets selection.add('jetSel', ???) # Second, 'jetSel_3j0t' which selects events with at least 3 tightJets and exactly zero bTaggedJets selection.add('jetSel_3j0t', ???) # add selection for events with exactly 0 tight photons selection.add('zeroPho', ?) # add selection for events with exactly 1 tight photon selection.add('onePho', ?) # add selection for events with exactly 1 loose photon selection.add('loosePho', ?) """ ################## # EVENT VARIABLES ################## # PART 2A: Uncomment to begin implementing event variables # 2. DEFINE VARIABLES ## Define M3, mass of 3-jet pair with highest pT # find all possible combinations of 3 tight jets in the events (hint: using the .p4.choose() method of jagged arrays to do combinations of the TLorentzVectors) triJet = tightjets.p4.choose(3) # need to update # calculate triJetPt = (triJet.i0 + triJet.i1 + triJet.i2).pt triJetMass = (triJet.i0 + triJet.i1 + triJet.i2).mass # define the M3 variable, the triJetMass of the combination with the highest triJetPt value (hint: using the .argmax() method) M3 = triJetMass[triJetPt.argmax()] leadingPhoton = tightPhotons[:, :1] leadingPhotonLoose = loosePhotons[:, :1] # 2. DEFINE VARIABLES # define egammaMass, mass of combinations of tightElectron and leadingPhoton (hint: using the .cross() method) egammaPairs = tightElectron.p4.cross(LeadingPhoton.p4) egammaMass = tightElectron.p4.cross(LeadingPhoton.mass) # define egammaMass, mass of combinations of tightElectron and leadingPhoton (hint: using the .cross() method) mugammaPairs = tightMuon.p4.cross(LeadingPhoton.p4) mugammaMass = tightMuon.p4.cross(LeadingPhoton.mass) ################### # PHOTON CATEGORIES ################### # Define photon category for each event phoCategory = np.ones(df.size) phoCategoryLoose = np.ones(df.size) # PART 2B: Uncomment to begin implementing photon categorization """ if not isData: #### Photon categories, using genIdx branch of the leading photon in the event idx = leadingPhoton.genIdx # look through gen particle history, finding the highest PDG ID maxParent = maxHistoryPDGID(idx.content, idx.starts, idx.stops, genpdgid.content, genpdgid.starts, genpdgid.stops, genmotherIdx.content, genmotherIdx.starts, genmotherIdx.stops) # reco photons matched to a generated photon matchedPho = (genpdgid[idx]==22).any() # reco photons really generated as electrons matchedEle = (abs(genpdgid[idx])==11).any() # if the gen photon has a PDG ID > 25 in it's history, it has a hadronic parent hadronicParent = maxParent>25 ##### # 2. DEFINE VARIABLES # define the photon categories for tight photon events # a genuine photon is a reconstructed photon which is matched to a generator level photon, and does not have a hadronic parent isGenPho = ? # a hadronic photon is a reconstructed photon which is matched to a generator level photon, but has a hadronic parent isHadPho = ? # a misidentified electron is a reconstructed photon which is isMisIDele = ? # a hadronic/fake photon is a reconstructed photon that does not fall within any of the above categories isHadFake = ? #define integer definition for the photon category axis phoCategory = 1*isGenPho + 2*isMisIDele + 3*isHadPho + 4*isHadFake # do photon matching for loose photons as well # look through parentage to find if any hadrons in genPhoton parent history idx = leadingPhotonLoose.genIdx # reco photons matched to a generated photon matchedPhoLoose = (genpdgid[idx]==22).any() # reco photons really generated as electrons matchedEleLoose = (abs(genpdgid[idx])==11).any() maxParent = maxHistoryPDGID(idx.content, idx.starts, idx.stops, genpdgid.content, genpdgid.starts, genpdgid.stops, genmotherIdx.content, genmotherIdx.starts, genmotherIdx.stops) hadronicParent = maxParent>25 ##### # 2. DEFINE VARIABLES # a genuine photon is a reconstructed photon which is matched to a generator level photon, and does not have a hadronic parent isGenPhoLoose = ? # a hadronic photon is a reconstructed photon which is matched to a generator level photon, but has a hadronic parent isHadPhoLoose = ? # a misidentified electron is a reconstructed photon which is isMisIDeleLoose = ? # a hadronic/fake photon is a reconstructed photon that does not fall within any of the above categories isHadFakeLoose = ? #define integer definition for the photon category axis phoCategoryLoose = 1*isGenPhoLoose + 2*isMisIDeleLoose + 3*isHadPhoLoose + 4*isHadFakeLoose """ ################ # EVENT WEIGHTS ################ #create a processor Weights object, with the same length as the number of events in the chunk weights = processor.Weights(len(df['event'])) if not isData: lumiWeight = np.ones(df.size) nMCevents = self.mcEventYields[datasetFull] xsec = crossSections[dataset] luminosity = 35860.0 lumiWeight *= xsec * luminosity / nMCevents weights.add('lumiWeight', lumiWeight) # PART 4: Uncomment to add weights and systematics """ nPUTrue = df['Pileup_nTrueInt'] # 4. SYSTEMATICS # calculate pileup weights and variations # use the puLookup, puLookup_Up, and puLookup_Down lookup functions to find the nominal and up/down systematic weights # the puLookup function is called with the full dataset name (datasetFull) and the number of true interactions puWeight = ? puWeight_Up = ? puWeight_Down = ? # add the puWeight and it's uncertainties to the weights container weights.add('puWeight',weight=?, weightUp=?, weightDown=?) eleID = self.ele_id_sf(tightElectron.eta, tightElectron.pt) eleIDerr = self.ele_id_err(tightElectron.eta, tightElectron.pt) eleRECO = self.ele_reco_sf(tightElectron.eta, tightElectron.pt) eleRECOerr = self.ele_reco_err(tightElectron.eta, tightElectron.pt) eleSF = (eleID*eleRECO).prod() eleSF_up = ((eleID + eleIDerr) * (eleRECO + eleRECOerr)).prod() eleSF_down = ((eleID - eleIDerr) * (eleRECO - eleRECOerr)).prod() # 4. SYSTEMATICS # add electron efficiency weights to the weight container weights.add('eleEffWeight',weight=?, weightUp=?, weightDown=?) muID = self.mu_id_sf(tightMuon.eta, tightMuon.pt) muIDerr = self.mu_id_err(tightMuon.eta, tightMuon.pt) muIso = self.mu_iso_sf(tightMuon.eta, tightMuon.pt) muIsoerr = self.mu_iso_err(tightMuon.eta, tightMuon.pt) muTrig = self.mu_iso_sf(abs(tightMuon.eta), tightMuon.pt) muTrigerr = self.mu_iso_err(abs(tightMuon.eta), tightMuon.pt) muSF = (muID*muIso*muTrig).prod() muSF_up = ((muID + muIDerr) * (muIso + muIsoerr) * (muTrig + muTrigerr)).prod() muSF_down = ((muID - muIDerr) * (muIso - muIsoerr) * (muTrig - muTrigerr)).prod() # 4. SYSTEMATICS # add electron efficiency weights to the weight container weights.add('muEffWeight',weight=?, weightUp=?, weightDown=?) #btag key name #name / working Point / type / systematic / jetType # ... / 0-loose 1-medium 2-tight / comb,mujets,iterativefit / central,up,down / 0-b 1-c 2-udcsg bJetSF_b = self.evaluator['btag2016DeepCSV_1_comb_central_0'](tightJets[tightJets.hadFlav==5].eta, tightJets[tightJets.hadFlav==5].pt, tightJets[tightJets.hadFlav==5].btag) bJetSF_c = self.evaluator['btag2016DeepCSV_1_comb_central_1'](tightJets[tightJets.hadFlav==4].eta, tightJets[tightJets.hadFlav==4].pt, tightJets[tightJets.hadFlav==4].btag) bJetSF_udcsg = self.evaluator['btag2016DeepCSV_1_incl_central_2'](tightJets[tightJets.hadFlav==0].eta, tightJets[tightJets.hadFlav==0].pt, tightJets[tightJets.hadFlav==0].btag) bJetSF_b_up = self.evaluator['btag2016DeepCSV_1_comb_up_0'](tightJets[tightJets.hadFlav==5].eta, tightJets[tightJets.hadFlav==5].pt, tightJets[tightJets.hadFlav==5].btag) bJetSF_c_up = self.evaluator['btag2016DeepCSV_1_comb_up_1'](tightJets[tightJets.hadFlav==4].eta, tightJets[tightJets.hadFlav==4].pt, tightJets[tightJets.hadFlav==4].btag) bJetSF_udcsg_up = self.evaluator['btag2016DeepCSV_1_incl_up_2'](tightJets[tightJets.hadFlav==0].eta, tightJets[tightJets.hadFlav==0].pt, tightJets[tightJets.hadFlav==0].btag) bJetSF_b_down = self.evaluator['btag2016DeepCSV_1_comb_down_0'](tightJets[tightJets.hadFlav==5].eta, tightJets[tightJets.hadFlav==5].pt, tightJets[tightJets.hadFlav==5].btag) bJetSF_c_down = self.evaluator['btag2016DeepCSV_1_comb_down_1'](tightJets[tightJets.hadFlav==4].eta, tightJets[tightJets.hadFlav==4].pt, tightJets[tightJets.hadFlav==4].btag) bJetSF_udcsg_down = self.evaluator['btag2016DeepCSV_1_incl_down_2'](tightJets[tightJets.hadFlav==0].eta, tightJets[tightJets.hadFlav==0].pt, tightJets[tightJets.hadFlav==0].btag) bJetSF = JaggedArray(content = np.ones_like(tightJets.pt.content,dtype=np.float64), starts = tightJets.starts, stops = tightJets.stops) bJetSF.content[(tightJets.hadFlav==5).content] = bJetSF_b.content bJetSF.content[(tightJets.hadFlav==4).content] = bJetSF_c.content bJetSF.content[(tightJets.hadFlav==0).content] = bJetSF_udcsg.content bJetSF_heavy_up = JaggedArray(content = np.ones_like(tightJets.pt.content,dtype=np.float64), starts = tightJets.starts, stops = tightJets.stops) bJetSF_heavy_up.content[(tightJets.hadFlav==5).content] = bJetSF_b_up.content bJetSF_heavy_up.content[(tightJets.hadFlav==4).content] = bJetSF_c_up.content bJetSF_heavy_up.content[(tightJets.hadFlav==0).content] = bJetSF_udcsg.content bJetSF_heavy_down = JaggedArray(content = np.ones_like(tightJets.pt.content,dtype=np.float64), starts = tightJets.starts, stops = tightJets.stops) bJetSF_heavy_down.content[(tightJets.hadFlav==5).content] = bJetSF_b_down.content bJetSF_heavy_down.content[(tightJets.hadFlav==4).content] = bJetSF_c_down.content bJetSF_heavy_down.content[(tightJets.hadFlav==0).content] = bJetSF_udcsg.content bJetSF_light_up = JaggedArray(content = np.ones_like(tightJets.pt.content,dtype=np.float64), starts = tightJets.starts, stops = tightJets.stops) bJetSF_light_up.content[(tightJets.hadFlav==5).content] = bJetSF_b.content bJetSF_light_up.content[(tightJets.hadFlav==4).content] = bJetSF_c.content bJetSF_light_up.content[(tightJets.hadFlav==0).content] = bJetSF_udcsg_up.content bJetSF_light_down = JaggedArray(content = np.ones_like(tightJets.pt.content,dtype=np.float64), starts = tightJets.starts, stops = tightJets.stops) bJetSF_light_down.content[(tightJets.hadFlav==5).content] = bJetSF_b.content bJetSF_light_down.content[(tightJets.hadFlav==4).content] = bJetSF_c.content bJetSF_light_down.content[(tightJets.hadFlav==0).content] = bJetSF_udcsg_down.content ## mc efficiency lookup, data efficiency is eff* scale factor btagEfficiencies = taggingEffLookup(datasetFull,tightJets.hadFlav,tightJets.pt,tightJets.eta) btagEfficienciesData = btagEfficiencies*bJetSF btagEfficienciesData_b_up = btagEfficiencies*bJetSF_heavy_up btagEfficienciesData_b_down = btagEfficiencies*bJetSF_heavy_down btagEfficienciesData_l_up = btagEfficiencies*bJetSF_light_up btagEfficienciesData_l_down = btagEfficiencies*bJetSF_light_down ##probability is the product of all efficiencies of tagged jets, times product of 1-eff for all untagged jets ## https://twiki.cern.ch/twiki/bin/view/CMS/BTagSFMethods#1a_Event_reweighting_using_scale pMC = btagEfficiencies[btagged].prod() * (1.-btagEfficiencies[np.invert(btagged)]).prod() pData = btagEfficienciesData[btagged].prod() * (1.-btagEfficienciesData[np.invert(btagged)]).prod() pData_b_up = btagEfficienciesData_b_up[btagged].prod() * (1.-btagEfficienciesData_b_up[np.invert(btagged)]).prod() pData_b_down = btagEfficienciesData_b_down[btagged].prod() * (1.-btagEfficienciesData_b_down[np.invert(btagged)]).prod() pData_l_up = btagEfficienciesData_l_up[btagged].prod() * (1.-btagEfficienciesData_l_up[np.invert(btagged)]).prod() pData_l_down = btagEfficienciesData_l_down[btagged].prod() * (1.-btagEfficienciesData_l_down[np.invert(btagged)]).prod() pMC[pMC==0]=1. #avoid 0/0 error btagWeight = pData/pMC pData[pData==0] = 1. #avoid divide by 0 error btagWeight_b_up = pData_b_up/pData btagWeight_b_down = pData_b_down/pData btagWeight_l_up = pData_l_up/pData btagWeight_l_down = pData_l_down/pData weights.add('btagWeight',btagWeight) weights.add('btagWeight_heavy',weight=np.ones_like(btagWeight), weightUp=btagWeight_b_up, weightDown=btagWeight_b_down) weights.add('btagWeight_light',weight=np.ones_like(btagWeight), weightUp=btagWeight_l_up, weightDown=btagWeight_l_down) #in some samples, generator systemtatics are not available, in those case the systematic weights of 1. are used try: generatorWeight = df['Generator_weight'] generatorWeight.shape = (generatorWeight.size,1) LHEWeight_originalXWGTUP = df['LHEWeight_originalXWGTUP'] LHEWeight_originalXWGTUP.shape = (LHEWeight_originalXWGTUP.size,1) nPSWeights = df['nPSWeight'] PSWeights = df['PSWeight'] PSWeights.shape = (nPSWeights.size,int(nPSWeights.mean())) if nPSWeights.mean()==1: hasWeights=False nLHEScaleWeights = df['nLHEScaleWeight'] LHEScaleWeights = df['LHEScaleWeight'] LHEScaleWeights.shape = (nLHEScaleWeights.size,int(nLHEScaleWeights.mean())) nLHEPdfWeights = df['nLHEPdfWeight'] LHEPdfWeights = df['LHEPdfWeight'] LHEPdfWeights.shape = (nLHEPdfWeights.size,int(nLHEPdfWeights.mean())) #PDF Uncertainty weights #avoid errors from 0/0 division if (LHEPdfWeights[:,:1]==0).any(): LHEPdfWeights[:,0][LHEPdfWeights[:,0]==0] = 1. LHEPdfVariation = LHEPdfWeights / LHEPdfWeights[:,:1] weights.add('PDF', weight=np.ones(df.size), weightUp=LHEPdfVariation.max(axis=1), weightDown=LHEPdfVariation.min(axis=1)) #Q2 Uncertainty weights if nLHEScaleWeights.mean()==9: scaleWeightSelector=[0,1,3,5,7,8] elif nLHEScaleWeights.mean()==44: scaleWeightSelector=[0,5,15,24,34,39] else: scaleWeightSelector=[] LHEScaleVariation = LHEScaleWeights[:,scaleWeightSelector] weights.add('Q2Scale', weight=np.ones(df.size), weightUp=LHEScaleVariation.max(axis=1), weightDown=LHEScaleVariation.min(axis=1)) #ISR / FSR uncertainty weights if not (generatorWeight==LHEWeight_originalXWGTUP).all(): PSWeights = PSWeights * LHEWeight_originalXWGTUP / generatorWeight weights.add('ISR',weight=np.ones(df.size), weightUp=PSWeights[:,2], weightDown=PSWeights[:,0]) weights.add('FSR',weight=np.ones(df.size), weightUp=PSWeights[:,3], weightDown=PSWeights[:,1]) else: weights.add('ISR', weight=np.ones(df.size),weightUp=np.ones(df.size),weightDown=np.ones(df.size)) weights.add('FSR', weight=np.ones(df.size),weightUp=np.ones(df.size),weightDown=np.ones(df.size)) weights.add('PDF', weight=np.ones(df.size),weightUp=np.ones(df.size),weightDown=np.ones(df.size)) weights.add('Q2Scale',weight=np.ones(df.size),weightUp=np.ones(df.size),weightDown=np.ones(df.size)) """ ################### # FILL HISTOGRAMS ################### # PART 3: Uncomment to add histograms """ #list of systematics systList = ['nowegiht','nominal'] # PART 4: SYSTEMATICS # uncomment the full list after systematics have been implemented #systList = ['noweight','nominal','puWeightUp','puWeightDown','muEffWeightUp','muEffWeightDown','eleEffWeightUp','eleEffWeightDown','btagWeight_lightUp','btagWeight_lightDown','btagWeight_heavyUp','btagWeight_heavyDown', 'ISRUp', 'ISRDown', 'FSRUp', 'FSRDown', 'PDFUp', 'PDFDown', 'Q2ScaleUp', 'Q2ScaleDown'] if not self.jetSyst=='nominal': systList=[self.jetSyst] if isData: systList = ['noweight'] for syst in systList: #find the event weight to be used when filling the histograms weightSyst = syst #in the case of 'nominal', or the jet energy systematics, no weight systematic variation is used (weightSyst=None) if syst in ['nominal','JERUp','JERDown','JESUp','JESDown']: weightSyst=None if syst=='noweight': evtWeight = np.ones(df.size) else: # call weights.weight() with the name of the systematic to be varied evtWeight = weights.weight(weightSyst) #loop over both electron and muon selections for lepton in ['electron','muon']: if lepton=='electron': lepSel='eleSel' if lepton=='muon': lepSel='muSel' # 3. GET HISTOGRAM EVENT SELECTION # use the selection.all() method to select events passing the lepton selection, 4-jet 1-tag jet selection, and either the one-photon or loose-photon selections # ex: selection.all( *('LIST', 'OF', 'SELECTION', 'CUTS') ) phosel = selection.all( *(???)) phoselLoose = selection.all( *(???) ) # 3. FILL HISTOGRAMS # fill photon_pt and photon_eta, using the tightPhotons array, from events passing the phosel selection output['photon_pt'].fill(dataset=dataset, pt=?, category=?, lepFlavor=lepton, systematic=syst, weight=?) output['photon_eta'].fill(dataset=dataset, pt=?, category=?, lepFlavor=lepton, systematic=syst, weight=?) # fill photon_chIso histogram, using the loosePhotons array (photons passing all cuts, except the charged hadron isolation cuts) output['photon_chIso'].fill(dataset=dataset, chIso=?, category=?, lepFlavor=lepton, systematic=syst, weight=?) # fill M3 histogram, for events passing the phosel selection output['M3'].fill(dataset=dataset, M3=?, category=?, lepFlavor=lepton, systematic=syst, weight=?) # 3. GET HISTOGRAM EVENT SELECTION # use the selection.all() method to select events passing the eleSel or muSel selection, 3-jet 0-btag selection, and have exactly one photon phosel_3j0t_e = selection.all( *('eleSel', ???) ) phosel_3j0t_mu = selection.all( *('muSel', ???) ) # 3. FILL HISTOGRAMS # fill photon_lepton_mass_3j0t histogram, using the egammaMass array, for events passing the phosel_3j0t_e output['photon_lepton_mass_3j0t'].fill(dataset=dataset, mass=?, category=? lepFlavor='electron', systematic=syst, weight=?) output['photon_lepton_mass_3j0t'].fill(dataset=dataset, mass=?, category=?, lepFlavor='muon', systematic=syst, weight=?) """ output['EventCount'] = len(df['event']) return output
def process(self, df): if not df.size: return self.accumulator.identity() self._configure(df) dataset = df['dataset'] df['is_lo_w'] = is_lo_w(dataset) df['is_lo_z'] = is_lo_z(dataset) df['is_lo_znunu'] = is_lo_znunu(dataset) df['is_lo_w_ewk'] = is_lo_w_ewk(dataset) df['is_lo_z_ewk'] = is_lo_z_ewk(dataset) df['is_lo_g'] = is_lo_g(dataset) df['is_nlo_z'] = is_nlo_z(dataset) df['is_nlo_w'] = is_nlo_w(dataset) df['has_lhe_v_pt'] = df['is_lo_w'] | df['is_lo_z'] | df[ 'is_nlo_z'] | df['is_nlo_w'] | df['is_lo_g'] | df[ 'is_lo_w_ewk'] | df['is_lo_z_ewk'] df['is_data'] = is_data(dataset) gen_v_pt = None if df['is_lo_w'] or df['is_lo_z'] or df['is_nlo_z'] or df[ 'is_nlo_w'] or df['is_lo_z_ewk'] or df['is_lo_w_ewk']: gen = setup_gen_candidates(df) dressed = setup_dressed_gen_candidates(df) fill_gen_v_info(df, gen, dressed) gen_v_pt = df['gen_v_pt_combined'] elif df['is_lo_g']: gen = setup_gen_candidates(df) all_gen_photons = gen[(gen.pdg == 22)] prompt_mask = (all_gen_photons.status == 1) & (all_gen_photons.flag & 1 == 1) stat1_mask = (all_gen_photons.status == 1) gen_photons = all_gen_photons[prompt_mask | (~prompt_mask.any()) & stat1_mask] gen_photon = gen_photons[gen_photons.pt.argmax()] gen_v_pt = gen_photon.pt.max() # Generator-level leading dijet mass if df['has_lhe_v_pt']: genjets = setup_lhe_cleaned_genjets(df) digenjet = genjets[:, :2].distincts() df['mjj_gen'] = digenjet.mass.max() df['mjj_gen'] = np.where(df['mjj_gen'] > 0, df['mjj_gen'], 0) # Candidates # Already pre-filtered! # All leptons are at least loose # Check out setup_candidates for filtering details met_pt, met_phi, ak4, bjets, _, muons, electrons, taus, photons = setup_candidates( df, cfg) # Remove jets in accordance with the noise recipe if df['year'] == 2017: ak4 = ak4[(ak4.ptraw > 50) | (ak4.abseta < 2.65) | (ak4.abseta > 3.139)] bjets = bjets[(bjets.ptraw > 50) | (bjets.abseta < 2.65) | (bjets.abseta > 3.139)] # Filtering ak4 jets according to pileup ID ak4 = ak4[ak4.puid] # Muons df['is_tight_muon'] = muons.tightId \ & (muons.iso < cfg.MUON.CUTS.TIGHT.ISO) \ & (muons.pt>cfg.MUON.CUTS.TIGHT.PT) \ & (muons.abseta<cfg.MUON.CUTS.TIGHT.ETA) dimuons = muons.distincts() dimuon_charge = dimuons.i0['charge'] + dimuons.i1['charge'] df['MT_mu'] = ((muons.counts == 1) * mt(muons.pt, muons.phi, met_pt, met_phi)).max() # Electrons df['is_tight_electron'] = electrons.tightId \ & (electrons.pt > cfg.ELECTRON.CUTS.TIGHT.PT) \ & (electrons.absetasc < cfg.ELECTRON.CUTS.TIGHT.ETA) dielectrons = electrons.distincts() dielectron_charge = dielectrons.i0['charge'] + dielectrons.i1['charge'] df['MT_el'] = ((electrons.counts == 1) * mt(electrons.pt, electrons.phi, met_pt, met_phi)).max() # ak4 leadak4_index = ak4.pt.argmax() elejet_pairs = ak4[:, :1].cross(electrons) df['dREleJet'] = np.hypot( elejet_pairs.i0.eta - elejet_pairs.i1.eta, dphi(elejet_pairs.i0.phi, elejet_pairs.i1.phi)).min() muonjet_pairs = ak4[:, :1].cross(muons) df['dRMuonJet'] = np.hypot( muonjet_pairs.i0.eta - muonjet_pairs.i1.eta, dphi(muonjet_pairs.i0.phi, muonjet_pairs.i1.phi)).min() # Recoil df['recoil_pt'], df['recoil_phi'] = recoil(met_pt, met_phi, electrons, muons, photons) df["dPFCaloSR"] = (met_pt - df["CaloMET_pt"]) / met_pt df["dPFCaloCR"] = (met_pt - df["CaloMET_pt"]) / df["recoil_pt"] df["dPFTkSR"] = (met_pt - df["TkMET_pt"]) / met_pt df["minDPhiJetRecoil"] = min_dphi_jet_met(ak4, df['recoil_phi'], njet=4, ptmin=30, etamax=5.0) df["minDPhiJetMet"] = min_dphi_jet_met(ak4, met_phi, njet=4, ptmin=30, etamax=5.0) selection = processor.PackedSelection() # Triggers pass_all = np.ones(df.size) == 1 selection.add('inclusive', pass_all) selection = trigger_selection(selection, df, cfg) selection.add('mu_pt_trig_safe', muons.pt.max() > 30) # Common selection selection.add('veto_ele', electrons.counts == 0) selection.add('veto_muo', muons.counts == 0) selection.add('veto_photon', photons.counts == 0) selection.add('veto_tau', taus.counts == 0) selection.add('at_least_one_tau', taus.counts > 0) selection.add('veto_b', bjets.counts == 0) selection.add('mindphijr', df['minDPhiJetRecoil'] > cfg.SELECTION.SIGNAL.MINDPHIJR) selection.add('mindphijm', df['minDPhiJetMet'] > cfg.SELECTION.SIGNAL.MINDPHIJR) selection.add('dpfcalo_sr', np.abs(df['dPFCaloSR']) < cfg.SELECTION.SIGNAL.DPFCALO) selection.add('dpfcalo_cr', np.abs(df['dPFCaloCR']) < cfg.SELECTION.SIGNAL.DPFCALO) selection.add('recoil', df['recoil_pt'] > cfg.SELECTION.SIGNAL.RECOIL) selection.add('met_sr', met_pt > cfg.SELECTION.SIGNAL.RECOIL) # AK4 dijet diak4 = ak4[:, :2].distincts() leadak4_pt_eta = (diak4.i0.pt > cfg.SELECTION.SIGNAL.LEADAK4.PT) & ( np.abs(diak4.i0.eta) < cfg.SELECTION.SIGNAL.LEADAK4.ETA) trailak4_pt_eta = (diak4.i1.pt > cfg.SELECTION.SIGNAL.TRAILAK4.PT) & ( np.abs(diak4.i1.eta) < cfg.SELECTION.SIGNAL.TRAILAK4.ETA) hemisphere = (diak4.i0.eta * diak4.i1.eta < 0).any() has_track0 = np.abs(diak4.i0.eta) <= 2.5 has_track1 = np.abs(diak4.i1.eta) <= 2.5 leadak4_id = diak4.i0.tightId & (has_track0 * ( (diak4.i0.chf > cfg.SELECTION.SIGNAL.LEADAK4.CHF) & (diak4.i0.nhf < cfg.SELECTION.SIGNAL.LEADAK4.NHF)) + ~has_track0) trailak4_id = has_track1 * ( (diak4.i1.chf > cfg.SELECTION.SIGNAL.TRAILAK4.CHF) & (diak4.i1.nhf < cfg.SELECTION.SIGNAL.TRAILAK4.NHF)) + ~has_track1 df['mjj'] = diak4.mass.max() df['dphijj'] = dphi(diak4.i0.phi.min(), diak4.i1.phi.max()) df['detajj'] = np.abs(diak4.i0.eta - diak4.i1.eta).max() leading_jet_in_horn = ((diak4.i0.abseta < 3.2) & (diak4.i0.abseta > 2.8)).any() trailing_jet_in_horn = ((diak4.i1.abseta < 3.2) & (diak4.i1.abseta > 2.8)).any() selection.add('hornveto', (df['dPFTkSR'] < 0.8) | ~(leading_jet_in_horn | trailing_jet_in_horn)) if df['year'] == 2018: if df['is_data']: metphihem_mask = ~((met_phi > -1.8) & (met_phi < -0.6) & (df['run'] > 319077)) else: metphihem_mask = pass_all selection.add("metphihemextveto", metphihem_mask) selection.add('no_el_in_hem', electrons[electrons_in_hem(electrons)].counts == 0) else: selection.add("metphihemextveto", pass_all) selection.add('no_el_in_hem', pass_all) selection.add('two_jets', diak4.counts > 0) selection.add('leadak4_pt_eta', leadak4_pt_eta.any()) selection.add('trailak4_pt_eta', trailak4_pt_eta.any()) selection.add('hemisphere', hemisphere) selection.add('leadak4_id', leadak4_id.any()) selection.add('trailak4_id', trailak4_id.any()) selection.add('mjj', df['mjj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.MASS) selection.add( 'dphijj', df['dphijj'] < cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DPHI) selection.add( 'detajj', df['detajj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DETA) # Cleaning cuts for signal region max_neEmEF = np.maximum(diak4.i0.nef, diak4.i1.nef) selection.add('max_neEmEF', (max_neEmEF < 0.7).any()) vec_b = calculate_vecB(ak4, met_pt, met_phi) vec_dphi = calculate_vecDPhi(ak4, met_pt, met_phi, df['TkMET_phi']) no_jet_in_trk = (diak4.i0.abseta > 2.5).any() & (diak4.i1.abseta > 2.5).any() no_jet_in_hf = (diak4.i0.abseta < 3.0).any() & (diak4.i1.abseta < 3.0).any() at_least_one_jet_in_hf = (diak4.i0.abseta > 3.0).any() | (diak4.i1.abseta > 3.0).any() at_least_one_jet_in_trk = (diak4.i0.abseta < 2.5).any() | (diak4.i1.abseta < 2.5).any() # Categorized cleaning cuts eemitigation = ((no_jet_in_hf | at_least_one_jet_in_trk) & (vec_dphi < 1.0)) | ( (no_jet_in_trk & at_least_one_jet_in_hf) & (vec_b < 0.2)) selection.add('eemitigation', eemitigation) # HF-HF veto in SR both_jets_in_hf = (diak4.i0.abseta > 3.0) & (diak4.i1.abseta > 3.0) selection.add('veto_hfhf', ~both_jets_in_hf.any()) # Divide into three categories for trigger study if cfg.RUN.TRIGGER_STUDY: two_central_jets = (np.abs(diak4.i0.eta) <= 2.4) & (np.abs( diak4.i1.eta) <= 2.4) two_forward_jets = (np.abs(diak4.i0.eta) > 2.4) & (np.abs( diak4.i1.eta) > 2.4) one_jet_forward_one_jet_central = (~two_central_jets) & ( ~two_forward_jets) selection.add('two_central_jets', two_central_jets.any()) selection.add('two_forward_jets', two_forward_jets.any()) selection.add('one_jet_forward_one_jet_central', one_jet_forward_one_jet_central.any()) # Dimuon CR leadmuon_index = muons.pt.argmax() selection.add('at_least_one_tight_mu', df['is_tight_muon'].any()) selection.add('dimuon_mass', ((dimuons.mass > cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MIN) \ & (dimuons.mass < cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MAX)).any()) selection.add('dimuon_charge', (dimuon_charge == 0).any()) selection.add('two_muons', muons.counts == 2) # Single muon CR selection.add('one_muon', muons.counts == 1) selection.add('mt_mu', df['MT_mu'] < cfg.SELECTION.CONTROL.SINGLEMU.MT) # Diele CR leadelectron_index = electrons.pt.argmax() selection.add('one_electron', electrons.counts == 1) selection.add('two_electrons', electrons.counts == 2) selection.add('at_least_one_tight_el', df['is_tight_electron'].any()) selection.add('dielectron_mass', ((dielectrons.mass > cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MIN) \ & (dielectrons.mass < cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MAX)).any()) selection.add('dielectron_charge', (dielectron_charge == 0).any()) # Single Ele CR selection.add('met_el', met_pt > cfg.SELECTION.CONTROL.SINGLEEL.MET) selection.add('mt_el', df['MT_el'] < cfg.SELECTION.CONTROL.SINGLEEL.MT) # Photon CR leadphoton_index = photons.pt.argmax() df['is_tight_photon'] = photons.mediumId & photons.barrel selection.add('one_photon', photons.counts == 1) selection.add('at_least_one_tight_photon', df['is_tight_photon'].any()) selection.add('photon_pt', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PT) selection.add('photon_pt_trig', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PTTRIG) # Fill histograms output = self.accumulator.identity() # Gen if df['has_lhe_v_pt']: output['genvpt_check'].fill(vpt=gen_v_pt, type="Nano", dataset=dataset) if 'LHE_Njets' in df: output['lhe_njets'].fill(dataset=dataset, multiplicity=df['LHE_Njets']) if 'LHE_HT' in df: output['lhe_ht'].fill(dataset=dataset, ht=df['LHE_HT']) if 'LHE_HTIncoming' in df: output['lhe_htinc'].fill(dataset=dataset, ht=df['LHE_HTIncoming']) # Weights evaluator = evaluator_from_config(cfg) weights = processor.Weights(size=df.size, storeIndividual=True) if not df['is_data']: weights.add('gen', df['Generator_weight']) try: weights.add('prefire', df['PrefireWeight']) except KeyError: weights.add('prefire', np.ones(df.size)) weights = candidate_weights(weights, df, evaluator, muons, electrons, photons, cfg) weights = pileup_weights(weights, df, evaluator, cfg) weights = ak4_em_frac_weights(weights, diak4, evaluator) if not (gen_v_pt is None): weights = theory_weights_vbf(weights, df, evaluator, gen_v_pt, df['mjj_gen']) # Save per-event values for synchronization if cfg.RUN.KINEMATICS.SAVE: for event in cfg.RUN.KINEMATICS.EVENTS: mask = df['event'] == event if not mask.any(): continue output['kinematics']['event'] += [event] output['kinematics']['met'] += [met_pt[mask]] output['kinematics']['met_phi'] += [met_phi[mask]] output['kinematics']['recoil'] += [df['recoil_pt'][mask]] output['kinematics']['recoil_phi'] += [df['recoil_phi'][mask]] output['kinematics']['ak4pt0'] += [ak4[leadak4_index][mask].pt] output['kinematics']['ak4eta0'] += [ ak4[leadak4_index][mask].eta ] output['kinematics']['leadbtag'] += [ak4.pt.max() < 0][mask] output['kinematics']['nLooseMu'] += [muons.counts[mask]] output['kinematics']['nTightMu'] += [ muons[df['is_tight_muon']].counts[mask] ] output['kinematics']['mupt0'] += [ muons[leadmuon_index][mask].pt ] output['kinematics']['mueta0'] += [ muons[leadmuon_index][mask].eta ] output['kinematics']['nLooseEl'] += [electrons.counts[mask]] output['kinematics']['nTightEl'] += [ electrons[df['is_tight_electron']].counts[mask] ] output['kinematics']['elpt0'] += [ electrons[leadelectron_index][mask].pt ] output['kinematics']['eleta0'] += [ electrons[leadelectron_index][mask].eta ] output['kinematics']['nLooseGam'] += [photons.counts[mask]] output['kinematics']['nTightGam'] += [ photons[df['is_tight_photon']].counts[mask] ] output['kinematics']['gpt0'] += [ photons[leadphoton_index][mask].pt ] output['kinematics']['geta0'] += [ photons[leadphoton_index][mask].eta ] # Sum of all weights to use for normalization # TODO: Deal with systematic variations output['nevents'][dataset] += df.size if not df['is_data']: output['sumw'][dataset] += df['genEventSumw'] output['sumw2'][dataset] += df['genEventSumw2'] output['sumw_pileup'][dataset] += weights._weights['pileup'].sum() regions = vbfhinv_regions(cfg) # Get veto weights (only for MC) if not df['is_data']: veto_weights = get_veto_weights(df, cfg, evaluator, electrons, muons, taus) for region, cuts in regions.items(): exclude = [None] region_weights = copy.deepcopy(weights) if not df['is_data']: ### Trigger weights if re.match(r'cr_(\d+)e.*', region): p_pass_data = 1 - (1 - evaluator["trigger_electron_eff_data"] (electrons.etasc, electrons.pt)).prod() p_pass_mc = 1 - (1 - evaluator["trigger_electron_eff_mc"] (electrons.etasc, electrons.pt)).prod() trigger_weight = p_pass_data / p_pass_mc trigger_weight[np.isnan(trigger_weight)] = 1 region_weights.add('trigger', trigger_weight) elif re.match(r'cr_(\d+)m.*', region) or re.match( 'sr_.*', region): region_weights.add( 'trigger_met', evaluator["trigger_met"](df['recoil_pt'])) elif re.match(r'cr_g.*', region): photon_trigger_sf(region_weights, photons, df) # Veto weights if re.match('.*no_veto.*', region): exclude = [ "muon_id_iso_tight", "muon_id_tight", "muon_iso_tight", "muon_id_loose", "muon_iso_loose", "ele_reco", "ele_id_tight", "ele_id_loose", "tau_id" ] region_weights.add( "veto", veto_weights.partial_weight(include=["nominal"])) # HEM-veto weights for signal region MC if re.match('^sr_vbf.*', region) and df['year'] == 2018: # Events that lie in the HEM-veto region events_to_weight_mask = (met_phi > -1.8) & (met_phi < -0.6) # Weight is the "good lumi fraction" for 2018 weight = 21.1 / 59.7 hem_weight = np.where(events_to_weight_mask, weight, 1.0) region_weights.add("hem_weight", hem_weight) # This is the default weight for this region rweight = region_weights.partial_weight(exclude=exclude) # Blinding if (self._blind and df['is_data'] and region.startswith('sr')): continue # Cutflow plot for signal and control regions if any(x in region for x in ["sr", "cr", "tr"]): output['cutflow_' + region][dataset]['all'] += df.size for icut, cutname in enumerate(cuts): output['cutflow_' + region][dataset][cutname] += selection.all( *cuts[:icut + 1]).sum() mask = selection.all(*cuts) if cfg.RUN.SAVE.TREE: if region in ['cr_1e_vbf', 'cr_1m_vbf']: output['tree_int64'][region][ "event"] += processor.column_accumulator( df["event"][mask]) output['tree_float16'][region][ "gen_v_pt"] += processor.column_accumulator( np.float16(gen_v_pt[mask])) output['tree_float16'][region][ "gen_mjj"] += processor.column_accumulator( np.float16(df['mjj_gen'][mask])) output['tree_float16'][region][ "recoil_pt"] += processor.column_accumulator( np.float16(df["recoil_pt"][mask])) output['tree_float16'][region][ "recoil_phi"] += processor.column_accumulator( np.float16(df["recoil_phi"][mask])) output['tree_float16'][region][ "mjj"] += processor.column_accumulator( np.float16(df["mjj"][mask])) output['tree_float16'][region][ "leadak4_pt"] += processor.column_accumulator( np.float16(diak4.i0.pt[mask])) output['tree_float16'][region][ "leadak4_eta"] += processor.column_accumulator( np.float16(diak4.i0.eta[mask])) output['tree_float16'][region][ "leadak4_phi"] += processor.column_accumulator( np.float16(diak4.i0.phi[mask])) output['tree_float16'][region][ "trailak4_pt"] += processor.column_accumulator( np.float16(diak4.i1.pt[mask])) output['tree_float16'][region][ "trailak4_eta"] += processor.column_accumulator( np.float16(diak4.i1.eta[mask])) output['tree_float16'][region][ "trailak4_phi"] += processor.column_accumulator( np.float16(diak4.i1.phi[mask])) output['tree_float16'][region][ "minDPhiJetRecoil"] += processor.column_accumulator( np.float16(df["minDPhiJetRecoil"][mask])) if '_1e_' in region: output['tree_float16'][region][ "leadlep_pt"] += processor.column_accumulator( np.float16(electrons.pt.max()[mask])) output['tree_float16'][region][ "leadlep_eta"] += processor.column_accumulator( np.float16(electrons[ electrons.pt.argmax()].eta.max()[mask])) output['tree_float16'][region][ "leadlep_phi"] += processor.column_accumulator( np.float16(electrons[ electrons.pt.argmax()].phi.max()[mask])) elif '_1m_' in region: output['tree_float16'][region][ "leadlep_pt"] += processor.column_accumulator( np.float16(muons.pt.max()[mask])) output['tree_float16'][region][ "leadlep_eta"] += processor.column_accumulator( np.float16( muons[muons.pt.argmax()].eta.max()[mask])) output['tree_float16'][region][ "leadlep_phi"] += processor.column_accumulator( np.float16( muons[muons.pt.argmax()].phi.max()[mask])) for name, w in region_weights._weights.items(): output['tree_float16'][region][ f"weight_{name}"] += processor.column_accumulator( np.float16(w[mask])) output['tree_float16'][region][ f"weight_total"] += processor.column_accumulator( np.float16(rweight[mask])) if region == 'inclusive': output['tree_int64'][region][ "event"] += processor.column_accumulator( df["event"][mask]) for name in selection.names: output['tree_bool'][region][ name] += processor.column_accumulator( np.bool_(selection.all(*[name])[mask])) # Save the event numbers of events passing this selection # Save the event numbers of events passing this selection if cfg.RUN.SAVE.PASSING: output['selected_events'][region] += list(df['event'][mask]) # Multiplicities def fill_mult(name, candidates): output[name].fill(dataset=dataset, region=region, multiplicity=candidates[mask].counts, weight=rweight[mask]) fill_mult('ak4_mult', ak4[ak4.pt > 30]) fill_mult('bjet_mult', bjets) fill_mult('loose_ele_mult', electrons) fill_mult('tight_ele_mult', electrons[df['is_tight_electron']]) fill_mult('loose_muo_mult', muons) fill_mult('tight_muo_mult', muons[df['is_tight_muon']]) fill_mult('tau_mult', taus) fill_mult('photon_mult', photons) def ezfill(name, **kwargs): """Helper function to make filling easier.""" output[name].fill(dataset=dataset, region=region, **kwargs) # Monitor weights for wname, wvalue in region_weights._weights.items(): ezfill("weights", weight_type=wname, weight_value=wvalue[mask]) ezfill("weights_wide", weight_type=wname, weight_value=wvalue[mask]) # All ak4 # This is a workaround to create a weight array of the right dimension w_alljets = weight_shape(ak4[mask].eta, rweight[mask]) w_alljets_nopref = weight_shape( ak4[mask].eta, region_weights.partial_weight(exclude=exclude + ['prefire'])[mask]) ezfill('ak4_eta', jeteta=ak4[mask].eta.flatten(), weight=w_alljets) ezfill('ak4_phi', jetphi=ak4[mask].phi.flatten(), weight=w_alljets) ezfill('ak4_pt', jetpt=ak4[mask].pt.flatten(), weight=w_alljets) ezfill('ak4_eta_nopref', jeteta=ak4[mask].eta.flatten(), weight=w_alljets_nopref) ezfill('ak4_phi_nopref', jetphi=ak4[mask].phi.flatten(), weight=w_alljets_nopref) ezfill('ak4_pt_nopref', jetpt=ak4[mask].pt.flatten(), weight=w_alljets_nopref) # Leading ak4 w_diak4 = weight_shape(diak4.pt[mask], rweight[mask]) ezfill('ak4_eta0', jeteta=diak4.i0.eta[mask].flatten(), weight=w_diak4) ezfill('ak4_phi0', jetphi=diak4.i0.phi[mask].flatten(), weight=w_diak4) ezfill('ak4_pt0', jetpt=diak4.i0.pt[mask].flatten(), weight=w_diak4) ezfill('ak4_ptraw0', jetpt=diak4.i0.ptraw[mask].flatten(), weight=w_diak4) ezfill('ak4_chf0', frac=diak4.i0.chf[mask].flatten(), weight=w_diak4) ezfill('ak4_nhf0', frac=diak4.i0.nhf[mask].flatten(), weight=w_diak4) ezfill('ak4_nconst0', nconst=diak4.i0.nconst[mask].flatten(), weight=w_diak4) # Trailing ak4 ezfill('ak4_eta1', jeteta=diak4.i1.eta[mask].flatten(), weight=w_diak4) ezfill('ak4_phi1', jetphi=diak4.i1.phi[mask].flatten(), weight=w_diak4) ezfill('ak4_pt1', jetpt=diak4.i1.pt[mask].flatten(), weight=w_diak4) ezfill('ak4_ptraw1', jetpt=diak4.i1.ptraw[mask].flatten(), weight=w_diak4) ezfill('ak4_chf1', frac=diak4.i1.chf[mask].flatten(), weight=w_diak4) ezfill('ak4_nhf1', frac=diak4.i1.nhf[mask].flatten(), weight=w_diak4) ezfill('ak4_nconst1', nconst=diak4.i1.nconst[mask].flatten(), weight=w_diak4) # B tag discriminator btag = getattr(ak4, cfg.BTAG.ALGO) w_btag = weight_shape(btag[mask], rweight[mask]) ezfill('ak4_btag', btag=btag[mask].flatten(), weight=w_btag) # MET ezfill('dpfcalo_cr', dpfcalo=df["dPFCaloCR"][mask], weight=rweight[mask]) ezfill('dpfcalo_sr', dpfcalo=df["dPFCaloSR"][mask], weight=rweight[mask]) ezfill('met', met=met_pt[mask], weight=rweight[mask]) ezfill('met_phi', phi=met_phi[mask], weight=rweight[mask]) ezfill('recoil', recoil=df["recoil_pt"][mask], weight=rweight[mask]) ezfill('recoil_phi', phi=df["recoil_phi"][mask], weight=rweight[mask]) ezfill('dphijm', dphi=df["minDPhiJetMet"][mask], weight=rweight[mask]) ezfill('dphijr', dphi=df["minDPhiJetRecoil"][mask], weight=rweight[mask]) ezfill('dphijj', dphi=df["dphijj"][mask], weight=rweight[mask]) ezfill('detajj', deta=df["detajj"][mask], weight=rweight[mask]) ezfill('mjj', mjj=df["mjj"][mask], weight=rweight[mask]) if gen_v_pt is not None: ezfill('gen_vpt', vpt=gen_v_pt[mask], weight=df['Generator_weight'][mask]) ezfill('gen_mjj', mjj=df['mjj_gen'][mask], weight=df['Generator_weight'][mask]) # Photon CR data-driven QCD estimate if df['is_data'] and re.match("cr_g.*", region) and re.match( "(SinglePhoton|EGamma).*", dataset): w_imp = photon_impurity_weights( photons[leadphoton_index].pt.max()[mask], df["year"]) output['mjj'].fill(dataset=data_driven_qcd_dataset(dataset), region=region, mjj=df["mjj"][mask], weight=rweight[mask] * w_imp) output['recoil'].fill(dataset=data_driven_qcd_dataset(dataset), region=region, recoil=df["recoil_pt"][mask], weight=rweight[mask] * w_imp) # Uncertainty variations if df['is_lo_z'] or df['is_nlo_z'] or df['is_lo_z_ewk']: theory_uncs = [x for x in cfg.SF.keys() if x.startswith('unc')] for unc in theory_uncs: reweight = evaluator[unc](gen_v_pt) w = (region_weights.weight() * reweight)[mask] ezfill('mjj_unc', mjj=df['mjj'][mask], uncertainty=unc, weight=w) # Two dimensional ezfill('recoil_mjj', recoil=df["recoil_pt"][mask], mjj=df["mjj"][mask], weight=rweight[mask]) # Muons if '_1m_' in region or '_2m_' in region or 'no_veto' in region: w_allmu = weight_shape(muons.pt[mask], rweight[mask]) ezfill('muon_pt', pt=muons.pt[mask].flatten(), weight=w_allmu) ezfill('muon_pt_abseta', pt=muons.pt[mask].flatten(), abseta=muons.eta[mask].flatten(), weight=w_allmu) ezfill('muon_mt', mt=df['MT_mu'][mask], weight=rweight[mask]) ezfill('muon_eta', eta=muons.eta[mask].flatten(), weight=w_allmu) ezfill('muon_phi', phi=muons.phi[mask].flatten(), weight=w_allmu) # Dimuon if '_2m_' in region: w_dimu = weight_shape(dimuons.pt[mask], rweight[mask]) ezfill('muon_pt0', pt=dimuons.i0.pt[mask].flatten(), weight=w_dimu) ezfill('muon_pt1', pt=dimuons.i1.pt[mask].flatten(), weight=w_dimu) ezfill('muon_eta0', eta=dimuons.i0.eta[mask].flatten(), weight=w_dimu) ezfill('muon_eta1', eta=dimuons.i1.eta[mask].flatten(), weight=w_dimu) ezfill('muon_phi0', phi=dimuons.i0.phi[mask].flatten(), weight=w_dimu) ezfill('muon_phi1', phi=dimuons.i1.phi[mask].flatten(), weight=w_dimu) ezfill('dimuon_pt', pt=dimuons.pt[mask].flatten(), weight=w_dimu) ezfill('dimuon_eta', eta=dimuons.eta[mask].flatten(), weight=w_dimu) ezfill('dimuon_mass', dilepton_mass=dimuons.mass[mask].flatten(), weight=w_dimu) # Electrons if '_1e_' in region or '_2e_' in region or 'no_veto' in region: w_allel = weight_shape(electrons.pt[mask], rweight[mask]) ezfill('electron_pt', pt=electrons.pt[mask].flatten(), weight=w_allel) ezfill('electron_pt_eta', pt=electrons.pt[mask].flatten(), eta=electrons.eta[mask].flatten(), weight=w_allel) ezfill('electron_mt', mt=df['MT_el'][mask], weight=rweight[mask]) ezfill('electron_eta', eta=electrons.eta[mask].flatten(), weight=w_allel) ezfill('electron_phi', phi=electrons.phi[mask].flatten(), weight=w_allel) # Dielectron if '_2e_' in region: w_diel = weight_shape(dielectrons.pt[mask], rweight[mask]) ezfill('electron_pt0', pt=dielectrons.i0.pt[mask].flatten(), weight=w_diel) ezfill('electron_pt1', pt=dielectrons.i1.pt[mask].flatten(), weight=w_diel) ezfill('electron_eta0', eta=dielectrons.i0.eta[mask].flatten(), weight=w_diel) ezfill('electron_eta1', eta=dielectrons.i1.eta[mask].flatten(), weight=w_diel) ezfill('electron_phi0', phi=dielectrons.i0.phi[mask].flatten(), weight=w_diel) ezfill('electron_phi1', phi=dielectrons.i1.phi[mask].flatten(), weight=w_diel) ezfill('dielectron_pt', pt=dielectrons.pt[mask].flatten(), weight=w_diel) ezfill('dielectron_eta', eta=dielectrons.eta[mask].flatten(), weight=w_diel) ezfill('dielectron_mass', dilepton_mass=dielectrons.mass[mask].flatten(), weight=w_diel) # Photon if '_g_' in region: w_leading_photon = weight_shape( photons[leadphoton_index].pt[mask], rweight[mask]) ezfill('photon_pt0', pt=photons[leadphoton_index].pt[mask].flatten(), weight=w_leading_photon) ezfill('photon_eta0', eta=photons[leadphoton_index].eta[mask].flatten(), weight=w_leading_photon) ezfill('photon_phi0', phi=photons[leadphoton_index].phi[mask].flatten(), weight=w_leading_photon) ezfill('photon_pt0_recoil', pt=photons[leadphoton_index].pt[mask].flatten(), recoil=df['recoil_pt'][mask & (leadphoton_index.counts > 0)], weight=w_leading_photon) ezfill('photon_eta_phi', eta=photons[leadphoton_index].eta[mask].flatten(), phi=photons[leadphoton_index].phi[mask].flatten(), weight=w_leading_photon) # w_drphoton_jet = weight_shape(df['dRPhotonJet'][mask], rweight[mask]) # Tau if 'no_veto' in region: w_all_taus = weight_shape(taus.pt[mask], rweight[mask]) ezfill("tau_pt", pt=taus.pt[mask].flatten(), weight=w_all_taus) # PV ezfill('npv', nvtx=df['PV_npvs'][mask], weight=rweight[mask]) ezfill('npvgood', nvtx=df['PV_npvsGood'][mask], weight=rweight[mask]) ezfill('npv_nopu', nvtx=df['PV_npvs'][mask], weight=region_weights.partial_weight(exclude=exclude + ['pileup'])[mask]) ezfill('npvgood_nopu', nvtx=df['PV_npvsGood'][mask], weight=region_weights.partial_weight(exclude=exclude + ['pileup'])[mask]) ezfill('rho_all', rho=df['fixedGridRhoFastjetAll'][mask], weight=region_weights.partial_weight(exclude=exclude)[mask]) ezfill('rho_central', rho=df['fixedGridRhoFastjetCentral'][mask], weight=region_weights.partial_weight(exclude=exclude)[mask]) ezfill('rho_all_nopu', rho=df['fixedGridRhoFastjetAll'][mask], weight=region_weights.partial_weight(exclude=exclude + ['pileup'])[mask]) ezfill('rho_central_nopu', rho=df['fixedGridRhoFastjetCentral'][mask], weight=region_weights.partial_weight(exclude=exclude + ['pileup'])[mask]) return output
def process(self, df): dataset = df['dataset'] if self._debug: print("Processing dataframe from", dataset) isRealData = dataset in ["JetHT", "SingleMuon", "data_obs_mu", "data_obs_jet"] self.build_leading_ak8_variables(df) self.build_subleading_ak8_variables(df) self.build_ak4_variables(df) self.build_met_systematics(df) df['muon_dphi'] = np.abs(deltaphi(df['vmuoLoose0_phi'], df['AK8Puppijet0_phi'])) selection = processor.PackedSelection() if isRealData: # Only take jet triggers from JetHT, single muon triggers from SingleMuon dataset # necessary but not sufficient condition to prevent double-counting # (this plus mutually exclusive offline selections are sufficient) selection.add('trigger', (df['triggerBits'] & self._corrections[f'{self._year}_triggerMask']).astype('bool') & (dataset=="JetHT")) selection.add('mutrigger', ((df['triggerBits']&1) & df['passJson']).astype('bool') & (dataset=="SingleMuon")) if self._debug: print("Trigger pass/all", selection.all('trigger').sum(), df.size) print("Muon trigger pass/all", selection.all('mutrigger').sum(), df.size) else: selection.add('trigger', np.ones(df.size, dtype='bool')) selection.add('mutrigger', np.ones(df.size, dtype='bool')) btagLooseWPs = { '2016': 0.6321, '2017': 0.4941, '2018': 0.4184, } selection.add('noLeptons', (df['neleLoose']==0) & (df['nmuLoose']==0) & (df['ntau']==0)) selection.add('oneMuon', (df['neleLoose']==0) & (df['nmuLoose']==1) & (df['ntau']==0)) selection.add('muonAcceptance', (df['vmuoLoose0_pt'] > 55.) & (np.abs(df['vmuoLoose0_eta']) < 2.1)) selection.add('muonDphiAK8', df['muon_dphi'] > 2*np.pi/3) selection.add('ak4btagMediumDR08', df['ak4_leadingDeepCSV_dR08'] > btagLooseWPs[self._year]) # at least one passes medium cut selection.add('antiak4btagMediumOppHem', df['opposite_ak4_leadingDeepCSV'] < btagLooseWPs[self._year]) # none pass selection.add('tightVjet', df['AK8Puppijet0_isTightVJet'] != 0) selection.add('n2ddtPass', df['ak8jet_n2ddt'] < 0) selection.add('jetMass', df['AK8Puppijet0_msd'] > 40.) selection.add('deepcvb', df['AK8Puppijet0_deepdoublecvb'] > 0.2) selection.add('jetKinematics', df['AK8Puppijet0_pt'] > 450.) selection.add('jetKinematicsMuonCR', df['AK8Puppijet0_pt'] > 400.) selection.add('pfmet', df['pfmet'] < 140.) regions = {} regions['noselection'] = {} regions['preselection'] = {'trigger', 'noLeptons'} regions['signalregion'] = {'trigger', 'noLeptons', 'jetKinematics', 'pfmet', 'n2ddtPass', 'tightVjet', 'antiak4btagMediumOppHem'} regions['muoncontrol'] = {'mutrigger', 'oneMuon', 'muonAcceptance', 'jetKinematicsMuonCR', 'n2ddtPass', 'tightVjet', 'ak4btagMediumDR08', 'muonDphiAK8'} regions['hCCsignalregion'] = {'trigger', 'noLeptons', 'jetKinematics', 'pfmet', 'n2ddtPass', 'tightVjet', 'antiak4btagMediumOppHem', 'deepcvb'} regions['hCCmuoncontrol'] = {'mutrigger', 'oneMuon', 'muonAcceptance', 'jetKinematicsMuonCR', 'n2ddtPass', 'tightVjet', 'ak4btagMediumDR08', 'muonDphiAK8', 'deepcvb'} shiftSystematics = ['JESUp', 'JESDown', 'JERUp', 'JERDown'] shiftedQuantities = {'AK8Puppijet0_pt', 'pfmet'} shiftedSelections = {'jetKinematics', 'jetKinematicsMuonCR', 'pfmet'} for syst in shiftSystematics: selection.add('jetKinematics'+syst, df['AK8Puppijet0_pt_'+syst] > 450) selection.add('jetKinematicsMuonCR'+syst, df['AK8Puppijet0_pt_'+syst] > 400.) selection.add('pfmet'+syst, df['pfmet_'+syst] < 140.) # mass shift applied only to V-matched data # https://github.com/kakwok/ZPrimePlusJet/blob/PerBinEff/fitting/PbbJet/buildRhalphabetHbb.py#L30 if not isRealData: shiftSystematics.append('matchedUp') shiftedQuantities.add('AK8Puppijet0_msd') msdshifts = {'2016': 1.001, '2017': 0.979, '2018': 0.970} df['AK8Puppijet0_msd_matchedUp'] = msdshifts[self._year] * df['AK8Puppijet0_msd'] weights = processor.Weights(df.size) if not isRealData: # SumWeights is sum(scale1fb), so we need to use full value here weights.add('genweight', df['scale1fb']) if not self._skipPileup: if self._year == '2017' and dataset in self._corrections['2017_pileupweight_dataset']: weights.add('pileupweight', self._corrections['2017_pileupweight_dataset'][dataset](df['npu']), self._corrections['2017_pileupweight_dataset_puUp'][dataset](df['npu']), self._corrections['2017_pileupweight_dataset_puDown'][dataset](df['npu']), ) elif self._year != '2017': weights.add('pileupweight', self._corrections[f'{self._year}_pileupweight'](df['npu']), self._corrections[f'{self._year}_pileupweight_puUp'](df['npu']), self._corrections[f'{self._year}_pileupweight_puDown'](df['npu']), ) # TODO unc. if self._year == '2017' and 'ZJetsToQQ_HT' in dataset: nlo_over_lo_qcd = self._corrections['2017_Z_nlo_qcd'](df['genVPt']) nlo_over_lo_ewk = self._corrections['Z_nlo_over_lo_ewk'](df['genVPt']) weights.add('kfactor', nlo_over_lo_qcd * nlo_over_lo_ewk) elif self._year == '2017' and 'WJetsToQQ_HT' in dataset: nlo_over_lo_qcd = self._corrections['2017_W_nlo_qcd'](df['genVPt']) nlo_over_lo_ewk = self._corrections['W_nlo_over_lo_ewk'](df['genVPt']) weights.add('kfactor', nlo_over_lo_qcd * nlo_over_lo_ewk) elif self._year == '2016' and 'DYJetsToQQ' in dataset: nlo_over_lo_qcd = self._corrections['2016_Z_nlo_qcd'](df['genVPt']) nlo_over_lo_ewk = self._corrections['Z_nlo_over_lo_ewk'](df['genVPt']) weights.add('kfactor', nlo_over_lo_qcd * nlo_over_lo_ewk) elif self._year == '2016' and 'WJetsToQQ' in dataset: nlo_over_lo_qcd = self._corrections['2016_W_nlo_qcd'](df['genVPt']) nlo_over_lo_ewk = self._corrections['W_nlo_over_lo_ewk'](df['genVPt']) weights.add('kfactor', nlo_over_lo_qcd * nlo_over_lo_ewk) if not isRealData: # handle weight systematics for signal region def regionMask(w): if self._skipTrigger: return np.ones(df.size) return np.where(selection.all('noLeptons'), w, 1.) weights.add('trigweight', regionMask(self._corrections[f'{self._year}_trigweight_msd_pt'](df['AK8Puppijet0_msd_raw'], df['AK8Puppijet0_pt'])), regionMask(self._corrections[f'{self._year}_trigweight_msd_pt_trigweightUp'](df['AK8Puppijet0_msd_raw'], df['AK8Puppijet0_pt'])), regionMask(self._corrections[f'{self._year}_trigweight_msd_pt_trigweightDown'](df['AK8Puppijet0_msd_raw'], df['AK8Puppijet0_pt'])), ) vmatch = (np.abs(deltaphi(df['AK8Puppijet0_phi'], df['genVPhi'])) < 0.8) & (np.abs(df['AK8Puppijet0_pt']-df['genVPt'])/df['genVPt'] < 0.5) & (np.abs(df['AK8Puppijet0_msd']-df['genVMass'])/df['genVMass'] < 0.3) weights.add('matched', np.ones(df.size, dtype='f'), vmatch.astype('f'), 1.-vmatch) # handle weight systematics for muon CR def regionMask(w): if self._skipTrigger: return np.ones(df.size) return np.where(selection.all('oneMuon'), w, 1.) mu_abseta = np.abs(df['vmuoLoose0_eta']) weights.add('mutrigweight', regionMask(self._corrections[f'{self._year}_mutrigweight_pt_abseta'](df['vmuoLoose0_pt'], mu_abseta)), regionMask(self._corrections[f'{self._year}_mutrigweight_pt_abseta_mutrigweightShift'](df['vmuoLoose0_pt'], mu_abseta)), shift=True ) weights.add('muidweight', regionMask(self._corrections[f'{self._year}_muidweight_abseta_pt'](mu_abseta, df['vmuoLoose0_pt'])), regionMask(self._corrections[f'{self._year}_muidweight_abseta_pt_muidweightShift'](mu_abseta, df['vmuoLoose0_pt'])), shift=True ) weights.add('muisoweight', regionMask(self._corrections[f'{self._year}_muisoweight_abseta_pt'](mu_abseta, df['vmuoLoose0_pt'])), regionMask(self._corrections[f'{self._year}_muisoweight_abseta_pt_muisoweightShift'](mu_abseta, df['vmuoLoose0_pt'])), shift=True ) if self._debug: print("Weight statistics:") pprint.pprint(weights._weightStats, indent=4) hout = self.accumulator.identity() for histname, h in hout.items(): if not isinstance(h, hist.Hist): continue if not all(k in df or k == 'systematic' for k in h.fields): # Cannot fill this histogram due to missing fields # is this an error, warning, or ignorable? if self._debug: print("Missing fields %r from %r" % (set(h.fields) - set(df.keys()), h)) continue fields = {k: df[k] for k in h.fields if k in df} region = [r for r in regions.keys() if r in histname.split('_')] if 'nminus1' in histname: _, sel, region = histname.split('_') cut = regions[region] - {sel} weight = weights.weight() * selection.all(*cut) h.fill(**fields, weight=weight) elif len(region) == 1: region = region[0] weight = weights.weight() cut = selection.all(*regions[region]) h.fill(systematic="", **fields, weight=weight*cut) if 'systematic' in h.fields: if self._debug: print("Filling systematics for %s" % histname) systs = set(weights.variations) systs.update(shiftSystematics) for syst in systs: if self._debug: print(" Filling systematic %s" % syst) fields_syst = fields for val in shiftedQuantities: if val+'_'+syst in df: fields_syst[val] = df[val+'_'+syst] if self._debug: print(" Replacing field %s with %s" % (val, val+'_'+syst)) if syst in weights.variations: weight_syst = weights.weight(syst) if self._debug: print(" Using modified weight") else: weight_syst = weight if syst in set(shiftSystematics): cut_syst = set() for sel in regions[region]: if sel in shiftedSelections and sel+syst in selection.names: cut_syst.add(sel+syst) if self._debug: print(" Replacing cut %s with systematic-shifted %s" % (sel, sel+syst)) else: cut_syst.add(sel) cut_syst = selection.all(*cut_syst) else: cut_syst = cut h.fill(systematic=syst, **fields_syst, weight=weight_syst*cut_syst) elif len(region) > 1: raise ValueError("Histogram '%s' has a name matching multiple region definitions: %r" % (histname, region)) else: raise ValueError("Histogram '%s' does not fall into any region definitions." % (histname, )) if not isRealData: if 'skim_sumw' in df: # hacky way to only accumulate file-level information once if df['skim_sumw'] is not None: hout['sumw'][dataset] += df['skim_sumw'] else: hout['sumw'][dataset] += np.sum(df['scale1fb']) return hout
def process(self, events): # get meta infos dataset = events.metadata["dataset"] isRealData = not hasattr(events, "genWeight") n_events = len(events) selection = processor.PackedSelection() weights = processor.Weights(n_events) output = self.accumulator.identity() # weights if not isRealData: output['sumw'][dataset] += awkward1.sum(events.genWeight) # trigger triggers = {} for channel in ["e","mu"]: trigger = np.zeros(len(events), dtype='bool') for t in self._trigger[channel]: try: trigger = trigger | events.HLT[t] except: warnings.warn("Missing trigger %s" % t, RuntimeWarning) triggers[channel] = trigger # met filter met_filters = ["goodVertices", "globalSuperTightHalo2016Filter", "HBHENoiseFilter", "HBHENoiseIsoFilter", "EcalDeadCellTriggerPrimitiveFilter", "BadPFMuonFilter", ] met_filters_mask = np.ones(len(events), dtype='bool') for t in met_filters: met_filters_mask = met_filters_mask & events.Flag[t] selection.add("met_filter", awkward1.to_numpy(met_filters_mask)) # load objects muons = events.Muon electrons = events.Electron jets = events.Jet fatjets = events.FatJet subjets = events.SubJet fatjetsLS = events.FatJetLS met = events.MET # muons goodmuon = ( (muons.mediumId) & (muons.miniPFRelIso_all <= 0.2) & (muons.pt >= 27) & (abs(muons.eta) <= 2.4) & (abs(muons.dz) < 0.1) & (abs(muons.dxy) < 0.05) & (muons.sip3d < 4) ) good_muons = muons[goodmuon] ngood_muons = awkward1.sum(goodmuon, axis=1) # electrons goodelectron = ( (electrons.mvaFall17V2noIso_WP90) & (electrons.pt >= 30) & (abs(electrons.eta) <= 1.479) & (abs(electrons.dz) < 0.1) & (abs(electrons.dxy) < 0.05) & (electrons.sip3d < 4) ) good_electrons = electrons[goodelectron] ngood_electrons = awkward1.sum(goodelectron, axis=1) # good leptons good_leptons = awkward1.concatenate([good_muons, good_electrons], axis=1) good_leptons = good_leptons[awkward1.argsort(good_leptons.pt)] # lepton candidate candidatelep = awkward1.firsts(good_leptons) # lepton channel selection selection.add("ch_e", awkward1.to_numpy((triggers["e"]) & (ngood_electrons==1) & (ngood_muons==0))) # not sure if need to require 0 muons or 0 electrons in the next line selection.add("ch_mu", awkward1.to_numpy((triggers["mu"]) & (ngood_electrons==0) & (ngood_muons==1))) # jets ht = awkward1.sum(jets[jets.pt > 30].pt,axis=1) selection.add("ht_400", awkward1.to_numpy(ht>=400)) goodjet = ( (jets.isTight) & (jets.pt > 30) & (abs(jets.eta) <= 2.5) ) good_jets = jets[goodjet] # fat jets jID = "isTight" # TODO: add mass correction # a way to get the first two subjets # cart = awkward1.cartesian([fatjets, subjets], nested=True) # idxes = awkward1.pad_none(awkward1.argsort(cart['0'].delta_r(cart['1'])), 2, axis=2) # sj1 = subjets[idxes[:,:,0]] # sj2 = subjets[idxes[:,:,1]] good_fatjet = ( (getattr(fatjets, jID)) & (abs(fatjets.eta) <= 2.4) & (fatjets.pt > 50) & (fatjets.msoftdrop > 30) & (fatjets.msoftdrop < 210) #& (fatjets.pt.copy(content=fatjets.subjets.content.counts) == 2) # TODO: require 2 subjets? # this can probably be done w FatJet_subJetIdx1 or FatJet_subJetIdx2 & (awkward1.all(fatjets.subjets.pt >= 20)) & (awkward1.all(abs(fatjets.subjets.eta) <= 2.4)) ) good_fatjets = fatjets[good_fatjet] # hbb candidate mask_hbb = ( (good_fatjets.pt > 200) & (good_fatjets.delta_r(candidatelep) > 2.0) ) candidateHbb = awkward1.firsts(good_fatjets[mask_hbb]) # b-tag #& (good_fatjets.particleNetMD_Xbb > 0.9) selection.add('hbb_btag',awkward1.to_numpy(candidateHbb.deepTagMD_ZHbbvsQCD >= 0.8)) # score would be larger for tight category (0.97) # No AK4 b-tagged jets away from bb jet jets_HbbV = jets[good_jets.delta_r(candidateHbb) >= 1.2] selection.add('hbb_vetobtagaway', awkward1.to_numpy(awkward1.max(jets_HbbV.btagDeepB, axis=1, mask_identity=False) > BTagEfficiency.btagWPs[self._year]['medium'])) # fat jets Lepton Subtracted # wjj candidate mask_wjj = ( (fatjetsLS.pt > 50) & (fatjetsLS.delta_r(candidatelep) > 1.2) # need to add 2 subjets w pt > 20 & eta<2.4 # need to add ID? ) candidateWjj = awkward1.firsts(fatjetsLS[mask_wjj][awkward1.argmin(fatjetsLS[mask_wjj].delta_r(candidatelep),axis=1,keepdims=True)]) # add t2/t1 <= 0.75 (0.45 HP) selection.add('hww_mass', awkward1.to_numpy(candidateWjj.mass >= 10)) print('met ',met) # wjjlnu info #HSolverLiInfo hwwInfoLi; # qqSDmass = candidateWjj.msoftdrop # hwwLi = hSolverLi->minimize(candidatelep.p4(), met.p4(), wjjcand.p4(), qqSDmass, hwwInfoLi) #neutrino = hwwInfoLi.neutrino; #wlnu = hwwInfoLi.wlnu; #wqq = hwwInfoLi.wqqjet; #hWW = hwwInfoLi.hWW; #wwDM = PhysicsUtilities::deltaR( wlnu,wqq) * hWW.pt()/2.0; # add dlvqq <= 11 (2.5 HP) # in the meantime let's add the mass ''' mm = (candidatejet - candidatelep).mass2 jmass = (mm>0)*np.sqrt(np.maximum(0, mm)) + (mm<0)*candidatejet.mass joffshell = jmass < 62.5 massassumption = 80.*joffshell + (125 - 80.)*~joffshell x = massassumption**2/(2*candidatelep.pt*met.pt) + np.cos(candidatelep.phi - met.phi) met_eta = ( (x < 1)*np.arcsinh(x*np.sinh(candidatelep.eta)) + (x > 1)*( candidatelep.eta - np.sign(candidatelep.eta)*np.arccosh(candidatelep.eta) ) ) met_p4 = TLorentzVectorArray.from_ptetaphim(np.array([0.]),np.array([0.]),np.array([0.]),np.array([0.])) if met.size > 0: met_p4 = TLorentzVectorArray.from_ptetaphim(met.pt, met_eta.fillna(0.), met.phi, np.zeros(met.size)) # hh system candidateHH = candidateWjj + met_p4 + candidateHbb selection.add('hh_mass', candidateHH.mass >= 700) selection.add('hh_centrality', candidateHH.pt/candidateHH.mass >= 0.3) ''' channels = {"e": ["met_filter","ch_e","ht_400","hbb_btag","hbb_vetobtagaway","hww_mass"], #,"hh_mass","hh_centrality"], "mu": ["met_filter","ch_mu","ht_400","hbb_btag","hbb_vetobtagaway","hww_mass"] #,"hh_mass","hh_centrality"], } # need to add gen info if not isRealData: weights.add('genweight', events.genWeight) add_pileup_weight(weights, events.Pileup.nPU, self._year, dataset) for channel, cuts in channels.items(): allcuts = set() output['cutflow'].fill(dataset=dataset, channel=channel, cut=0, weight=weights.weight()) for i, cut in enumerate(cuts): allcuts.add(cut) cut = selection.all(*allcuts) output['cutflow'].fill(dataset=dataset, channel=channel, cut=i + 1, weight=weights.weight()[cut]) return output
def process(self, df): output = self.accumulator.identity() if df.size == 0: return output dataset = df['dataset'] ## construct weights ## wgts = processor.Weights(df.size) if self.data_type != 'data': wgts.add('genw', df['weight']) npv = df['trueInteractionNum'] wgts.add('pileup', *(f(npv) for f in self.pucorrs)) triggermask = np.logical_or.reduce([df[t] for t in Triggers]) wgts.add('trigger', triggermask) cosmicpairmask = df['cosmicveto_result'] wgts.add('cosmicveto', cosmicpairmask) pvmask = df['metfilters_PrimaryVertexFilter'] wgts.add('primaryvtx', pvmask) # ...bla bla, other weights goes here weight = wgts.weight() ######################## ak4jets = JaggedCandidateArray.candidatesfromcounts( df['akjet_ak4PFJetsCHS_p4'], px=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fX'].content, py=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fY'].content, pz=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fZ'].content, energy=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fT'].content, jetid=df['akjet_ak4PFJetsCHS_jetid'].content, deepcsv=df['hftagscore_DeepCSV_b'].content, ) deepcsv_tight = np.bitwise_and(ak4jets.deepcsv, 1 << 2) == (1 << 2) ak4jets.add_attributes(deepcsvTight=deepcsv_tight) ak4jets = ak4jets[ak4jets.jetid & (ak4jets.pt > 20) & (np.abs(ak4jets.eta) < 2.5)] leptonjets = JaggedCandidateArray.candidatesfromcounts( df['pfjet_p4'], px=df['pfjet_p4.fCoordinates.fX'].content, py=df['pfjet_p4.fCoordinates.fY'].content, pz=df['pfjet_p4.fCoordinates.fZ'].content, energy=df['pfjet_p4.fCoordinates.fT'].content, lxy=df['pfjet_klmvtx_lxy'].content, ) ljdautype = awkward.fromiter(df['pfjet_pfcand_type']) npfmu = (ljdautype == 3).sum() ndsa = (ljdautype == 8).sum() isegammajet = (npfmu == 0) & (ndsa == 0) ispfmujet = (npfmu >= 2) & (ndsa == 0) isdsajet = ndsa > 0 label = isegammajet.astype(int) * 1 + ispfmujet.astype( int) * 2 + isdsajet.astype(int) * 3 leptonjets.add_attributes(label=label, ndsa=ndsa) nmu = ((ljdautype == 3) | (ljdautype == 8)).sum() leptonjets.add_attributes(ismutype=(nmu >= 2), iseltype=(nmu == 0)) ljdaucharge = awkward.fromiter(df['pfjet_pfcand_charge']).sum() leptonjets.add_attributes(qsum=ljdaucharge) leptonjets.add_attributes( isneutral=(leptonjets.iseltype | (leptonjets.ismutype & (leptonjets.qsum == 0)))) leptonjets.add_attributes( displaced=((np.abs(leptonjets.lxy) >= 5) | (np.isnan(leptonjets.lxy) & leptonjets.ismutype) )) # non-vertex treated as displaced too ljdsamuSubset = fromNestNestIndexArray( df['dsamuon_isSubsetFilteredCosmic1Leg'], awkward.fromiter(df['pfjet_pfcand_dsamuonIdx'])) leptonjets.add_attributes(nocosmic=(ljdsamuSubset.sum() == 0)) leptonjets = leptonjets[(leptonjets.nocosmic) & (leptonjets.pt > 30)] ## __twoleptonjets__ AND >=1 displaced twoleptonjets = (leptonjets.counts >= 2) & ( leptonjets.ismutype.sum() >= 1) & (leptonjets.displaced.sum() >= 1) dileptonjets = leptonjets[twoleptonjets] ak4jets = ak4jets[twoleptonjets] wgt = weight[twoleptonjets] if dileptonjets.size == 0: return output lj0 = dileptonjets[dileptonjets.pt.argmax()] lj1 = dileptonjets[dileptonjets.pt.argsort()[:, 1:2]] ## channel def ## singleMuljEvents = dileptonjets.ismutype.sum() == 1 muljInLeading2Events = (lj0.ismutype | lj1.ismutype).flatten() channel_2mu2e = (singleMuljEvents & muljInLeading2Events).astype(int) * 1 doubleMuljEvents = dileptonjets.ismutype.sum() == 2 muljIsLeading2Events = (lj0.ismutype & lj1.ismutype).flatten() channel_4mu = (doubleMuljEvents & muljIsLeading2Events).astype(int) * 2 channel_ = channel_2mu2e + channel_4mu ########### cuts = [ ((lj0.isneutral) & (lj1.isneutral)).flatten(), # both 'neutral' (np.abs(lj0.p4.delta_phi(lj1.p4)) > np.pi / 2).flatten(), # dphi > pi/2 (~channel_2mu2e.astype(bool)) | (channel_2mu2e.astype(bool) & (((lj0.iseltype) & (lj0.pt > 60)) | ((lj1.iseltype) & (lj1.pt > 60))).flatten()), # EGMpt0>60 ak4jets.counts < 3, # N(jets) < 4 ak4jets[(ak4jets.pt > 30) & (np.abs(ak4jets.eta) < 2.4) & ak4jets.deepcsvTight].counts == 0, # N(tightB)==0 ] if self.region == 'CR': cuts[1] = ~cuts[1] if self.enforceNeutral == False: cuts[0] = ~cuts[0] for i, c in enumerate(itertools.accumulate(cuts, np.logical_and)): output['count'].fill(dataset=dataset, cnt=np.ones_like(wgt[c]) * i, weight=wgt[c], channel=channel_[c]) return output
def process(self, df): if not df.size: return self.accumulator.identity() self._configure(df) dataset = df['dataset'] df['is_lo_w'] = is_lo_w(dataset) df['is_lo_z'] = is_lo_z(dataset) df['is_lo_w_ewk'] = is_lo_w_ewk(dataset) df['is_lo_z_ewk'] = is_lo_z_ewk(dataset) df['is_lo_g'] = is_lo_g(dataset) df['is_nlo_z'] = is_nlo_z(dataset) df['is_nlo_w'] = is_nlo_w(dataset) df['has_lhe_v_pt'] = df['is_lo_w'] | df['is_lo_z'] | df[ 'is_nlo_z'] | df['is_nlo_w'] | df['is_lo_g'] | df[ 'is_lo_w_ewk'] | df['is_lo_z_ewk'] df['is_data'] = is_data(dataset) gen_v_pt = None if df['is_lo_w'] or df['is_lo_z'] or df['is_nlo_z'] or df[ 'is_nlo_w'] or df['is_lo_z_ewk'] or df['is_lo_w_ewk']: gen = setup_gen_candidates(df) dressed = setup_dressed_gen_candidates(df) fill_gen_v_info(df, gen, dressed) gen_v_pt = df['gen_v_pt_dress'] elif df['is_lo_g']: gen = setup_gen_candidates(df) gen_v_pt = gen[(gen.pdg == 22) & (gen.status == 1)].pt.max() # Generator-level leading dijet mass if df['has_lhe_v_pt']: genjets = setup_lhe_cleaned_genjets(df) digenjet = genjets[:, :2].distincts() df['mjj_gen'] = digenjet.mass.max() # Candidates # Already pre-filtered! # All leptons are at least loose # Check out setup_candidates for filtering details met_pt, met_phi, ak4, bjets, _, muons, electrons, taus, photons = setup_candidates( df, cfg) # Filtering ak4 jets according to pileup ID ak4 = ak4[ak4.puid] bjets = bjets[bjets.puid] # Muons df['is_tight_muon'] = muons.tightId \ & (muons.iso < cfg.MUON.CUTS.TIGHT.ISO) \ & (muons.pt>cfg.MUON.CUTS.TIGHT.PT) \ & (muons.abseta<cfg.MUON.CUTS.TIGHT.ETA) dimuons = muons.distincts() dimuon_charge = dimuons.i0['charge'] + dimuons.i1['charge'] df['MT_mu'] = ((muons.counts == 1) * mt(muons.pt, muons.phi, met_pt, met_phi)).max() # Electrons df['is_tight_electron'] = electrons.tightId \ & (electrons.pt > cfg.ELECTRON.CUTS.TIGHT.PT) \ & (electrons.abseta < cfg.ELECTRON.CUTS.TIGHT.ETA) dielectrons = electrons.distincts() dielectron_charge = dielectrons.i0['charge'] + dielectrons.i1['charge'] df['MT_el'] = ((electrons.counts == 1) * mt(electrons.pt, electrons.phi, met_pt, met_phi)).max() # ak4 leadak4_index = ak4.pt.argmax() elejet_pairs = ak4[:, :1].cross(electrons) df['dREleJet'] = np.hypot( elejet_pairs.i0.eta - elejet_pairs.i1.eta, dphi(elejet_pairs.i0.phi, elejet_pairs.i1.phi)).min() muonjet_pairs = ak4[:, :1].cross(muons) df['dRMuonJet'] = np.hypot( muonjet_pairs.i0.eta - muonjet_pairs.i1.eta, dphi(muonjet_pairs.i0.phi, muonjet_pairs.i1.phi)).min() # Recoil df['recoil_pt'], df['recoil_phi'] = recoil(met_pt, met_phi, electrons, muons, photons) df["dPFCalo"] = (met_pt - df["CaloMET_pt"]) / df["recoil_pt"] df["minDPhiJetRecoil"] = min_dphi_jet_met(ak4, df['recoil_phi'], njet=4, ptmin=30, etamax=4.7) df["minDPhiJetMet"] = min_dphi_jet_met(ak4, met_phi, njet=4, ptmin=30, etamax=4.7) selection = processor.PackedSelection() # Triggers pass_all = np.ones(df.size) == 1 selection.add('inclusive', pass_all) selection = trigger_selection(selection, df, cfg) selection.add('mu_pt_trig_safe', muons.pt.max() > 30) # Common selection selection.add('veto_ele', electrons.counts == 0) selection.add('veto_muo', muons.counts == 0) selection.add('veto_photon', photons.counts == 0) selection.add('veto_tau', taus.counts == 0) selection.add('veto_b', bjets.counts == 0) selection.add('mindphijr', df['minDPhiJetRecoil'] > cfg.SELECTION.SIGNAL.MINDPHIJR) selection.add('dpfcalo', np.abs(df['dPFCalo']) < cfg.SELECTION.SIGNAL.DPFCALO) selection.add('recoil', df['recoil_pt'] > cfg.SELECTION.SIGNAL.RECOIL) if (cfg.MITIGATION.HEM and extract_year(df['dataset']) == 2018 and not cfg.RUN.SYNC): selection.add('hemveto', df['hemveto']) else: selection.add('hemveto', np.ones(df.size) == 1) # AK4 dijet diak4 = ak4[:, :2].distincts() leadak4_pt_eta = (diak4.i0.pt > cfg.SELECTION.SIGNAL.LEADAK4.PT) & ( np.abs(diak4.i0.eta) < cfg.SELECTION.SIGNAL.LEADAK4.ETA) trailak4_pt_eta = (diak4.i1.pt > cfg.SELECTION.SIGNAL.TRAILAK4.PT) & ( np.abs(diak4.i1.eta) < cfg.SELECTION.SIGNAL.TRAILAK4.ETA) hemisphere = (diak4.i0.eta * diak4.i1.eta < 0).any() has_track0 = np.abs(diak4.i0.eta) <= 2.5 has_track1 = np.abs(diak4.i1.eta) <= 2.5 leadak4_id = diak4.i0.tightId & (has_track0 * ( (diak4.i0.chf > cfg.SELECTION.SIGNAL.LEADAK4.CHF) & (diak4.i0.nhf < cfg.SELECTION.SIGNAL.LEADAK4.NHF)) + ~has_track0) trailak4_id = has_track1 * ( (diak4.i1.chf > cfg.SELECTION.SIGNAL.TRAILAK4.CHF) & (diak4.i1.nhf < cfg.SELECTION.SIGNAL.TRAILAK4.NHF)) + ~has_track1 df['mjj'] = diak4.mass.max() df['dphijj'] = dphi(diak4.i0.phi.min(), diak4.i1.phi.max()) df['detajj'] = np.abs(diak4.i0.eta - diak4.i1.eta).max() selection.add('two_jets', diak4.counts > 0) selection.add('leadak4_pt_eta', leadak4_pt_eta.any()) selection.add('trailak4_pt_eta', trailak4_pt_eta.any()) selection.add('hemisphere', hemisphere) selection.add('leadak4_id', leadak4_id.any()) selection.add('trailak4_id', trailak4_id.any()) selection.add('mjj', df['mjj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.MASS) selection.add( 'dphijj', df['dphijj'] < cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DPHI) selection.add( 'detajj', df['detajj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DETA) # Divide into three categories for trigger study if cfg.RUN.TRIGGER_STUDY: two_central_jets = (np.abs(diak4.i0.eta) <= 2.4) & (np.abs( diak4.i1.eta) <= 2.4) two_forward_jets = (np.abs(diak4.i0.eta) > 2.4) & (np.abs( diak4.i1.eta) > 2.4) one_jet_forward_one_jet_central = (~two_central_jets) & ( ~two_forward_jets) selection.add('two_central_jets', two_central_jets.any()) selection.add('two_forward_jets', two_forward_jets.any()) selection.add('one_jet_forward_one_jet_central', one_jet_forward_one_jet_central.any()) # Dimuon CR leadmuon_index = muons.pt.argmax() selection.add('at_least_one_tight_mu', df['is_tight_muon'].any()) selection.add('dimuon_mass', ((dimuons.mass > cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MIN) \ & (dimuons.mass < cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MAX)).any()) selection.add('dimuon_charge', (dimuon_charge == 0).any()) selection.add('two_muons', muons.counts == 2) # Single muon CR selection.add('one_muon', muons.counts == 1) selection.add('mt_mu', df['MT_mu'] < cfg.SELECTION.CONTROL.SINGLEMU.MT) # Diele CR leadelectron_index = electrons.pt.argmax() selection.add('one_electron', electrons.counts == 1) selection.add('two_electrons', electrons.counts == 2) selection.add('at_least_one_tight_el', df['is_tight_electron'].any()) selection.add('dielectron_mass', ((dielectrons.mass > cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MIN) \ & (dielectrons.mass < cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MAX)).any()) selection.add('dielectron_charge', (dielectron_charge == 0).any()) selection.add('two_electrons', electrons.counts == 2) # Single Ele CR selection.add('met_el', met_pt > cfg.SELECTION.CONTROL.SINGLEEL.MET) selection.add('mt_el', df['MT_el'] < cfg.SELECTION.CONTROL.SINGLEEL.MT) # Photon CR leadphoton_index = photons.pt.argmax() df['is_tight_photon'] = photons.mediumId \ & (photons.abseta < cfg.PHOTON.CUTS.TIGHT.ETA) selection.add('one_photon', photons.counts == 1) selection.add('at_least_one_tight_photon', df['is_tight_photon'].any()) selection.add('photon_pt', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PT) selection.add('photon_pt_trig', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PTTRIG) # Fill histograms output = self.accumulator.identity() # Gen if df['has_lhe_v_pt']: output['genvpt_check'].fill(vpt=gen_v_pt, type="Nano", dataset=dataset) if 'LHE_Njets' in df: output['lhe_njets'].fill(dataset=dataset, multiplicity=df['LHE_Njets']) if 'LHE_HT' in df: output['lhe_ht'].fill(dataset=dataset, ht=df['LHE_HT']) if 'LHE_HTIncoming' in df: output['lhe_htinc'].fill(dataset=dataset, ht=df['LHE_HTIncoming']) # Weights evaluator = evaluator_from_config(cfg) weights = processor.Weights(size=df.size, storeIndividual=True) if not df['is_data']: weights.add('gen', df['Generator_weight']) try: weights.add('prefire', df['PrefireWeight']) except KeyError: weights.add('prefire', np.ones(df.size)) weights = candidate_weights(weights, df, evaluator, muons, electrons, photons) weights = pileup_weights(weights, df, evaluator, cfg) if not (gen_v_pt is None): weights = theory_weights_vbf(weights, df, evaluator, gen_v_pt, df['mjj_gen']) # Save per-event values for synchronization if cfg.RUN.KINEMATICS.SAVE: for event in cfg.RUN.KINEMATICS.EVENTS: mask = df['event'] == event if not mask.any(): continue output['kinematics']['event'] += [event] output['kinematics']['met'] += [met_pt[mask]] output['kinematics']['met_phi'] += [met_phi[mask]] output['kinematics']['recoil'] += [df['recoil_pt'][mask]] output['kinematics']['recoil_phi'] += [df['recoil_phi'][mask]] output['kinematics']['ak4pt0'] += [ak4[leadak4_index][mask].pt] output['kinematics']['ak4eta0'] += [ ak4[leadak4_index][mask].eta ] output['kinematics']['leadbtag'] += [ak4.pt.max() < 0][mask] output['kinematics']['nLooseMu'] += [muons.counts[mask]] output['kinematics']['nTightMu'] += [ muons[df['is_tight_muon']].counts[mask] ] output['kinematics']['mupt0'] += [ muons[leadmuon_index][mask].pt ] output['kinematics']['mueta0'] += [ muons[leadmuon_index][mask].eta ] output['kinematics']['nLooseEl'] += [electrons.counts[mask]] output['kinematics']['nTightEl'] += [ electrons[df['is_tight_electron']].counts[mask] ] output['kinematics']['elpt0'] += [ electrons[leadelectron_index][mask].pt ] output['kinematics']['eleta0'] += [ electrons[leadelectron_index][mask].eta ] output['kinematics']['nLooseGam'] += [photons.counts[mask]] output['kinematics']['nTightGam'] += [ photons[df['is_tight_photon']].counts[mask] ] output['kinematics']['gpt0'] += [ photons[leadphoton_index][mask].pt ] output['kinematics']['geta0'] += [ photons[leadphoton_index][mask].eta ] # Sum of all weights to use for normalization # TODO: Deal with systematic variations output['nevents'][dataset] += df.size if not df['is_data']: output['sumw'][dataset] += df['genEventSumw'] output['sumw2'][dataset] += df['genEventSumw2'] output['sumw_pileup'][dataset] += weights._weights['pileup'].sum() regions = vbfhinv_regions(cfg) for region, cuts in regions.items(): # Blinding if (self._blind and df['is_data'] and region.startswith('sr')): continue # Cutflow plot for signal and control regions if any(x in region for x in ["sr", "cr", "tr"]): output['cutflow_' + region]['all'] += df.size for icut, cutname in enumerate(cuts): output['cutflow_' + region][cutname] += selection.all( *cuts[:icut + 1]).sum() mask = selection.all(*cuts) # Save the event numbers of events passing this selection if cfg.RUN.SAVE.PASSING: output['selected_events'][region] += list(df['event'][mask]) # Multiplicities def fill_mult(name, candidates): output[name].fill(dataset=dataset, region=region, multiplicity=candidates[mask].counts, weight=weights.weight()[mask]) fill_mult('ak4_mult', ak4) fill_mult('bjet_mult', bjets) fill_mult('loose_ele_mult', electrons) fill_mult('tight_ele_mult', electrons[df['is_tight_electron']]) fill_mult('loose_muo_mult', muons) fill_mult('tight_muo_mult', muons[df['is_tight_muon']]) fill_mult('tau_mult', taus) fill_mult('photon_mult', photons) def ezfill(name, **kwargs): """Helper function to make filling easier.""" output[name].fill(dataset=dataset, region=region, **kwargs) # Monitor weights for wname, wvalue in weights._weights.items(): ezfill("weights", weight_type=wname, weight_value=wvalue[mask]) ezfill("weights_wide", weight_type=wname, weight_value=wvalue[mask]) # All ak4 # This is a workaround to create a weight array of the right dimension w_alljets = weight_shape(ak4[mask].eta, weights.weight()[mask]) w_alljets_nopref = weight_shape( ak4[mask].eta, weights.partial_weight(exclude=['prefire'])[mask]) ezfill('ak4_eta', jeteta=ak4[mask].eta.flatten(), weight=w_alljets) ezfill('ak4_phi', jetphi=ak4[mask].phi.flatten(), weight=w_alljets) ezfill('ak4_pt', jetpt=ak4[mask].pt.flatten(), weight=w_alljets) ezfill('ak4_eta_nopref', jeteta=ak4[mask].eta.flatten(), weight=w_alljets_nopref) ezfill('ak4_phi_nopref', jetphi=ak4[mask].phi.flatten(), weight=w_alljets_nopref) ezfill('ak4_pt_nopref', jetpt=ak4[mask].pt.flatten(), weight=w_alljets_nopref) # Leading ak4 w_diak4 = weight_shape(diak4.pt[mask], weights.weight()[mask]) ezfill('ak4_eta0', jeteta=diak4.i0.eta[mask].flatten(), weight=w_diak4) ezfill('ak4_phi0', jetphi=diak4.i0.phi[mask].flatten(), weight=w_diak4) ezfill('ak4_pt0', jetpt=diak4.i0.pt[mask].flatten(), weight=w_diak4) ezfill('ak4_ptraw0', jetpt=diak4.i0.ptraw[mask].flatten(), weight=w_diak4) ezfill('ak4_chf0', frac=diak4.i0.chf[mask].flatten(), weight=w_diak4) ezfill('ak4_nhf0', frac=diak4.i0.nhf[mask].flatten(), weight=w_diak4) ezfill('ak4_nconst0', nconst=diak4.i0.nconst[mask].flatten(), weight=w_diak4) # Trailing ak4 ezfill('ak4_eta1', jeteta=diak4.i1.eta[mask].flatten(), weight=w_diak4) ezfill('ak4_phi1', jetphi=diak4.i1.phi[mask].flatten(), weight=w_diak4) ezfill('ak4_pt1', jetpt=diak4.i1.pt[mask].flatten(), weight=w_diak4) ezfill('ak4_ptraw1', jetpt=diak4.i1.ptraw[mask].flatten(), weight=w_diak4) ezfill('ak4_chf1', frac=diak4.i1.chf[mask].flatten(), weight=w_diak4) ezfill('ak4_nhf1', frac=diak4.i1.nhf[mask].flatten(), weight=w_diak4) ezfill('ak4_nconst1', nconst=diak4.i1.nconst[mask].flatten(), weight=w_diak4) # B tag discriminator btag = getattr(ak4, cfg.BTAG.ALGO) w_btag = weight_shape(btag[mask], weights.weight()[mask]) ezfill('ak4_btag', btag=btag[mask].flatten(), weight=w_btag) # MET ezfill('dpfcalo', dpfcalo=df["dPFCalo"][mask], weight=weights.weight()[mask]) ezfill('met', met=met_pt[mask], weight=weights.weight()[mask]) ezfill('met_phi', phi=met_phi[mask], weight=weights.weight()[mask]) ezfill('recoil', recoil=df["recoil_pt"][mask], weight=weights.weight()[mask]) ezfill('recoil_phi', phi=df["recoil_phi"][mask], weight=weights.weight()[mask]) ezfill('dphijm', dphi=df["minDPhiJetMet"][mask], weight=weights.weight()[mask]) ezfill('dphijr', dphi=df["minDPhiJetRecoil"][mask], weight=weights.weight()[mask]) ezfill('dphijj', dphi=df["dphijj"][mask], weight=weights.weight()[mask]) ezfill('detajj', deta=df["detajj"][mask], weight=weights.weight()[mask]) ezfill('mjj', mjj=df["mjj"][mask], weight=weights.weight()[mask]) # Two dimensional ezfill('recoil_mjj', recoil=df["recoil_pt"][mask], mjj=df["mjj"][mask], weight=weights.weight()[mask]) # Muons if '_1m_' in region or '_2m_' in region: w_allmu = weight_shape(muons.pt[mask], weights.weight()[mask]) ezfill('muon_pt', pt=muons.pt[mask].flatten(), weight=w_allmu) ezfill('muon_mt', mt=df['MT_mu'][mask], weight=weights.weight()[mask]) ezfill('muon_eta', eta=muons.eta[mask].flatten(), weight=w_allmu) ezfill('muon_phi', phi=muons.phi[mask].flatten(), weight=w_allmu) # Dimuon if '_2m_' in region: w_dimu = weight_shape(dimuons.pt[mask], weights.weight()[mask]) ezfill('muon_pt0', pt=dimuons.i0.pt[mask].flatten(), weight=w_dimu) ezfill('muon_pt1', pt=dimuons.i1.pt[mask].flatten(), weight=w_dimu) ezfill('muon_eta0', eta=dimuons.i0.eta[mask].flatten(), weight=w_dimu) ezfill('muon_eta1', eta=dimuons.i1.eta[mask].flatten(), weight=w_dimu) ezfill('muon_phi0', phi=dimuons.i0.phi[mask].flatten(), weight=w_dimu) ezfill('muon_phi1', phi=dimuons.i1.phi[mask].flatten(), weight=w_dimu) ezfill('dimuon_pt', pt=dimuons.pt[mask].flatten(), weight=w_dimu) ezfill('dimuon_eta', eta=dimuons.eta[mask].flatten(), weight=w_dimu) ezfill('dimuon_mass', dilepton_mass=dimuons.mass[mask].flatten(), weight=w_dimu) # Electrons if '_1e_' in region or '_2e_' in region: w_allel = weight_shape(electrons.pt[mask], weights.weight()[mask]) ezfill('electron_pt', pt=electrons.pt[mask].flatten(), weight=w_allel) ezfill('electron_mt', mt=df['MT_el'][mask], weight=weights.weight()[mask]) ezfill('electron_eta', eta=electrons.eta[mask].flatten(), weight=w_allel) ezfill('electron_phi', phi=electrons.phi[mask].flatten(), weight=w_allel) # Dielectron if '_2e_' in region: w_diel = weight_shape(dielectrons.pt[mask], weights.weight()[mask]) ezfill('electron_pt0', pt=dielectrons.i0.pt[mask].flatten(), weight=w_diel) ezfill('electron_pt1', pt=dielectrons.i1.pt[mask].flatten(), weight=w_diel) ezfill('electron_eta0', eta=dielectrons.i0.eta[mask].flatten(), weight=w_diel) ezfill('electron_eta1', eta=dielectrons.i1.eta[mask].flatten(), weight=w_diel) ezfill('electron_phi0', phi=dielectrons.i0.phi[mask].flatten(), weight=w_diel) ezfill('electron_phi1', phi=dielectrons.i1.phi[mask].flatten(), weight=w_diel) ezfill('dielectron_pt', pt=dielectrons.pt[mask].flatten(), weight=w_diel) ezfill('dielectron_eta', eta=dielectrons.eta[mask].flatten(), weight=w_diel) ezfill('dielectron_mass', dilepton_mass=dielectrons.mass[mask].flatten(), weight=w_diel) # Photon if '_g_' in region: w_leading_photon = weight_shape( photons[leadphoton_index].pt[mask], weights.weight()[mask]) ezfill('photon_pt0', pt=photons[leadphoton_index].pt[mask].flatten(), weight=w_leading_photon) ezfill('photon_eta0', eta=photons[leadphoton_index].eta[mask].flatten(), weight=w_leading_photon) ezfill('photon_phi0', phi=photons[leadphoton_index].phi[mask].flatten(), weight=w_leading_photon) ezfill('photon_pt0_recoil', pt=photons[leadphoton_index].pt[mask].flatten(), recoil=df['recoil_pt'][mask & (leadphoton_index.counts > 0)], weight=w_leading_photon) ezfill('photon_eta_phi', eta=photons[leadphoton_index].eta[mask].flatten(), phi=photons[leadphoton_index].phi[mask].flatten(), weight=w_leading_photon) # w_drphoton_jet = weight_shape(df['dRPhotonJet'][mask], weights.weight()[mask]) # PV ezfill('npv', nvtx=df['PV_npvs'][mask], weight=weights.weight()[mask]) ezfill('npvgood', nvtx=df['PV_npvsGood'][mask], weight=weights.weight()[mask]) ezfill('npv_nopu', nvtx=df['PV_npvs'][mask], weight=weights.partial_weight(exclude=['pileup'])[mask]) ezfill('npvgood_nopu', nvtx=df['PV_npvsGood'][mask], weight=weights.partial_weight(exclude=['pileup'])[mask]) ezfill('rho_all', rho=df['fixedGridRhoFastjetAll'][mask], weight=weights.weight()[mask]) ezfill('rho_central', rho=df['fixedGridRhoFastjetCentral'][mask], weight=weights.weight()[mask]) ezfill('rho_all_nopu', rho=df['fixedGridRhoFastjetAll'][mask], weight=weights.partial_weight(exclude=['pileup'])[mask]) ezfill('rho_central_nopu', rho=df['fixedGridRhoFastjetCentral'][mask], weight=weights.partial_weight(exclude=['pileup'])[mask]) return output
def process(self, events): dataset = events.metadata['dataset'] print('process dataset', dataset) isRealData = 'genWeight' not in events.columns selection = processor.PackedSelection() weights = processor.Weights(len(events)) output = self.accumulator.identity() if (len(events) == 0): return output if not isRealData: output['sumw'][dataset] += events.genWeight.sum() # trigger paths if isRealData: trigger_fatjet = np.zeros(events.size, dtype='bool') for t in self._triggers[self._year]: try: trigger_fatjet = trigger_fatjet | events.HLT[t] except: print('trigger %s not available' % t) continue trigger_muon = np.zeros(events.size, dtype='bool') for t in self._muontriggers[self._year]: trigger_muon = trigger_muon | events.HLT[t] else: trigger_fatjet = np.ones(events.size, dtype='bool') trigger_muon = np.ones(events.size, dtype='bool') selection.add('fatjet_trigger', trigger_fatjet) selection.add('muon_trigger', trigger_muon) #jet corrected kinematics gru = events.GRU IN = events.IN fatjets = events.FatJet fatjets['msdcorr'] = corrected_msoftdrop(fatjets) fatjets['rhocorr'] = 2 * np.log(fatjets.msdcorr / fatjets.pt) fatjets['gruddt'] = gru.v25 - shift( fatjets, algo='gruddt', year=self._year) fatjets['gru'] = gru.v25 fatjets['in_v3'] = IN.v3 fatjets['in_v3_ddt'] = IN.v3 - shift( fatjets, algo='inddt', year=self._year) fatjets['in_v3_ddt_90pctl'] = IN.v3 - shift( fatjets, algo='inddt90pctl', year=self._year) fatjets['n2ddt'] = fatjets.n2b1 - n2ddt_shift(fatjets, year=self._year) fatjets["genMatchFull"] = genmatch(events, dataset) #else: fatjets["genMatchFull"] = fatjets.pt.zeros_like() #np.zeros(events.size, dtype='bool') candidatejet = fatjets[:, :1] candidatemuon = events.Muon[:, :5] # run model on PFCands associated to FatJet (FatJetPFCands) #events.FatJet.array.content["PFCands"] = type(events.FatJetPFCands.array).fromcounts(events.FatJet.nPFConstituents.flatten(), events.FatJetPFCands.flatten()) #events.FatJet.array.content["twoProngGru"] = run_model(events.FatJet.flatten()) selection.add('pt', (candidatejet.pt > 525).any()) selection.add('msdcorr', (candidatejet.msdcorr > 40).any()) # basic jet selection goodjet_sel = ((candidatejet.pt > 525) & (abs(candidatejet.eta) < 2.5) & (candidatejet.msoftdrop > 40.) & (candidatejet.rhocorr > -5.5) & (candidatejet.rhocorr < -2) & (candidatejet.genMatchFull if ('WJetsToQQ' in dataset or 'ZJetsToQQ' in dataset) else (1 == 1))).any() vselection_goodjet_sel = ((candidatejet.pt > 200) & (abs(candidatejet.eta) < 2.5) & (candidatejet.msoftdrop > 40.)).any() #& (candidatejet.genMatchFull if ('TTTo' in dataset) else (1==1))).any() #& (candidatejet.rhocorr > -5.5) #& (candidatejet.rhocorr < -2)).any() selection.add('vselection_jetkin', vselection_goodjet_sel) #goodmuon sel for muon CR (lep vetos below) goodmuon_sel = ((candidatemuon.pt > 55) & (abs(candidatemuon.eta) < 2.1) & (candidatemuon.looseId).astype(bool) & (candidatemuon.pfRelIso04_all < 0.15)).any() vselection_goodmuon_sel = ((candidatemuon.pt > 53) & (abs(candidatemuon.eta) < 2.1) & (candidatemuon.tightId).astype(bool)) #& (candidatemuon.pfRelIso04_all < 0.15)) vselection_goodmuon_sel_loose = ((candidatemuon.pt > 20) & (candidatemuon.looseId).astype(bool) & (abs(candidatemuon.eta) < 2.4)) selection.add('vselection_muonkin', vselection_goodmuon_sel.any()) selection.add('vselection_onetightmuon', vselection_goodmuon_sel.sum() == 1) selection.add('vselection_oneloosemuon', vselection_goodmuon_sel_loose.sum() == 1) candidatemuon = candidatemuon[:, 0:1] selection.add('muonkin', goodmuon_sel) selection.add('jetkin', goodjet_sel) selection.add('n2ddt', (candidatejet.n2ddt < 0.).any()) selection.add('jetid', candidatejet.isTight.any()) selection.add('met', events.MET.pt > 40.) muon_ak8_pair = candidatemuon.cross(candidatejet, nested=True) selection.add('muonDphiAK8', (abs(muon_ak8_pair.i0.delta_phi(muon_ak8_pair.i1)) > 2 * np.pi / 3).all().all()) selection.add('vselection_muonDphiAK8', (abs( muon_ak8_pair.i0.delta_phi(muon_ak8_pair.i1)) > 1).all().all()) #ak4 puppi jet for CR jets = events.Jet[((events.Jet.pt > 50.) & (abs(events.Jet.eta) < 2.5))][:, :10] # only consider first 4 jets to be consistent with old framework ak4_ak8_pair = jets.cross(candidatejet, nested=True) dr = abs(ak4_ak8_pair.i0.delta_r(ak4_ak8_pair.i1)) dphi = abs(ak4_ak8_pair.i0.delta_phi(ak4_ak8_pair.i1)) ak4_away = jets[(dr > 0.8).all()] selection.add('ak4btagMedium08', ak4_away.btagCSVV2.max() > 0.8838) ak4_opposite = jets[(dphi > np.pi / 2).all()] selection.add('antiak4btagMediumOppHem', ak4_opposite.btagCSVV2.max() < 0.8838) mu_p4 = TLorentzVectorArray.from_ptetaphim( candidatemuon.pt.fillna(0), candidatemuon.eta.fillna(0), candidatemuon.phi.fillna(0), candidatemuon.mass.fillna(0)) met_p4 = TLorentzVectorArray.from_ptetaphim( awkward.JaggedArray.fromiter([[v] for v in events.MET.pt]), awkward.JaggedArray.fromiter([[v] for v in np.zeros(events.size)]), awkward.JaggedArray.fromiter([[v] for v in events.MET.phi]), awkward.JaggedArray.fromiter([[v] for v in np.zeros(events.size)])) met_candidatemuon_pair = met_p4.cross(mu_p4) Wleptoniccandidate = met_candidatemuon_pair.i0 + met_candidatemuon_pair.i1 selection.add('Wleptonic_candidate', (Wleptoniccandidate.pt > 200).any()) vselection_jets = events.Jet[((events.Jet.pt > 30.) & (abs(events.Jet.eta) < 2.4))] vselection_ak4_ak8_pair = vselection_jets.cross(candidatejet, nested=True) muon_ak4_pair = vselection_jets.cross(candidatemuon, nested=True) dr_ak8 = abs( vselection_ak4_ak8_pair.i0.delta_r(vselection_ak4_ak8_pair.i1)) dr_muon = abs(muon_ak4_pair.i0.delta_r(muon_ak4_pair.i1)) ak4_away = vselection_jets[(dr_ak8 > 0.8).all()] selection.add('vselection_ak4btagMedium08', ak4_away.btagCSVV2.max() > 0.8838) ak4_away = vselection_jets[(dr_muon > 0.3).all()] selection.add('vselection_muonDphiAK4', ak4_away.btagCSVV2.max() > 0.8838) nelectrons = (( (events.Electron.pt > 10.) & (abs(events.Electron.eta) < 2.5) #& (events.Electron.cutBased >= events.Electron.LOOSE)) #& (events.Electron.cutBased_Fall17_V1 >= 1)) & (events.Electron.cutBased >= 2))).sum() nmuons = (((events.Muon.pt > 10) & (abs(events.Muon.eta) < 2.1) #& (events.Muon.pfRelIso04_all < 0.4) & (events.Muon.looseId).astype(bool))).sum() ntaus = (((events.Tau.pt > 20.) #& (events.Tau.idMVAnewDM2017v2 >=4)) & (events.Tau.idDecayMode).astype(bool) & (events.Tau.rawIso < 5) & (abs(events.Tau.eta) < 2.3))).sum() selection.add('noleptons', (nmuons == 0) & (nelectrons == 0) & (ntaus == 0)) selection.add('noelectron_notau', (nelectrons == 0) & (ntaus == 0)) #weights.add('metfilter', events.Flag.METFilters) if isRealData: genflavor = candidatejet.pt.zeros_like().pad( 1, clip=True).fillna(-1).flatten() if not isRealData: weights.add('genweight', events.genWeight) add_pileup_weight(weights, events.Pileup.nPU, self._year, dataset) #add_jetTriggerWeight(weights, candidatejet.msdcorr, candidatejet.pt, self._year) #signal region only #add_singleMuTriggerWeight(weights, abs(candidatemuon.eta), candidatemuon.pt, self._year) bosons = getBosons(events) genBosonPt = bosons.pt.pad(1, clip=True).fillna(0) add_VJets_NLOkFactor(weights, genBosonPt, self._year, dataset) genflavor = matchedBosonFlavor(candidatejet, bosons).pad( 1, clip=True).fillna(-1).flatten() #b-tag weights regions = { 'signal': [ 'fatjet_trigger', 'jetkin', 'noleptons', 'jetid', 'antiak4btagMediumOppHem', ], 'ttbar_muoncontrol': [ 'muon_trigger', 'pt', 'msdcorr', 'jetid', 'jetkin', 'muonkin', 'muonDphiAK8', 'ak4btagMedium08', 'noelectron_notau', ], 'vselection': [ 'muon_trigger', 'vselection_jetkin', 'vselection_muonkin', 'vselection_onetightmuon', 'vselection_oneloosemuon', 'vselection_muonDphiAK8', 'vselection_ak4btagMedium08', 'vselection_muonDphiAK4', 'Wleptonic_candidate', 'met' ], 'noselection': [], #'vselection_muoncontrol' : ['muon_trigger', 'v_selection_jetkin', 'genmatch', 'jetid', 'ak4btagMedium08', 'muonkin','met'], } allcuts_signal = set() output['cutflow_signal'][dataset]['none'] += float( weights.weight().sum()) allcuts_ttbar_muoncontrol = set() output['cutflow_ttbar_muoncontrol'][dataset]['none'] += float( weights.weight().sum()) allcuts_vselection = set() output['cutflow_vselection'][dataset]['none'] += float( weights.weight().sum()) for cut in regions['signal']: allcuts_signal.add(cut) output['cutflow_signal'][dataset][cut] += float( weights.weight()[selection.all(*allcuts_signal)].sum()) for cut in regions['ttbar_muoncontrol']: allcuts_ttbar_muoncontrol.add(cut) output['cutflow_ttbar_muoncontrol'][dataset][cut] += float( weights.weight()[selection.all( *allcuts_ttbar_muoncontrol)].sum()) for cut in regions['vselection']: allcuts_vselection.add(cut) output['cutflow_vselection'][dataset][cut] += float( weights.weight()[selection.all(*allcuts_vselection)].sum()) def normalize(val, cut): return val[cut].pad(1, clip=True).fillna(0).flatten() def fill(region, systematic=None, wmod=None): print('filling %s' % region) selections = regions[region] cut = selection.all(*selections) weight = weights.weight()[cut] output['templates'].fill( dataset=dataset, region=region, pt=normalize(candidatejet.pt, cut), msd=normalize(candidatejet.msdcorr, cut), n2ddt=normalize(candidatejet.n2ddt, cut), gruddt=normalize(candidatejet.gruddt, cut), in_v3_ddt=normalize(candidatejet.in_v3_ddt_90pctl, cut), weight=weight, ), output['event'].fill( dataset=dataset, region=region, MET=events.MET.pt[cut], nJet=fatjets.counts[cut], nPFConstituents=normalize(candidatejet.nPFConstituents, cut), weight=weight, ), output['muon'].fill( dataset=dataset, region=region, mu_pt=normalize(candidatemuon.pt, cut), mu_pfRelIso04_all=normalize(candidatemuon.pfRelIso04_all, cut), weight=weight, ), output['deepAK8'].fill( dataset=dataset, region=region, deepTagMDWqq=normalize(candidatejet.deepTagMDWqq, cut), deepTagMDZqq=normalize(candidatejet.deepTagMDZqq, cut), msd=normalize(candidatejet.msdcorr, cut), genflavor=genflavor[cut], weight=weight, ), output['in_v3'].fill( dataset=dataset, region=region, genflavor=genflavor[cut], in_v3=normalize(candidatejet.in_v3, cut), n2=normalize(candidatejet.n2b1, cut), gru=normalize(candidatejet.gru, cut), weight=weight, ) for region in regions: fill(region) return output
def process(self, df): dataset = df.metadata['dataset'] isRealData = 'genWeight' not in df.columns output = self.accumulator.identity() selection = processor.PackedSelection() output = self.accumulator.identity() good = False goodMuon = ((df.Muon.pt > 27.) & (np.abs(df.Muon.eta) < 2.4)) nmuons = goodMuon.sum() goodElectron = ((df.Electron.pt > 30.) & (np.abs(df.Electron.eta) < 2.5)) nelectrons = goodElectron.sum() df.FatJet['msdcorr'] = corrected_msoftdrop(df.FatJet) goodFatJet = ((df.FatJet.pt > 300.) & (np.abs(df.FatJet.eta) < 2.4) & (df.FatJet.msdcorr > 10.) & (df.FatJet.isTight)) nfatjets = goodFatJet.sum() if self._channel == 'muon': good = ((nmuons >= 1) & (nfatjets >= 1)) else: good = ((nelectrons >= 1) & (nfatjets >= 1)) events = df[good] if not isRealData: output['sumw'][dataset] += events.genWeight.sum() # trigger trigger = np.zeros(df.size, dtype='bool') for t in self._triggers[self._year + '_' + self._trigger]: try: trigger = trigger | df.HLT[t] except: warnings.warn("Missing trigger %s" % t, RuntimeWarning) selection.add('trigger', trigger[good]) # Muons candidatemuon = events.Muon[:, 0:1] nmuons = events.Muon.counts # Electrons candidateelectron = events.Electron[:, 0:1] nelectrons = events.Electron.counts if self._channel == 'muon': candidatelep = candidatemuon selection.add('nootherlepton', (nelectrons == 0)) else: candidatelep = candidateelectron selection.add('nootherlepton', (nmuons == 0)) selection.add('iplepton', ((np.abs(candidatelep.dz) < 0.1) & (np.abs(candidatelep.dxy) < 0.05)).any()) # FatJets ak8_lep_pair = candidatelep.cross(events.FatJet) ak8_lep_dR = ak8_lep_pair.i0.delta_r(ak8_lep_pair.i1) candidatejet = events.FatJet[ak8_lep_dR.argmin()] leadingjet = events.FatJet[:, 0:1] ak8_lep_dR_closest = candidatelep.delta_r(candidatejet) selection.add('jetkin', (candidatejet.pt > self._fjetptMIN).any()) selection.add('jetmsd', (candidatejet.msdcorr > 20).any()) selection.add('LSF3medium', (candidatejet.lsf3 > 0.7).any()) selection.add('LSF3tight', (candidatejet.lsf3 > 0.78).any()) selection.add('lepnearjet', (ak8_lep_dR.min() < 1.5)) selection.add('lepinjet', (ak8_lep_dR.min() < 0.8)) # FatJet substracted Lepton # sj1_sj2_btagDeepB_pair = candidatejet.LSsubJet1btagDeepB.cross(candidatejet.LSsubJet2btagDeepB) # fls_btagDeepB_max = max(sj1_sj2_btagDeepB_pair.i0,sj1_sj2_btagDeepB_pair.i1) # Jets jets = events.Jet[(events.Jet.pt > 30.) & (abs(events.Jet.eta) < 2.5) & (events.Jet.isTight)] ak4_ak8_pair = jets.cross(candidatejet, nested=True) ak4_ak8_dphi = abs(ak4_ak8_pair.i0.delta_phi(ak4_ak8_pair.i1)) ak4_opposite = jets[(ak4_ak8_dphi > np.pi / 2).all()] ak4_away = jets[(ak4_ak8_dphi > 0.8).all()] selection.add( 'antiak4btagMediumOppHem', ak4_opposite.btagDeepB.max() < self._btagWPs['med'][self._year]) selection.add( 'ak4btagMedium08', ak4_away.btagDeepB.max() < self._btagWPs['med'][self._year]) # MET met = events.MET # MET eta with mass assumption mm = (candidatejet - candidatelep).mass2 jmass = (mm > 0) * np.sqrt(np.maximum( 0, mm)) + (mm < 0) * candidatejet.mass joffshell = jmass < 62.5 massassumption = 80. * joffshell + (125 - 80.) * ~joffshell x = massassumption**2 / (2 * candidatelep.pt * met.pt) + np.cos(candidatelep.phi - met.phi) met_eta = ((x < 1) * np.arcsinh(x * np.sinh(candidatelep.eta)) + (x > 1) * (candidatelep.eta - np.sign(candidatelep.eta) * np.arccosh(candidatelep.eta))) met_p4 = TLorentzVectorArray.from_ptetaphim(np.array([0.]), np.array([0.]), np.array([0.]), np.array([0.])) if met.size > 0: met_p4 = TLorentzVectorArray.from_ptetaphim( met.pt, met_eta.fillna(0.), met.phi, np.zeros(met.size)) hmass = (candidatejet + met_p4).mass else: hmass = candidatejet.pt.zeros_like() # weights weights = processor.Weights(len(events), storeIndividual=True) if isRealData: genflavor = candidatejet.pt.zeros_like() else: try: weights.add('genweight', events.genWeight) add_pileup_weight(weights, events.Pileup.nPU, self._year) #print("Weight statistics: %r" % weights._weightStats) except: print('no gen weight') if 'TTTo' in dataset: genW, genW_idx = getParticles( events, 24, ['fromHardProcess', 'isLastCopy']) genb, genb_idx = getParticles( events, 5, ['fromHardProcess', 'isLastCopy']) genflavorW = matchedParticleFlavor(candidatelep, genW, 'child', 0.4) genflavorb = matchedParticleFlavor(candidatelep, genb, 'mom', 0.4) genflavor = getFlavor(genflavorW, genflavorb) elif (('hww_2017' in dataset) or ('GluGluHToWW' in dataset)): genH, genH_idx = getParticles( events, 25, ['fromHardProcess', 'isLastCopy']) genW, genW_idx = getParticles( events, 24, ['fromHardProcess', 'isLastCopy']) genE, genE_idx = getParticles( events, 11, ['fromHardProcess', 'isFirstCopy'], 1) genM, genM_idx = getParticles( events, 13, ['fromHardProcess', 'isFirstCopy'], 1) genT, genT_idx = getParticles( events, 15, ['fromHardProcess', 'isFirstCopy'], 1) genQ, genQ_idx = getParticles( events, [0, 5], ['fromHardProcess', 'isFirstCopy']) ishWW_qqelev = (genH.counts == 1) & (genW.counts == 2) & ( genE.counts == 1) & (genM.counts == 0) & (genT.counts == 0) ishWW_qqmuv = (genH.counts == 1) & (genW.counts == 2) & ( genM.counts == 1) & (genE.counts == 0) & (genT.counts == 0) ishWW_qqtauv = (genH.counts == 1) & (genW.counts == 2) & ( genT.counts == 1) & (genM.counts == 0) & (genE.counts == 0) ishWW_qqqq = (genH.counts == 1) & (genW.counts == 2) & ( genQ.counts == 4) & (genM.counts == 0) & (genE.counts == 0) ishWW_muvelev = (genH.counts == 1) & (genW.counts == 2) & ( genE.counts == 1) & (genM.counts == 1) ishWW_elevelev = (genH.counts == 1) & (genW.counts == 2) & ( genE.counts == 2) & (genM.counts == 0) ishWW_tauvtauv = (genH.counts == 1) & (genW.counts == 2) & ( genT.counts == 2) & (genM.counts == 0) & (genE.counts == 0) ishWW_muvmuv = (genH.counts == 1) & (genW.counts == 2) & ( genE.counts == 0) & (genM.counts == 2) genflavor = ((ishWW_qqelev) * 8 + (ishWW_qqmuv) * 9) else: genflavor = candidatejet.pt.zeros_like() # fill cutflow cutflow = [ 'trigger', 'jetkin', 'jetmsd', 'lepnearjet', 'lepinjet', 'antiak4btagMediumOppHem', 'nootherlepton', 'iplepton', 'LSF3medium', 'LSF3tight' ] allcuts = set() output['cutflow']['none'] += len(events) for cut in cutflow: allcuts.add(cut) output['cutflow'][cut] += selection.all(*allcuts).sum() regions = {} regions['presel'] = {'trigger', 'jetkin', 'jetmsd', 'lepinjet'} regions['antibtag'] = { 'trigger', 'jetkin', 'jetmsd', 'antiak4btagMediumOppHem' } regions['noinjet'] = { 'trigger', 'jetkin', 'jetmsd', 'lepnearjet', 'antiak4btagMediumOppHem' } regions['nolsf'] = { 'trigger', 'jetkin', 'jetmsd', 'lepinjet', 'antiak4btagMediumOppHem' } #,'nootherlepton'} regions['lsf'] = { 'trigger', 'jetkin', 'jetmsd', 'lepinjet', 'LSF3tight' } regions['bopp'] = { 'trigger', 'jetkin', 'jetmsd', 'lepinjet', 'LSF3tight', 'antiak4btagMediumOppHem' } regions['lep'] = { 'trigger', 'jetkin', 'jetmsd', 'lepinjet', 'LSF3tight', 'antiak4btagMediumOppHem', 'nootherlepton', 'iplepton' } for region in self._regions: selections = regions[region] cut = selection.all(*selections) weight = weights.weight()[cut] def normalize(val): try: return val[cut].pad(1, clip=True).fillna(0).flatten() except: try: return val[cut].flatten() except: return val[cut] # output['%s_fjetprop'%region].fill(#fjet_pt = normalize(candidatejet.pt), # fjet_msd = normalize(candidatejet.msdcorr), # fjet_lsf3 = normalize(candidatejet.lsf3), # #jet_oppbtag = normalize(ak4_opposite.btagDeepB.max()), # genflavor = normalize(genflavor), # dataset=dataset, # weight=weight # ) # output['%s_fjetextraprop'%region].fill(fjet_t41 = normalize(candidatejet.tau4/candidatejet.tau1), # fjet_t42 = normalize(candidatejet.tau4/candidatejet.tau2), # fjet_t31 = normalize(candidatejet.tau3/candidatejet.tau1), # dataset=dataset, # weight=weight # ) # output['%s_jetprop'%region].fill(jet_oppbtag = normalize(ak4_opposite.btagDeepB.max()), # genflavor = normalize(genflavor), # dataset=dataset, # weight=weight # ) output['%s_fmmjetprop' % region].fill( fjet_pt=normalize(candidatejet.pt), #fjet_mmass = normalize(jmass), #fjet_hmass = normalize(hmass), lep_pt=normalize(candidatelep.pt), fjet_lsf3=normalize(candidatejet.lsf3), genflavor=normalize(genflavor), dataset=dataset, weight=weight) output['%s_fmmjetprop2' % region].fill( fjet_mmass=normalize(jmass), fjet_lsf3=normalize(candidatejet.lsf3), genflavor=normalize(genflavor), dataset=dataset, weight=weight) # output['%s_flsjetprop'%region].fill(#flsjet_pt = normalize(candidatejet.LSpt), # flsjet_msd = normalize(candidatejet.LSmsoftdrop), # #flsjet_n2b1 = normalize(candidatejet.LSn2b1), # #flsjet_n3b1 = normalize(candidatejet.LSn3b1), # #flsjet_t21 = normalize(candidatejet.LStau2/candidatejet.LStau1), # #flsjet_t32 = normalize(candidatejet.LStau3/candidatejet.LStau2), # genflavor = normalize(genflavor), # dataset=dataset, # weight=weight) #output['%s_metprop'%region].fill(met_pt = normalize(met.pt), # met_phi = normalize(met.phi), # dataset=dataset, # weight=weight) # output['%s_weight'%region].fill(puweight=weights.partial_weight(include=["pileup_weight"])[cut], # genweight=weights.partial_weight(include=["genweight"])[cut], # dataset=dataset, # ) # if self._channel=='muon': # output['%s_muonprop'%region].fill(muon_pt = normalize(candidatemuon.pt), # muon_miso = normalize(candidatemuon.miniPFRelIso_all), # muon_sip = normalize(candidatemuon.sip3d), # dataset=dataset, # weight=weight) # output['%s_muonextraprop'%region].fill(nmuons = normalize(nmuons), # nelectrons = normalize(nelectrons), # muon_dz = normalize(candidatemuon.dz), # muon_dxy = normalize(candidatemuon.dxy), # dataset=dataset, # weight=weight) # else: # output['%s_electronprop'%region].fill(electron_pt = normalize(candidateelectron.pt), # electron_miso = normalize(candidateelectron.miniPFRelIso_all), # electron_sip = normalize(candidateelectron.sip3d), # dataset=dataset, # weight=weight) # output['%s_electronextraprop'%region].fill(nmuons = normalize(nmuons), # nelectrons = normalize(nelectrons), # electron_dz = normalize(candidateelectron.dz), # electron_dxy = normalize(candidateelectron.dxy), # dataset=dataset, # weight=weight) return output
def process(self, events): dataset = events.metadata['dataset'] isData = 'genWeight' not in events.columns selection = processor.PackedSelection() hout = self.accumulator.identity() match = self._common['match'] isLooseElectron = self._ids['isLooseElectron'] isLooseMuon = self._ids['isLooseMuon'] isLoosePhoton = self._ids['isLoosePhoton'] isTightPhoton = self._ids['isTightPhoton'] isGoodJet = self._ids['isGoodJet'] #### Select loose muon and electron to select clean photon mu = events.Muon mu['isloose'] = isLooseMuon(mu.pt, mu.eta, mu.pfRelIso04_all, mu.looseId, self._year) mu_loose = mu[mu.isloose.astype(np.bool)] e = events.Electron e['isclean'] = ~match(e, mu_loose, 0.3) e['isloose'] = isLooseElectron(e.pt, e.eta + e.deltaEtaSC, e.dxy, e.dz, e.cutBased, self._year) e_clean = e[e.isclean.astype(np.bool)] e_loose = e_clean[e_clean.isloose.astype(np.bool)] #### Consider clean and tight photon for purity measurement pho = events.Photon pho['isclean'] = ~match(pho, mu_loose, 0.5) & ~match(pho, e_loose, 0.5) _id = 'cutBasedBitmap' if self._year == '2016': _id = 'cutBased' def isPurityPhoton(pt, medium_id): mask = ~(pt == np.nan) if self._year == '2016': mask = (pt > 200) & (medium_id >= 2) else: mask = (pt > 200) & ((medium_id & 2) == 2) return mask pho['isloose'] = isLoosePhoton(pho.pt, pho.eta, pho[_id], self._year) & (pho.electronVeto) pho['ispurity'] = isPurityPhoton( pho.pt, pho[_id]) & (pho.isScEtaEB) & (pho.electronVeto) pho_clean = pho[pho.isclean.astype(np.bool)] pho_loose = pho_clean[pho_clean.isloose.astype(np.bool)] pho_purity = pho_clean[pho_clean.ispurity.astype(np.bool)] pho_nosieie = pho_clean[(pho_clean.pt > 200) & (pho_clean.isScEtaEB) & (pho_clean.electronVeto) & medium_id_no_sieie(pho_clean)] pho_nosieie_inv_iso = pho_clean[(pho_clean.pt > 200) & (pho_clean.isScEtaEB) & (pho_clean.electronVeto) & medium_id_no_sieie_inv_iso(pho_clean)] #### Consider AK4 jet def isPurityJet(pt, eta, jet_id): mask = (pt > 30) & (abs(eta) < 2.4) & ((jet_id & 2) == 2) return mask j = events.Jet #30 GeV cut on jet pT, we need to check later #j['isgood'] = isGoodJet(j.pt, j.eta, j.jetId, j.neHEF, j.neEmEF, j.chHEF, j.chEmEF) j['ispurity'] = isPurityJet(j.pt, j.eta, j.jetId) j['isclean'] = ~match(j, e_loose, 0.4) & ~match( j, mu_loose, 0.4) & ~match(j, pho_loose, 0.4) j_purity = j[j.ispurity.astype(np.bool)] j_clean = j_purity[j_purity.isclean.astype(np.bool)] j_nclean = j_clean.counts met = events.MET #### Genweights weights = processor.Weights(len(events), storeIndividual=True) if isData: weights.add('genw', np.ones(events.size)) else: weights.add('genw', events.genWeight) #### MET filter & single photon trigger met_filters = np.ones(events.size, dtype=np.bool) if isData: met_filters = met_filters & events.Flag['eeBadScFilter'] for flag in PhotonPurity.met_filter_flags[self._year]: met_filters = met_filters & events.Flag[flag] #selection.add('met_filters',met_filters) triggers = np.zeros(events.size, dtype=np.bool) for path in self._singlephoton_triggers[self._year]: if path not in events.HLT.columns: continue triggers = triggers | events.HLT[path] #selection.add('singlephoton_triggers', triggers) #selection.add('jet_cut', (j_nclean>0)) #selection.add('met60', (met.pt<60)) event_mask = met_filters & triggers & (met.pt < 60) & (j_nclean > 0) hout['count'].fill(dataset=dataset, cat='medium', sieie=pho_purity.sieie[event_mask].flatten(), pt=pho_purity.pt[event_mask].flatten(), weight=weight_shape(pho_purity.sieie[event_mask], weights.weight()[event_mask])) hout['count'].fill(dataset=dataset, cat='medium_nosieie', sieie=pho_nosieie.sieie[event_mask].flatten(), pt=pho_nosieie.pt[event_mask].flatten(), weight=weight_shape(pho_nosieie.sieie[event_mask], weights.weight()[event_mask])) hout['count'].fill( dataset=dataset, cat='medium_nosieie_invertiso', sieie=pho_nosieie_inv_iso.sieie[event_mask].flatten(), pt=pho_nosieie_inv_iso.pt[event_mask].flatten(), weight=weight_shape(pho_nosieie_inv_iso.sieie[event_mask], weights.weight()[event_mask])) if isData: hout['sumw'].fill(dataset=dataset, sumw=1, weight=1) else: hout['sumw'].fill(dataset=dataset, sumw=1, weight=events.genWeight.sum()) return hout
def process(self, df): output = self.accumulator.identity() dataset = df['dataset'] ## construct weights ## wgts = processor.Weights(df.size) if self.data_type != 'data': wgts.add('genw', df['weight']) npv = df['trueInteractionNum'] wgts.add('pileup', *(f(npv) for f in self.pucorrs)) triggermask = np.logical_or.reduce([df[t] for t in Triggers]) wgts.add('trigger', triggermask) cosmicpairmask = df['cosmicveto_result'] wgts.add('cosmicveto', cosmicpairmask) pvmask = df['metfilters_PrimaryVertexFilter'] wgts.add('primaryvtx', pvmask) # ...bla bla, other weights goes here weight = wgts.weight() ######################## leptonjets = JaggedCandidateArray.candidatesfromcounts( df['pfjet_p4'], px=df['pfjet_p4.fCoordinates.fX'].content, py=df['pfjet_p4.fCoordinates.fY'].content, pz=df['pfjet_p4.fCoordinates.fZ'].content, energy=df['pfjet_p4.fCoordinates.fT'].content, sumtkpt=df['pfjet_tkPtSum05'].content, pfiso=df['pfjet_pfIsolationNoPU05'].content, isodbeta=df['pfjet_pfiso'].content, mintkdist=df['pfjet_pfcands_minTwoTkDist'].content, ) ljdautype = awkward.fromiter(df['pfjet_pfcand_type']) npfmu = (ljdautype == 3).sum() ndsa = (ljdautype == 8).sum() isegammajet = (npfmu == 0) & (ndsa == 0) ispfmujet = (npfmu >= 2) & (ndsa == 0) isdsajet = ndsa > 0 label = isegammajet.astype(int) * 1 + ispfmujet.astype( int) * 2 + isdsajet.astype(int) * 3 leptonjets.add_attributes(label=label) nmu = ((ljdautype == 3) | (ljdautype == 8)).sum() leptonjets.add_attributes(ismutype=(nmu >= 2), iseltype=(nmu == 0)) ljdaucharge = awkward.fromiter(df['pfjet_pfcand_charge']).sum() leptonjets.add_attributes(qsum=ljdaucharge) leptonjets.add_attributes( isneutral=(leptonjets.iseltype | (leptonjets.ismutype & (leptonjets.qsum == 0)))) leptonjets.add_attributes( mucharged=(leptonjets.iseltype | (leptonjets.ismutype & (leptonjets.qsum != 0)))) ljdsamuSubset = fromNestNestIndexArray( df['dsamuon_isSubsetFilteredCosmic1Leg'], awkward.fromiter(df['pfjet_pfcand_dsamuonIdx'])) leptonjets.add_attributes(nocosmic=(ljdsamuSubset.sum() == 0)) leptonjets = leptonjets[(leptonjets.nocosmic) & (leptonjets.pt > 30) & (leptonjets.mintkdist < 50)] ## __ twoleptonjets__ twoleptonjets = leptonjets.counts >= 2 dileptonjets = leptonjets[twoleptonjets] wgt = weight[twoleptonjets] if dileptonjets.size == 0: return output lj0 = dileptonjets[dileptonjets.pt.argmax()] lj1 = dileptonjets[dileptonjets.pt.argsort()[:, 1:2]] ## channel def ## singleMuljEvents = dileptonjets.ismutype.sum() == 1 muljInLeading2Events = (lj0.ismutype | lj1.ismutype).flatten() channel_2mu2e = (singleMuljEvents & muljInLeading2Events).astype(int) * 1 doubleMuljEvents = dileptonjets.ismutype.sum() == 2 muljIsLeading2Events = (lj0.ismutype & lj1.ismutype).flatten() channel_4mu = (doubleMuljEvents & muljIsLeading2Events).astype(int) * 2 channel_ = channel_2mu2e + channel_4mu ########### # isControl = (np.abs(lj0.p4.delta_phi(lj1.p4))<np.pi/2).flatten() # if self.data_type!='data': # dileptonjets = dileptonjets[isControl] # channel_ = channel_[isControl] # wgt = wgt[isControl] mask_ = (lj0.isneutral & lj1.isneutral).flatten() if self.bothNeutral is False: mask_ = (lj0.mucharged & lj1.mucharged).flatten() # mask_ = ((channel_==2)&((~lj0.isneutral&(~lj1.isneutral)).flatten())) | ((channel_==1)&mask_) channel_ = channel_[mask_] wgt = wgt[mask_] dileptonjets = dileptonjets[mask_] minpfiso = (lj0.pfiso > lj1.pfiso).astype(int) * lj1.pfiso + ( lj0.pfiso < lj1.pfiso).astype(int) * lj0.pfiso output['minpfiso'].fill(dataset=dataset, iso=minpfiso[mask_].flatten(), channel=channel_, weight=wgt) maxpfiso = (lj0.pfiso > lj1.pfiso).astype(int) * lj0.pfiso + ( lj0.pfiso < lj1.pfiso).astype(int) * lj1.pfiso output['maxpfiso'].fill(dataset=dataset, iso=maxpfiso[mask_].flatten(), channel=channel_, weight=wgt) output['lj0pfiso'].fill(dataset=dataset, iso=lj0.pfiso[mask_].flatten(), channel=channel_, weight=wgt) ljones = dileptonjets.pt.ones_like() output['sumpt'].fill(dataset=dataset, sumpt=dileptonjets.sumtkpt.flatten(), channel=(channel_ * ljones).flatten(), weight=(wgt * ljones).flatten()) output['pfiso'].fill(dataset=dataset, iso=dileptonjets.pfiso.flatten(), channel=(channel_ * ljones).flatten(), weight=(wgt * ljones).flatten()) output['isodbeta'].fill(dataset=dataset, iso=dileptonjets.isodbeta.flatten(), channel=(channel_ * ljones).flatten(), weight=(wgt * ljones).flatten()) return output
def process(self, df): dataset = df['dataset'] isRealData = 'genWeight' not in df isSignal = 'htautau' in dataset output = self.accumulator.identity() # select at least one jet and one muon ( this is Pre-Selection! ) events = buildevents(df, fatjet='CustomAK8Puppi') good = ( (events.muons.counts >= 1) & (events.fatjets.counts >= 1) ) events = events[good] selection = processor.PackedSelection() # trigger trigger = np.ones(df.size, dtype='bool') for t in self._triggers[self._year+'_'+self._trigger]: trigger &= df[t] selection.add('trigger', trigger[good]) # muon selection goodmuon = ( (events.muons.p4.pt > 10) & (np.abs(events.muons.p4.eta) < 2.4) & (events.muons.sip3d < 4) & (np.abs(events.muons.dz) < 0.1) & (np.abs(events.muons.dxy) < 0.05) & (events.muons.mvaId == 2) ) nmuons = goodmuon.sum() leadingmuon = events.muons[goodmuon][:, 0:1] # fatjet closest to lepton leadingmuon = events.muons[:, 0] mujet_dR = leadingmuon.p4.delta_r(events.fatjets.p4) mu_in_cone = mujet_dR.min() < 0.8 # this I am not sure we have to put as a selection... mujet_bestidx = mujet_dR.argmin() leadingjet_mu = events.fatjets[mujet_bestidx] selection.add('jetkin', ( (leadingjet_mu.p4.pt > 300) & (leadingjet_mu.p4.eta < 2.4) & (leadingjet_mu.msoftdrop > 10.) ).any()) selection.add('jetid', (leadingjet_mu.jetId & 2).any()) # tight id # lepton inside jet? selection.add('muinside', mu_in_cone.astype(bool)) selection.add('LSF3muinside', (leadingjet_mu.electronIdx3SJ == 0).any()) selection.add('LSF3medium', (leadingjet_mu.lsf3>0.78).any()) # veto b-tag in opposite side jets = events.jets[ (events.jets.p4.pt > 30.) & (events.jets.jetId & 2) # tight id ] ak4_ak8_pair = jets.cross(leadingjet_mu, nested=True) dphi = ak4_ak8_pair.i0.p4.delta_phi(ak4_ak8_pair.i1.p4) ak4_opposite = jets[(np.abs(dphi) > np.pi / 2).all()] selection.add('antiak4btagMediumOppHem', ak4_opposite.deepcsvb.max() < self._btagWPs['med'][self._year]) # b-tag in same side #subjets = events.subjets[:, leadingjet_mu.subJetIdx1] # final lepton selection nelectrons = ( (events.electrons.p4.pt > 10) & (np.abs(events.electrons.p4.eta) < 2.5) & (events.electrons.cutBased & (1 << 2)).astype(bool) # 2017V2 loose ).sum() selection.add('onemuon', (nmuons == 1) & (nelectrons == 0)) # should we veto taus? selection.add('muonkin', ( (leadingmuon.p4.pt > 27.) & (np.abs(leadingmuon.p4.eta) < 2.4) )) # building variables leadingjet_mu = leadingjet_mu.flatten() mm = (leadingjet_mu.p4 - leadingmuon.p4).mass2 jmass = (mm>0)*np.sqrt(np.maximum(0, mm)) + (mm<0)*leadingjet_mu.p4.mass # (jet - lep).M met = events.met joffshell = jmass < 62.5 massassumption = 80.*joffshell + (125 - 80.)*~joffshell x = massassumption**2/(2*leadingmuon.p4.pt*met.rho) + np.cos(leadingmuon.p4.phi - met.phi) met_eta = ( (x < 1)*np.arcsinh(x*np.sinh(leadingmuon.p4.eta)) + (x >= 1)*( leadingmuon.p4.eta - np.sign(leadingmuon.p4.eta)*np.arccosh(np.maximum(1., x)) ) ) met_p4 = TLorentzVectorArray.from_ptetaphim(met.rho, met_eta, met.phi, np.zeros(met.size)) # filling missing columns df['jet_pt'] = leadingjet_mu.p4.pt df['jet_lsf3'] = leadingjet_mu.lsf3 df['jet_mmass'] = jmass df['jet_hmass'] = (met_p4 + leadingjet_mu.p4).mass df['jet_oppbtag'] = ak4_opposite.deepcsvb.max() df['muon_pt'] = leadingmuon.p4.pt df['muon_miso'] = leadingmuon.miniPFRelIso_all df['met_pt'] = met.rho df['met_eta'] = met_eta # fill cutflow cutflow = ['trigger', 'jetkin', 'jetid', 'antiak4btagMediumOppHem', 'onemuon', 'muonkin', 'muinside', 'LSF3muinside','LSF3muinside'] allcuts = set() output['cutflow']['none'] += len(events) for cut in cutflow: allcuts.add(cut) output['cutflow'][cut] += selection.all(*allcuts).sum() weights = processor.Weights(len(events)) if not isRealData: weights.add('genweight', events.genWeight) regions = {} regions['presel'] = {'trigger', 'jetkin', 'jetid', 'antiak4btagMediumOppHem', 'onemuon', 'muonkin'} regions['muinjet'] = {'trigger', 'jetkin', 'jetid', 'antiak4btagMediumOppHem', 'onemuon', 'muonkin', 'muinside', 'LSF3muinside','LSF3muinside'} for histname, h in output.items(): if not isinstance(h, hist.Hist): continue if not all(k in df or k == 'systematic' for k in h.fields): print("Missing fields %r from %r" % (set(h.fields) - set(df.keys()), h)) continue fields = {k: df[k] for k in h.fields if k in df} region = [r for r in regions.keys() if r in histname.split('_')] if len(region) == 1: region = region[0] cut = selection.all(*regions[region]) h.fill(**fields, weight=cut) elif len(region) > 1: raise ValueError("Histogram '%s' has a name matching multiple region definitions: %r" % (histname, region)) else: raise ValueError("Histogram '%s' does not fall into any region definitions." % (histname, )) return output
def process(self, df): dataset = df['dataset'] selected_regions = {} if not dataset in selected_regions: selected_regions[dataset] = [] for selection,v in self._samples.items(): for i in range (0,len(v)): if v[i] not in dataset: continue selected_regions[dataset].append(selection) ### #Getting corrections, ids, triggers, ecc, from .coffea files ### met_trigger_paths = self._triggers['met_trigger_paths'] singleele_trigger_paths = self._triggers['singleele_trigger_paths'] singlepho_trigger_paths = self._triggers['singlepho_trigger_paths'] get_msd_weight = self._corrections['get_msd_weight'] get_ttbar_weight = self._corrections['get_ttbar_weight'] get_nlo_weight = self._corrections['get_nlo_weight'] get_adhoc_weight = self._corrections['get_adhoc_weight'] get_pu_weight = self._corrections['get_pu_weight'] get_met_trig_weight = self._corrections['get_met_trig_weight'] get_met_zmm_trig_weight = self._corrections['get_met_zmm_trig_weight'] get_ele_trig_weight = self._corrections['get_ele_trig_weight'] get_pho_trig_weight = self._corrections['get_pho_trig_weight'] get_ecal_bad_calib = self._corrections['get_ecal_bad_calib'] isLooseElectron = self._ids['isLooseElectron'] isTightElectron = self._ids['isTightElectron'] isLooseMuon = self._ids['isLooseMuon'] isTightMuon = self._ids['isTightMuon'] isLooseTau = self._ids['isLooseTau'] isLoosePhoton = self._ids['isLoosePhoton'] isTightPhoton = self._ids['isTightPhoton'] isGoodJet = self._ids['isGoodJet'] isGoodFatJet = self._ids['isGoodFatJet'] isHEMJet = self._ids['isHEMJet'] met_filter_flags = self._metfilters['met_filter_flags'] ### #Initialize global quantities (MET ecc.) ### met = Initialize({'pt':df['MET_pt'], 'eta':0, 'phi':df['MET_phi'], 'mass':0}) calomet = Initialize({'pt':df['CaloMET_pt'], 'eta':0, 'phi':df['CaloMET_phi'], 'mass':0}) ### #Initialize physics objects ### #Define first and empty object that will use as protection against arrays with size 0 #Will use MET to set the correct size for the arrays #Not used at the moment #empty_jagged = awkward.JaggedArray.fromcounts(np.ones_like(met.pt, dtype=int),np.zeros_like(met.pt)) #empty_obj = Initialize({'pt':empty_jagged, # 'eta':empty_jagged, # 'phi':empty_jagged, # 'mass':empty_jagged}) e = Initialize({'pt':df['Electron_pt'], 'eta':df['Electron_eta'], 'phi':df['Electron_phi'], 'mass':df['Electron_mass']}) for key in self._e_id[self._year]: if self._e_id[self._year][key] in df: e[key] = df[self._e_id[self._year][key]] else: e[key] = e.pt.zeros_like() e['isloose'] = isLooseElectron(e.pt,e.eta,e.dxy,e.dz,e.iso,e.loose_id,self._year) e['istight'] = isTightElectron(e.pt,e.eta,e.dxy,e.dz,e.iso,e.tight_id,self._year) leading_e = e[e.pt.argmax()] leading_e = leading_e[leading_e.istight.astype(np.bool)] e_loose = e[e.isloose.astype(np.bool)] e_tight = e[e.istight.astype(np.bool)] e_ntot = e.counts e_nloose = e_loose.counts e_ntight = e_tight.counts mu = Initialize({'pt':df['Muon_pt'], 'eta':df['Muon_eta'], 'phi':df['Muon_phi'], 'mass':df['Muon_mass']}) for key in self._mu_id[self._year]: if self._mu_id[self._year][key] in df: mu[key] = df[self._mu_id[self._year][key]] else: mu[key] = mu.pt.zeros_like() mu['isloose'] = isLooseMuon(mu.pt,mu.eta,mu.dxy,mu.dz,mu.iso,mu.med_id,self._year) mu['istight'] = isTightMuon(mu.pt,mu.eta,mu.dxy,mu.dz,mu.iso,mu.tight_id,self._year) leading_mu = mu[mu.pt.argmax()] leading_mu = leading_mu[leading_mu.istight.astype(np.bool)] mu_loose=mu[mu.isloose.astype(np.bool)] mu_tight=mu[mu.istight.astype(np.bool)] mu_ntot = mu.counts mu_nloose = mu_loose.counts mu_ntight = mu_tight.counts tau = Initialize({'pt':df['Tau_pt'], 'eta':df['Tau_eta'], 'phi':df['Tau_phi'], 'mass':df['Tau_mass']}) for key in self._tau_id[self._year]: if self._tau_id[self._year][key] in df: tau[key] = df[self._tau_id[self._year][key]] else: tau[key] = tau.pt.zeros_like() tau['isclean'] =~tau.match(mu_loose,0.3)&~tau.match(e_loose,0.3) tau['isloose']=isLooseTau(tau.pt,tau.eta,tau.decayMode,tau.id,self._year)&tau.isclean.astype(np.bool) tau_loose=tau[tau.isloose.astype(np.bool)] tau_ntot=tau.counts tau_nloose=tau_loose.counts pho = Initialize({'pt':df['Photon_pt'], 'eta':df['Photon_eta'], 'phi':df['Photon_phi'], 'mass':df['Photon_mass']}) for key in self._pho_id[self._year]: if self._pho_id[self._year][key] in df: pho[key] = df[self._pho_id[self._year][key]] else: pho[key] = pho.pt.zeros_like() pho['isclean'] =~pho.match(e_loose,0.4) pho['isloose']=isLoosePhoton(pho.pt,pho.eta,pho.loose_id,pho.eleveto,self._year)&pho.isclean.astype(np.bool) pho['istight']=isTightPhoton(pho.pt,pho.eta,pho.tight_id,pho.eleveto,self._year)&pho.isclean.astype(np.bool) leading_pho = pho[pho.pt.argmax()] leading_pho = leading_pho[leading_pho.istight.astype(np.bool)] pho_loose=pho[pho.isloose.astype(np.bool)] pho_tight=pho[pho.istight.astype(np.bool)] pho_ntot=pho.counts pho_nloose=pho_loose.counts pho_ntight=pho_tight.counts fj = Initialize({'pt':df['AK15Puppi_pt'], 'eta':df['AK15Puppi_eta'], 'phi':df['AK15Puppi_phi'], 'mass':df['AK15Puppi_mass']}) fj['msd'] = df['AK15Puppi_msoftdrop'] for key in self._fj_id[self._year]: if self._fj_id[self._year][key] in df: fj[key] = df[self._fj_id[self._year][key]] else: fj[key] = fj.pt.zeros_like() fj['isgood'] = isGoodFatJet(fj.pt, fj.eta, fj.id) fj['isclean'] =~fj.match(pho_loose,1.5)&~fj.match(mu_loose,1.5)&~fj.match(e_loose,1.5)&fj.isgood.astype(np.bool) for key in self._deep[self._year]: if self._deep[self._year][key] in df: fj[key] = df[self._deep[self._year][key]] else: fj[key] = fj.pt.zeros_like() fj['probQCD'] = fj.probQCDbb+fj.probQCDcc+fj.probQCDb+fj.probQCDc+fj.probQCDothers fj['TvsQCD'] = (fj.probTbcq + fj.probTbqq) / (fj.probTbcq + fj.probTbqq + fj.probQCD) fj['ZHbbvsQCD'] = (fj.probZbb + fj.probHbb) / (fj.probZbb+ fj.probHbb+ fj.probQCD) fj['VvsQCD'] = (fj.probWcq+fj.probWqq+fj.probZcc+fj.probZqq+fj.probZbb) / (fj.probWcq+fj.probWqq+fj.probZcc+fj.probZqq+fj.probZbb+fj.probQCD) leading_fj = fj[fj.pt.argmax()] leading_fj = leading_fj[leading_fj.isclean.astype(np.bool)] leading_fj_msd_corr = leading_fj.msd.sum()*get_msd_weight(leading_fj.pt.sum(),leading_fj.eta.sum()) fj_good = fj[fj.isgood.astype(np.bool)] fj_clean=fj[fj.isclean.astype(np.bool)] fj_ntot=fj.counts fj_ngood=fj_good.counts fj_nclean=fj_clean.counts j = Initialize({'pt':df['Jet_pt'], 'eta':df['Jet_eta'], 'phi':df['Jet_phi'], 'mass':df['Jet_mass']}) #https://twiki.cern.ch/twiki/bin/viewauth/CMS/BtagRecommendation102X j['deepcsv'] = df['Jet_btagDeepB'] j['deepflv'] = df['Jet_btagDeepFlavB'] for key in self._j_id[self._year]: if self._j_id[self._year][key] in df: j[key] = df[self._j_id[self._year][key]] else: j[key] = j.pt.zeros_like() j['isgood'] = isGoodJet(j.pt, j.eta, j.id, j.nhf, j.nef, j.chf, j.cef) j['isHEM'] = isHEMJet(j.pt, j.eta, j.phi) j['isclean'] = ~j.match(e_loose,0.4)&~j.match(mu_loose,0.4)&~j.match(pho_loose,0.4)&j.isgood.astype(np.bool) #j['isclean'] = ~j.match(e_tight,0.4)&~j.match(mu_tight,0.4)&~j.match(pho_tight,0.4)&j.isgood j['isiso'] = ~(j.match(fj_clean,1.5))&j.isclean.astype(np.bool) j['isdcsvL'] = (j.deepcsv>0.1241)&j.isiso.astype(np.bool) j['isdflvL'] = (j.deepflv>0.0494)&j.isiso.astype(np.bool) j['isdcsvM'] = (j.deepcsv>0.4184)&j.isiso.astype(np.bool) j['isdflvM'] = (j.deepflv>0.2770)&j.isiso.astype(np.bool) j['isdcsvT'] = (j.deepcsv>0.7527)&j.isiso.astype(np.bool) j['isdflvT'] = (j.deepflv>0.7264)&j.isiso.astype(np.bool) leading_j = j[j.pt.argmax()] leading_j = leading_j[leading_j.isclean.astype(np.bool)] j_good = j[j.isgood.astype(np.bool)] j_clean = j[j.isclean.astype(np.bool)] j_iso = j[j.isiso.astype(np.bool)] j_dcsvL = j[j.isdcsvL] j_dflvL = j[j.isdflvL] j_dcsvM = j[j.isdcsvM] j_dflvM = j[j.isdflvM] j_dcsvT = j[j.isdcsvT] j_dflvT = j[j.isdflvT] j_HEM = j[j.isHEM.astype(np.bool)] j_ntot=j.counts j_ngood=j_good.counts j_nclean=j_clean.counts j_niso=j_iso.counts j_ndcsvL=j_dcsvL.counts j_ndflvL=j_dflvL.counts j_ndcsvM=j_dcsvM.counts j_ndflvM=j_dflvM.counts j_ndcsvT=j_dcsvT.counts j_ndflvT=j_dflvT.counts j_nHEM = j_HEM.counts ### #Calculating derivatives ### ele_pairs = e_loose.distincts() diele = leading_e leading_diele = leading_e if ele_pairs.i0.content.size>0: diele = ele_pairs.i0+ele_pairs.i1 leading_diele = diele[diele.pt.argmax()] mu_pairs = mu_loose.distincts() dimu = leading_mu leading_dimu = leading_mu if mu_pairs.i0.content.size>0: dimu = mu_pairs.i0+mu_pairs.i1 leading_dimu = dimu[dimu.pt.argmax()] u={} u["iszeroL"] = met u["isoneM"] = met+leading_mu.sum() u["isoneE"] = met+leading_e.sum() u["istwoM"] = met+leading_dimu.sum() u["istwoE"] = met+leading_diele.sum() u["isoneA"] = met+leading_pho.sum() lepSys={} lepSys["iszeroL"] = met lepSys["isoneM"] = leading_mu.sum() lepSys["isoneE"] = leading_e.sum() lepSys["istwoM"] = leading_dimu.sum() lepSys["istwoE"] = leading_diele.sum() lepSys["isoneA"] = leading_pho.sum() leadlepton={} leadlepton["iszeroL"] = met leadlepton["isoneM"] = leading_mu.sum() leadlepton["isoneE"] = leading_e.sum() leadlepton["istwoM"] = leading_mu.sum() leadlepton["istwoE"] = leading_e.sum() leadlepton["isoneA"] = leading_pho.sum() ### #Calculating weights ### ### # For MC, retrieve the LHE weights, to take into account NLO destructive interference, and their sum ### genw = np.ones_like(df['MET_pt']) sumw = 1. wnlo = np.ones_like(df['MET_pt']) adhocw = np.ones_like(df['MET_pt']) if self._xsec[dataset] != -1: genw = df['genWeight'] sumw = genw.sum() if 'TTJets' in dataset or 'WJets' in dataset or 'DY' in dataset or 'ZJets' in dataset: gen_flags = df['GenPart_statusFlags'] LastCopy = (gen_flags&(1 << 13))==0 #genLastCopy = Initialize({'pt':df['GenPart_pt'][LastCopy], # 'eta':df['GenPart_eta'][LastCopy], # 'phi':df['GenPart_phi'][LastCopy], # 'mass':df['GenPart_mass'][LastCopy], # 'pdgid':df['GenPart_pdgId'][LastCopy]}) gen_pt = df['GenPart_pt'][LastCopy] gen_pdgid = df['GenPart_pdgId'][LastCopy] #genTops = genLastCopy[abs(genLastCopy.pdgid)==6] #genWs = genLastCopy[abs(genLastCopy.pdgid)==24] #genZs = genLastCopy[abs(genLastCopy.pdgid)==23] #genAs = genLastCopy[abs(genLastCopy.pdgid)==22] #genHs = genLastCopy[abs(genLastCopy.pdgid)==25] genTops = gen_pt[abs(gen_pdgid)==6] genWs = gen_pt[abs(gen_pdgid)==24] genZs = gen_pt[abs(gen_pdgid)==23] genAs = gen_pt[abs(gen_pdgid)==22] genHs = gen_pt[abs(gen_pdgid)==25] isTT = (genTops.counts==2) isW = (genTops.counts==0)&(genWs.counts==1)&(genZs.counts==0)&(genAs.counts==0)&(genHs.counts==0) isZ = (genTops.counts==0)&(genWs.counts==0)&(genZs.counts==1)&(genAs.counts==0)&(genHs.counts==0) isA = (genTops.counts==0)&(genWs.counts==0)&(genZs.counts==0)&(genAs.counts==1)&(genHs.counts==0) if('TTJets' in dataset): wnlo = np.sqrt(get_ttbar_weight(genTops[0].sum()) * get_ttbar_weight(genTops[1].sum())) elif('WJets' in dataset): wnlo = get_nlo_weight[self._year]['w'](genWs[0].sum()) if self._year != '2016': adhocw = get_adhoc_weight['w'](genWs[0].sum()) elif('DY' in dataset or 'ZJets' in dataset): wnlo = get_nlo_weight[self._year]['z'](genZs[0].sum()) if self._year != '2016': adhocw = get_adhoc_weight['z'](genZs[0].sum()) elif('GJets' in dataset): wnlo = get_nlo_weight[self._year]['a'](genAs[0].sum()) ### # Calculate PU weight and systematic variations ### nvtx = df['PV_npvs'] pu = get_pu_weight[self._year]['cen'](nvtx) puUp = get_pu_weight[self._year]['up'](nvtx) puDown = get_pu_weight[self._year]['down'](nvtx) ### #Importing the MET filters per year from metfilters.py and constructing the filter boolean ### met_filters = {} for flag in met_filter_flags[self._year]: if flag in df: met_filters[flag] = df[flag] ### #Importing the trigger paths per year from trigger.py and constructing the trigger boolean ### pass_trig = {} met_trigger = {} for path in met_trigger_paths[self._year]: if path in df: met_trigger[path] = df[path] passMetTrig = np.zeros_like(df['MET_pt'], dtype=np.bool) for path in met_trigger: passMetTrig |= met_trigger[path] singleele_trigger = {} for path in singleele_trigger_paths[self._year]: if path in df: singleele_trigger[path] = df[path] passSingleEleTrig = np.zeros_like(df['MET_pt'], dtype=np.bool) for path in singleele_trigger: passSingleEleTrig |= singleele_trigger[path] singlepho_trigger = {} for path in singlepho_trigger_paths[self._year]: if path in df: singlepho_trigger[path] = df[path] passSinglePhoTrig = np.zeros_like(df['MET_pt'], dtype=np.bool) for path in singlepho_trigger: passSinglePhoTrig |= singlepho_trigger[path] pass_trig['iszeroL'] = passMetTrig pass_trig['isoneM'] = passMetTrig pass_trig['istwoM'] = passMetTrig pass_trig['isoneE'] = passSingleEleTrig pass_trig['istwoE'] = passSingleEleTrig pass_trig['isoneA'] =passSinglePhoTrig ### # Trigger efficiency weight ### trig = {} trig['iszeroL'] = get_met_trig_weight[self._year](u["iszeroL"].pt) trig['isoneM'] = get_met_trig_weight[self._year](u["isoneM"].pt) trig['istwoM'] = get_met_zmm_trig_weight[self._year](u["istwoM"].pt) trig['isoneE'] = get_ele_trig_weight[self._year](leading_e.eta.sum(), leading_e.pt.sum()) trig['istwoE'] = trig['isoneE'] if ele_pairs.i0.content.size>0: eff1 = get_ele_trig_weight[self._year](ele_pairs[diele.pt.argmax()].i0.eta.sum(),ele_pairs[diele.pt.argmax()].i0.pt.sum()) eff2 = get_ele_trig_weight[self._year](ele_pairs[diele.pt.argmax()].i1.eta.sum(),ele_pairs[diele.pt.argmax()].i1.pt.sum()) trig['istwoE'] = 1 - (1-eff1)*(1-eff2) trig['isoneA'] = get_pho_trig_weight[self._year](leading_pho.pt.sum()) ### #Event selection ### selections = processor.PackedSelection() selections.add('iszeroL', (e_nloose==0)&(mu_nloose==0)&(tau_nloose==0)&(pho_nloose==0)) selections.add('isoneM', (e_nloose==0)&(mu_ntight==1)&(tau_nloose==0)&(pho_nloose==0)) selections.add('isoneE', (e_ntight==1)&(mu_nloose==0)&(tau_nloose==0)&(pho_nloose==0)&(met.pt>50)) selections.add('istwoM', (e_nloose==0) & (mu_ntight>=1) & (mu_nloose==2) & (tau_nloose==0)&(pho_nloose==0)&(leading_dimu.mass.sum()>60) & (leading_dimu.mass.sum()<120)) selections.add('istwoE', (e_ntight>=1) & (e_nloose==2)&(mu_nloose==0)&(tau_nloose==0)&(pho_nloose==0)&(leading_diele.mass.sum()>60)&(leading_diele.mass.sum()<120)) selections.add('isoneA', (e_nloose==0)&(mu_nloose==0)&(tau_nloose==0)&(pho_ntight==1)) selections.add('noextrab', (j_ndflvL==0)) selections.add('extrab', (j_ndflvL>0)) selections.add('ismonohs', (leading_fj.ZHbbvsQCD.sum()>0.65)) selections.add('ismonojet', ~(leading_fj.ZHbbvsQCD.sum()>0.65)) selections.add('mass0', (leading_fj_msd_corr<30)) selections.add('mass1', (leading_fj_msd_corr>=30)&(leading_fj_msd_corr<60)) selections.add('mass2', (leading_fj_msd_corr>=60)&(leading_fj_msd_corr<80)) selections.add('mass3', (leading_fj_msd_corr>=80)&(leading_fj_msd_corr<120)) selections.add('mass4', (leading_fj_msd_corr>=120)) selections.add('noHEMj', (j_nHEM==0)) ### #Adding weights and selections ### weights = {} regions = {} for k in selected_regions[dataset]: weights[k] = processor.Weights(df.size) weights[k].add('nlo',wnlo) weights[k].add('adhoc',adhocw) weights[k].add('genw',genw) weights[k].add('pileup',pu,puUp,puDown) weights[k].add('passMetFilters',np.prod([met_filters[key] for key in met_filters], axis=0)) weights[k].add('trig', trig[k]) weights[k].add('pass_trig', pass_trig[k]) selections.add(k+'baggy', (fj_nclean>0)&(fj_clean.pt.max()>160)&(abs(u[k].delta_phi(j_clean)).min()>0.8)&(u[k].pt>250)) regions[k+'_baggy'] = {k,k+'baggy','noHEMj','noextrab'} regions[k+'_mass0'] = {k,k+'baggy','mass0','noHEMj','noextrab'} regions[k+'_mass1'] = {k,k+'baggy','mass1','noHEMj','noextrab'} regions[k+'_mass2'] = {k,k+'baggy','mass2','noHEMj','noextrab'} regions[k+'_mass3'] = {k,k+'baggy','mass3','noHEMj','noextrab'} regions[k+'_mass4'] = {k,k+'baggy','mass4','noHEMj','noextrab'} regions[k+'_baggy_extrab'] = {k,k+'baggy','noHEMj','extrab'} regions[k+'_mass0_extrab'] = {k,k+'baggy','mass0','noHEMj','extrab'} regions[k+'_mass1_extrab'] = {k,k+'baggy','mass1','noHEMj','extrab'} regions[k+'_mass2_extrab'] = {k,k+'baggy','mass2','noHEMj','extrab'} regions[k+'_mass3_extrab'] = {k,k+'baggy','mass3','noHEMj','extrab'} regions[k+'_mass4_extrab'] = {k,k+'baggy','mass4','noHEMj','extrab'} regions[k+'_baggy_ismonohs'] = {k,k+'baggy','noHEMj','noextrab','ismonohs'} regions[k+'_mass0_ismonohs'] = {k,k+'baggy','mass0','noHEMj','noextrab','ismonohs'} regions[k+'_mass1_ismonohs'] = {k,k+'baggy','mass1','noHEMj','noextrab','ismonohs'} regions[k+'_mass2_ismonohs'] = {k,k+'baggy','mass2','noHEMj','noextrab','ismonohs'} regions[k+'_mass3_ismonohs'] = {k,k+'baggy','mass3','noHEMj','noextrab','ismonohs'} regions[k+'_mass4_ismonohs'] = {k,k+'baggy','mass4','noHEMj','noextrab','ismonohs'} regions[k+'_baggy_extrab_ismonohs'] = {k,k+'baggy','noHEMj','extrab','ismonohs'} regions[k+'_mass0_extrab_ismonohs'] = {k,k+'baggy','mass0','noHEMj','extrab','ismonohs'} regions[k+'_mass1_extrab_ismonohs'] = {k,k+'baggy','mass1','noHEMj','extrab','ismonohs'} regions[k+'_mass2_extrab_ismonohs'] = {k,k+'baggy','mass2','noHEMj','extrab','ismonohs'} regions[k+'_mass3_extrab_ismonohs'] = {k,k+'baggy','mass3','noHEMj','extrab','ismonohs'} regions[k+'_mass4_extrab_ismonohs'] = {k,k+'baggy','mass4','noHEMj','extrab','ismonohs'} regions[k+'_baggy_ismonojet'] = {k,k+'baggy','noHEMj','noextrab','ismonojet'} regions[k+'_mass0_ismonojet'] = {k,k+'baggy','mass0','noHEMj','noextrab','ismonojet'} regions[k+'_mass1_ismonojet'] = {k,k+'baggy','mass1','noHEMj','noextrab','ismonojet'} regions[k+'_mass2_ismonojet'] = {k,k+'baggy','mass2','noHEMj','noextrab','ismonojet'} regions[k+'_mass3_ismonojet'] = {k,k+'baggy','mass3','noHEMj','noextrab','ismonojet'} regions[k+'_mass4_ismonojet'] = {k,k+'baggy','mass4','noHEMj','noextrab','ismonojet'} regions[k+'_baggy_extrab_ismonojet'] = {k,k+'baggy','noHEMj','extrab','ismonojet'} regions[k+'_mass0_extrab_ismonojet'] = {k,k+'baggy','mass0','noHEMj','extrab','ismonojet'} regions[k+'_mass1_extrab_ismonojet'] = {k,k+'baggy','mass1','noHEMj','extrab','ismonojet'} regions[k+'_mass2_extrab_ismonojet'] = {k,k+'baggy','mass2','noHEMj','extrab','ismonojet'} regions[k+'_mass3_extrab_ismonojet'] = {k,k+'baggy','mass3','noHEMj','extrab','ismonojet'} regions[k+'_mass4_extrab_ismonojet'] = {k,k+'baggy','mass4','noHEMj','extrab','ismonojet'} variables = {} variables['j1pt'] = leading_j.pt variables['j1eta'] = leading_j.eta variables['j1phi'] = leading_j.phi variables['fj1pt'] = leading_fj.pt variables['fj1eta'] = leading_fj.eta variables['fj1phi'] = leading_fj.phi variables['e1pt'] = leading_e.pt variables['e1phi'] = leading_e.phi variables['e1eta'] = leading_e.eta variables['dielemass'] = leading_diele.mass variables['mu1pt'] = leading_mu.pt variables['mu1phi'] = leading_mu.phi variables['mu1eta'] = leading_mu.eta variables['dimumass'] = leading_dimu.mass variables['njets'] = j_nclean variables['ndcsvL'] = j_ndcsvL variables['ndflvL'] = j_ndflvL variables['ndcsvM'] = j_ndcsvM variables['ndflvM'] = j_ndflvM variables['ndcsvT'] = j_ndcsvT variables['ndflvT'] = j_ndflvT variables['nfjtot'] = fj_ntot variables['nfjgood'] = fj_ngood variables['nfjclean'] = fj_nclean variables['TvsQCD'] = leading_fj.TvsQCD variables['ZHbbvsQCD'] = leading_fj.ZHbbvsQCD variables['VvsQCD'] = leading_fj.VvsQCD variables['probTbcq'] = leading_fj.probTbcq variables['probTbqq'] = leading_fj.probTbqq variables['probTbc'] = leading_fj.probTbc variables['probTbq'] = leading_fj.probTbq variables['probWcq'] = leading_fj.probWcq variables['probWqq'] = leading_fj.probWqq variables['probZbb'] = leading_fj.probZbb variables['probZcc'] = leading_fj.probZcc variables['probZqq'] = leading_fj.probZqq variables['probHbb'] = leading_fj.probHbb variables['probHcc'] = leading_fj.probHcc variables['probHqqqq'] = leading_fj.probHqqqq variables['probQCDbb'] = leading_fj.probQCDbb variables['probQCDcc'] = leading_fj.probQCDcc variables['probQCDb'] = leading_fj.probQCDb variables['probQCDc'] = leading_fj.probQCDc variables['probQCDothers'] = leading_fj.probQCDothers hout = self.accumulator.identity() hout['sumw'].fill(dataset=dataset, sumw=1, weight=sumw) i = 0 while i < len(selected_regions[dataset]): r = selected_regions[dataset][i] weight = weights[r].weight() for s in ['baggy','mass0','mass1','mass2','mass3','mass4', 'baggy_extrab','mass0_extrab','mass1_extrab','mass2_extrab','mass3_extrab','mass4_extrab', 'baggy_ismonohs','mass0_ismonohs','mass1_ismonohs','mass2_ismonohs','mass3_ismonohs','mass4_ismonohs', 'baggy_extrab_ismonohs','mass0_extrab_ismonohs','mass1_extrab_ismonohs','mass2_extrab_ismonohs','mass3_extrab_ismonohs','mass4_extrab_ismonohs', 'baggy_ismonojet','mass0_ismonojet','mass1_ismonojet','mass2_ismonojet','mass3_ismonojet','mass4_ismonojet', 'baggy_extrab_ismonojet','mass0_extrab_ismonojet','mass1_extrab_ismonojet','mass2_extrab_ismonojet','mass3_extrab_ismonojet','mass4_extrab_ismonojet']: cut = selections.all(*regions[r+'_'+s]) flat_variables = {k: v[cut].flatten() for k, v in variables.items()} flat_weights = {k: (~np.isnan(v[cut])*weight[cut]).flatten() for k, v in variables.items()} for histname, h in hout.items(): if not isinstance(h, hist.Hist): continue elif histname == 'sumw': continue elif histname == 'fjmass': h.fill(dataset=dataset, region=r, jet_selection=s, fjmass=leading_fj_msd_corr, weight=weight*cut) elif histname == 'recoil': h.fill(dataset=dataset, region=r, jet_selection=s, recoil=u[r].pt, weight=weight*cut) elif histname == 'CaloMinusPfOverRecoil': h.fill(dataset=dataset, region=r, jet_selection=s, CaloMinusPfOverRecoil= abs(calomet.pt - met.pt) / u[r].pt, weight=weight*cut) elif histname == 'mindphi': h.fill(dataset=dataset, region=r, jet_selection=s, mindphi=abs(u[r].delta_phi(j_clean)).min(), weight=weight*cut) elif histname == 'diledphi': h.fill(dataset=dataset, region=r, jet_selection=s, diledphi=abs(lepSys[r].delta_phi(j_clean)).min(), weight=weight*cut) elif histname == 'ledphi': h.fill(dataset=dataset, region=r, jet_selection=s, ledphi=abs(leadlepton[r].delta_phi(j_clean)).min(), weight=weight*cut) elif histname == 'recoilVSmindphi': h.fill(dataset=dataset, region=r, jet_selection=s, recoil=u[r].pt, mindphi=abs(u[r].delta_phi(j_clean)).min(), weight=weight*cut) else: flat_variable = {histname: flat_variables[histname]} h.fill(dataset=dataset, region=r, jet_selection=s, **flat_variable, weight=flat_weights[histname]) i += 1 return hout
def process(self, df): output = self.accumulator.identity() if df.size == 0: return output dataset = df['dataset'] ## construct weights ## wgts = processor.Weights(df.size) if len(dataset) != 1: wgts.add('genw', df['weight']) triggermask = np.logical_or.reduce([df[t] for t in Triggers]) wgts.add('trigger', triggermask) cosmicpairmask = df['cosmicveto_result'] wgts.add('cosmicveto', cosmicpairmask) pvmask = df['metfilters_PrimaryVertexFilter'] wgts.add('primaryvtx', pvmask) # ...bla bla, other weights goes here weight = wgts.weight() ######################## ak4jets = JaggedCandidateArray.candidatesfromcounts( df['akjet_ak4PFJetsCHS_p4'], px=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fX'], py=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fY'], pz=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fZ'], energy=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fT'], jetid=df['akjet_ak4PFJetsCHS_jetid'], ) ak4jets = ak4jets[ak4jets.jetid & (ak4jets.pt > 20) & (np.abs(ak4jets.eta) < 2.5)] leptonjets = JaggedCandidateArray.candidatesfromcounts( df['pfjet_p4'], px=df['pfjet_p4.fCoordinates.fX'], py=df['pfjet_p4.fCoordinates.fY'], pz=df['pfjet_p4.fCoordinates.fZ'], energy=df['pfjet_p4.fCoordinates.fT'], pfisoAll05=df['pfjet_pfIsolation05'], pfisoNopu05=df['pfjet_pfIsolationNoPU05'], pfisoDbeta=df['pfjet_pfiso'], ncands=df['pfjet_pfcands_n'], ) ljdautype = awkward.fromiter(df['pfjet_pfcand_type']) npfmu = (ljdautype == 3).sum() ndsa = (ljdautype == 8).sum() isegammajet = (npfmu == 0) & (ndsa == 0) ispfmujet = (npfmu >= 2) & (ndsa == 0) isdsajet = ndsa > 0 label = isegammajet.astype(int) * 1 + ispfmujet.astype( int) * 2 + isdsajet.astype(int) * 3 leptonjets.add_attributes(label=label) nmu = ((ljdautype == 3) | (ljdautype == 8)).sum() leptonjets.add_attributes(ismutype=(nmu >= 2), iseltype=(nmu == 0)) ## __ twoleptonjets__ twoleptonjets = leptonjets.counts >= 2 dileptonjets = leptonjets[twoleptonjets] ak4jets = ak4jets[twoleptonjets] wgt = weight[twoleptonjets] if dileptonjets.size == 0: return output lj0 = dileptonjets[dileptonjets.pt.argmax()] lj1 = dileptonjets[dileptonjets.pt.argsort()[:, 1:2]] ## channel def ## singleMuljEvents = dileptonjets.ismutype.sum() == 1 muljInLeading2Events = (lj0.ismutype | lj1.ismutype).flatten() channel_2mu2e = (singleMuljEvents & muljInLeading2Events).astype(int) * 1 doubleMuljEvents = dileptonjets.ismutype.sum() == 2 muljIsLeading2Events = (lj0.ismutype & lj1.ismutype).flatten() channel_4mu = (doubleMuljEvents & muljIsLeading2Events).astype(int) * 2 channel_ = channel_2mu2e + channel_4mu ########### isControl = (np.abs(lj0.p4.delta_phi(lj1.p4)) < np.pi / 2).flatten() ## __isControl__ if self.dphi_control: leptonjets_ = dileptonjets[isControl] wgt = wgt[isControl] lj0 = lj0[isControl] lj1 = lj1[isControl] channel_ = channel_[isControl] else: leptonjets_ = dileptonjets if leptonjets_.size == 0: return output output['ljpfiso'].fill(dataset=dataset, lj0iso=lj0[channel_ == 2].pfisoAll05.flatten(), lj1iso=lj1[channel_ == 2].pfisoAll05.flatten(), weight=wgt[channel_ == 2], channel='4mu', isotype='all05', njet=ak4jets.counts[channel_ == 2]) output['ljpfiso'].fill(dataset=dataset, lj0iso=lj0[channel_ == 2].pfisoNopu05.flatten(), lj1iso=lj1[channel_ == 2].pfisoNopu05.flatten(), weight=wgt[channel_ == 2], channel='4mu', isotype='nopu05', njet=ak4jets.counts[channel_ == 2]) output['ljpfiso'].fill(dataset=dataset, lj0iso=lj0[channel_ == 2].pfisoDbeta.flatten(), lj1iso=lj1[channel_ == 2].pfisoDbeta.flatten(), weight=wgt[channel_ == 2], channel='4mu', isotype='dbeta', njet=ak4jets.counts[channel_ == 2]) ## 2mu2e leptonjets_2mu2e = leptonjets_[channel_ == 1] egm_2mu2e = leptonjets_2mu2e[leptonjets_2mu2e.iseltype] egm_2mu2e = egm_2mu2e[egm_2mu2e.pt.argmax()] mu_2mu2e = leptonjets_2mu2e[leptonjets_2mu2e.ismutype] mu_2mu2e = mu_2mu2e[mu_2mu2e.pt.argmax()] output['ljpfiso'].fill(dataset=dataset, lj0iso=egm_2mu2e.pfisoAll05.flatten(), lj1iso=mu_2mu2e.pfisoAll05.flatten(), weight=wgt[channel_ == 1], channel='2mu2e', isotype='all05', njet=ak4jets.counts[channel_ == 1]) output['ljpfiso'].fill(dataset=dataset, lj0iso=egm_2mu2e.pfisoNopu05.flatten(), lj1iso=mu_2mu2e.pfisoNopu05.flatten(), weight=wgt[channel_ == 1], channel='2mu2e', isotype='nopu05', njet=ak4jets.counts[channel_ == 1]) output['ljpfiso'].fill(dataset=dataset, lj0iso=egm_2mu2e.pfisoDbeta.flatten(), lj1iso=mu_2mu2e.pfisoDbeta.flatten(), weight=wgt[channel_ == 1], channel='2mu2e', isotype='dbeta', njet=ak4jets.counts[channel_ == 1]) return output
def process(self, df): output = self.accumulator.identity() if df.size == 0: return output dataset = df['dataset'] ## construct weights ## wgts = processor.Weights(df.size) if self.data_type != 'data': wgts.add('genw', df['weight']) npv = df['trueInteractionNum'] wgts.add('pileup', *(f(npv) for f in self.pucorrs)) triggermask = np.logical_or.reduce([df[t] for t in Triggers]) wgts.add('trigger', triggermask) cosmicpairmask = df['cosmicveto_result'] wgts.add('cosmicveto', cosmicpairmask) pvmask = df['metfilters_PrimaryVertexFilter'] wgts.add('primaryvtx', pvmask) # ...bla bla, other weights goes here weight = wgts.weight() ######################## leptonjets = JaggedCandidateArray.candidatesfromcounts( df['pfjet_p4'], px=df['pfjet_p4.fCoordinates.fX'], py=df['pfjet_p4.fCoordinates.fY'], pz=df['pfjet_p4.fCoordinates.fZ'], energy=df['pfjet_p4.fCoordinates.fT'], vx=df['pfjet_klmvtx.fCoordinates.fX'], vy=df['pfjet_klmvtx.fCoordinates.fY'], vz=df['pfjet_klmvtx.fCoordinates.fZ'], ) leptonjets.add_attributes(vxy=np.hypot(leptonjets.vx, leptonjets.vy)) ljdautype = awkward.fromiter(df['pfjet_pfcand_type']) npfmu = (ljdautype == 3).sum() ndsa = (ljdautype == 8).sum() isegammajet = (npfmu == 0) & (ndsa == 0) ispfmujet = (npfmu >= 2) & (ndsa == 0) isdsajet = ndsa > 0 label = isegammajet.astype(int) * 1 + ispfmujet.astype( int) * 2 + isdsajet.astype(int) * 3 leptonjets.add_attributes(label=label) nmu = ((ljdautype == 3) | (ljdautype == 8)).sum() leptonjets.add_attributes(ismutype=(nmu >= 2), iseltype=(nmu == 0)) ljdaucharge = awkward.fromiter(df['pfjet_pfcand_charge']).sum() leptonjets.add_attributes(qsum=ljdaucharge) leptonjets.add_attributes( isneutral=(leptonjets.iseltype | (leptonjets.ismutype & (leptonjets.qsum == 0)))) ## __ twoleptonjets__ twoleptonjets = (leptonjets.counts >= 2) & (leptonjets.ismutype.sum() >= 1) dileptonjets = leptonjets[twoleptonjets] wgt = weight[twoleptonjets] if dileptonjets.size == 0: return output lj0 = dileptonjets[dileptonjets.pt.argmax()] lj1 = dileptonjets[dileptonjets.pt.argsort()[:, 1:2]] ## channel def ## singleMuljEvents = dileptonjets.ismutype.sum() == 1 muljInLeading2Events = (lj0.ismutype | lj1.ismutype).flatten() channel_2mu2e = (singleMuljEvents & muljInLeading2Events).astype(int) * 1 doubleMuljEvents = dileptonjets.ismutype.sum() == 2 muljIsLeading2Events = (lj0.ismutype & lj1.ismutype).flatten() channel_4mu = (doubleMuljEvents & muljIsLeading2Events).astype(int) * 2 channel_ = channel_2mu2e + channel_4mu ########### output['lj0pt'].fill(dataset=dataset, pt=lj0.pt.flatten(), channel=channel_, weight=wgt) output['lj1pt'].fill(dataset=dataset, pt=lj1.pt.flatten(), channel=channel_, weight=wgt) mulj = dileptonjets[dileptonjets.ismutype] muljones = mulj.pt.ones_like() output['muljmass'].fill(dataset=dataset, ljmass=mulj.mass.flatten(), channel=(channel_ * muljones).flatten(), weight=(wgt * muljones).flatten()) output['muljvxy'].fill(dataset=dataset, vxy=mulj.vxy.flatten(), channel=(channel_ * muljones).flatten(), weight=(wgt * muljones).flatten()) output['muljqsum'].fill(dataset=dataset, qsum=mulj.isneutral.flatten(), channel=(channel_ * muljones).flatten(), weight=(wgt * muljones).flatten()) output['ljpairmass'].fill(dataset=dataset, pairmass=(lj0.p4 + lj1.p4).mass.flatten(), channel=channel_, weight=wgt) output['ljpairdphi'].fill(dataset=dataset, dphi=(np.abs(lj0.p4.delta_phi( lj1.p4))).flatten(), channel=channel_, weight=wgt) return output
def process(self, df): # Dataset parameters dataset = df['dataset'] year = self._samples[dataset]['year'] xsec = self._samples[dataset]['xsec'] sow = self._samples[dataset]['nSumOfWeights'] isData = self._samples[dataset]['isData'] ### Recover objects, selection, functions and others... # Objects isTightMuon = self._objects['isTightMuon'] isTightElectron = self._objects['isTightElectron'] isGoodJet = self._objects['isGoodJet'] # Corrections GetMuonIsoSF = self._corrections['getMuonIso'] GetMuonIDSF = self._corrections['getMuonID'] # Selection passNJets = self._selection['passNJets'] passMETcut = self._selection['passMETcut'] # Functions pow2 = self._functions['pow2'] # Initialize objects met = Initialize({ 'pt': df['MET_pt'], 'eta': 0, 'phi': df['MET_phi'], 'mass': 0 }) e = Initialize({ 'pt': df['Electron_pt'], 'eta': df['Electron_eta'], 'phi': df['Electron_phi'], 'mass': df['Electron_mass'] }) mu = Initialize({ 'pt': df['Muon_pt'], 'eta': df['Muon_eta'], 'phi': df['Muon_phi'], 'mass': df['Muon_mass'] }) j = Initialize({ 'pt': df['Jet_pt'], 'eta': df['Jet_eta'], 'phi': df['Jet_phi'], 'mass': df['Jet_mass'] }) # Electron selection for key in self._e: e[key] = e.pt.zeros_like() if self._e[key] in df: e[key] = df[self._e[key]] e['istight'] = isTightElectron(e.pt, e.eta, e.dxy, e.dz, e.id, year) leading_e = e[e.pt.argmax()] leading_e = leading_e[leading_e.istight.astype(np.bool)] nElec = e.counts # Muon selection for key in self._mu: mu[key] = mu.pt.zeros_like() if self._mu[key] in df: mu[key] = df[self._mu[key]] mu['istight'] = isTightMuon(mu.pt, mu.eta, mu.dxy, mu.dz, mu.iso, mu.tight_id, year) leading_mu = mu[mu.pt.argmax()] leading_mu = leading_mu[leading_mu.istight.astype(np.bool)] nMuon = mu.counts # Jet selection j['deepcsv'] = df['Jet_btagDeepB'] j['deepflv'] = df['Jet_btagDeepFlavB'] for key in self._jet: j[key] = j.pt.zeros_like() if self._jet[key] in df: j[key] = df[self._jet[key]] j['isgood'] = isGoodJet(j.pt, j.eta, j.id) j['isclean'] = ~j.match(e, 0.4) & ~j.match(mu, 0.4) & j.isgood.astype( np.bool) j0 = j[j.pt.argmax()] j0 = j0[j0.isclean.astype(np.bool)] nJets = j.counts # Dilepton pair ele_pairs = e.distincts() diele = leading_e leading_diele = leading_e if ele_pairs.i0.content.size > 0: diele = ele_pairs.i0 + ele_pairs.i1 leading_diele = diele[diele.pt.argmax()] mu_pairs = mu.distincts() dimu = leading_mu leading_dimu = leading_mu if mu_pairs.i0.content.size > 0: dimu = mu_pairs.i0 + mu_pairs.i1 leading_dimu = dimu[dimu.pt.argmax()] mmumu = leading_dimu.mass # Triggers # MET filters # Weights genw = np.ones_like(df['MET_pt']) if isData else df['genWeight'] weights = processor.Weights(df.size) weights.add('norm', xsec / sow * genw) # Selections and cuts selections = processor.PackedSelection() channels = ['em', 'mm', 'ee'] selections.add('em', (nElec == 1) & (nMuon == 1)) selections.add('ee', (nElec >= 2)) selections.add('mm', (nMuon >= 2)) levels = ['dilepton', '2jets'] selections.add('dilepton', (nElec >= 2) | (nMuon >= 2) | ((nElec + nMuon) >= 2)) selections.add('2jets', (nJets >= 2)) # Variables # Fill Histos hout = self.accumulator.identity() hout['dummy'].fill(sample=dataset, dummy=1, weight=df.size) for ch in channels: for lev in levels: weight = weights.weight() cuts = [ch] + [lev] cut = selections.all(*cuts) invmass_flat = mmumu[cut].flatten() weights_flat = (~np.isnan(mmumu[cut]) * weight[cut]).flatten() hout['invmass'].fill( sample=dataset, channel=ch, level=lev, invmass=invmass_flat, weight=weights_flat) #*selections.all(*{'mm'}) #flat_variables = {k: v[cut].flatten() for k, v in variables.items()} #flat_weights = {k: (~np.isnan(v[cut])*weight[cut]).flatten() for k, v in variables.items()} #hout['invmass'].fill(sample=dataset, channel='mm', level="dilepton", invmass=mmumu, weight=np.ones_like(df['MET_pt']))#weight=weights.weight())#*selections.all(*{'mm'}) return hout
def process(self, events): # Initialize accumulator out = self.accumulator.identity() dataset = sample_name # events.metadata['dataset'] # Data or MC isData = "genWeight" not in events.fields isFake = self._isFake # Stop processing if there is no event remain if len(events) == 0: return out # Golden Json file if (self._year == "2018") and isData: injson = "/x5/cms/jwkim/gitdir/JWCorp/JW_analysis/Coffea_WZG/Corrections/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt.RunABD" if (self._year == "2017") and isData: injson = "/x5/cms/jwkim/gitdir/JWCorp/JW_analysis/Coffea_WZG/Corrections/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt" # <----- Get Scale factors ------># if not isData: # Egamma reco ID get_ele_reco_above20_sf = self._corrections[ "get_ele_reco_above20_sf"][self._year] get_ele_medium_id_sf = self._corrections["get_ele_medium_id_sf"][ self._year] get_pho_medium_id_sf = self._corrections["get_pho_medium_id_sf"][ self._year] # DoubleEG trigger # 2016, 2017 are not applied yet if self._year == "2018": get_ele_trig_leg1_SF = self._corrections[ "get_ele_trig_leg1_SF"][self._year] get_ele_trig_leg1_data_Eff = self._corrections[ "get_ele_trig_leg1_data_Eff"][self._year] get_ele_trig_leg1_mc_Eff = self._corrections[ "get_ele_trig_leg1_mc_Eff"][self._year] get_ele_trig_leg2_SF = self._corrections[ "get_ele_trig_leg2_SF"][self._year] get_ele_trig_leg2_data_Eff = self._corrections[ "get_ele_trig_leg2_data_Eff"][self._year] get_ele_trig_leg2_mc_Eff = self._corrections[ "get_ele_trig_leg2_mc_Eff"][self._year] # Muon ID, Iso get_mu_tight_id_sf = self._corrections["get_mu_tight_id_sf"][ self._year] get_mu_tight_iso_sf = self._corrections["get_mu_tight_iso_sf"][ self._year] # PU weight with custom made npy and multi-indexing pu_weight_idx = ak.values_astype(events.Pileup.nTrueInt, "int64") pu = self._puweight_arr[pu_weight_idx] # <----- Helper functions ------># # Sort by PT helper function def sort_by_pt(ele, pho, jet): ele = ele[ak.argsort(ele.pt, ascending=False, axis=1)] pho = pho[ak.argsort(pho.pt, ascending=False, axis=1)] jet = jet[ak.argsort(jet.pt, ascending=False, axis=1)] return ele, pho, jet # Lorentz vectors from coffea.nanoevents.methods import vector ak.behavior.update(vector.behavior) def TLorentz_vector(vec): vec = ak.zip( { "x": vec.x, "y": vec.y, "z": vec.z, "t": vec.t }, with_name="LorentzVector", ) return vec def TLorentz_vector_cylinder(vec): vec = ak.zip( { "pt": vec.pt, "eta": vec.eta, "phi": vec.phi, "mass": vec.mass, }, with_name="PtEtaPhiMLorentzVector", ) return vec # <----- Selection ------># Initial_events = events # Good Run ( Golden Json files ) from coffea import lumi_tools if isData: lumi_mask_builder = lumi_tools.LumiMask(injson) lumimask = ak.Array( lumi_mask_builder.__call__(events.run, events.luminosityBlock)) events = events[lumimask] # print("{0}% of files pass good-run conditions".format(len(events)/ len(Initial_events))) # Stop processing if there is no event remain if len(events) == 0: return out # Cut flow cut0 = np.zeros(len(events)) ##----------- Cut flow1: Passing Triggers # double lepton trigger is_double_ele_trigger = True if not is_double_ele_trigger: double_ele_triggers_arr = np.ones(len(events), dtype=np.bool) else: double_ele_triggers_arr = np.zeros(len(events), dtype=np.bool) for path in self._doubleelectron_triggers[self._year]: if path not in events.HLT.fields: continue double_ele_triggers_arr = double_ele_triggers_arr | events.HLT[ path] # single lepton trigger is_single_ele_trigger = True if not is_single_ele_trigger: single_ele_triggers_arr = np.ones(len(events), dtype=np.bool) else: single_ele_triggers_arr = np.zeros(len(events), dtype=np.bool) for path in self._singleelectron_triggers[self._year]: if path not in events.HLT.fields: continue single_ele_triggers_arr = single_ele_triggers_arr | events.HLT[ path] events.Electron, events.Photon, events.Jet = sort_by_pt( events.Electron, events.Photon, events.Jet) # Good Primary vertex nPV = events.PV.npvsGood nPV_nw = events.PV.npvsGood if not isData: nPV = nPV * pu print(pu) # Apply cut1 events = events[double_ele_triggers_arr] if not isData: pu = pu[double_ele_triggers_arr] # Stop processing if there is no event remain if len(events) == 0: return out cut1 = np.ones(len(events)) # Set Particles Electron = events.Electron Muon = events.Muon Photon = events.Photon MET = events.MET Jet = events.Jet ##----------- Cut flow2: Muon Selection MuSelmask = ((Muon.pt >= 10) & (abs(Muon.eta) <= 2.5) & (Muon.tightId) & (Muon.pfRelIso04_all < 0.15)) Muon = Muon[MuSelmask] # Exatly one muon Muon_sel_mask = ak.num(Muon) == 1 Electron = Electron[Muon_sel_mask] Photon = Photon[Muon_sel_mask] Jet = Jet[Muon_sel_mask] MET = MET[Muon_sel_mask] Muon = Muon[Muon_sel_mask] events = events[Muon_sel_mask] if not isData: pu = pu[Muon_sel_mask] # Stop processing if there is no event remain if len(Electron) == 0: return out cut2 = np.ones(len(Photon)) * 2 ##----------- Cut flow3: Electron Selection EleSelmask = ((Electron.pt >= 10) & (np.abs(Electron.eta + Electron.deltaEtaSC) < 1.479) & (Electron.cutBased > 2) & (abs(Electron.dxy) < 0.05) & (abs(Electron.dz) < 0.1)) | ( (Electron.pt >= 10) & (np.abs(Electron.eta + Electron.deltaEtaSC) > 1.479) & (np.abs(Electron.eta + Electron.deltaEtaSC) <= 2.5) & (Electron.cutBased > 2) & (abs(Electron.dxy) < 0.1) & (abs(Electron.dz) < 0.2)) Electron = Electron[EleSelmask] # Exactly two electrons ee_mask = ak.num(Electron) == 2 Electron = Electron[ee_mask] Photon = Photon[ee_mask] Jet = Jet[ee_mask] MET = MET[ee_mask] Muon = Muon[ee_mask] if not isData: pu = pu[ee_mask] events = events[ee_mask] # Stop processing if there is no event remain if len(Electron) == 0: return out cut3 = np.ones(len(Photon)) * 3 ##----------- Cut flow4: Photon Selection # Basic photon selection isgap_mask = (abs(Photon.eta) < 1.442) | ((abs(Photon.eta) > 1.566) & (abs(Photon.eta) < 2.5)) Pixel_seed_mask = ~Photon.pixelSeed if (dataset == "ZZ") and (self._year == "2017"): PT_ID_mask = (Photon.pt >= 20) & ( Photon.cutBasedBitmap >= 3 ) # 2^0(Loose) + 2^1(Medium) + 2^2(Tights) else: PT_ID_mask = (Photon.pt >= 20) & (Photon.cutBased > 1) # dR cut with selected Muon and Electrons dr_pho_ele_mask = ak.all(Photon.metric_table(Electron) >= 0.5, axis=-1) # default metric table: delta_r dr_pho_mu_mask = ak.all(Photon.metric_table(Muon) >= 0.5, axis=-1) # genPartFlav cut """ if dataset == "WZG": isPrompt = (Photon.genPartFlav == 1) | (Photon.genPartFlav == 11) PhoSelmask = PT_ID_mask & isgap_mask & Pixel_seed_mask & isPrompt & dr_pho_ele_mask & dr_pho_mu_mask elif dataset == "WZ": isPrompt = (Photon.genPartFlav == 1) PhoSelmask = PT_ID_mask & isgap_mask & Pixel_seed_mask & ~isPrompt & dr_pho_ele_mask & dr_pho_mu_mask else: PhoSelmask = PT_ID_mask & isgap_mask & Pixel_seed_mask & dr_pho_ele_mask & dr_pho_mu_mask """ # Add genPartFlav to remove Fake Photon in MC samples ( They are already considered by data driven method ) if not isData: genPartFlav_mask = (Photon.genPartFlav == 1) PhoSelmask = (genPartFlav_mask & PT_ID_mask & isgap_mask & Pixel_seed_mask & dr_pho_ele_mask & dr_pho_mu_mask) else: PhoSelmask = (PT_ID_mask & isgap_mask & Pixel_seed_mask & dr_pho_ele_mask & dr_pho_mu_mask) Photon = Photon[PhoSelmask] # Apply cut 4 A_photon_mask = ak.num(Photon) > 0 Electron = Electron[A_photon_mask] Photon = Photon[A_photon_mask] Jet = Jet[A_photon_mask] Muon = Muon[A_photon_mask] MET = MET[A_photon_mask] if not isData: pu = pu[A_photon_mask] events = events[A_photon_mask] # Stop processing if there is no event remain if len(Electron) == 0: return out def make_leading_pair(target, base): return target[ak.argmax(base.pt, axis=1, keepdims=True)] leading_pho = make_leading_pair(Photon, Photon) # -------------------- Make Fake Photon BKGs---------------------------# def make_bins(pt, eta, bin_range_str): bin_dict = { "PT_1_eta_1": (pt > 20) & (pt < 30) & (eta < 1), "PT_1_eta_2": (pt > 20) & (pt < 30) & (eta > 1) & (eta < 1.5), "PT_1_eta_3": (pt > 20) & (pt < 30) & (eta > 1.5) & (eta < 2), "PT_1_eta_4": (pt > 20) & (pt < 30) & (eta > 2) & (eta < 2.5), "PT_2_eta_1": (pt > 30) & (pt < 40) & (eta < 1), "PT_2_eta_2": (pt > 30) & (pt < 40) & (eta > 1) & (eta < 1.5), "PT_2_eta_3": (pt > 30) & (pt < 40) & (eta > 1.5) & (eta < 2), "PT_2_eta_4": (pt > 30) & (pt < 40) & (eta > 2) & (eta < 2.5), "PT_3_eta_1": (pt > 40) & (pt < 50) & (eta < 1), "PT_3_eta_2": (pt > 40) & (pt < 50) & (eta > 1) & (eta < 1.5), "PT_3_eta_3": (pt > 40) & (pt < 50) & (eta > 1.5) & (eta < 2), "PT_3_eta_4": (pt > 40) & (pt < 50) & (eta > 2) & (eta < 2.5), "PT_4_eta_1": (pt > 50) & (eta < 1), "PT_4_eta_2": (pt > 50) & (eta > 1) & (eta < 1.5), "PT_4_eta_3": (pt > 50) & (eta > 1.5) & (eta < 2), "PT_4_eta_4": (pt > 50) & (eta > 2) & (eta < 2.5), } binmask = bin_dict[bin_range_str] return binmask bin_name_list = [ "PT_1_eta_1", "PT_1_eta_2", "PT_1_eta_3", "PT_1_eta_4", "PT_2_eta_1", "PT_2_eta_2", "PT_2_eta_3", "PT_2_eta_4", "PT_3_eta_1", "PT_3_eta_2", "PT_3_eta_3", "PT_3_eta_4", "PT_4_eta_1", "PT_4_eta_2", "PT_4_eta_3", "PT_4_eta_4", ] ## -- Fake-fraction Lookup table --## if isFake: # Make Bin-range mask binned_pteta_mask = {} for name in bin_name_list: binned_pteta_mask[name] = make_bins( ak.flatten(leading_pho.pt), ak.flatten(abs(leading_pho.eta)), name, ) # Read Fake fraction --> Mapping bin name to int() if self._year == "2018": in_dict = np.load("Fitting_2018/Fit_results.npy", allow_pickle="True")[()] if self._year == "2017": in_dict = np.load("Fitting_2017/Fit_results.npy", allow_pickle="True")[()] idx = 0 fake_dict = {} for i, j in in_dict.items(): fake_dict[idx] = j idx += 1 # Reconstruct Fake_weight fw = 0 for i, j in binned_pteta_mask.items(): fw = fw + j * fake_dict[bin_name_list.index(i)] # Process 0 weight to 1 @numba.njit def zero_one(x): if x == 0: x = 1 return x vec_zero_one = np.vectorize(zero_one) fw = vec_zero_one(fw) else: fw = np.ones(len(events)) cut4 = np.ones(len(Photon)) * 4 print("Fake fraction weight: ", len(fw), len(cut4), fw) ##----------- Cut flow5: OSSF ossf_mask = Electron.charge[:, 0] + Electron.charge[:, 1] == 0 # Apply cut 5 Electron = Electron[ossf_mask] Photon = Photon[ossf_mask] fw = fw[ossf_mask] Jet = Jet[ossf_mask] MET = MET[ossf_mask] Muon = Muon[ossf_mask] if not isData: pu = pu[ossf_mask] events = events[ossf_mask] # Stop processing if there is no event remain if len(Electron) == 0: return out cut5 = np.ones(ak.sum(ak.num(Electron) > 0)) * 5 # Define Electron Triplet Diele = ak.zip({ "lep1": Electron[:, 0], "lep2": Electron[:, 1], "p4": TLorentz_vector(Electron[:, 0] + Electron[:, 1]), }) leading_ele = Diele.lep1 subleading_ele = Diele.lep2 def make_leading_pair(target, base): return target[ak.argmax(base.pt, axis=1, keepdims=True)] leading_pho = make_leading_pair(Photon, Photon) # -- Scale Factor for each electron # Trigger weight helper function def Trigger_Weight(eta1, pt1, eta2, pt2): per_ev_MC = (get_ele_trig_leg1_mc_Eff(eta1, pt1) * get_ele_trig_leg2_mc_Eff(eta2, pt2) + get_ele_trig_leg1_mc_Eff(eta2, pt2) * get_ele_trig_leg2_mc_Eff(eta1, pt1) - get_ele_trig_leg1_mc_Eff(eta1, pt1) * get_ele_trig_leg1_mc_Eff(eta2, pt2)) per_ev_data = ( get_ele_trig_leg1_data_Eff(eta1, pt1) * get_ele_trig_leg1_SF( eta1, pt1) * get_ele_trig_leg2_data_Eff(eta2, pt2) * get_ele_trig_leg2_SF(eta2, pt2) + get_ele_trig_leg1_data_Eff(eta2, pt2) * get_ele_trig_leg1_SF( eta2, pt2) * get_ele_trig_leg2_data_Eff(eta1, pt1) * get_ele_trig_leg2_SF(eta1, pt1) - get_ele_trig_leg1_data_Eff(eta1, pt1) * get_ele_trig_leg1_SF( eta1, pt1) * get_ele_trig_leg1_data_Eff(eta2, pt2) * get_ele_trig_leg1_SF(eta2, pt2)) return per_ev_data / per_ev_MC if not isData: ## -------------< Egamma ID and Reco Scale factor > -----------------## get_pho_medium_id_sf = get_pho_medium_id_sf( ak.flatten(leading_pho.eta), ak.flatten(leading_pho.pt)) ele_reco_sf = get_ele_reco_above20_sf( leading_ele.deltaEtaSC + leading_ele.eta, leading_ele.pt, ) * get_ele_reco_above20_sf( subleading_ele.deltaEtaSC + subleading_ele.eta, subleading_ele.pt, ) ele_medium_id_sf = get_ele_medium_id_sf( leading_ele.deltaEtaSC + leading_ele.eta, leading_ele.pt, ) * get_ele_medium_id_sf( subleading_ele.deltaEtaSC + subleading_ele.eta, subleading_ele.pt, ) ## -------------< Muon ID and Iso Scale factor > -----------------## get_mu_tight_id_sf = get_mu_tight_id_sf(ak.flatten(abs(Muon.eta)), ak.flatten(Muon.pt)) get_mu_tight_iso_sf = get_mu_tight_iso_sf( ak.flatten(abs(Muon.eta)), ak.flatten(Muon.pt)) ## -------------< Double Electron Trigger Scale factor > -----------------## eta1 = leading_ele.deltaEtaSC + leading_ele.eta eta2 = subleading_ele.deltaEtaSC + subleading_ele.eta pt1 = leading_ele.pt pt2 = subleading_ele.pt # -- 2017,2016 are not applied yet if self._year == "2018": ele_trig_weight = Trigger_Weight(eta1, pt1, eta2, pt2) ##----------- Cut flow6: Baseline selection # Mee cut Mee_cut_mask = Diele.p4.mass > 4 # Lepton PT cuts Leppt_mask = ak.firsts((Diele.lep1.pt >= 25) & (Diele.lep2.pt >= 20) & (Muon.pt >= 25)) # MET cuts MET_mask = MET.pt > 20 # Baseline # Assemble!! Baseline_mask = Leppt_mask & MET_mask & Mee_cut_mask # SR,CR # Apply cut6 Diele_base = Diele[Baseline_mask] leading_pho_base = leading_pho[Baseline_mask] Jet_base = Jet[Baseline_mask] MET_base = MET[Baseline_mask] Muon_base = Muon[Baseline_mask] events_base = events[Baseline_mask] # Photon EE and EB isEE_mask = leading_pho.isScEtaEE isEB_mask = leading_pho.isScEtaEB Pho_EE_base = leading_pho[isEE_mask & Baseline_mask] Pho_EB_base = leading_pho[isEB_mask & Baseline_mask] # Stop processing if there is no event remain if len(leading_pho_base) == 0: return out cut6 = np.ones(ak.sum(ak.num(leading_pho_base) > 0)) * 6 base_arr_dict = { "Diele_sel": Diele_base, "leading_pho_sel": leading_pho_base, "Jet_sel": Jet_base, "MET_sel": MET_base, "Muon_sel": Muon_base, "Pho_EE_sel": Pho_EE_base, "Pho_EB_sel": Pho_EB_base, } ##----------- << SR >> Zmass_window_mask = abs(Diele.p4.mass - 91.1876) < 15 MET_mask = MET.pt > 30 bjet_veto = ak.firsts(Jet.btagDeepB > 0.7665) == 0 Mlll_mask = ((Diele.p4 + Muon[:, 0]).mass) > 100 SR_mask = Zmass_window_mask & MET_mask & bjet_veto & Mlll_mask SR_mask = Baseline_mask & SR_mask Diele_SR = Diele[SR_mask] leading_pho_SR = leading_pho[SR_mask] Muon_SR = Muon[SR_mask] MET_SR = MET[SR_mask] Jet_SR = Jet[SR_mask] events_SR = events[SR_mask] Pho_EE_SR = leading_pho[isEE_mask & SR_mask] Pho_EB_SR = leading_pho[isEB_mask & SR_mask] SR_arr_dict = { "Diele_sel": Diele_SR, "leading_pho_sel": leading_pho_SR, "Jet_sel": Jet_SR, "MET_sel": MET_SR, "Muon_sel": Muon_SR, "Pho_EE_sel": Pho_EE_SR, "Pho_EB_sel": Pho_EB_SR, } ##----------- << CR-Z+Jets >> Zmass_window_mask = abs(Diele.p4.mass - 91.1876) < 15 MET_mask = MET.pt <= 30 bjet_veto = ak.firsts(Jet.btagDeepB > 0.7665) == 0 Mlll_mask = ((Diele.p4 + Muon[:, 0]).mass) > 100 CR_ZJets_mask = Zmass_window_mask & MET_mask & bjet_veto & Mlll_mask CR_ZJets_mask = Baseline_mask & CR_ZJets_mask Diele_CR_ZJets = Diele[CR_ZJets_mask] leading_pho_CR_ZJets = leading_pho[CR_ZJets_mask] Muon_CR_ZJets = Muon[CR_ZJets_mask] MET_CR_ZJets = MET[CR_ZJets_mask] Jet_CR_ZJets = Jet[CR_ZJets_mask] events_CR_ZJets = events[CR_ZJets_mask] Pho_EE_CR_ZJets = leading_pho[isEE_mask & CR_ZJets_mask] Pho_EB_CR_ZJets = leading_pho[isEB_mask & CR_ZJets_mask] CR_ZJets_arr_dict = { "Diele_sel": Diele_CR_ZJets, "leading_pho_sel": leading_pho_CR_ZJets, "Jet_sel": Jet_CR_ZJets, "MET_sel": MET_CR_ZJets, "Muon_sel": Muon_CR_ZJets, "Pho_EE_sel": Pho_EE_CR_ZJets, "Pho_EB_sel": Pho_EB_CR_ZJets, } ##----------- << CR-T-enriched >> Zmass_window_mask = abs(Diele.p4.mass - 91.1876) > 5 MET_mask = MET.pt > 30 bjet_veto = ak.firsts(Jet.btagDeepB > 0.7665) > 0 Mlll_mask = ((Diele.p4 + Muon[:, 0]).mass) > 100 CR_Tenri_mask = Zmass_window_mask & MET_mask & bjet_veto & Mlll_mask CR_Tenri_mask = Baseline_mask & CR_Tenri_mask Diele_CR_t = Diele[CR_Tenri_mask] leading_pho_CR_t = leading_pho[CR_Tenri_mask] Muon_CR_t = Muon[CR_Tenri_mask] MET_CR_t = MET[CR_Tenri_mask] Jet_CR_t = Jet[CR_Tenri_mask] events_CR_t = events[CR_Tenri_mask] Pho_EE_CR_t = leading_pho[isEE_mask & CR_Tenri_mask] Pho_EB_CR_t = leading_pho[isEB_mask & CR_Tenri_mask] CR_tEnriched_arr_dict = { "Diele_sel": Diele_CR_t, "leading_pho_sel": leading_pho_CR_t, "Jet_sel": Jet_CR_t, "MET_sel": MET_CR_t, "Muon_sel": Muon_CR_t, "Pho_EE_sel": Pho_EE_CR_t, "Pho_EB_sel": Pho_EB_CR_t, } ##----------- << CR-Conversion >> Zmass_window_mask = abs(Diele.p4.mass - 91.1876) > 15 MET_mask = MET.pt <= 30 bjet_veto = ak.firsts(Jet.btagDeepB > 0.7665) == 0 Mlll_mask = ((Diele.p4 + Muon[:, 0]).mass) <= 100 CR_conv_mask = Zmass_window_mask & MET_mask & bjet_veto & Mlll_mask CR_conv_mask = Baseline_mask & CR_conv_mask Diele_CR_conv = Diele[CR_conv_mask] leading_pho_CR_conv = leading_pho[CR_conv_mask] Muon_CR_conv = Muon[CR_conv_mask] MET_CR_conv = MET[CR_conv_mask] Jet_CR_conv = Jet[CR_conv_mask] events_CR_conv = events[CR_conv_mask] Pho_EE_CR_conv = leading_pho[isEE_mask & CR_conv_mask] Pho_EB_CR_conv = leading_pho[isEB_mask & CR_conv_mask] CR_Conversion_dict = { "Diele_sel": Diele_CR_conv, "leading_pho_sel": leading_pho_CR_conv, "Jet_sel": Jet_CR_conv, "MET_sel": MET_CR_conv, "Muon_sel": Muon_CR_conv, "Pho_EE_sel": Pho_EE_CR_conv, "Pho_EB_sel": Pho_EB_CR_conv, } ## -------------------- Prepare making hist --------------# regions = { "Baseline": base_arr_dict, "Signal": SR_arr_dict, "CR_ZJets": CR_ZJets_arr_dict, "CR_tEnriched": CR_tEnriched_arr_dict, "CR_conversion": CR_Conversion_dict, } mask_dict = { "Baseline": Baseline_mask, "Signal": SR_mask, "CR_ZJets": CR_ZJets_mask, "CR_tEnriched": CR_Tenri_mask, "CR_conversion": CR_conv_mask, } for region, arr_dict in regions.items(): # Photon phoPT = ak.flatten(arr_dict["leading_pho_sel"].pt) phoEta = ak.flatten(arr_dict["leading_pho_sel"].eta) phoPhi = ak.flatten(arr_dict["leading_pho_sel"].phi) # Photon EE if len(arr_dict["Pho_EE_sel"].pt) != 0: Pho_EE_PT = ak.flatten(arr_dict["Pho_EE_sel"].pt) Pho_EE_Eta = ak.flatten(arr_dict["Pho_EE_sel"].eta) Pho_EE_Phi = ak.flatten(arr_dict["Pho_EE_sel"].phi) Pho_EE_sieie = ak.flatten(arr_dict["Pho_EE_sel"].sieie) Pho_EE_Iso_charge = ak.flatten( arr_dict["Pho_EE_sel"].pfRelIso03_chg) # Photon EB if len(arr_dict["Pho_EB_sel"].pt) != 0: Pho_EB_PT = ak.flatten(arr_dict["Pho_EB_sel"].pt) Pho_EB_Eta = ak.flatten(arr_dict["Pho_EB_sel"].eta) Pho_EB_Phi = ak.flatten(arr_dict["Pho_EB_sel"].phi) Pho_EB_sieie = ak.flatten(arr_dict["Pho_EB_sel"].sieie) Pho_EB_Iso_charge = ak.flatten( arr_dict["Pho_EB_sel"].pfRelIso03_chg) # Electrons ele1PT = arr_dict["Diele_sel"].lep1.pt ele1Eta = arr_dict["Diele_sel"].lep1.eta ele1Phi = arr_dict["Diele_sel"].lep1.phi ele2PT = arr_dict["Diele_sel"].lep2.pt ele2Eta = arr_dict["Diele_sel"].lep2.eta ele2Phi = arr_dict["Diele_sel"].lep2.phi # Muon muPT = ak.flatten(arr_dict["Muon_sel"].pt) muEta = ak.flatten(arr_dict["Muon_sel"].eta) muPhi = ak.flatten(arr_dict["Muon_sel"].phi) # MET met = ak.to_numpy(arr_dict["MET_sel"].pt) # M(eea) M(ee) diele = arr_dict["Diele_sel"].p4 lll_vec = diele + arr_dict["Muon_sel"][:, 0] Mlll = lll_vec.mass Mee = diele.mass # W MT (--> beta) MT = np.sqrt( 2 * arr_dict["Muon_sel"].pt * arr_dict["MET_sel"].pt * (1 - np.cos( abs(arr_dict["MET_sel"].delta_phi(arr_dict["Muon_sel"]))))) MT = np.array(ak.firsts(MT)) # --- Apply weight and hist weights = processor.Weights(len(cut5)) # --- skim cut-weight def skim_weight(arr): mask1 = ~ak.is_none(arr) subarr = arr[mask1] mask2 = subarr != 0 return ak.to_numpy(subarr[mask2]) cuts = mask_dict[region] cuts_pho_EE = ak.flatten(isEE_mask) cuts_pho_EB = ak.flatten(isEB_mask) if isFake: weights.add("fake_fraction", fw) # Weight and SF here if not (isData | isFake): weights.add("pileup", pu) weights.add("ele_id", ele_medium_id_sf) weights.add("ele_reco", ele_reco_sf) weights.add("pho_id", get_pho_medium_id_sf) weights.add("mu_id", get_mu_tight_id_sf) weights.add("mu_iso", get_mu_tight_id_sf) # 2016,2017 are not applied yet if self._year == "2018": weights.add("ele_trigger", ele_trig_weight) # ---------------------------- Fill hist --------------------------------------# # Initial events out["sumw"][dataset] += len(Initial_events) print( "region: {0} ### cut0: {1},cut1: {2}, cut2: {3},cut3: {4},cut4: {5},cut5: {6},cut6: {7}, cut7: {8}" .format(region, len(cut0), len(cut1), len(cut2), len(cut3), len(cut4), len(cut5), len(cut6), len(met))) # Fill hist # -- met -- # out["met"].fill( dataset=dataset, region=region, met=met, weight=skim_weight(weights.weight() * cuts), ) # --mass -- # out["MT"].fill( dataset=dataset, region=region, MT=MT, weight=skim_weight(weights.weight() * cuts), ) out["mass"].fill( dataset=dataset, region=region, mass=Mee, weight=skim_weight(weights.weight() * cuts), ) out["mass_lll"].fill( dataset=dataset, region=region, mass_lll=Mlll, weight=skim_weight(weights.weight() * cuts), ) # -- Muon -- # out["mupt"].fill( dataset=dataset, region=region, mupt=muPT, weight=skim_weight(weights.weight() * cuts), ) out["mueta"].fill( dataset=dataset, region=region, mueta=muEta, weight=skim_weight(weights.weight() * cuts), ) out["muphi"].fill( dataset=dataset, region=region, muphi=muPhi, weight=skim_weight(weights.weight() * cuts), ) # -- Electron -- # out["ele1pt"].fill( dataset=dataset, region=region, ele1pt=ele1PT, weight=skim_weight(weights.weight() * cuts), ) out["ele1eta"].fill( dataset=dataset, region=region, ele1eta=ele1Eta, weight=skim_weight(weights.weight() * cuts), ) out["ele1phi"].fill( dataset=dataset, region=region, ele1phi=ele1Phi, weight=skim_weight(weights.weight() * cuts), ) out["ele2pt"].fill( dataset=dataset, region=region, ele2pt=ele2PT, weight=skim_weight(weights.weight() * cuts), ) out["ele2eta"].fill( dataset=dataset, region=region, ele2eta=ele2Eta, weight=skim_weight(weights.weight() * cuts), ) out["ele2phi"].fill( dataset=dataset, region=region, ele2phi=ele2Phi, weight=skim_weight(weights.weight() * cuts), ) # -- Photon -- # out["phopt"].fill( dataset=dataset, region=region, phopt=phoPT, weight=skim_weight(weights.weight() * cuts), ) out["phoeta"].fill( dataset=dataset, region=region, phoeta=phoEta, weight=skim_weight(weights.weight() * cuts), ) out["phophi"].fill( dataset=dataset, region=region, phophi=phoPhi, weight=skim_weight(weights.weight() * cuts), ) if len(arr_dict["Pho_EE_sel"].pt) != 0: out["pho_EE_pt"].fill( dataset=dataset, region=region, pho_EE_pt=Pho_EE_PT, weight=skim_weight(weights.weight() * cuts * cuts_pho_EE), ) out["pho_EE_eta"].fill( dataset=dataset, region=region, pho_EE_eta=Pho_EE_Eta, weight=skim_weight(weights.weight() * cuts * cuts_pho_EE), ) out["pho_EE_phi"].fill( dataset=dataset, region=region, pho_EE_phi=Pho_EE_Phi, weight=skim_weight(weights.weight() * cuts * cuts_pho_EE), ) out["pho_EE_sieie"].fill( dataset=dataset, region=region, pho_EE_sieie=Pho_EE_sieie, weight=skim_weight(weights.weight() * cuts * cuts_pho_EE), ) out["pho_EE_Iso_chg"].fill( dataset=dataset, region=region, pho_EE_Iso_chg=Pho_EE_Iso_charge, weight=skim_weight(weights.weight() * cuts * cuts_pho_EE), ) if len(arr_dict["Pho_EB_sel"].pt) != 0: out["pho_EB_pt"].fill( dataset=dataset, region=region, pho_EB_pt=Pho_EB_PT, weight=skim_weight(weights.weight() * cuts * cuts_pho_EB), ) out["pho_EB_eta"].fill( dataset=dataset, region=region, pho_EB_eta=Pho_EB_Eta, weight=skim_weight(weights.weight() * cuts * cuts_pho_EB), ) out["pho_EB_phi"].fill( dataset=dataset, region=region, pho_EB_phi=Pho_EB_Phi, weight=skim_weight(weights.weight() * cuts * cuts_pho_EB), ) out["pho_EB_sieie"].fill( dataset=dataset, region=region, pho_EB_sieie=Pho_EB_sieie, weight=skim_weight(weights.weight() * cuts * cuts_pho_EB), ) out["pho_EB_Iso_chg"].fill( dataset=dataset, region=region, pho_EB_Iso_chg=Pho_EB_Iso_charge, weight=skim_weight(weights.weight() * cuts * cuts_pho_EB), ) return out
def process(self, events): dataset = events.metadata['dataset'] isRealData = 'genWeight' not in events.columns selection = processor.PackedSelection() weights = processor.Weights(len(events)) output = self.accumulator.identity() if not isRealData: output['sumw'][dataset] += events.genWeight.sum() if isRealData: trigger = np.zeros(events.size, dtype='bool') for t in self._triggers[self._year]: trigger = trigger | events.HLT[t] else: trigger = np.ones(events.size, dtype='bool') selection.add('trigger', trigger) if isRealData: trigger = np.zeros(events.size, dtype='bool') for t in self._muontriggers[self._year]: trigger = trigger | events.HLT[t] else: trigger = np.ones(events.size, dtype='bool') selection.add('muontrigger', trigger) try: fatjets = events.FatJet except AttributeError: # early pancakes fatjets = events.CustomAK8Puppi fatjets['msdcorr'] = corrected_msoftdrop(fatjets) fatjets['rho'] = 2 * np.log(fatjets.msdcorr / fatjets.pt) fatjets['n2ddt'] = fatjets.n2b1 - n2ddt_shift(fatjets, year=self._year) fatjets['msdcorr_full'] = fatjets['msdcorr'] * self._msdSF[self._year] candidatejet = fatjets[ # https://github.com/DAZSLE/BaconAnalyzer/blob/master/Analyzer/src/VJetLoader.cc#L269 (fatjets.pt > 200) & (abs(fatjets.eta) < 2.5) # & fatjets.isLoose # not always available ][:, 0:1] selection.add('minjetkin', ((candidatejet.pt >= 450) & (candidatejet.msdcorr >= 47.) & (abs(candidatejet.eta) < 2.5)).any()) selection.add('jetacceptance', ((candidatejet.msdcorr >= 47.) & (candidatejet.pt < 1200) & (candidatejet.msdcorr < 201.)).any()) selection.add('jetid', candidatejet.isTight.any()) selection.add('n2ddt', (candidatejet.n2ddt < 0.).any()) selection.add('ddbpass', (candidatejet.btagDDBvL >= 0.89).any()) jets = events.Jet[(events.Jet.pt > 30.) & (abs(events.Jet.eta) < 2.5) & events.Jet.isTight] # only consider first 4 jets to be consistent with old framework jets = jets[:, :4] ak4_ak8_pair = jets.cross(candidatejet, nested=True) dphi = abs(ak4_ak8_pair.i0.delta_phi(ak4_ak8_pair.i1)) ak4_opposite = jets[(dphi > np.pi / 2).all()] selection.add( 'antiak4btagMediumOppHem', ak4_opposite.btagDeepB.max() < BTagEfficiency.btagWPs[self._year]['medium']) ak4_away = jets[(dphi > 0.8).all()] selection.add( 'ak4btagMedium08', ak4_away.btagDeepB.max() > BTagEfficiency.btagWPs[self._year]['medium']) selection.add('met', events.MET.pt < 140.) goodmuon = ((events.Muon.pt > 10) & (abs(events.Muon.eta) < 2.4) & (events.Muon.pfRelIso04_all < 0.25) & (events.Muon.looseId).astype(bool)) nmuons = goodmuon.sum() leadingmuon = events.Muon[goodmuon][:, 0:1] muon_ak8_pair = leadingmuon.cross(candidatejet, nested=True) nelectrons = ( (events.Electron.pt > 10) & (abs(events.Electron.eta) < 2.5) & (events.Electron.cutBased >= events.Electron.LOOSE)).sum() ntaus = ((events.Tau.pt > 20) & (events.Tau.idDecayMode).astype(bool) # bacon iso looser than Nano selection ).sum() selection.add('noleptons', (nmuons == 0) & (nelectrons == 0) & (ntaus == 0)) selection.add('onemuon', (nmuons == 1) & (nelectrons == 0) & (ntaus == 0)) selection.add('muonkin', ((leadingmuon.pt > 55.) & (abs(leadingmuon.eta) < 2.1)).all()) selection.add('muonDphiAK8', (abs(muon_ak8_pair.i0.delta_phi(muon_ak8_pair.i1)) > 2 * np.pi / 3).all().all()) if isRealData: genflavor = candidatejet.pt.zeros_like() else: weights.add('genweight', events.genWeight) add_pileup_weight(weights, events.Pileup.nPU, self._year, dataset) bosons = getBosons(events) genBosonPt = bosons.pt.pad(1, clip=True).fillna(0) add_VJets_NLOkFactor(weights, genBosonPt, self._year, dataset) genflavor = matchedBosonFlavor(candidatejet, bosons).pad( 1, clip=True).fillna(-1).flatten() add_jetTriggerWeight(weights, candidatejet.msdcorr, candidatejet.pt, self._year) output['btagWeight'].fill(dataset=dataset, val=self._btagSF.addBtagWeight( weights, ak4_away)) logger.debug("Weight statistics: %r" % weights._weightStats) msd_matched = candidatejet.msdcorr * self._msdSF[self._year] * ( genflavor > 0) + candidatejet.msdcorr * (genflavor == 0) regions = { 'signal': [ 'trigger', 'minjetkin', ], #'noleptons','jetacceptance', 'noleptons','jetid',],#'jetid', 'noleptons',],# 'n2ddt','antiak4btagMediumOppHem'],#, 'met',], 'muoncontrol': [ 'muontrigger', 'minjetkin', 'jetid', 'muonDphiAK8', 'muonkin', 'ak4btagMedium08', 'onemuon', ], # 'muonkin', 'muonDphiAK8'], 'noselection': [], } for region, cuts in regions.items(): allcuts = set() logger.debug( f"Filling cutflow with: {dataset}, {region}, {genflavor}, {weights.weight()}" ) #output['cutflow'].fill(dataset=dataset, region=region, genflavor=genflavor, cut=0, weight=weights.weight()) #for i, cut in enumerate(cuts + ['ddbpass']): # allcuts.add(cut) # cut = selection.all(*allcuts) # output['cutflow'].fill(dataset=dataset, region=region, genflavor=genflavor[cut], cut=i + 1, weight=weights.weight()[cut]) systematics = [ None, 'jet_triggerUp', 'jet_triggerDown', 'btagWeightUp', 'btagWeightDown', 'btagEffStatUp', 'btagEffStatDown', ] def normalize(val, cut): return val[cut].pad(1, clip=True).fillna(0).flatten() def fill(region, systematic=None, wmod=None): selections = regions[region] cut = selection.all(*selections) sname = 'nominal' if systematic is None else systematic if wmod is None: weight = weights.weight(modifier=systematic)[cut] else: weight = weights.weight()[cut] * wmod[cut] output['templates'].fill( dataset=dataset, region=region, #systematic=sname, #genflavor=genflavor[cut], pt=normalize(candidatejet.pt, cut), msd=normalize(msd_matched, cut), #ddb=normalize(candidatejet.btagDDBvL, cut), weight=weight, ) if wmod is not None: output['genresponse_noweight'].fill( dataset=dataset, region=region, systematic=sname, pt=normalize(candidatejet.pt, cut), genpt=normalize(genBosonPt, cut), weight=events.genWeight[cut] * wmod[cut], ) output['genresponse'].fill( dataset=dataset, region=region, systematic=sname, pt=normalize(candidatejet.pt, cut), genpt=normalize(genBosonPt, cut), weight=weight, ) for region in regions: cut = selection.all(*(set(regions[region]) - {'n2ddt'})) output['nminus1_n2ddt'].fill( dataset=dataset, region=region, n2ddt=normalize(candidatejet.n2ddt, cut), weight=weights.weight()[cut], ) #for systematic in systematics: fill(region) #, systematic) if 'GluGluHToBB' in dataset: for i in range(9): fill(region, 'LHEScale_%d' % i, events.LHEScaleWeight[:, i]) for c in events.LHEWeight.columns[1:]: fill(region, 'LHEWeight_%s' % c, events.LHEWeight[c]) return output
def process(self, events): #assert(len(np.unique(events.event)) == len((events.event))) dataset = events.metadata['dataset'] print('process dataset', dataset) isRealData = 'genWeight' not in events.columns selection = processor.PackedSelection() weights = processor.Weights(len(events)) output = self.accumulator.identity() if(len(events) == 0): return output if not isRealData: output['sumw'][dataset] += events.genWeight.sum() # trigger paths if isRealData: trigger_fatjet = np.zeros(events.size, dtype='bool') for t in self._triggers[self._year]: try: trigger_fatjet = trigger_fatjet | events.HLT[t] except: print('trigger %s not available'%t) continue trigger_muon = np.zeros(events.size, dtype='bool') for t in self._muontriggers[self._year]: trigger_muon = trigger_muon | events.HLT[t] else: trigger_fatjet = np.ones(events.size, dtype='bool') trigger_muon = np.ones(events.size, dtype='bool') selection.add('fatjet_trigger', trigger_fatjet) selection.add('muon_trigger', trigger_muon) # run model on PFCands associated to FatJet (FatJetPFCands) #events.FatJet.array.content["PFCands"] = type(events.FatJetPFCands.array).fromcounts(events.FatJet.nPFConstituents.flatten(), events.FatJetPFCands.flatten()) #events.FatJet.array.content["twoProngGru"] = run_model(events.FatJet.flatten()) #else: # events.FatJet["genMatchFull"] = np.ones(len(events)) fatjets = events.FatJet gru = events.GRU IN = events.IN fatjets['msdcorr'] = corrected_msoftdrop(fatjets) fatjets['rhocorr'] = 2*np.log(fatjets.msdcorr/fatjets.pt) fatjets['gruddt'] = gru.v25 - shift(fatjets,algo='gruddt',year=self._year) fatjets['in_v3_ddt'] = IN.v3 - shift(fatjets,algo='inddt',year=self._year) fatjets['n2ddt'] = fatjets.n2b1 - n2ddt_shift(fatjets,year=self._year) #fatjets['count'] = fatjets.count if 'WJetsToQQ' in dataset or 'ZJetsToQQ' in dataset: fatjets["genMatchFull"] = genmatch(events) else: fatjets["genMatchFull"] = fatjets.pt.zeros_like() #np.zeros(events.size, dtype='bool') candidatejet = fatjets[ (fatjets.pt > 200) & (abs(fatjets.eta) < 2.5) ][:, 0:1] # basic jet selection selection.add('minjetkin', ( (candidatejet.pt >= 450) #& (candidatejet.msdcorr >= 40.) & (abs(candidatejet.eta) < 2.5) & (candidatejet.rhocorr >= -5.5) & (candidatejet.rhocorr <= -2) ).any()) selection.add('signal_pt', ( (candidatejet.pt >= 525) ).any()) selection.add('mass', (candidatejet.msdcorr >= 40.).any()) selection.add('v_selection_jetkin', ( (candidatejet.pt >= 200) & (candidatejet.rhocorr >= -5.5) & (candidatejet.rhocorr <= -2) ).any()) selection.add('genmatch', candidatejet.genMatchFull.pad(1).fillna(0).flatten() if ('WJetsToQQ' in dataset or 'ZJetsToQQ' in dataset) else candidatejet.pt.pad(1).fillna(0).flatten().astype(bool)) #if isRealData: # selection.add('blinding', ( # (events.event %10 == 0) # )) selection.add('n2ddt', (candidatejet.n2ddt < 0.).any()) selection.add('jetid', candidatejet.isTight.any()) selection.add('met', events.MET.pt > 40.) goodmuon = ( (events.Muon.pt > 10) & (abs(events.Muon.eta) < 2.1) #& (events.Muon.pfRelIso04_all < 0.4) #& (events.Muon.looseId).astype(bool) ) nmuons=goodmuon.sum() leadingmuon = events.Muon[goodmuon #& (events.Muon.pt > 55) ][:, 0:1] muon_ak8_pair = leadingmuon.cross(candidatejet, nested=True) ngoodmuons = goodmuon[events.Muon.pt > 55].sum() selection.add('muonDphiAK8', ( abs(muon_ak8_pair.i0.delta_phi(muon_ak8_pair.i1)) > 2*np.pi/3 ).all().all()) selection.add('muonkin', ( (leadingmuon.pt > 55.) & (abs(leadingmuon.eta) < 2.1) #& (leadingmuon.looseId).astype(bool) ).all()) #ak4 puppi jet for CR jets = events.Jet[ (events.Jet.pt > 50.) & (abs(events.Jet.eta) < 3) & (events.Jet.isTight).astype(bool) ] # only consider first 4 jets to be consistent with old framework jets = jets[:, :4] ak4_ak8_pair = jets.cross(candidatejet, nested=True) dr = abs(ak4_ak8_pair.i0.delta_r(ak4_ak8_pair.i1)) ak4_away = jets[(dr > 0.8).all()] #selection.add('ak4btagMedium08', ak4_away.btagDeepB.max() > 0.4941) selection.add('ak4btagMedium08', ak4_away.btagCSVV2.max() > 0.8838) #generic lep veto nelectrons = ( (events.Electron.pt > 10.) & (abs(events.Electron.eta) < 2.5) & (events.Electron.cutBased >= events.Electron.LOOSE) ).sum() ntaus = ( (events.Tau.pt > 20.) & (events.Tau.idDecayMode).astype(bool) # bacon iso looser than Nano selection ).sum() selection.add('onemuon', (ngoodmuons==1)& (nelectrons == 0) & (ntaus == 0)) selection.add('noleptons', (nmuons == 0) & (nelectrons == 0) & (ntaus == 0)) selection.add('noelectron_notau', (nelectrons == 0) & (ntaus == 0)) if not isRealData: weights.add('genweight', events.genWeight) #add_pileup_weight(weights, events.Pileup.nPU, self._year, dataset) #add_jetTriggerWeight(weights, candidatejet.msdcorr, candidatejet.pt, self._year) signal region only bosons = getBosons(events) genBosonPt = bosons.pt.pad(1, clip=True).fillna(0) add_VJets_NLOkFactor(weights, genBosonPt, self._year, dataset) #b-tag weights regions = { 'signal' : ['fatjet_trigger','minjetkin','signal_pt','mass','noleptons','jetid','genmatch'], 'ttbar_muoncontrol' : ['muon_trigger', 'minjetkin','jetid', 'mass', 'muonDphiAK8','muonkin','ak4btagMedium08','onemuon',], 'noselection' : [],#'vselection_muoncontrol' : ['muon_trigger', 'v_selection_jetkin', 'genmatch', 'jetid', 'ak4btagMedium08', 'muonkin','met'], } #if isRealData and 'SingleMuon' not in dataset: # regions['signal'].append('blinding') '''for region, cuts in regions.items(): allcuts = set() print ('weights', weights.weight().shape) print( len(events)) output['cutflow'].fill(dataset=dataset, region=region, cut=0)#,weight=weights.weight()) for i, cut in enumerate(cuts): allcuts.add(cut) cut = selection.all(*allcuts) output['cutflow'].fill(dataset=dataset, region=region, cut=i + 1)# weight=weights.weight()[cut]) ''' allcuts_signal = set() output['cutflow_signal'][dataset]['none']+= float(weights.weight().sum()) allcuts_ttbar_muoncontrol = set() output['cutflow_ttbar_muoncontrol'][dataset]['none']+= float(weights.weight().sum()) for cut in regions['signal']: allcuts_signal.add(cut) output['cutflow_signal'][dataset][cut] += float(weights.weight()[selection.all(*allcuts_signal)].sum()) for cut in regions['ttbar_muoncontrol']: allcuts_ttbar_muoncontrol.add(cut) output['cutflow_ttbar_muoncontrol'][dataset][cut] += float(weights.weight()[selection.all(*allcuts_ttbar_muoncontrol)].sum()) def normalize(val, cut): return val[cut].pad(1, clip=True).fillna(0).flatten() def fill(region, systematic=None, wmod=None): selections = regions[region] cut = selection.all(*selections) sname = 'nominal' if systematic is None else systematic weight = weights.weight()[cut] output['templates'].fill( dataset=dataset, region=region, pt=normalize(candidatejet.pt, cut), msd=normalize(candidatejet.msdcorr, cut), #gruddt=normalize(candidatejet.gruddt, cut), #n2=normalize(candidatejet.n2b1, cut), #gru=normalize(candidatejet.twoProngGru, cut), #rho=normalize(candidatejet.rhocorr, cut), in_v3_ddt=normalize(candidatejet.in_v3_ddt, cut), #nPFConstituents=normalize(candidatejet.nPFConstituents, cut), #nJet=candidatejet.counts[cut], #Vmatch=normalize(candidatejet.genMatchFull, cut), mu_pt=normalize(leadingmuon.pt, cut), mu_pfRelIso04_all=normalize(leadingmuon.pfRelIso04_all, cut), weight=weight, ) for region in regions: fill(region) return output
def process(self, events): logging.debug('starting process') output = self.accumulator.identity() dataset = events.metadata['dataset'] self._isData = dataset in [ 'SingleMuon', 'DoubleMuon', 'SingleElectron', 'DoubleEG', 'EGamma', 'MuonEG' ] selection = processor.PackedSelection() # TODO: instead of cutflow, use processor.PackedSelection output['cutflow']['all events'] += events.size logging.debug('applying lumi mask') if self._isData: lumiMask = lumi_tools.LumiMask(self._corrections['golden']) events['passLumiMask'] = lumiMask(np.array(events.run), np.array(events.luminosityBlock)) else: events['passLumiMask'] = np.ones_like(events.run, dtype=bool) passLumiMask = events.passLumiMask selection.add('lumiMask', passLumiMask) logging.debug('adding trigger') self._add_trigger(events) passHLT = events.passHLT selection.add('trigger', passHLT) output['cutflow']['pass trigger'] += passHLT.sum() # if no trigger: fast return if passHLT.sum() == 0: return output # require one good vertex logging.debug('checking vertices') passGoodVertex = (events.PV.npvsGood > 0) output['cutflow']['good vertex'] += passGoodVertex.sum() selection.add('goodVertex', passGoodVertex) # run rochester rochester = self._rochester _muon_offsets = events.Muon.pt.offsets _charge = events.Muon.charge _pt = events.Muon.pt _eta = events.Muon.eta _phi = events.Muon.phi if self._isData: _k = rochester.kScaleDT(_charge, _pt, _eta, _phi) # _kErr = rochester.kScaleDTerror(_charge, _pt, _eta, _phi) else: # for default if gen present _gpt = events.Muon.matched_gen.pt # for backup w/o gen _nl = events.Muon.nTrackerLayers _u = JaggedArray.fromoffsets(_muon_offsets, np.random.rand(*_pt.flatten().shape)) _hasgen = (_gpt.fillna(-1) > 0) _kspread = rochester.kSpreadMC(_charge[_hasgen], _pt[_hasgen], _eta[_hasgen], _phi[_hasgen], _gpt[_hasgen]) _ksmear = rochester.kSmearMC(_charge[~_hasgen], _pt[~_hasgen], _eta[~_hasgen], _phi[~_hasgen], _nl[~_hasgen], _u[~_hasgen]) _k = np.ones_like(_pt.flatten()) _k[_hasgen.flatten()] = _kspread.flatten() _k[~_hasgen.flatten()] = _ksmear.flatten() _k = JaggedArray.fromoffsets(_muon_offsets, _k) # _kErrspread = rochester.kSpreadMCerror(_charge[_hasgen], _pt[_hasgen], _eta[_hasgen], _phi[_hasgen], # _gpt[_hasgen]) # _kErrsmear = rochester.kSmearMCerror(_charge[~_hasgen], _pt[~_hasgen], _eta[~_hasgen], _phi[~_hasgen], # _nl[~_hasgen], _u[~_hasgen]) # _kErr = np.ones_like(_pt.flatten()) # _kErr[_hasgen.flatten()] = _kErrspread.flatten() # _kErr[~_hasgen.flatten()] = _kErrsmear.flatten() # _kErr = JaggedArray.fromoffsets(_muon_offsets, _kErr) mask = _pt.flatten() < 200 rochester_pt = _pt.flatten() rochester_pt[mask] = (_k * _pt).flatten()[mask] events.Muon['pt'] = JaggedArray.fromoffsets(_muon_offsets, rochester_pt) logging.debug('adding muon id') self._add_muon_id(events.Muon) logging.debug('adding electron id') self._add_electron_id(events.Electron) logging.debug('selecting muons') muonId = (events.Muon.passId > 0) muons = events.Muon[muonId] logging.debug('selecting electrons') electronId = (events.Electron.passId > 0) electrons = events.Electron[electronId] passTwoLeptons = (muons.counts >= 2) | (electrons.counts >= 2) output['cutflow']['two leptons'] += passTwoLeptons.sum() selection.add('twoLeptons', passTwoLeptons) # build cands # remake z to have same columns # pt eta phi mass charge pdgId logging.debug('rebuilding leptons') def rebuild(leptons): return JaggedCandidateArray.candidatesfromoffsets( leptons.offsets, pt=leptons.pt.flatten(), eta=leptons.eta.flatten(), phi=leptons.phi.flatten(), mass=leptons.mass.flatten(), charge=leptons.charge.flatten(), pdgId=leptons.pdgId.flatten(), # needed for electron SF etaSC=leptons.etaSC.flatten() if hasattr(leptons, 'etaSC') else leptons.eta.flatten(), ) newMuons = rebuild(muons) newElectrons = rebuild(electrons) logging.debug('building 2 leptons') ee_cands = newElectrons.choose(2) mm_cands = newMuons.choose(2) # combine them z_cands = JaggedArray.concatenate([ee_cands, mm_cands], axis=1) def bestcombination(zcands): good_charge = sum(zcands[str(i)]['charge'] for i in range(2)) == 0 # this keeps the first z cand in each event # should instead sort the best first # TODO: select best zcands = zcands[good_charge][:, :1] return zcands logging.debug('selecting best combinations') z_cands = bestcombination(z_cands) z1 = np.zeros_like(z_cands['p4'].pt.flatten(), dtype='i') z2 = np.ones_like(z_cands['p4'].pt.flatten(), dtype='i') z1[(z_cands['0']['p4'].pt.flatten() < z_cands['1']['p4'].pt.flatten())] = 1 z2[(z_cands['0']['p4'].pt.flatten() < z_cands['1']['p4'].pt.flatten())] = 0 z1 = JaggedArray.fromoffsets(z_cands.offsets, z1) z2 = JaggedArray.fromoffsets(z_cands.offsets, z2) passZCand = (z_cands.counts > 0) output['cutflow']['z cand'] += passZCand.sum() selection.add('zCand', passZCand) passMassWindow = (passZCand & z_cands[( (z_cands.p4.mass > 60) & (z_cands.p4.mass < 120))].counts > 0) output['cutflow']['mass window'] += passMassWindow.sum() selection.add('massWindow', passMassWindow) # im sure there is a better way, but for now just do this def get_lepton_values(zl, key): val = np.zeros_like(zl.flatten(), dtype=float) if len(val) == 0: return JaggedArray.fromoffsets(zl.offsets, val) for i in range(2): mask = (i == zl.flatten()) if key == 'pt': val[mask] = z_cands[passZCand][str( i)].flatten()[mask]['p4'].pt elif key == 'eta': val[mask] = z_cands[passZCand][str( i)].flatten()[mask]['p4'].eta elif key == 'phi': val[mask] = z_cands[passZCand][str( i)].flatten()[mask]['p4'].phi elif key == 'mass': val[mask] = z_cands[passZCand][str( i)].flatten()[mask]['p4'].mass else: val[mask] = z_cands[passZCand][str(i)].flatten()[mask][key] return JaggedArray.fromoffsets(zl.offsets, val) z1pt = get_lepton_values(z1, 'pt') z2pt = get_lepton_values(z2, 'pt') passPt = ((z1pt > 30) & (z2pt > 20)).counts > 0 output['cutflow']['pt threshold'] += passPt.sum() selection.add('ptThreshold', passPt) chanSels = {} z1pdg = get_lepton_values(z1, 'pdgId') z2pdg = get_lepton_values(z2, 'pdgId') for chan in ['ee', 'mm']: if chan == 'ee': pdgIds = (11, 11) if chan == 'mm': pdgIds = (13, 13) chanSels[chan] = ((abs(z1pdg) == pdgIds[0]) & (abs(z2pdg) == pdgIds[1])) weights = processor.Weights(events.run.size) if self._isData: output['sumw'][dataset] = 0 # always set to 0 for data else: output['sumw'][dataset] += events.genWeight.sum() weights.add('genWeight', events.genWeight) weights.add( 'pileupWeight', self._corrections['pileupWeight'](events.Pileup.nPU), self._corrections['pileupWeightUp'](events.Pileup.nPU), self._corrections['pileupWeightDown'](events.Pileup.nPU), ) zls = [z1, z2] # electron sf for ei, zl in enumerate(zls): ei = str(ei) eta = get_lepton_values(zl, 'etaSC') pt = get_lepton_values(zl, 'pt') electronRecoSF = self._corrections['electron_reco'](eta, pt) electronIdSF = self._corrections['electron_id_MVA90'](eta, pt) electronSF = np.ones_like(electronRecoSF.prod()) if ei in ['0', '1']: chans = ['ee'] else: chans = [] for chan in chans: # turns empty arrays into 0's, nonempty int 1's chanSel = (chanSels[chan].ones_like().sum() > 0) electronSF[chanSel] *= electronRecoSF[chanSel].prod() electronSF[chanSel] *= electronIdSF[chanSel].prod() weights.add('electronSF' + ei, electronSF) # muon SF for mi, zl in enumerate(zls): mi = str(mi) eta = get_lepton_values(zl, 'eta') pt = get_lepton_values(zl, 'pt') if self._year == '2016': idSF = self._corrections['muon_id_MediumID'](eta, pt) isoSF = self._corrections['muon_iso_TightRelIso_MediumID']( eta, pt) else: idSF = self._corrections['muon_id_MediumPromptID']( pt, abs(eta)) isoSF = self._corrections['muon_iso_TightRelIso_MediumID']( pt, abs(eta)) muonSF = np.ones_like(idSF.prod()) if mi in ['0', '1']: chans = ['mm'] else: chans = [] for chan in chans: # turns empty arrays into 0's, nonempty int 1's chanSel = (chanSels[chan].ones_like().sum() > 0) muonSF[chanSel] *= idSF[chanSel].prod() muonSF[chanSel] *= isoSF[chanSel].prod() weights.add('muonSF' + mi, muonSF) logging.debug('filling') for sel in self._selections: if sel == 'massWindow': cut = selection.all('lumiMask', 'trigger', 'goodVertex', 'twoLeptons', 'zCand', 'massWindow', 'ptThreshold') for chan in ['ee', 'mm']: chanSel = chanSels[chan] weight = chanSel.astype(float) * weights.weight() output[sel + '_zmass'].fill( dataset=dataset, channel=chan, mass=z_cands[cut].p4.mass.flatten(), weight=weight[cut].flatten(), ) output[sel + '_met'].fill( dataset=dataset, channel=chan, met=events.MET.pt[cut], weight=weight[cut].flatten(), ) output[sel + '_pileup'].fill( dataset=dataset, channel=chan, npvs=events.PV.npvs[cut], weight=weight[cut].flatten(), ) return output
def process(self, df): output = self.accumulator.identity() if df.size == 0: return output dataset = df['dataset'] ## construct weights ## wgts = processor.Weights(df.size) if self.data_type != 'data': wgts.add('genw', df['weight']) npv = df['trueInteractionNum'] wgts.add('pileup', *(f(npv) for f in self.pucorrs)) triggermask = np.logical_or.reduce([df[t] for t in Triggers]) wgts.add('trigger', triggermask) cosmicpairmask = df['cosmicveto_result'] wgts.add('cosmicveto', cosmicpairmask) pvmask = df['metfilters_PrimaryVertexFilter'] wgts.add('primaryvtx', pvmask) # ...bla bla, other weights goes here weight = wgts.weight() ######################## ak4jets = JaggedCandidateArray.candidatesfromcounts( df['akjet_ak4PFJetsCHS_p4'], px=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fX'].content, py=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fY'].content, pz=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fZ'].content, energy=df['akjet_ak4PFJetsCHS_p4.fCoordinates.fT'].content, jetid=df['akjet_ak4PFJetsCHS_jetid'].content, muefrac=df['akjet_ak4PFJetsCHS_muonEnergyFraction'].content, chaemefrac=df['akjet_ak4PFJetsCHS_chaEmEnergyFraction'].content, emefrac=df['akjet_ak4PFJetsCHS_emEnergyFraction'].content, hadfrac=df['akjet_ak4PFJetsCHS_hadronEnergyFraction'].content, chahadfrac=df['akjet_ak4PFJetsCHS_chaHadEnergyFraction'].content, deepcsv=df['hftagscore_DeepCSV_b'].content, ) deepcsv_tight = np.bitwise_and(ak4jets.deepcsv, 1 << 2) == (1 << 2) ak4jets.add_attributes(deepcsvTight=deepcsv_tight, ) ak4jets = ak4jets[ak4jets.jetid & (ak4jets.pt > 30) & (np.abs(ak4jets.eta) < 2.4)] leptonjets = JaggedCandidateArray.candidatesfromcounts( df['pfjet_p4'], px=df['pfjet_p4.fCoordinates.fX'].content, py=df['pfjet_p4.fCoordinates.fY'].content, pz=df['pfjet_p4.fCoordinates.fZ'].content, energy=df['pfjet_p4.fCoordinates.fT'].content, vx=df['pfjet_klmvtx.fCoordinates.fX'].content, vy=df['pfjet_klmvtx.fCoordinates.fY'].content, vz=df['pfjet_klmvtx.fCoordinates.fZ'].content, mintkdist=df['pfjet_pfcands_minTwoTkDist'].content, ) leptonjets.add_attributes(vxy=np.hypot(leptonjets.vx, leptonjets.vy)) ljdautype = awkward.fromiter(df['pfjet_pfcand_type']) npfmu = (ljdautype == 3).sum() ndsa = (ljdautype == 8).sum() isegammajet = (npfmu == 0) & (ndsa == 0) ispfmujet = (npfmu >= 2) & (ndsa == 0) isdsajet = ndsa > 0 label = isegammajet.astype(int) * 1 + ispfmujet.astype( int) * 2 + isdsajet.astype(int) * 3 leptonjets.add_attributes(label=label) nmu = ((ljdautype == 3) | (ljdautype == 8)).sum() leptonjets.add_attributes(ismutype=(nmu >= 2), iseltype=(nmu == 0)) ljdaucharge = awkward.fromiter(df['pfjet_pfcand_charge']).sum() leptonjets.add_attributes(qsum=ljdaucharge) leptonjets.add_attributes( isneutral=(leptonjets.iseltype | (leptonjets.ismutype & (leptonjets.qsum == 0)))) leptonjets.add_attributes( displaced=((leptonjets.vxy >= 5) | (np.isnan(leptonjets.vxy) & leptonjets.ismutype) )) # non-vertex treated as displaced too ljdsamuSubset = fromNestNestIndexArray( df['dsamuon_isSubsetFilteredCosmic1Leg'], awkward.fromiter(df['pfjet_pfcand_dsamuonIdx'])) leptonjets.add_attributes(nocosmic=(ljdsamuSubset.sum() == 0)) leptonjets = leptonjets[(leptonjets.isneutral) & (leptonjets.nocosmic) & (leptonjets.pt > 30) & (leptonjets.mintkdist < 50)] # mask_ = ak4jets.match(leptonjets, deltaRCut=0.4) # ak4jets = ak4jets[~mask_] ## __ twoleptonjets__ twoleptonjets = (leptonjets.counts >= 2) & (leptonjets.ismutype.sum() >= 1) dileptonjets = leptonjets[twoleptonjets] ak4jets = ak4jets[twoleptonjets] wgt = weight[twoleptonjets] if dileptonjets.size == 0: return output lj0 = dileptonjets[dileptonjets.pt.argmax()] lj1 = dileptonjets[dileptonjets.pt.argsort()[:, 1:2]] ## channel def ## singleMuljEvents = dileptonjets.ismutype.sum() == 1 muljInLeading2Events = (lj0.ismutype | lj1.ismutype).flatten() channel_2mu2e = (singleMuljEvents & muljInLeading2Events).astype(int) * 1 doubleMuljEvents = dileptonjets.ismutype.sum() == 2 muljIsLeading2Events = (lj0.ismutype & lj1.ismutype).flatten() channel_4mu = (doubleMuljEvents & muljIsLeading2Events).astype(int) * 2 channel_ = channel_2mu2e + channel_4mu ########### ak4jets = ak4jets[ak4jets.pt > (lj0.pt.flatten())] output['njets'].fill(dataset=dataset, cnt=ak4jets.counts, weight=wgt, channel=channel_) if ak4jets.flatten().size != 0: ak4jets = ak4jets[(ak4jets.pt > 30) & (np.abs(ak4jets.eta) < 2.4) & (ak4jets.deepcsvTight)] output['ntightb'].fill(dataset=dataset, cnt=ak4jets.counts, weight=wgt, channel=channel_) return output
def process(self, events): # Initialize accumulator out = self.accumulator.identity() dataset = sample_name #events.metadata['dataset'] # Data or MC isData = 'genWeight' not in events.fields #Stop processing if there is no event remain if len(events) == 0: return out # Golden Json file if (self._year == "2018") and isData: injson = "/x5/cms/jwkim/gitdir/JWCorp/JW_analysis/Coffea_WZG/Corrections/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt.RunABCD" if (self._year == "2017") and isData: injson = "/x5/cms/jwkim/gitdir/JWCorp/JW_analysis/Coffea_WZG/Corrections/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt" # <----- Get Scale factors ------># if not isData: # Egamma reco ID get_ele_reco_above20_sf = self._corrections[ 'get_ele_reco_above20_sf'][self._year] get_ele_medium_id_sf = self._corrections['get_ele_medium_id_sf'][ self._year] get_pho_medium_id_sf = self._corrections['get_pho_medium_id_sf'][ self._year] # DoubleEG trigger # 2016, 2017 are not applied yet if self._year == "2018": get_ele_trig_leg1_SF = self._corrections[ 'get_ele_trig_leg1_SF'][self._year] get_ele_trig_leg1_data_Eff = self._corrections[ 'get_ele_trig_leg1_data_Eff'][self._year] get_ele_trig_leg1_mc_Eff = self._corrections[ 'get_ele_trig_leg1_mc_Eff'][self._year] get_ele_trig_leg2_SF = self._corrections[ 'get_ele_trig_leg2_SF'][self._year] get_ele_trig_leg2_data_Eff = self._corrections[ 'get_ele_trig_leg2_data_Eff'][self._year] get_ele_trig_leg2_mc_Eff = self._corrections[ 'get_ele_trig_leg2_mc_Eff'][self._year] # PU weight with custom made npy and multi-indexing pu_weight_idx = ak.values_astype(events.Pileup.nTrueInt, "int64") pu = self._puweight_arr[pu_weight_idx] selection = processor.PackedSelection() # Cut flow cut0 = np.zeros(len(events)) # <----- Helper functions ------># # Sort by PT helper function def sort_by_pt(ele, pho, jet): ele = ele[ak.argsort(ele.pt, ascending=False, axis=1)] pho = pho[ak.argsort(pho.pt, ascending=False, axis=1)] jet = jet[ak.argsort(jet.pt, ascending=False, axis=1)] return ele, pho, jet # Lorentz vectors from coffea.nanoevents.methods import vector ak.behavior.update(vector.behavior) def TLorentz_vector(vec): vec = ak.zip({ "x": vec.x, "y": vec.y, "z": vec.z, "t": vec.t }, with_name="LorentzVector") return vec def TLorentz_vector_cylinder(vec): vec = ak.zip( { "pt": vec.pt, "eta": vec.eta, "phi": vec.phi, "mass": vec.mass, }, with_name="PtEtaPhiMLorentzVector", ) return vec # Cut-based ID modification @numba.njit def PhotonVID(vid, idBit): rBit = 0 for x in range(0, 7): rBit |= (1 << x) if ((vid >> (x * 2)) & 0b11 >= idBit) else 0 return rBit # Inverse Sieie and upper limit @numba.njit def make_fake_obj_mask(Pho, builder): #for eventIdx,pho in enumerate(tqdm(Pho)): # --Event Loop for eventIdx, pho in enumerate(Pho): builder.begin_list() if len(pho) < 1: continue for phoIdx, _ in enumerate(pho): # --Photon Loop vid = Pho[eventIdx][phoIdx].vidNestedWPBitmap vid_cuts1 = PhotonVID(vid, 1) # Loose photon vid_cuts2 = PhotonVID(vid, 2) # Medium photon vid_cuts3 = PhotonVID(vid, 3) # Tight photon # Field name # |0|0|0|0|0|0|0| # |IsoPho|IsoNeu|IsoChg|Sieie|hoe|scEta|PT| # 1. Turn off cut (ex turn off Sieie # |1|1|1|0|1|1|1| = |1|1|1|0|1|1|1| # 2. Inverse cut (ex inverse Sieie) # |1|1|1|1|1|1|1| = |1|1|1|0|1|1|1| #if (vid_cuts2 & 0b1111111 == 0b1111111): # Cut applied #if (vid_cuts2 & 0b1111111 == 0b1110111): # Inverse Sieie if (vid_cuts2 & 0b1110111 == 0b1110111): # Without Sieie builder.boolean(True) else: builder.boolean(False) builder.end_list() return builder # <----- Selection ------># Initial_events = events # Good Run ( Golden Json files ) from coffea import lumi_tools if isData: lumi_mask_builder = lumi_tools.LumiMask(injson) lumimask = ak.Array( lumi_mask_builder.__call__(events.run, events.luminosityBlock)) events = events[lumimask] #print("{0}% of files pass good-run conditions".format(len(events)/ len(Initial_events))) # Stop processing if there is no event remain if len(events) == 0: return out ##----------- Cut flow1: Passing Triggers # double lepton trigger is_double_ele_trigger = True if not is_double_ele_trigger: double_ele_triggers_arr = np.ones(len(events), dtype=np.bool) else: double_ele_triggers_arr = np.zeros(len(events), dtype=np.bool) for path in self._doubleelectron_triggers[self._year]: if path not in events.HLT.fields: continue double_ele_triggers_arr = double_ele_triggers_arr | events.HLT[ path] # single lepton trigger is_single_ele_trigger = True if not is_single_ele_trigger: single_ele_triggers_arr = np.ones(len(events), dtype=np.bool) else: single_ele_triggers_arr = np.zeros(len(events), dtype=np.bool) for path in self._singleelectron_triggers[self._year]: if path not in events.HLT.fields: continue single_ele_triggers_arr = single_ele_triggers_arr | events.HLT[ path] events.Electron, events.Photon, events.Jet = sort_by_pt( events.Electron, events.Photon, events.Jet) # Good Primary vertex nPV = events.PV.npvsGood if not isData: nPV = nPV * pu nPV_nw = nPV # Apply cut1 events = events[double_ele_triggers_arr] if not isData: pu = pu[double_ele_triggers_arr] cut1 = np.ones(len(events)) # Set Particles Electron = events.Electron Muon = events.Muon Photon = events.Photon MET = events.MET Jet = events.Jet # Stop processing if there is no event remain if len(Electron) == 0: return out # --Gen Photon for dR genparts = events.GenPart pdgID_mask = (genparts.pdgId == 22) # mask2: isPrompt | fromHardProcess | isLastCopy mask2 = (1 << 0) | (1 << 8) | (1 << 13) # https://github.com/PKUHEPEWK/WGamma/blob/master/2018/wgRealPhotonTemplateModule.py status_mask = ((genparts.statusFlags & mask2) == mask2) gen_photons = genparts[pdgID_mask & status_mask] assert (ak.all(ak.num(gen_photons) == 1) ) # Raise error if len(gen_photon) != 1 # --Muon ( only used to calculate dR ) MuSelmask = (Muon.pt >= 10) & (abs( Muon.eta) <= 2.5) & (Muon.tightId) & (Muon.pfRelIso04_all < 0.15) Muon = Muon[MuSelmask] ##----------- Cut flow2: Electron Selection EleSelmask = ((Electron.pt >= 20) & (np.abs(Electron.eta + Electron.deltaEtaSC) < 1.479) & (Electron.cutBased > 2) & (abs(Electron.dxy) < 0.05) & (abs(Electron.dz) < 0.1)) | \ ((Electron.pt >= 20) & (np.abs(Electron.eta + Electron.deltaEtaSC) > 1.479) & (np.abs(Electron.eta + Electron.deltaEtaSC) <= 2.5) & (Electron.cutBased > 2) & (abs(Electron.dxy) < 0.1) & (abs(Electron.dz) < 0.2)) Electron = Electron[EleSelmask] # apply cut 2 Tri_electron_mask = ak.num(Electron) >= 2 Electron = Electron[Tri_electron_mask] Photon = Photon[Tri_electron_mask] Jet = Jet[Tri_electron_mask] MET = MET[Tri_electron_mask] Muon = Muon[Tri_electron_mask] if not isData: pu = pu[Tri_electron_mask] events = events[Tri_electron_mask] gen_photons = gen_photons[Tri_electron_mask] # Stop processing if there is no event remain if len(Electron) == 0: return out cut2 = np.ones(len(Photon)) * 2 ##----------- Cut flow3: Photon Selection # Basic photon selection isgap_mask = (abs(Photon.eta) < 1.442) | ((abs(Photon.eta) > 1.566) & (abs(Photon.eta) < 2.5)) Pixel_seed_mask = ~Photon.pixelSeed PT_mask = Photon.pt >= 20 # dR cut with selected Muon and Electrons dr_pho_ele_mask = ak.all(Photon.metric_table(Electron) >= 0.5, axis=-1) # default metric table: delta_r dr_pho_mu_mask = ak.all(Photon.metric_table(Muon) >= 0.5, axis=-1) PhoSelmask = PT_mask & isgap_mask & Pixel_seed_mask & dr_pho_ele_mask & dr_pho_mu_mask Photon = Photon[PhoSelmask] # Apply cut 3 A_photon_mask = ak.num(Photon) > 0 Electron = Electron[A_photon_mask] Photon = Photon[A_photon_mask] Jet = Jet[A_photon_mask] Muon = Muon[A_photon_mask] MET = MET[A_photon_mask] if not isData: pu = pu[A_photon_mask] events = events[A_photon_mask] gen_photons = gen_photons[A_photon_mask] Photon_template_mask = make_fake_obj_mask( Photon, ak.ArrayBuilder()).snapshot() Photon = Photon[Photon_template_mask] # Apply cut 3 A_photon_mask = ak.num(Photon) > 0 Electron = Electron[A_photon_mask] Photon = Photon[A_photon_mask] Jet = Jet[A_photon_mask] Muon = Muon[A_photon_mask] MET = MET[A_photon_mask] if not isData: pu = pu[A_photon_mask] events = events[A_photon_mask] gen_photons = gen_photons[A_photon_mask] # Stop processing if there is no event remain if len(Electron) == 0: return out cut3 = np.ones(len(Photon)) * 3 ## -- Additional photon selection: Photon gen-matching # Choose Photons that dR(genPhoton,Photon) <= 0.1 gen_match_photon_mask = ak.all(Photon.metric_table(gen_photons) <= 0.1, axis=-1) # Apply cut Photon = Photon[gen_match_photon_mask] gen_match_photon_evt_mask = ak.num(Photon) >= 1 Electron = Electron[gen_match_photon_evt_mask] Photon = Photon[gen_match_photon_evt_mask] Jet = Jet[gen_match_photon_evt_mask] MET = MET[gen_match_photon_evt_mask] gen_photons = gen_photons[gen_match_photon_evt_mask] if not isData: pu = pu[gen_match_photon_evt_mask] events = events[gen_match_photon_evt_mask] ##----------- Cut flow4: Select 2 OSSF electrons from Z @numba.njit def find_2lep(events_leptons, builder): for leptons in events_leptons: builder.begin_list() nlep = len(leptons) for i0 in range(nlep): for i1 in range(i0 + 1, nlep): if leptons[i0].charge + leptons[i1].charge != 0: continue if nlep == 2: builder.begin_tuple(2) builder.index(0).integer(i0) builder.index(1).integer(i1) builder.end_tuple() else: for i2 in range(nlep): if len({i0, i1, i2}) < 3: continue builder.begin_tuple(3) builder.index(0).integer(i0) builder.index(1).integer(i1) builder.index(2).integer(i2) builder.end_tuple() builder.end_list() return builder ossf_idx = find_2lep(Electron, ak.ArrayBuilder()).snapshot() # OSSF cut ossf_mask = ak.num(ossf_idx) >= 1 ossf_idx = ossf_idx[ossf_mask] Electron = Electron[ossf_mask] Photon = Photon[ossf_mask] Jet = Jet[ossf_mask] MET = MET[ossf_mask] events = events[ossf_mask] if not isData: pu = pu[ossf_mask] Double_electron = [Electron[ossf_idx[idx]] for idx in "01"] from coffea.nanoevents.methods import vector ak.behavior.update(vector.behavior) Diele = ak.zip({ "lep1": Double_electron[0], "lep2": Double_electron[1], "p4": TLorentz_vector(Double_electron[0] + Double_electron[1]) }) bestZ_idx = ak.singletons( ak.argmin(abs(Diele.p4.mass - 91.1876), axis=1)) Diele = Diele[bestZ_idx] # Stop processing if there is no event remain if len(Electron) == 0: return out cut4 = np.ones(len(Electron)) * 4 leading_ele = Diele.lep1 subleading_ele = Diele.lep2 def make_leading_pair(target, base): return target[ak.argmax(base.pt, axis=1, keepdims=True)] leading_pho = make_leading_pair(Photon, Photon) # -- Scale Factor for each electron # Trigger weight helper function def Trigger_Weight(eta1, pt1, eta2, pt2): per_ev_MC =\ get_ele_trig_leg1_mc_Eff(eta1,pt1) * get_ele_trig_leg2_mc_Eff(eta2,pt2) +\ get_ele_trig_leg1_mc_Eff(eta2,pt2) * get_ele_trig_leg2_mc_Eff(eta1,pt1) -\ get_ele_trig_leg1_mc_Eff(eta1,pt1) * get_ele_trig_leg1_mc_Eff(eta2,pt2) per_ev_data =\ get_ele_trig_leg1_data_Eff(eta1,pt1) * get_ele_trig_leg1_SF(eta1,pt1) * get_ele_trig_leg2_data_Eff(eta2,pt2) * get_ele_trig_leg2_SF(eta2,pt2) +\ get_ele_trig_leg1_data_Eff(eta2,pt2) * get_ele_trig_leg1_SF(eta2,pt2) * get_ele_trig_leg2_data_Eff(eta1,pt1) * get_ele_trig_leg2_SF(eta1,pt1) -\ get_ele_trig_leg1_data_Eff(eta1,pt1) * get_ele_trig_leg1_SF(eta1,pt1) * get_ele_trig_leg1_data_Eff(eta2,pt2) * get_ele_trig_leg1_SF(eta2,pt2) return per_ev_data / per_ev_MC if not isData: ## -------------< Egamma ID and Reco Scale factor > -----------------## get_pho_medium_id_sf = get_pho_medium_id_sf( ak.flatten(leading_pho.eta), ak.flatten(leading_pho.pt)) ele_reco_sf = get_ele_reco_above20_sf( ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta), ak.flatten(leading_ele.pt)) * get_ele_reco_above20_sf( ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta), ak.flatten(subleading_ele.pt)) ele_medium_id_sf = get_ele_medium_id_sf( ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta), ak.flatten(leading_ele.pt)) * get_ele_medium_id_sf( ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta), ak.flatten(subleading_ele.pt)) ## -------------< Double Electron Trigger Scale factor > -----------------## eta1 = ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta) eta2 = ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta) pt1 = ak.flatten(leading_ele.pt) pt2 = ak.flatten(subleading_ele.pt) # -- 2017,2016 are not applied yet if self._year == '2018': ele_trig_weight = Trigger_Weight(eta1, pt1, eta2, pt2) ##----------- Cut flow5: Event selection # Mee cut Mee_cut_mask = ak.firsts(Diele.p4.mass) > 4 # Electron PT cuts Elept_mask = ak.firsts((Diele.lep1.pt >= 25) & (Diele.lep2.pt >= 20)) # MET cuts MET_mask = MET.pt > 20 # --------Mask -------# Event_sel_mask = Mee_cut_mask & Elept_mask & MET_mask Diele_sel = Diele[Event_sel_mask] leading_pho_sel = leading_pho[Event_sel_mask] Jet_sel = Jet[Event_sel_mask] MET_sel = MET[Event_sel_mask] # Photon EE and EB isEE_mask = leading_pho.isScEtaEE isEB_mask = leading_pho.isScEtaEB Pho_EE = leading_pho[isEE_mask & Event_sel_mask] Pho_EB = leading_pho[isEB_mask & Event_sel_mask] #Stop processing if there is no event remain if len(leading_pho_sel) == 0: return out cut5 = np.ones(len(Diele)) * 5 # -------------------- Flatten variables ---------------------------# # -- Ele1 --# Ele1_PT = ak.flatten(Diele_sel.lep1.pt) Ele1_Eta = ak.flatten(Diele_sel.lep1.eta) Ele1_Phi = ak.flatten(Diele_sel.lep1.phi) # -- Ele2 --# Ele2_PT = ak.flatten(Diele_sel.lep2.pt) Ele2_Eta = ak.flatten(Diele_sel.lep2.eta) Ele2_Phi = ak.flatten(Diele_sel.lep2.phi) # -- Pho -- # Pho_PT = ak.flatten(leading_pho_sel.pt) Pho_Eta = ak.flatten(leading_pho_sel.eta) Pho_Phi = ak.flatten(leading_pho_sel.phi) # -- Pho EB --# Pho_EB_PT = ak.flatten(Pho_EB.pt) Pho_EB_Eta = ak.flatten(Pho_EB.eta) Pho_EB_Phi = ak.flatten(Pho_EB.phi) Pho_EB_Isochg = ak.flatten(Pho_EE.pfRelIso03_chg) Pho_EB_Sieie = ak.flatten(Pho_EE.sieie) # -- Pho EE --# Pho_EE_PT = ak.flatten(Pho_EE.pt) Pho_EE_Eta = ak.flatten(Pho_EE.eta) Pho_EE_Phi = ak.flatten(Pho_EE.phi) Pho_EE_Isochg = ak.flatten(Pho_EE.pfRelIso03_chg) Pho_EE_Sieie = ak.flatten(Pho_EE.sieie) # --Kinematics --# Diele_mass = ak.flatten(Diele_sel.p4.mass) leading_ele, subleading_ele = ak.flatten( TLorentz_vector_cylinder(Diele_sel.lep1)), ak.flatten( TLorentz_vector_cylinder(Diele_sel.lep2)) dR_e1pho = ak.flatten( leading_ele.delta_r(leading_pho_sel)) # dR pho,ele1 dR_e2pho = ak.flatten( subleading_ele.delta_r(leading_pho_sel)) # dR pho,ele2 dR_jpho = ak.flatten(Jet_sel[:, 0].delta_r(leading_pho_sel)) MET_PT = ak.to_numpy(MET_sel.pt) # -------------------- Sieie bins---------------------------# def make_bins(pt, eta, sieie, bin_range_str): bin_dict = { 'PT_1_eta_1': (pt > 20) & (pt < 30) & (eta < 1), 'PT_1_eta_2': (pt > 20) & (pt < 30) & (eta > 1) & (eta < 1.5), 'PT_1_eta_3': (pt > 20) & (pt < 30) & (eta > 1.5) & (eta < 2), 'PT_1_eta_4': (pt > 20) & (pt < 30) & (eta > 2) & (eta < 2.5), 'PT_2_eta_1': (pt > 30) & (pt < 40) & (eta < 1), 'PT_2_eta_2': (pt > 30) & (pt < 40) & (eta > 1) & (eta < 1.5), 'PT_2_eta_3': (pt > 30) & (pt < 40) & (eta > 1.5) & (eta < 2), 'PT_2_eta_4': (pt > 30) & (pt < 40) & (eta > 2) & (eta < 2.5), 'PT_3_eta_1': (pt > 40) & (pt < 50) & (eta < 1), 'PT_3_eta_2': (pt > 40) & (pt < 50) & (eta > 1) & (eta < 1.5), 'PT_3_eta_3': (pt > 40) & (pt < 50) & (eta > 1.5) & (eta < 2), 'PT_3_eta_4': (pt > 40) & (pt < 50) & (eta > 2) & (eta < 2.5), 'PT_4_eta_1': (pt > 50) & (eta < 1), 'PT_4_eta_2': (pt > 50) & (eta > 1) & (eta < 1.5), 'PT_4_eta_3': (pt > 50) & (eta > 1.5) & (eta < 2), 'PT_4_eta_4': (pt > 50) & (eta > 2) & (eta < 2.5) } binmask = bin_dict[bin_range_str] return ak.to_numpy(sieie[binmask]), binmask bin_name_list = [ 'PT_1_eta_1', 'PT_1_eta_2', 'PT_1_eta_3', 'PT_1_eta_4', 'PT_2_eta_1', 'PT_2_eta_2', 'PT_2_eta_3', 'PT_2_eta_4', 'PT_3_eta_1', 'PT_3_eta_2', 'PT_3_eta_3', 'PT_3_eta_4', 'PT_4_eta_1', 'PT_4_eta_2', 'PT_4_eta_3', 'PT_4_eta_4' ] binned_sieie_hist = {} binmask_dict = {} for name in bin_name_list: binned_sieie_hist[name], _ = make_bins( ak.flatten(leading_pho_sel.pt), ak.flatten(abs(leading_pho_sel.eta)), ak.flatten(leading_pho_sel.sieie), name) _, binmask_dict[name] = make_bins(ak.flatten(leading_pho.pt), ak.flatten(abs(leading_pho.eta)), ak.flatten(leading_pho.sieie), name) print("Show me the last bin: ", binned_sieie_hist['PT_4_eta_4']) # --- Apply weight and hist weights = processor.Weights(len(cut4)) # --- skim cut-weight def skim_weight(arr): mask1 = ~ak.is_none(arr) subarr = arr[mask1] mask2 = subarr != 0 return ak.to_numpy(subarr[mask2]) cuts = Event_sel_mask cuts_pho_EE = ak.flatten(isEE_mask) cuts_pho_EB = ak.flatten(isEB_mask) print( "cut0: {0}, cut1: {1}, cut2: {2}, cut3: {3}, cut4: {4} ,cut5 {5} ". format(len(Initial_events), len(cut1), len(cut2), len(cut3), len(cut4), len(cut5))) # Weight and SF here if not isData: weights.add('pileup', pu) weights.add('ele_id', ele_medium_id_sf) weights.add('pho_id', get_pho_medium_id_sf) weights.add('ele_reco', ele_reco_sf) # 2016,2017 are not applied yet if self._year == "2018": weights.add('ele_trigger', ele_trig_weight) # ---------------------------- Fill hist --------------------------------------# # Initial events out["sumw"][dataset] += len(Initial_events) # Cut flow loop for cut in [cut0, cut1, cut2, cut3, cut4, cut5]: out["cutflow"].fill(dataset=dataset, cutflow=cut) # Primary vertex out['nPV'].fill( dataset=dataset, nPV=nPV, ) out['nPV_nw'].fill(dataset=dataset, nPV_nw=nPV_nw) # Fill hist # -- met -- # out["met"].fill(dataset=dataset, met=MET_PT, weight=skim_weight(weights.weight() * cuts)) # --mass -- # out["mass"].fill(dataset=dataset, mass=Diele_mass, weight=skim_weight(weights.weight() * cuts)) # -- Ele1 -- # out["ele1pt"].fill(dataset=dataset, ele1pt=Ele1_PT, weight=skim_weight(weights.weight() * cuts)) out["ele1eta"].fill(dataset=dataset, ele1eta=Ele1_Eta, weight=skim_weight(weights.weight() * cuts)) out["ele1phi"].fill(dataset=dataset, ele1phi=Ele1_Phi, weight=skim_weight(weights.weight() * cuts)) # --Ele2 --# out["ele2pt"].fill(dataset=dataset, ele2pt=Ele2_PT, weight=skim_weight(weights.weight() * cuts)) out["ele2eta"].fill(dataset=dataset, ele2eta=Ele2_Eta, weight=skim_weight(weights.weight() * cuts)) out["ele2phi"].fill(dataset=dataset, ele2phi=Ele2_Phi, weight=skim_weight(weights.weight() * cuts)) # -- Photon -- # out["phopt"].fill(dataset=dataset, phopt=Pho_PT, weight=skim_weight(weights.weight() * cuts)) out["phoeta"].fill(dataset=dataset, phoeta=Pho_Eta, weight=skim_weight(weights.weight() * cuts)) out["phophi"].fill(dataset=dataset, phophi=Pho_Phi, weight=skim_weight(weights.weight() * cuts)) # -- Binned sieie hist -- # if len(binned_sieie_hist['PT_1_eta_1'] > 0): out['PT_1_eta_1'].fill(dataset=dataset, PT_1_eta_1=binned_sieie_hist['PT_1_eta_1']) if len(binned_sieie_hist['PT_1_eta_2'] > 0): out['PT_1_eta_2'].fill(dataset=dataset, PT_1_eta_2=binned_sieie_hist['PT_1_eta_2']) if len(binned_sieie_hist['PT_1_eta_3'] > 0): out['PT_1_eta_3'].fill(dataset=dataset, PT_1_eta_3=binned_sieie_hist['PT_1_eta_3']) if len(binned_sieie_hist['PT_1_eta_4'] > 0): out['PT_1_eta_4'].fill(dataset=dataset, PT_1_eta_4=binned_sieie_hist['PT_1_eta_4']) if len(binned_sieie_hist['PT_2_eta_1'] > 0): out['PT_2_eta_1'].fill(dataset=dataset, PT_2_eta_1=binned_sieie_hist['PT_2_eta_1']) if len(binned_sieie_hist['PT_2_eta_2'] > 0): out['PT_2_eta_2'].fill(dataset=dataset, PT_2_eta_2=binned_sieie_hist['PT_2_eta_2']) if len(binned_sieie_hist['PT_2_eta_3'] > 0): out['PT_2_eta_3'].fill(dataset=dataset, PT_2_eta_3=binned_sieie_hist['PT_2_eta_3']) if len(binned_sieie_hist['PT_2_eta_4'] > 0): out['PT_2_eta_4'].fill(dataset=dataset, PT_2_eta_4=binned_sieie_hist['PT_2_eta_4']) if len(binned_sieie_hist['PT_3_eta_1'] > 0): out['PT_3_eta_1'].fill(dataset=dataset, PT_3_eta_1=binned_sieie_hist['PT_3_eta_1']) if len(binned_sieie_hist['PT_3_eta_2'] > 0): out['PT_3_eta_2'].fill(dataset=dataset, PT_3_eta_2=binned_sieie_hist['PT_3_eta_2']) if len(binned_sieie_hist['PT_3_eta_3'] > 0): out['PT_3_eta_3'].fill(dataset=dataset, PT_3_eta_3=binned_sieie_hist['PT_3_eta_3']) if len(binned_sieie_hist['PT_3_eta_4'] > 0): out['PT_3_eta_4'].fill(dataset=dataset, PT_3_eta_4=binned_sieie_hist['PT_3_eta_4']) if len(binned_sieie_hist['PT_4_eta_1'] > 0): out['PT_4_eta_1'].fill(dataset=dataset, PT_4_eta_1=binned_sieie_hist['PT_4_eta_1']) if len(binned_sieie_hist['PT_4_eta_2'] > 0): out['PT_4_eta_2'].fill(dataset=dataset, PT_4_eta_2=binned_sieie_hist['PT_4_eta_2']) if len(binned_sieie_hist['PT_4_eta_3'] > 0): out['PT_4_eta_3'].fill(dataset=dataset, PT_4_eta_3=binned_sieie_hist['PT_4_eta_3']) if len(binned_sieie_hist['PT_4_eta_4'] > 0): out['PT_4_eta_4'].fill(dataset=dataset, PT_4_eta_4=binned_sieie_hist['PT_4_eta_4']) return out
def process(self, df): output = self.accumulator.identity() if df.size == 0: return output dataset = df['dataset'] ## construct weights ## wgts = processor.Weights(df.size) if self.data_type != 'data': wgts.add('genw', df['weight']) npv = df['trueInteractionNum'] wgts.add('pileup', *(f(npv) for f in self.pucorrs)) triggermask = np.logical_or.reduce([df[t] for t in Triggers]) wgts.add('trigger', triggermask) cosmicpairmask = df['cosmicveto_result'] wgts.add('cosmicveto', cosmicpairmask) pvmask = df['metfilters_PrimaryVertexFilter'] wgts.add('primaryvtx', pvmask) # ...bla bla, other weights goes here weight = wgts.weight() ######################## leptonjets = JaggedCandidateArray.candidatesfromcounts( df['pfjet_p4'], px=df['pfjet_p4.fCoordinates.fX'], py=df['pfjet_p4.fCoordinates.fY'], pz=df['pfjet_p4.fCoordinates.fZ'], energy=df['pfjet_p4.fCoordinates.fT'], pfisoAll05=df['pfjet_pfIsolation05'], pfisoNopu05=df['pfjet_pfIsolationNoPU05'], pfisoDbeta=df['pfjet_pfiso'], ncands=df['pfjet_pfcands_n'], ) ljdautype = awkward.fromiter(df['pfjet_pfcand_type']) npfmu = (ljdautype == 3).sum() ndsa = (ljdautype == 8).sum() isegammajet = (npfmu == 0) & (ndsa == 0) ispfmujet = (npfmu >= 2) & (ndsa == 0) isdsajet = ndsa > 0 label = isegammajet.astype(int) * 1 + ispfmujet.astype( int) * 2 + isdsajet.astype(int) * 3 leptonjets.add_attributes(label=label) nmu = ((ljdautype == 3) | (ljdautype == 8)).sum() leptonjets.add_attributes(ismutype=(nmu >= 2), iseltype=(nmu == 0)) ## __twoleptonjets__ twoleptonjets = leptonjets.counts >= 2 dileptonjets = leptonjets[twoleptonjets] wgt = weight[twoleptonjets] if dileptonjets.size == 0: return output lj0 = dileptonjets[dileptonjets.pt.argmax()] lj1 = dileptonjets[dileptonjets.pt.argsort()[:, 1:2]] ## channel def ## singleMuljEvents = dileptonjets.ismutype.sum() == 1 muljInLeading2Events = (lj0.ismutype | lj1.ismutype).flatten() channel_2mu2e = (singleMuljEvents & muljInLeading2Events).astype(int) * 1 doubleMuljEvents = dileptonjets.ismutype.sum() == 2 muljIsLeading2Events = (lj0.ismutype & lj1.ismutype).flatten() channel_4mu = (doubleMuljEvents & muljIsLeading2Events).astype(int) * 2 channel_ = channel_2mu2e + channel_4mu ########### isControl = (np.abs(lj0.p4.delta_phi(lj1.p4)) < np.pi / 2).flatten() ## __isControl__ if self.dphi_control: dileptonjets = dileptonjets[isControl] wgt = wgt[isControl] lj0 = lj0[isControl] lj1 = lj1[isControl] channel_ = channel_[isControl] else: dileptonjets = dileptonjets if dileptonjets.size == 0: return output if self.data_type == 'bkg': wgt *= bkgSCALE[dataset] output['all05'] += processor.column_accumulator( dileptonjets.pfisoAll05.flatten()) output['nopu05'] += processor.column_accumulator( dileptonjets.pfisoNopu05.flatten()) output['dbeta'] += processor.column_accumulator( dileptonjets.pfisoDbeta.flatten()) output['all05w'] += processor.column_accumulator( (dileptonjets.pfisoAll05 / dileptonjets.ncands).flatten()) output['nopu05w'] += processor.column_accumulator( (dileptonjets.pfisoNopu05 / dileptonjets.ncands).flatten()) output['dbetaw'] += processor.column_accumulator( (dileptonjets.pfisoDbeta / dileptonjets.ncands).flatten()) output['pt'] += processor.column_accumulator(dileptonjets.pt.flatten()) output['eta'] += processor.column_accumulator( dileptonjets.eta.flatten()) output['wgt'] += processor.column_accumulator( (dileptonjets.pt.ones_like() * wgt).flatten()) output['ljtype'] += processor.column_accumulator( (dileptonjets.ismutype.astype(int) * 1 + dileptonjets.iseltype.astype(int) * 2).flatten()) output['channel'] += processor.column_accumulator( (dileptonjets.pt.ones_like() * channel_).flatten()) return output
def process(self, events): # Dataset parameters dataset = events.metadata['dataset'] year = self._samples[dataset]['year'] xsec = self._samples[dataset]['xsec'] sow = self._samples[dataset]['nSumOfWeights'] isData = self._samples[dataset]['isData'] datasets = [ 'SingleMuon', 'SingleElectron', 'EGamma', 'MuonEG', 'DoubleMuon', 'DoubleElectron' ] for d in datasets: if d in dataset: dataset = dataset.split('_')[0] ### Recover objects, selection, functions and others... # Objects isTightMuon = self._objects['isTightMuonPOG'] isTightElectron = self._objects['isTightElectronPOG'] isGoodJet = self._objects['isGoodJet'] isClean = self._objects['isClean'] isMuonMVA = self._objects[ 'isMuonMVA'] #isMuonMVA(pt, eta, dxy, dz, miniIso, sip3D, mvaTTH, mediumPrompt, tightCharge, jetDeepB=0, minpt=15) isElecMVA = self._objects[ 'isElecMVA'] #isElecMVA(pt, eta, dxy, dz, miniIso, sip3D, mvaTTH, elecMVA, lostHits, convVeto, tightCharge, jetDeepB=0, minpt=15) # Corrections GetMuonIsoSF = self._corrections['getMuonIso'] GetMuonIDSF = self._corrections['getMuonID'] # Selection passNJets = self._selection['passNJets'] passMETcut = self._selection['passMETcut'] passTrigger = self._selection['passTrigger'] # Functions pow2 = self._functions['pow2'] IsClosestToZ = self._functions['IsClosestToZ'] GetGoodTriplets = self._functions['GetGoodTriplets'] # Initialize objects met = events.MET e = events.Electron mu = events.Muon j = events.Jet # Electron selection #e['isGood'] = e.pt.zeros_like() e['isGood'] = isElecMVA(e.pt, e.eta, e.dxy, e.dz, e.miniPFRelIso_all, e.sip3d, e.mvaTTH, e.mvaFall17V2Iso, e.lostHits, e.convVeto, e.tightCharge, minpt=10) leading_e = e[e.pt.argmax()] leading_e = leading_e[leading_e.isGood.astype(np.bool)] # Muon selection mu['isGood'] = isMuonMVA(mu.pt, mu.eta, mu.dxy, mu.dz, mu.miniPFRelIso_all, mu.sip3d, mu.mvaTTH, mu.mediumPromptId, mu.tightCharge, minpt=10) leading_mu = mu[mu.pt.argmax()] leading_mu = leading_mu[leading_mu.isGood.astype(np.bool)] e = e[e.isGood.astype(np.bool)] mu = mu[mu.isGood.astype(np.bool)] nElec = e.counts nMuon = mu.counts twoLeps = (nElec + nMuon) == 2 threeLeps = (nElec + nMuon) == 3 twoElec = (nElec == 2) twoMuon = (nMuon == 2) e0 = e[e.pt.argmax()] m0 = mu[mu.pt.argmax()] # Jet selection j['isgood'] = isGoodJet(j.pt, j.eta, j.jetId) j['isclean'] = isClean(j, e, mu) goodJets = j[(j.isclean) & (j.isgood)] njets = goodJets.counts ht = goodJets.pt.sum() j0 = goodJets[goodJets.pt.argmax()] nbtags = goodJets[goodJets.btagDeepFlavB > 0.2770].counts ################################################################## ### 2 same-sign leptons ################################################################## # emu singe = e[(nElec == 1) & (nMuon == 1) & (e.pt > -1)] singm = mu[(nElec == 1) & (nMuon == 1) & (mu.pt > -1)] em = singe.cross(singm) emSSmask = (em.i0.charge * em.i1.charge > 0) emSS = em[emSSmask] nemSS = len(emSS.flatten()) # ee and mumu # pt>-1 to preserve jagged dimensions ee = e[(nElec == 2) & (nMuon == 0) & (e.pt > -1)] mm = mu[(nElec == 0) & (nMuon == 2) & (mu.pt > -1)] eepairs = ee.distincts() eeSSmask = (eepairs.i0.charge * eepairs.i1.charge > 0) eeonZmask = (np.abs((eepairs.i0 + eepairs.i1).mass - 91) < 15) eeoffZmask = (eeonZmask == 0) mmpairs = mm.distincts() mmSSmask = (mmpairs.i0.charge * mmpairs.i1.charge > 0) mmonZmask = (np.abs((mmpairs.i0 + mmpairs.i1).mass - 91) < 15) mmoffZmask = (mmonZmask == 0) eeSSonZ = eepairs[eeSSmask & eeonZmask] eeSSoffZ = eepairs[eeSSmask & eeoffZmask] mmSSonZ = mmpairs[mmSSmask & mmonZmask] mmSSoffZ = mmpairs[mmSSmask & mmoffZmask] neeSS = len(eeSSonZ.flatten()) + len(eeSSoffZ.flatten()) nmmSS = len(mmSSonZ.flatten()) + len(mmSSoffZ.flatten()) #print('Same-sign events [ee, emu, mumu] = [%i, %i, %i]'%(neeSS, nemSS, nmmSS)) # Cuts eeSSmask = (eeSSmask[eeSSmask].counts > 0) mmSSmask = (mmSSmask[mmSSmask].counts > 0) eeonZmask = (eeonZmask[eeonZmask].counts > 0) eeoffZmask = (eeoffZmask[eeoffZmask].counts > 0) mmonZmask = (mmonZmask[mmonZmask].counts > 0) mmoffZmask = (mmoffZmask[mmoffZmask].counts > 0) emSSmask = (emSSmask[emSSmask].counts > 0) # njets ################################################################## ### 3 leptons ################################################################## # eem muon_eem = mu[(nElec == 2) & (nMuon == 1) & (mu.pt > -1)] elec_eem = e[(nElec == 2) & (nMuon == 1) & (e.pt > -1)] ee_eem = elec_eem.distincts() ee_eemZmask = (ee_eem.i0.charge * ee_eem.i1.charge < 1) & (np.abs( (ee_eem.i0 + ee_eem.i1).mass - 91) < 15) ee_eemOffZmask = (ee_eem.i0.charge * ee_eem.i1.charge < 1) & (np.abs( (ee_eem.i0 + ee_eem.i1).mass - 91) > 15) ee_eemZmask = (ee_eemZmask[ee_eemZmask].counts > 0) ee_eemOffZmask = (ee_eemOffZmask[ee_eemOffZmask].counts > 0) eepair_eem = (ee_eem.i0 + ee_eem.i1) trilep_eem = eepair_eem.cross(muon_eem) trilep_eem = (trilep_eem.i0 + trilep_eem.i1) # mme muon_mme = mu[(nElec == 1) & (nMuon == 2) & (mu.pt > -1)] elec_mme = e[(nElec == 1) & (nMuon == 2) & (e.pt > -1)] mm_mme = muon_mme.distincts() mm_mmeZmask = (mm_mme.i0.charge * mm_mme.i1.charge < 1) & (np.abs( (mm_mme.i0 + mm_mme.i1).mass - 91) < 15) mm_mmeOffZmask = (mm_mme.i0.charge * mm_mme.i1.charge < 1) & (np.abs( (mm_mme.i0 + mm_mme.i1).mass - 91) > 15) mm_mmeZmask = (mm_mmeZmask[mm_mmeZmask].counts > 0) mm_mmeOffZmask = (mm_mmeOffZmask[mm_mmeOffZmask].counts > 0) mmpair_mme = (mm_mme.i0 + mm_mme.i1) trilep_mme = mmpair_mme.cross(elec_mme) trilep_mme = (trilep_mme.i0 + trilep_mme.i1) mZ_mme = mmpair_mme.mass mZ_eem = eepair_eem.mass m3l_eem = trilep_eem.mass m3l_mme = trilep_mme.mass ### eee and mmm eee = e[(nElec == 3) & (nMuon == 0) & (e.pt > -1)] mmm = mu[(nElec == 0) & (nMuon == 3) & (mu.pt > -1)] # Create pairs ee_pairs = eee.argchoose(2) mm_pairs = mmm.argchoose(2) # Select pairs that are SFOS. eeSFOS_pairs = ee_pairs[ (np.abs(eee[ee_pairs.i0].pdgId) == np.abs(eee[ee_pairs.i1].pdgId)) & (eee[ee_pairs.i0].charge != eee[ee_pairs.i1].charge)] mmSFOS_pairs = mm_pairs[ (np.abs(mmm[mm_pairs.i0].pdgId) == np.abs(mmm[mm_pairs.i1].pdgId)) & (mmm[mm_pairs.i0].charge != mmm[mm_pairs.i1].charge)] # Find the pair with mass closest to Z. eeOSSFmask = eeSFOS_pairs[np.abs((eee[eeSFOS_pairs.i0] + eee[eeSFOS_pairs.i1]).mass - 91.2).argmin()] onZmask_ee = np.abs((eee[eeOSSFmask.i0] + eee[eeOSSFmask.i1]).mass - 91.2) < 15 mmOSSFmask = mmSFOS_pairs[np.abs((mmm[mmSFOS_pairs.i0] + mmm[mmSFOS_pairs.i1]).mass - 91.2).argmin()] onZmask_mm = np.abs((mmm[mmOSSFmask.i0] + mmm[mmOSSFmask.i1]).mass - 91.2) < 15 offZmask_ee = np.abs((eee[eeOSSFmask.i0] + eee[eeOSSFmask.i1]).mass - 91.2) > 15 offZmask_mm = np.abs((mmm[mmOSSFmask.i0] + mmm[mmOSSFmask.i1]).mass - 91.2) > 15 # Create masks eeeOnZmask = onZmask_ee[onZmask_ee].counts > 0 eeeOffZmask = offZmask_ee[offZmask_ee].counts > 0 mmmOnZmask = onZmask_mm[onZmask_mm].counts > 0 mmmOffZmask = offZmask_mm[offZmask_mm].counts > 0 # Leptons from Z eZ0 = eee[eeOSSFmask.i0] eZ1 = eee[eeOSSFmask.i1] mZ0 = mmm[mmOSSFmask.i0] mZ1 = mmm[mmOSSFmask.i1] # Leptons from W eW = eee[~eeOSSFmask.i0 | ~eeOSSFmask.i1] mW = mmm[~mmOSSFmask.i0 | ~mmOSSFmask.i1] eZ = eee[eeOSSFmask.i0] + eee[eeOSSFmask.i1] triElec = eZ + eW mZ = mmm[mmOSSFmask.i0] + mmm[mmOSSFmask.i1] triMuon = mZ + mW mZ_eee = eZ.mass m3l_eee = triElec.mass mZ_mmm = mZ.mass m3l_mmm = triMuon.mass # Triggers #passTrigger = lambda events, n, m, o : np.ones_like(events['MET_pt'], dtype=np.bool) # XXX trig_eeSS = passTrigger(events, 'ee', isData, dataset) trig_mmSS = passTrigger(events, 'mm', isData, dataset) trig_emSS = passTrigger(events, 'em', isData, dataset) trig_eee = passTrigger(events, 'eee', isData, dataset) trig_mmm = passTrigger(events, 'mmm', isData, dataset) trig_eem = passTrigger(events, 'eem', isData, dataset) trig_mme = passTrigger(events, 'mme', isData, dataset) # MET filters # Weights genw = np.ones_like( events['MET_pt']) if isData else events['genWeight'] weights = processor.Weights(events.size) weights.add('norm', genw if isData else (xsec / sow) * genw) # Selections and cuts selections = processor.PackedSelection() channels2LSS = ['eeSSonZ', 'eeSSoffZ', 'mmSSonZ', 'mmSSoffZ', 'emSS'] selections.add('eeSSonZ', (eeonZmask) & (eeSSmask) & (trig_eeSS)) selections.add('eeSSoffZ', (eeoffZmask) & (eeSSmask) & (trig_eeSS)) selections.add('mmSSonZ', (mmonZmask) & (mmSSmask) & (trig_mmSS)) selections.add('mmSSoffZ', (mmoffZmask) & (mmSSmask) & (trig_mmSS)) selections.add('emSS', (emSSmask) & (trig_emSS)) channels3L = ['eemSSonZ', 'eemSSoffZ', 'mmeSSonZ', 'mmeSSoffZ'] selections.add('eemSSonZ', (ee_eemZmask) & (trig_eem)) selections.add('eemSSoffZ', (ee_eemOffZmask) & (trig_eem)) selections.add('mmeSSonZ', (mm_mmeZmask) & (trig_mme)) selections.add('mmeSSoffZ', (mm_mmeOffZmask) & (trig_mme)) channels3L += ['eeeSSonZ', 'eeeSSoffZ', 'mmmSSonZ', 'mmmSSoffZ'] selections.add('eeeSSonZ', (eeeOnZmask) & (trig_eee)) selections.add('eeeSSoffZ', (eeeOffZmask) & (trig_eee)) selections.add('mmmSSonZ', (mmmOnZmask) & (trig_mmm)) selections.add('mmmSSoffZ', (mmmOffZmask) & (trig_mmm)) levels = ['base', '2jets', '4jets', '4j1b', '4j2b'] selections.add('base', (nElec + nMuon >= 2)) selections.add('2jets', (njets >= 2)) selections.add('4jets', (njets >= 4)) selections.add('4j1b', (njets >= 4) & (nbtags >= 1)) selections.add('4j2b', (njets >= 4) & (nbtags >= 2)) # Variables invMass_eeSSonZ = (eeSSonZ.i0 + eeSSonZ.i1).mass invMass_eeSSoffZ = (eeSSoffZ.i0 + eeSSoffZ.i1).mass invMass_mmSSonZ = (mmSSonZ.i0 + mmSSonZ.i1).mass invMass_mmSSoffZ = (mmSSoffZ.i0 + mmSSoffZ.i1).mass invMass_emSS = (emSS.i0 + emSS.i1).mass varnames = {} varnames['met'] = met.pt varnames['ht'] = ht varnames['njets'] = njets varnames['nbtags'] = nbtags varnames['invmass'] = { 'eeSSonZ': invMass_eeSSonZ, 'eeSSoffZ': invMass_eeSSoffZ, 'mmSSonZ': invMass_mmSSonZ, 'mmSSoffZ': invMass_mmSSoffZ, 'emSS': invMass_emSS, 'eemSSonZ': mZ_eem, 'eemSSoffZ': mZ_eem, 'mmeSSonZ': mZ_mme, 'mmeSSoffZ': mZ_mme, 'eeeSSonZ': mZ_eee, 'eeeSSoffZ': mZ_eee, 'mmmSSonZ': mZ_mmm, 'mmmSSoffZ': mZ_mmm, } varnames['m3l'] = { 'eemSSonZ': m3l_eem, 'eemSSoffZ': m3l_eem, 'mmeSSonZ': m3l_mme, 'mmeSSoffZ': m3l_mme, 'eeeSSonZ': m3l_eee, 'eeeSSoffZ': m3l_eee, 'mmmSSonZ': m3l_mmm, 'mmmSSoffZ': m3l_mmm, } varnames['e0pt'] = e0.pt varnames['e0eta'] = e0.eta varnames['m0pt'] = m0.pt varnames['m0eta'] = m0.eta varnames['j0pt'] = j0.pt varnames['j0eta'] = j0.eta varnames['counts'] = np.ones_like(events.MET.pt, dtype=np.int) # Fill Histos hout = self.accumulator.identity() hout['dummy'].fill(sample=dataset, dummy=1, weight=events.size) for var, v in varnames.items(): for ch in channels2LSS + channels3L: for lev in levels: weight = weights.weight() cuts = [ch] + [lev] cut = selections.all(*cuts) weights_flat = weight[cut].flatten() weights_ones = np.ones_like(weights_flat, dtype=np.int) if var == 'invmass': if ch in ['eeeSSoffZ', 'mmmSSoffZ']: continue elif ch in ['eeeSSonZ', 'mmmSSonZ']: continue #values = v[ch] else: values = v[ch][cut].flatten() hout['invmass'].fill(sample=dataset, channel=ch, cut=lev, invmass=values, weight=weights_flat) elif var == 'm3l': if ch in [ 'eeSSonZ', 'eeSSoffZ', 'mmSSonZ', 'mmSSoffZ', 'emSS', 'eeeSSoffZ', 'mmmSSoffZ', 'eeeSSonZ', 'mmmSSonZ' ]: continue values = v[ch][cut].flatten() hout['m3l'].fill(sample=dataset, channel=ch, cut=lev, m3l=values, weight=weights_flat) else: values = v[cut].flatten() if var == 'ht': hout[var].fill(ht=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'met': hout[var].fill(met=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'njets': hout[var].fill(njets=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'nbtags': hout[var].fill(nbtags=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'counts': hout[var].fill(counts=values, sample=dataset, channel=ch, cut=lev, weight=weights_ones) elif var == 'e0pt': if ch in [ 'mmSSonZ', 'mmSSoffZ', 'mmmSSoffZ', 'mmmSSonZ' ]: continue hout[var].fill(e0pt=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'm0pt': if ch in [ 'eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ', 'eeeSSonZ' ]: continue hout[var].fill(m0pt=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'e0eta': if ch in [ 'mmSSonZ', 'mmSSoffZ', 'mmmSSoffZ', 'mmmSSonZ' ]: continue hout[var].fill(e0eta=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'm0eta': if ch in [ 'eeSSonZ', 'eeSSoffZ', 'eeeSSoffZ', 'eeeSSonZ' ]: continue hout[var].fill(m0eta=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'j0pt': if lev == 'base': continue hout[var].fill(j0pt=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) elif var == 'j0eta': if lev == 'base': continue hout[var].fill(j0eta=values, sample=dataset, channel=ch, cut=lev, weight=weights_flat) return hout
def get_veto_weights(df, evaluator, electrons, muons, taus, do_variations=False): """ Calculate veto weights for SR W The weights are effectively: w = product(1-SF) where the product runs overveto-able e, mu, tau. """ veto_weights = processor.Weights(size=df.size, storeIndividual=True) variations = ["nominal"] if do_variations: variations.extend([ 'ele_reco_up','ele_reco_dn', 'ele_id_up','ele_id_dn', 'muon_id_up','muon_id_dn', 'muon_iso_up','muon_iso_dn', 'tau_id_up','tau_id_dn' ]) for variation in variations: def varied_weight(sfname, *args): '''Helper function to easily get the correct weights for a given variation''' # For the nominal variation, just pass through if 'nominal' in variation: return evaluator[sfname](*args) # If this variation is unrelated to the SF at hand, # pass through as well if not (re.sub('_(up|dn)', '', variation) in sfname): return evaluator[sfname](*args) # Direction of variation sgn = 1 if variation.endswith("up") else -1 return evaluator[sfname](*args) + sgn * evaluator[f"{sfname}_error"](*args) ### Electrons if extract_year(df['dataset']) == 2017: high_et = electrons.pt>20 # Low pt SFs low_pt_args = (electrons.etasc[~high_et], electrons.pt[~high_et]) ele_reco_sf_low = varied_weight('ele_reco_pt_lt_20', *low_pt_args) ele_id_sf_low = varied_weight("ele_id_loose", *low_pt_args) # High pt SFs high_pt_args = (electrons.etasc[high_et], electrons.pt[high_et]) ele_reco_sf_high = varied_weight("ele_reco", *high_pt_args) ele_id_sf_high = varied_weight("ele_id_loose", *high_pt_args) # Combine veto_weight_ele = (1 - ele_reco_sf_low*ele_id_sf_low).prod() * (1-ele_reco_sf_high*ele_id_sf_high).prod() else: # No split for 2018 args = (electrons.etasc, electrons.pt) ele_reco_sf = varied_weight("ele_reco", *args) ele_id_sf = varied_weight("ele_id_loose", *args) # Combine veto_weight_ele = (1 - ele_id_sf*ele_reco_sf).prod() ### Muons args = (muons.pt, muons.abseta) veto_weight_muo = (1 - varied_weight("muon_id_loose", *args)*varied_weight("muon_iso_loose", *args)).prod() ### Taus # Taus have their variations saves as separate histograms, # so our cool trick from above is replaced by the pedestrian way if "tau_id" in variation: direction = variation.split("_")[-1] tau_sf_name = f"tau_id_{direction}" else: tau_sf_name = "tau_id" veto_weight_tau = (1 - evaluator[tau_sf_name](taus.pt)).prod() ### Combine total = veto_weight_ele * veto_weight_muo * veto_weight_tau # Cap weights just in case total[np.abs(total)>5] = 1 veto_weights.add(variation, total) return veto_weights
def process(self, df): if not df.size: return self.accumulator.identity() self._configure(df) dataset = df['dataset'] df['is_lo_w'] = is_lo_w(dataset) df['is_lo_z'] = is_lo_z(dataset) df['is_lo_g'] = is_lo_g(dataset) df['is_nlo_z'] = is_nlo_z(dataset) df['is_nlo_w'] = is_nlo_w(dataset) df['has_v_jet'] = has_v_jet(dataset) df['has_lhe_v_pt'] = df['is_lo_w'] | df['is_lo_z'] | df['is_nlo_z'] | df['is_nlo_w'] | df['is_lo_g'] df['is_data'] = is_data(dataset) gen_v_pt = None if not df['is_data']: gen = setup_gen_candidates(df) if df['is_lo_w'] or df['is_lo_z'] or df['is_nlo_z'] or df['is_nlo_w']: dressed = setup_dressed_gen_candidates(df) fill_gen_v_info(df, gen, dressed) gen_v_pt = df['gen_v_pt_combined'] elif df['is_lo_g']: gen_v_pt = gen[(gen.pdg==22) & (gen.status==1)].pt.max() # Candidates # Already pre-filtered! # All leptons are at least loose # Check out setup_candidates for filtering details met_pt, met_phi, ak4, bjets, ak8, muons, electrons, taus, photons = setup_candidates(df, cfg) # Muons df['is_tight_muon'] = muons.tightId \ & (muons.iso < cfg.MUON.CUTS.TIGHT.ISO) \ & (muons.pt>cfg.MUON.CUTS.TIGHT.PT) \ & (muons.abseta<cfg.MUON.CUTS.TIGHT.ETA) dimuons = muons.distincts() dimuon_charge = dimuons.i0['charge'] + dimuons.i1['charge'] df['MT_mu'] = ((muons.counts==1) * mt(muons.pt, muons.phi, met_pt, met_phi)).max() # Electrons df['is_tight_electron'] = electrons.tightId \ & (electrons.pt > cfg.ELECTRON.CUTS.TIGHT.PT) \ & (electrons.abseta < cfg.ELECTRON.CUTS.TIGHT.ETA) dielectrons = electrons.distincts() dielectron_charge = dielectrons.i0['charge'] + dielectrons.i1['charge'] df['MT_el'] = ((electrons.counts==1) * mt(electrons.pt, electrons.phi, met_pt, met_phi)).max() # ak4 leadak4_index=ak4.pt.argmax() elejet_pairs = ak4[:,:1].cross(electrons) df['dREleJet'] = np.hypot(elejet_pairs.i0.eta-elejet_pairs.i1.eta , dphi(elejet_pairs.i0.phi,elejet_pairs.i1.phi)).min() muonjet_pairs = ak4[:,:1].cross(muons) df['dRMuonJet'] = np.hypot(muonjet_pairs.i0.eta-muonjet_pairs.i1.eta , dphi(muonjet_pairs.i0.phi,muonjet_pairs.i1.phi)).min() # Photons # Angular distance leading photon - leading jet phojet_pairs = ak4[:,:1].cross(photons[:,:1]) df['dRPhotonJet'] = np.hypot(phojet_pairs.i0.eta-phojet_pairs.i1.eta , dphi(phojet_pairs.i0.phi,phojet_pairs.i1.phi)).min() # Recoil df['recoil_pt'], df['recoil_phi'] = recoil(met_pt,met_phi, electrons, muons, photons) df["dPFCalo"] = (met_pt - df["CaloMET_pt"]) / df["recoil_pt"] df["minDPhiJetRecoil"] = min_dphi_jet_met(ak4, df['recoil_phi'], njet=4, ptmin=30, etamax=2.4) df["minDPhiJetMet"] = min_dphi_jet_met(ak4, met_phi, njet=4, ptmin=30, etamax=2.4) selection = processor.PackedSelection() # Triggers pass_all = np.ones(df.size)==1 selection.add('inclusive', pass_all) selection = trigger_selection(selection, df, cfg) selection.add('mu_pt_trig_safe', muons.pt.max() > 30) # Common selection selection.add('veto_ele', electrons.counts==0) selection.add('veto_muo', muons.counts==0) selection.add('veto_photon', photons.counts==0) selection.add('veto_tau', taus.counts==0) selection.add('veto_b', bjets.counts==0) selection.add('mindphijr',df['minDPhiJetRecoil'] > cfg.SELECTION.SIGNAL.MINDPHIJR) selection.add('mindphijm',df['minDPhiJetMet'] > cfg.SELECTION.SIGNAL.MINDPHIJR) selection.add('dpfcalo',np.abs(df['dPFCalo']) < cfg.SELECTION.SIGNAL.DPFCALO) selection.add('recoil', df['recoil_pt']>cfg.SELECTION.SIGNAL.RECOIL) if(cfg.MITIGATION.HEM and extract_year(df['dataset']) == 2018 and not cfg.RUN.SYNC): selection.add('hemveto', df['hemveto']) else: selection.add('hemveto', np.ones(df.size)==1) # AK4 Jet leadak4_pt_eta = (ak4.pt.max() > cfg.SELECTION.SIGNAL.leadak4.PT) \ & (ak4.abseta[leadak4_index] < cfg.SELECTION.SIGNAL.leadak4.ETA).any() selection.add('leadak4_pt_eta', leadak4_pt_eta) selection.add('leadak4_id',(ak4.tightId[leadak4_index] \ & (ak4.chf[leadak4_index] >cfg.SELECTION.SIGNAL.leadak4.CHF) \ & (ak4.nhf[leadak4_index]<cfg.SELECTION.SIGNAL.leadak4.NHF)).any()) # AK8 Jet leadak8_index=ak8.pt.argmax() leadak8_pt_eta = (ak8.pt.max() > cfg.SELECTION.SIGNAL.leadak8.PT) \ & (ak8.abseta[leadak8_index] < cfg.SELECTION.SIGNAL.leadak8.ETA).any() selection.add('leadak8_pt_eta', leadak8_pt_eta) selection.add('leadak8_id',(ak8.tightId[leadak8_index]).any()) # Mono-V selection selection.add('leadak8_tau21', ((ak8.tau2[leadak8_index] / ak8.tau1[leadak8_index]) < cfg.SELECTION.SIGNAL.LEADAK8.TAU21).any()) selection.add('leadak8_mass', ((ak8.mass[leadak8_index] > cfg.SELECTION.SIGNAL.LEADAK8.MASS.MIN) \ & (ak8.mass[leadak8_index] < cfg.SELECTION.SIGNAL.LEADAK8.MASS.MAX)).any()) selection.add('leadak8_wvsqcd_loosemd', ((ak8.wvsqcdmd[leadak8_index] > cfg.WTAG.LOOSEMD) & (ak8.wvsqcdmd[leadak8_index] < cfg.WTAG.TIGHTMD)).any()) selection.add('leadak8_wvsqcd_tightmd', ((ak8.wvsqcdmd[leadak8_index] > cfg.WTAG.TIGHTMD)).any()) selection.add('leadak8_wvsqcd_loose', ((ak8.wvsqcd[leadak8_index] > cfg.WTAG.LOOSE) & (ak8.wvsqcd[leadak8_index] < cfg.WTAG.TIGHT)).any()) selection.add('leadak8_wvsqcd_tight', ((ak8.wvsqcd[leadak8_index] > cfg.WTAG.TIGHT)).any()) selection.add('veto_vtag', ~selection.all("leadak8_pt_eta", "leadak8_id", "leadak8_tau21", "leadak8_mass")) selection.add('only_one_ak8', ak8.counts==1) # Dimuon CR leadmuon_index=muons.pt.argmax() selection.add('at_least_one_tight_mu', df['is_tight_muon'].any()) selection.add('dimuon_mass', ((dimuons.mass > cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MIN) \ & (dimuons.mass < cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MAX)).any()) selection.add('dimuon_charge', (dimuon_charge==0).any()) selection.add('two_muons', muons.counts==2) # Single muon CR selection.add('one_muon', muons.counts==1) selection.add('mt_mu', df['MT_mu'] < cfg.SELECTION.CONTROL.SINGLEMU.MT) # Diele CR leadelectron_index=electrons.pt.argmax() selection.add('one_electron', electrons.counts==1) selection.add('two_electrons', electrons.counts==2) selection.add('at_least_one_tight_el', df['is_tight_electron'].any()) selection.add('dielectron_mass', ((dielectrons.mass > cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MIN) \ & (dielectrons.mass < cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MAX)).any()) selection.add('dielectron_charge', (dielectron_charge==0).any()) selection.add('two_electrons', electrons.counts==2) # Single Ele CR selection.add('met_el', met_pt > cfg.SELECTION.CONTROL.SINGLEEL.MET) selection.add('mt_el', df['MT_el'] < cfg.SELECTION.CONTROL.SINGLEEL.MT) # Photon CR leadphoton_index=photons.pt.argmax() df['is_tight_photon'] = photons.mediumId \ & (photons.abseta < cfg.PHOTON.CUTS.TIGHT.ETA) selection.add('one_photon', photons.counts==1) selection.add('at_least_one_tight_photon', df['is_tight_photon'].any()) selection.add('photon_pt', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PT) selection.add('photon_pt_trig', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PTTRIG) # Fill histograms output = self.accumulator.identity() # Gen if gen_v_pt is not None: output['genvpt_check'].fill(vpt=gen_v_pt,type="Nano", dataset=dataset, weight=df['Generator_weight']) if 'LHE_HT' in df: output['lhe_ht'].fill(dataset=dataset, ht=df['LHE_HT']) # Weights evaluator = evaluator_from_config(cfg) weights = processor.Weights(size=df.size, storeIndividual=True) if not df['is_data']: weights.add('gen', df['Generator_weight']) try: weights.add('prefire', df['PrefireWeight']) except KeyError: weights.add('prefire', np.ones(df.size)) weights = candidate_weights(weights, df, evaluator, muons, electrons, photons) weights = pileup_weights(weights, df, evaluator, cfg) if not (gen_v_pt is None): weights = theory_weights_monojet(weights, df, evaluator, gen_v_pt) # Save per-event values for synchronization if cfg.RUN.KINEMATICS.SAVE: for event in cfg.RUN.KINEMATICS.EVENTS: mask = df['event'] == event if not mask.any(): continue output['kinematics']['event'] += [event] output['kinematics']['met'] += [met_pt[mask].flatten()] output['kinematics']['met_phi'] += [met_phi[mask].flatten()] output['kinematics']['recoil'] += [df['recoil_pt'][mask].flatten()] output['kinematics']['recoil_phi'] += [df['recoil_phi'][mask].flatten()] output['kinematics']['ak4pt0'] += [ak4[leadak4_index][mask].pt.flatten()] output['kinematics']['ak4eta0'] += [ak4[leadak4_index][mask].eta.flatten()] output['kinematics']['leadbtag'] += [ak4.pt.max()<0][mask] output['kinematics']['nLooseMu'] += [muons.counts[mask]] output['kinematics']['nTightMu'] += [muons[df['is_tight_muon']].counts[mask].flatten()] output['kinematics']['mupt0'] += [muons[leadmuon_index][mask].pt.flatten()] output['kinematics']['mueta0'] += [muons[leadmuon_index][mask].eta.flatten()] output['kinematics']['muphi0'] += [muons[leadmuon_index][mask].phi.flatten()] output['kinematics']['nLooseEl'] += [electrons.counts[mask]] output['kinematics']['nTightEl'] += [electrons[df['is_tight_electron']].counts[mask].flatten()] output['kinematics']['elpt0'] += [electrons[leadelectron_index][mask].pt.flatten()] output['kinematics']['eleta0'] += [electrons[leadelectron_index][mask].eta.flatten()] output['kinematics']['nLooseGam'] += [photons.counts[mask]] output['kinematics']['nTightGam'] += [photons[df['is_tight_photon']].counts[mask].flatten()] output['kinematics']['gpt0'] += [photons[leadphoton_index][mask].pt.flatten()] output['kinematics']['geta0'] += [photons[leadphoton_index][mask].eta.flatten()] # Sum of all weights to use for normalization # TODO: Deal with systematic variations output['nevents'][dataset] += df.size if not df['is_data']: output['sumw'][dataset] += df['genEventSumw'] output['sumw2'][dataset] += df['genEventSumw2'] output['sumw_pileup'][dataset] += weights.partial_weight(include=['pileup']).sum() regions = monojet_regions(cfg) for region, cuts in regions.items(): region_weights = copy.deepcopy(weights) if not df['is_data']: if re.match(r'cr_(\d+)e.*', region): region_weights.add('trigger', np.ones(df.size)) elif re.match(r'cr_(\d+)m.*', region) or re.match('sr_.*', region): region_weights.add('trigger', evaluator["trigger_met"](df['recoil_pt'])) elif re.match(r'cr_g.*', region): region_weights.add('trigger', np.ones(df.size)) if not df['is_data']: genVs = gen[((gen.pdg==23) | (gen.pdg==24) | (gen.pdg==-24)) & (gen.pt>10)] leadak8 = ak8[ak8.pt.argmax()] leadak8_matched_mask = leadak8.match(genVs, deltaRCut=0.8) matched_leadak8 = leadak8[leadak8_matched_mask] unmatched_leadak8 = leadak8[~leadak8_matched_mask] for wp in ['loose','loosemd','tight','tightmd']: if re.match(r'.*_{wp}_v.*', region): if (wp == 'tight') or ('nomistag' in region): # no mistag SF available for tight cut matched_weights = evaluator[f'wtag_{wp}'](matched_leadak8.pt).prod() else: matched_weights = evaluator[f'wtag_{wp}'](matched_leadak8.pt).prod() \ * evaluator[f'wtag_mistag_{wp}'](unmatched_leadak8.pt).prod() region_weights.add('wtag_{wp}', matched_weights) # Blinding if(self._blind and df['is_data'] and region.startswith('sr')): continue # Cutflow plot for signal and control regions if any(x in region for x in ["sr", "cr", "tr"]): output['cutflow_' + region]['all']+=df.size for icut, cutname in enumerate(cuts): output['cutflow_' + region][cutname] += selection.all(*cuts[:icut+1]).sum() mask = selection.all(*cuts) if cfg.RUN.SAVE.TREE: def fill_tree(variable, values): treeacc = processor.column_accumulator(values) name = f'tree_{region}_{variable}' if dataset in output[name].keys(): output[name][dataset] += treeacc else: output[name][dataset] = treeacc if region in ['cr_2m_j','cr_1m_j','cr_2e_j','cr_1e_j','cr_g_j']: fill_tree('recoil',df['recoil_pt'][mask].flatten()) fill_tree('weight',region_weights.weight()[mask].flatten()) if gen_v_pt is not None: fill_tree('gen_v_pt',gen_v_pt[mask].flatten()) else: fill_tree('gen_v_pt', -1 * np.ones(sum(mask))) # Save the event numbers of events passing this selection if cfg.RUN.SAVE.PASSING: output['selected_events'][region] += list(df['event'][mask]) # Multiplicities def fill_mult(name, candidates): output[name].fill( dataset=dataset, region=region, multiplicity=candidates[mask].counts, weight=region_weights.weight()[mask] ) fill_mult('ak8_mult', ak8) fill_mult('ak4_mult', ak4) fill_mult('bjet_mult',bjets) fill_mult('loose_ele_mult',electrons) fill_mult('tight_ele_mult',electrons[df['is_tight_electron']]) fill_mult('loose_muo_mult',muons) fill_mult('tight_muo_mult',muons[df['is_tight_muon']]) fill_mult('tau_mult',taus) fill_mult('photon_mult',photons) def ezfill(name, **kwargs): """Helper function to make filling easier.""" output[name].fill( dataset=dataset, region=region, **kwargs ) # Monitor weights for wname, wvalue in region_weights._weights.items(): ezfill("weights", weight_type=wname, weight_value=wvalue[mask]) # All ak4 # This is a workaround to create a weight array of the right dimension w_alljets = weight_shape(ak4[mask].eta, region_weights.weight()[mask]) ezfill('ak4_eta', jeteta=ak4[mask].eta.flatten(), weight=w_alljets) ezfill('ak4_phi', jetphi=ak4[mask].phi.flatten(), weight=w_alljets) ezfill('ak4_eta_phi', phi=ak4[mask].phi.flatten(),eta=ak4[mask].eta.flatten(), weight=w_alljets) ezfill('ak4_pt', jetpt=ak4[mask].pt.flatten(), weight=w_alljets) # Leading ak4 w_leadak4 = weight_shape(ak4[leadak4_index].eta[mask], region_weights.weight()[mask]) ezfill('ak4_eta0', jeteta=ak4[leadak4_index].eta[mask].flatten(), weight=w_leadak4) ezfill('ak4_phi0', jetphi=ak4[leadak4_index].phi[mask].flatten(), weight=w_leadak4) ezfill('ak4_pt0', jetpt=ak4[leadak4_index].pt[mask].flatten(), weight=w_leadak4) ezfill('ak4_ptraw0', jetpt=ak4[leadak4_index].ptraw[mask].flatten(), weight=w_leadak4) ezfill('ak4_chf0', frac=ak4[leadak4_index].chf[mask].flatten(), weight=w_leadak4) ezfill('ak4_nhf0', frac=ak4[leadak4_index].nhf[mask].flatten(), weight=w_leadak4) ezfill('drelejet', dr=df['dREleJet'][mask], weight=region_weights.weight()[mask]) ezfill('drmuonjet', dr=df['dRMuonJet'][mask], weight=region_weights.weight()[mask]) ezfill('drphotonjet', dr=df['dRPhotonJet'][mask], weight=region_weights.weight()[mask]) # AK8 jets if region=='inclusive' or region.endswith('v'): # All w_allak8 = weight_shape(ak8.eta[mask], region_weights.weight()[mask]) ezfill('ak8_eta', jeteta=ak8[mask].eta.flatten(), weight=w_allak8) ezfill('ak8_phi', jetphi=ak8[mask].phi.flatten(), weight=w_allak8) ezfill('ak8_pt', jetpt=ak8[mask].pt.flatten(), weight=w_allak8) ezfill('ak8_mass', mass=ak8[mask].mass.flatten(), weight=w_allak8) # Leading w_leadak8 = weight_shape(ak8[leadak8_index].eta[mask], region_weights.weight()[mask]) ezfill('ak8_eta0', jeteta=ak8[leadak8_index].eta[mask].flatten(), weight=w_leadak8) ezfill('ak8_phi0', jetphi=ak8[leadak8_index].phi[mask].flatten(), weight=w_leadak8) ezfill('ak8_pt0', jetpt=ak8[leadak8_index].pt[mask].flatten(), weight=w_leadak8 ) ezfill('ak8_mass0', mass=ak8[leadak8_index].mass[mask].flatten(), weight=w_leadak8) ezfill('ak8_tau210', tau21=ak8[leadak8_index].tau21[mask].flatten(), weight=w_leadak8) ezfill('ak8_wvsqcd0', tagger=ak8[leadak8_index].wvsqcd[mask].flatten(), weight=w_leadak8) ezfill('ak8_wvsqcdmd0', tagger=ak8[leadak8_index].wvsqcdmd[mask].flatten(), weight=w_leadak8) ezfill('ak8_zvsqcd0', tagger=ak8[leadak8_index].zvsqcd[mask].flatten(), weight=w_leadak8) ezfill('ak8_zvsqcdmd0', tagger=ak8[leadak8_index].zvsqcdmd[mask].flatten(), weight=w_leadak8) # histogram with only gen-matched lead ak8 pt if not df['is_data']: w_matchedleadak8 = weight_shape(matched_leadak8.eta[mask], region_weights.weight()[mask]) ezfill('ak8_Vmatched_pt0', jetpt=matched_leadak8.pt[mask].flatten(), weight=w_matchedleadak8 ) # Dimuon specifically for deepak8 mistag rate measurement if 'inclusive_v' in region: ezfill('ak8_passloose_pt0', wppass=ak8[leadak8_index].wvsqcd[mask].max()>cfg.WTAG.LOOSE, jetpt=ak8[leadak8_index].pt[mask].max(), weight=w_leadak8 ) ezfill('ak8_passtight_pt0', wppass=ak8[leadak8_index].wvsqcd[mask].max()>cfg.WTAG.TIGHT, jetpt=ak8[leadak8_index].pt[mask].max(), weight=w_leadak8 ) ezfill('ak8_passloosemd_pt0', wppass=ak8[leadak8_index].wvsqcdmd[mask].max()>cfg.WTAG.LOOSEMD, jetpt=ak8[leadak8_index].pt[mask].max(), weight=w_leadak8 ) ezfill('ak8_passtightmd_pt0', wppass=ak8[leadak8_index].wvsqcdmd[mask].max()>cfg.WTAG.TIGHTMD, jetpt=ak8[leadak8_index].pt[mask].max(), weight=w_leadak8 ) ezfill('ak8_passloose_mass0', wppass=ak8[leadak8_index].wvsqcd[mask].max()>cfg.WTAG.LOOSE, mass=ak8[leadak8_index].mass[mask].max(), weight=w_leadak8 ) ezfill('ak8_passtight_mass0', wppass=ak8[leadak8_index].wvsqcd[mask].max()>cfg.WTAG.TIGHT, mass=ak8[leadak8_index].mass[mask].max(), weight=w_leadak8 ) ezfill('ak8_passloosemd_mass0', wppass=ak8[leadak8_index].wvsqcdmd[mask].max()>cfg.WTAG.LOOSEMD, mass=ak8[leadak8_index].mass[mask].max(), weight=w_leadak8 ) ezfill('ak8_passtightmd_mass0', wppass=ak8[leadak8_index].wvsqcdmd[mask].max()>cfg.WTAG.TIGHTMD, mass=ak8[leadak8_index].mass[mask].max(), weight=w_leadak8 ) # MET ezfill('dpfcalo', dpfcalo=df["dPFCalo"][mask], weight=region_weights.weight()[mask] ) ezfill('met', met=met_pt[mask], weight=region_weights.weight()[mask] ) ezfill('met_phi', phi=met_phi[mask], weight=region_weights.weight()[mask] ) ezfill('recoil', recoil=df["recoil_pt"][mask], weight=region_weights.weight()[mask] ) ezfill('recoil_phi', phi=df["recoil_phi"][mask], weight=region_weights.weight()[mask] ) ezfill('recoil_nopog', recoil=df["recoil_pt"][mask], weight=region_weights.partial_weight(include=['pileup','theory','gen','prefire'])[mask]) ezfill('recoil_nopref', recoil=df["recoil_pt"][mask], weight=region_weights.partial_weight(exclude=['prefire'])[mask]) ezfill('recoil_nopu', recoil=df["recoil_pt"][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask]) ezfill('recoil_notrg', recoil=df["recoil_pt"][mask], weight=region_weights.partial_weight(exclude=['trigger'])[mask]) ezfill('ak4_pt0_over_recoil', ratio=ak4.pt.max()[mask]/df["recoil_pt"][mask], weight=region_weights.weight()[mask]) ezfill('dphijm', dphi=df["minDPhiJetMet"][mask], weight=region_weights.weight()[mask] ) ezfill('dphijr', dphi=df["minDPhiJetRecoil"][mask], weight=region_weights.weight()[mask] ) if 'noveto' in region: continue # Muons if '_1m_' in region or '_2m_' in region: w_allmu = weight_shape(muons.pt[mask], region_weights.weight()[mask]) ezfill('muon_pt', pt=muons.pt[mask].flatten(), weight=w_allmu ) ezfill('muon_mt', mt=df['MT_mu'][mask], weight=region_weights.weight()[mask]) ezfill('muon_eta', eta=muons.eta[mask].flatten(), weight=w_allmu) ezfill('muon_eta_phi', phi=muons.phi[mask].flatten(),eta=muons.eta[mask].flatten(), weight=w_allmu) ezfill('muon_phi', phi=muons.phi[mask].flatten(), weight=w_allmu) ezfill('muon_dxy', dxy=muons.dxy[mask].flatten(), weight=w_allmu) ezfill('muon_dz', dz=muons.dz[mask].flatten(), weight=w_allmu) # Leading muon w_leadmu = weight_shape(muons[leadmuon_index].pt[mask], region_weights.weight()[mask]) ezfill('muon_pt0', pt=muons[leadmuon_index].pt[mask].flatten(), weight=w_leadmu ) ezfill('muon_eta0', eta=muons[leadmuon_index].eta[mask].flatten(), weight=w_leadmu) ezfill('muon_phi0', phi=muons[leadmuon_index].phi[mask].flatten(), weight=w_leadmu) ezfill('muon_dxy0', dxy=muons[leadmuon_index].dxy[mask].flatten(), weight=w_leadmu) ezfill('muon_dz0', dz=muons[leadmuon_index].dz[mask].flatten(), weight=w_leadmu) # Dimuon if '_2m_' in region: w_dimu = weight_shape(dimuons.pt[mask], region_weights.weight()[mask]) ezfill('dimuon_pt', pt=dimuons.pt[mask].flatten(), weight=w_dimu) ezfill('dimuon_eta', eta=dimuons.eta[mask].flatten(), weight=w_dimu) ezfill('dimuon_mass', dilepton_mass=dimuons.mass[mask].flatten(), weight=w_dimu ) ezfill('dimuon_dr', dr=dimuons.i0.p4.delta_r(dimuons.i1.p4)[mask].flatten(), weight=w_dimu ) ezfill('muon_pt1', pt=muons[~leadmuon_index].pt[mask].flatten(), weight=w_leadmu ) ezfill('muon_eta1', eta=muons[~leadmuon_index].eta[mask].flatten(), weight=w_leadmu) ezfill('muon_phi1', phi=muons[~leadmuon_index].phi[mask].flatten(), weight=w_leadmu) # Electrons if '_1e_' in region or '_2e_' in region: w_allel = weight_shape(electrons.pt[mask], region_weights.weight()[mask]) ezfill('electron_pt', pt=electrons.pt[mask].flatten(), weight=w_allel) ezfill('electron_mt', mt=df['MT_el'][mask], weight=region_weights.weight()[mask]) ezfill('electron_eta', eta=electrons.eta[mask].flatten(), weight=w_allel) ezfill('electron_phi', phi=electrons.phi[mask].flatten(), weight=w_allel) ezfill('electron_eta_phi', phi=electrons.phi[mask].flatten(),eta=electrons.eta[mask].flatten(), weight=w_allel) ezfill('electron_dz', dz=electrons.dz[mask].flatten(), weight=w_allel) ezfill('electron_dxy', dxy=electrons.dxy[mask].flatten(), weight=w_allel) w_leadel = weight_shape(electrons[leadelectron_index].pt[mask], region_weights.weight()[mask]) ezfill('electron_pt0', pt=electrons[leadelectron_index].pt[mask].flatten(), weight=w_leadel) ezfill('electron_eta0', eta=electrons[leadelectron_index].eta[mask].flatten(), weight=w_leadel) ezfill('electron_phi0', phi=electrons[leadelectron_index].phi[mask].flatten(), weight=w_leadel) w_trailel = weight_shape(electrons[~leadelectron_index].pt[mask], region_weights.weight()[mask]) ezfill('electron_tightid1', id=electrons[~leadelectron_index].tightId[mask].flatten(), weight=w_trailel) # Dielectron if '_2e_' in region: w_diel = weight_shape(dielectrons.pt[mask], region_weights.weight()[mask]) ezfill('dielectron_pt', pt=dielectrons.pt[mask].flatten(), weight=w_diel) ezfill('dielectron_eta', eta=dielectrons.eta[mask].flatten(), weight=w_diel) ezfill('dielectron_mass', dilepton_mass=dielectrons.mass[mask].flatten(), weight=w_diel) ezfill('dielectron_dr', dr=dielectrons.i0.p4.delta_r(dielectrons.i1.p4)[mask].flatten(), weight=w_diel ) ezfill('electron_pt1', pt=electrons[~leadelectron_index].pt[mask].flatten(), weight=w_leadel) ezfill('electron_eta1', eta=electrons[~leadelectron_index].eta[mask].flatten(), weight=w_leadel) ezfill('electron_phi1', phi=electrons[~leadelectron_index].phi[mask].flatten(), weight=w_leadel) # Photon if '_g_' in region: w_leading_photon = weight_shape(photons[leadphoton_index].pt[mask],region_weights.weight()[mask]); ezfill('photon_pt0', pt=photons[leadphoton_index].pt[mask].flatten(), weight=w_leading_photon) ezfill('photon_eta0', eta=photons[leadphoton_index].eta[mask].flatten(), weight=w_leading_photon) ezfill('photon_phi0', phi=photons[leadphoton_index].phi[mask].flatten(), weight=w_leading_photon) ezfill('photon_eta_phi', phi=photons[leadphoton_index].phi[mask].flatten(),eta=photons[leadphoton_index].eta[mask].flatten(), weight=w_leading_photon) # w_drphoton_jet = weight_shape(df['dRPhotonJet'][mask], region_weights.weight()[mask]) # PV ezfill('npv', nvtx=df['PV_npvs'][mask], weight=region_weights.weight()[mask]) ezfill('npvgood', nvtx=df['PV_npvsGood'][mask], weight=region_weights.weight()[mask]) ezfill('npv_nopu', nvtx=df['PV_npvs'][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask]) ezfill('npvgood_nopu', nvtx=df['PV_npvsGood'][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask]) ezfill('rho_all', rho=df['fixedGridRhoFastjetAll'][mask], weight=region_weights.weight()[mask]) ezfill('rho_central', rho=df['fixedGridRhoFastjetCentral'][mask], weight=region_weights.weight()[mask]) ezfill('rho_all_nopu', rho=df['fixedGridRhoFastjetAll'][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask]) ezfill('rho_central_nopu', rho=df['fixedGridRhoFastjetCentral'][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask]) return output