def process(self, events): output = self.accumulator.identity() # use a very loose preselection to filter the events presel = ak.num(events.Jet)>2 ev = events[presel] dataset = ev.metadata['dataset'] # load the config - probably not needed anymore cfg = loadConfig() output['totalEvents']['all'] += len(events) output['skimmedEvents']['all'] += len(ev) if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): ## Generated leptons gen_lep = ev.GenL leading_gen_lep = gen_lep[ak.singletons(ak.argmax(gen_lep.pt, axis=1))] trailing_gen_lep = gen_lep[ak.singletons(ak.argmin(gen_lep.pt, axis=1))] ## Muons muon = Collections(ev, "Muon", "tightSSTTH").get() vetomuon = Collections(ev, "Muon", "vetoTTH").get() dimuon = choose(muon, 2) SSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge)>0, axis=1) leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1)) leading_muon = muon[leading_muon_idx] ## Electrons electron = Collections(ev, "Electron", "tightSSTTH").get() vetoelectron = Collections(ev, "Electron", "vetoTTH").get() dielectron = choose(electron, 2) SSelectron = ak.any((dielectron['0'].charge * dielectron['1'].charge)>0, axis=1) leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1)) leading_electron = electron[leading_electron_idx] ## Merge electrons and muons - this should work better now in ak1 dilepton = cross(muon, electron) SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge)>0, axis=1) lepton = ak.concatenate([muon, electron], axis=1) leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1)) leading_lepton = lepton[leading_lepton_idx] trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1)) trailing_lepton = lepton[trailing_lepton_idx] dilepton_mass = (leading_lepton+trailing_lepton).mass dilepton_pt = (leading_lepton+trailing_lepton).pt dilepton_dR = delta_r(leading_lepton, trailing_lepton) lepton_pdgId_pt_ordered = ak.fill_none(ak.pad_none(lepton[ak.argsort(lepton.pt, ascending=False)].pdgId, 2, clip=True), 0) if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): n_nonprompt = getNonPromptFromFlavour(electron) + getNonPromptFromFlavour(muon) n_chargeflip = getChargeFlips(electron, ev.GenPart) + getChargeFlips(muon, ev.GenPart) mt_lep_met = mt(lepton.pt, lepton.phi, ev.MET.pt, ev.MET.phi) min_mt_lep_met = ak.min(mt_lep_met, axis=1) ## Tau and other stuff tau = getTaus(ev) track = getIsoTracks(ev) ## Jets jet = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom') jet = jet[ak.argsort(jet.pt_nom, ascending=False)] # need to sort wrt smeared and recorrected jet pt jet = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons jet = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons central = jet[(abs(jet.eta)<2.4)] btag = getBTagsDeepFlavB(jet, year=self.year) # should study working point for DeepJet light = getBTagsDeepFlavB(jet, year=self.year, invert=True) fwd = getFwdJet(light) fwd_noPU = getFwdJet(light, puId=False) high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:,:2] bl = cross(lepton, high_score_btag) bl_dR = delta_r(bl['0'], bl['1']) min_bl_dR = ak.min(bl_dR, axis=1) ## forward jets j_fwd = fwd[ak.singletons(ak.argmax(fwd.p, axis=1))] # highest momentum spectator jf = cross(j_fwd, jet) mjf = (jf['0']+jf['1']).mass j_fwd2 = jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'] # this is the jet that forms the largest invariant mass with j_fwd delta_eta = abs(j_fwd2.eta - j_fwd.eta) ## MET -> can switch to puppi MET met_pt = ev.MET.pt met_phi = ev.MET.phi ## other variables ht = ak.sum(jet.pt, axis=1) st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1) # define the weight weight = Weights( len(ev) ) if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): # lumi weight weight.add("weight", ev.weight*cfg['lumi'][self.year]) #weight.add("weight", ev.genWeight*cfg['lumi'][self.year]*mult) # PU weight - not in the babies... weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False) # b-tag SFs weight.add("btag", self.btagSF.Method1a(btag, light)) # lepton SFs weight.add("lepton", self.leptonSF.get(electron, muon)) cutflow = Cutflow(output, ev, weight=weight) sel = Selection( dataset = dataset, events = ev, year = self.year, ele = electron, ele_veto = vetoelectron, mu = muon, mu_veto = vetomuon, jet_all = jet, jet_central = central, jet_btag = btag, jet_fwd = fwd, met = ev.MET, ) BL = sel.dilep_baseline(cutflow=cutflow, SS=True) weight_BL = weight.weight()[BL] if True: # define the inputs to the NN # this is super stupid. there must be a better way. NN_inputs = np.stack([ ak.to_numpy(ak.num(jet[BL])), ak.to_numpy(ak.num(tau[BL])), ak.to_numpy(ak.num(track[BL])), ak.to_numpy(st[BL]), ak.to_numpy(ev.MET[BL].pt), ak.to_numpy(ak.max(mjf[BL], axis=1)), ak.to_numpy(pad_and_flatten(delta_eta[BL])), ak.to_numpy(pad_and_flatten(leading_lepton[BL].pt)), ak.to_numpy(pad_and_flatten(leading_lepton[BL].eta)), ak.to_numpy(pad_and_flatten(trailing_lepton[BL].pt)), ak.to_numpy(pad_and_flatten(trailing_lepton[BL].eta)), ak.to_numpy(pad_and_flatten(dilepton_mass[BL])), ak.to_numpy(pad_and_flatten(dilepton_pt[BL])), ak.to_numpy(pad_and_flatten(j_fwd[BL].pt)), ak.to_numpy(pad_and_flatten(j_fwd[BL].p)), ak.to_numpy(pad_and_flatten(j_fwd[BL].eta)), ak.to_numpy(pad_and_flatten(jet[:, 0:1][BL].pt)), ak.to_numpy(pad_and_flatten(jet[:, 1:2][BL].pt)), ak.to_numpy(pad_and_flatten(jet[:, 0:1][BL].eta)), ak.to_numpy(pad_and_flatten(jet[:, 1:2][BL].eta)), ak.to_numpy(pad_and_flatten(high_score_btag[:, 0:1][BL].pt)), ak.to_numpy(pad_and_flatten(high_score_btag[:, 1:2][BL].pt)), ak.to_numpy(pad_and_flatten(high_score_btag[:, 0:1][BL].eta)), ak.to_numpy(pad_and_flatten(high_score_btag[:, 1:2][BL].eta)), ak.to_numpy(min_bl_dR[BL]), ak.to_numpy(min_mt_lep_met[BL]), ]) NN_inputs = np.moveaxis(NN_inputs, 0, 1) model, scaler = load_onnx_model('v8') try: NN_inputs_scaled = scaler.transform(NN_inputs) NN_pred = predict_onnx(model, NN_inputs_scaled) best_score = np.argmax(NN_pred, axis=1) except ValueError: #print ("Empty NN_inputs") NN_pred = np.array([]) best_score = np.array([]) NN_inputs_scaled = NN_inputs #k.clear_session() output['node'].fill(dataset=dataset, multiplicity=best_score, weight=weight_BL) output['node0_score_incl'].fill(dataset=dataset, score=NN_pred[:,0] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL) output['node0_score'].fill(dataset=dataset, score=NN_pred[best_score==0][:,0] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==0]) output['node1_score'].fill(dataset=dataset, score=NN_pred[best_score==1][:,1] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==1]) output['node2_score'].fill(dataset=dataset, score=NN_pred[best_score==2][:,2] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==2]) output['node3_score'].fill(dataset=dataset, score=NN_pred[best_score==3][:,3] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==3]) output['node4_score'].fill(dataset=dataset, score=NN_pred[best_score==4][:,4] if np.shape(NN_pred)[0]>0 else np.array([]), weight=weight_BL[best_score==4]) SR_sel_pp = ((best_score==0) & ak.flatten((leading_lepton[BL].pdgId<0))) SR_sel_mm = ((best_score==0) & ak.flatten((leading_lepton[BL].pdgId>0))) leading_lepton_BL = leading_lepton[BL] output['lead_lep_SR_pp'].fill( dataset = dataset, pt = ak.to_numpy(ak.flatten(leading_lepton_BL[SR_sel_pp].pt)), weight = weight_BL[SR_sel_pp] ) output['lead_lep_SR_mm'].fill( dataset = dataset, pt = ak.to_numpy(ak.flatten(leading_lepton_BL[SR_sel_mm].pt)), weight = weight_BL[SR_sel_mm] ) del model del scaler del NN_inputs, NN_inputs_scaled, NN_pred # first, make a few super inclusive plots output['PV_npvs'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvs, weight=weight_BL) output['PV_npvsGood'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvsGood, weight=weight_BL) output['N_jet'].fill(dataset=dataset, multiplicity=ak.num(jet)[BL], weight=weight_BL) output['N_b'].fill(dataset=dataset, multiplicity=ak.num(btag)[BL], weight=weight_BL) output['N_central'].fill(dataset=dataset, multiplicity=ak.num(central)[BL], weight=weight_BL) output['N_ele'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight_BL) output['N_mu'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight_BL) output['N_fwd'].fill(dataset=dataset, multiplicity=ak.num(fwd)[BL], weight=weight_BL) output['ST'].fill(dataset=dataset, pt=st[BL], weight=weight_BL) output['HT'].fill(dataset=dataset, pt=ht[BL], weight=weight_BL) if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): output['nLepFromTop'].fill(dataset=dataset, multiplicity=ev[BL].nLepFromTop, weight=weight_BL) output['nLepFromTau'].fill(dataset=dataset, multiplicity=ev.nLepFromTau[BL], weight=weight_BL) output['nLepFromZ'].fill(dataset=dataset, multiplicity=ev.nLepFromZ[BL], weight=weight_BL) output['nLepFromW'].fill(dataset=dataset, multiplicity=ev.nLepFromW[BL], weight=weight_BL) output['nGenTau'].fill(dataset=dataset, multiplicity=ev.nGenTau[BL], weight=weight_BL) output['nGenL'].fill(dataset=dataset, multiplicity=ak.num(ev.GenL[BL], axis=1), weight=weight_BL) output['chargeFlip_vs_nonprompt'].fill(dataset=dataset, n1=n_chargeflip[BL], n2=n_nonprompt[BL], n_ele=ak.num(electron)[BL], weight=weight_BL) output['MET'].fill( dataset = dataset, pt = ev.MET[BL].pt, phi = ev.MET[BL].phi, weight = weight_BL ) if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): output['lead_gen_lep'].fill( dataset = dataset, pt = ak.to_numpy(ak.flatten(leading_gen_lep[BL].pt)), eta = ak.to_numpy(ak.flatten(leading_gen_lep[BL].eta)), phi = ak.to_numpy(ak.flatten(leading_gen_lep[BL].phi)), weight = weight_BL ) output['trail_gen_lep'].fill( dataset = dataset, pt = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].pt)), eta = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].eta)), phi = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].phi)), weight = weight_BL ) output['lead_lep'].fill( dataset = dataset, pt = ak.to_numpy(ak.flatten(leading_lepton[BL].pt)), eta = ak.to_numpy(ak.flatten(leading_lepton[BL].eta)), phi = ak.to_numpy(ak.flatten(leading_lepton[BL].phi)), weight = weight_BL ) output['trail_lep'].fill( dataset = dataset, pt = ak.to_numpy(ak.flatten(trailing_lepton[BL].pt)), eta = ak.to_numpy(ak.flatten(trailing_lepton[BL].eta)), phi = ak.to_numpy(ak.flatten(trailing_lepton[BL].phi)), weight = weight_BL ) output['j1'].fill( dataset = dataset, pt = ak.flatten(jet.pt_nom[:, 0:1][BL]), eta = ak.flatten(jet.eta[:, 0:1][BL]), phi = ak.flatten(jet.phi[:, 0:1][BL]), weight = weight_BL ) output['j2'].fill( dataset = dataset, pt = ak.flatten(jet[:, 1:2][BL].pt_nom), eta = ak.flatten(jet[:, 1:2][BL].eta), phi = ak.flatten(jet[:, 1:2][BL].phi), weight = weight_BL ) output['j3'].fill( dataset = dataset, pt = ak.flatten(jet[:, 2:3][BL].pt_nom), eta = ak.flatten(jet[:, 2:3][BL].eta), phi = ak.flatten(jet[:, 2:3][BL].phi), weight = weight_BL ) output['fwd_jet'].fill( dataset = dataset, pt = ak.flatten(j_fwd[BL].pt), eta = ak.flatten(j_fwd[BL].eta), phi = ak.flatten(j_fwd[BL].phi), weight = weight_BL ) output['high_p_fwd_p'].fill(dataset=dataset, p = ak.flatten(j_fwd[BL].p), weight = weight_BL) return output
def process(self, events): events = events[ ak.num(events.Jet) > 0] #corrects for rare case where there isn't a single jet in event output = self.accumulator.identity() # we can use a very loose preselection to filter the events. nothing is done with this presel, though presel = ak.num(events.Jet) >= 0 ev = events[presel] dataset = ev.metadata['dataset'] # load the config - probably not needed anymore # cfg = loadConfig() output['totalEvents']['all'] += len(events) output['skimmedEvents']['all'] += len(ev) ### For FCNC, we want electron -> tightTTH electron = Collections(ev, "Electron", "tightFCNC").get() fakeableelectron = Collections(ev, "Electron", "fakeableFCNC").get() muon = Collections(ev, "Muon", "tightFCNC").get() fakeablemuon = Collections(ev, "Muon", "fakeableFCNC").get() ##Jets Jets = events.Jet ## MET -> can switch to puppi MET met_pt = ev.MET.pt met_phi = ev.MET.phi lepton = fakeablemuon #ak.concatenate([fakeablemuon, fakeableelectron], axis=1) mt_lep_met = mt(lepton.pt, lepton.phi, ev.MET.pt, ev.MET.phi) min_mt_lep_met = ak.min(mt_lep_met, axis=1) selection = PackedSelection() selection.add('MET<20', (ev.MET.pt < 20)) selection.add('mt<20', min_mt_lep_met < 20) #selection.add('MET<19', (ev.MET.pt<19) ) selection_reqs = ['MET<20', 'mt<20'] #, 'MET<19'] fcnc_reqs_d = {sel: True for sel in selection_reqs} fcnc_selection = selection.require(**fcnc_reqs_d) # define the weight weight = Weights(len(ev)) if not dataset == 'MuonEG': # generator weight weight.add("weight", ev.genWeight) jets = getJets( ev, maxEta=2.4, minPt=25, pt_var='pt' ) #& (ak.num(jets[~match(jets, fakeablemuon, deltaRCut=1.0)])>=1) single_muon_sel = (ak.num(muon) == 1) & (ak.num(fakeablemuon) == 1) | ( ak.num(muon) == 0) & (ak.num(fakeablemuon) == 1) single_electron_sel = (ak.num(electron) == 1) & ( ak.num(fakeableelectron) == 1) | (ak.num(electron) == 0) & (ak.num(fakeableelectron) == 1) fcnc_muon_sel = (ak.num( jets[~match(jets, fakeablemuon, deltaRCut=1.0)]) >= 1) & fcnc_selection & single_muon_sel fcnc_electron_sel = (ak.num( jets[~match(jets, fakeableelectron, deltaRCut=1.0)]) >= 1) & fcnc_selection & single_electron_sel tight_muon_sel = (ak.num(muon) == 1) & fcnc_muon_sel loose_muon_sel = (ak.num(fakeablemuon) == 1) & fcnc_muon_sel tight_electron_sel = (ak.num(electron) == 1) & fcnc_electron_sel loose_electron_sel = (ak.num(fakeableelectron) == 1) & fcnc_electron_sel output['single_mu_fakeable'].fill( dataset=dataset, pt=ak.to_numpy(ak.flatten(fakeablemuon[loose_muon_sel].conePt)), eta=np.abs( ak.to_numpy(ak.flatten(fakeablemuon[loose_muon_sel].eta)))) output['single_mu'].fill( dataset=dataset, pt=ak.to_numpy(ak.flatten(muon[tight_muon_sel].conePt)), eta=np.abs(ak.to_numpy(ak.flatten(muon[tight_muon_sel].eta)))) output['single_e_fakeable'].fill( dataset=dataset, pt=ak.to_numpy( ak.flatten(fakeableelectron[loose_electron_sel].conePt)), eta=np.abs( ak.to_numpy( ak.flatten(fakeableelectron[loose_electron_sel].eta)))) output['single_e'].fill( dataset=dataset, pt=ak.to_numpy(ak.flatten(electron[tight_electron_sel].conePt)), eta=np.abs( ak.to_numpy(ak.flatten(electron[tight_electron_sel].eta)))) if self.debug: #create pandas dataframe for debugging passed_events = ev[tight_muon_sel] passed_muons = muon[tight_muon_sel] event_p = ak.to_pandas(passed_events[["event"]]) event_p["MET_PT"] = passed_events["MET"]["pt"] event_p["mt"] = min_mt_lep_met[tight_muon_sel] event_p["num_tight_mu"] = ak.to_numpy(ak.num(muon)[tight_muon_sel]) event_p["num_loose_mu"] = ak.num(fakeablemuon)[tight_muon_sel] muon_p = ak.to_pandas( ak.flatten(passed_muons)[[ "pt", "conePt", "eta", "dz", "dxy", "ptErrRel", "miniPFRelIso_all", "jetRelIsoV2", "jetRelIso", "jetPtRelv2" ]]) #convert to numpy array for the output events_array = pd.concat([muon_p, event_p], axis=1) events_to_add = [6886009] for e in events_to_add: tmp_event = ev[ev.event == e] added_event = ak.to_pandas(tmp_event[["event"]]) added_event["MET_PT"] = tmp_event["MET"]["pt"] added_event["mt"] = min_mt_lep_met[ev.event == e] added_event["num_tight_mu"] = ak.to_numpy( ak.num(muon)[ev.event == e]) added_event["num_loose_mu"] = ak.to_numpy( ak.num(fakeablemuon)[ev.event == e]) add_muon = ak.to_pandas( ak.flatten(muon[ev.event == e])[[ "pt", "conePt", "eta", "dz", "dxy", "ptErrRel", "miniPFRelIso_all", "jetRelIsoV2", "jetRelIso", "jetPtRelv2" ]]) add_concat = pd.concat([add_muon, added_event], axis=1) events_array = pd.concat([events_array, add_concat], axis=0) output['muons_df'] += processor.column_accumulator( events_array.to_numpy()) return output
def process(self, events): output = self.accumulator.identity() output['total']['all'] += len(events) # use a very loose preselection to filter the events presel = ak.num(events.Jet) > 2 ev = events[presel] dataset = ev.metadata['dataset'] # load the config - probably not needed anymore cfg = loadConfig() ## Muons muon = Collections(ev, "Muon", "vetoTTH").get() tightmuon = Collections(ev, "Muon", "tightTTH").get() dimuon = choose(muon, 2) SSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) > 0, axis=1) leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1)) leading_muon = muon[leading_muon_idx] ## Electrons electron = Collections(ev, "Electron", "vetoTTH").get() tightelectron = Collections(ev, "Electron", "tightTTH").get() dielectron = choose(electron, 2) SSelectron = ak.any( (dielectron['0'].charge * dielectron['1'].charge) > 0, axis=1) leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1)) leading_electron = electron[leading_electron_idx] ## Merge electrons and muons - this should work better now in ak1 dilepton = cross(muon, electron) SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) > 0, axis=1) lepton = ak.concatenate([muon, electron], axis=1) leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1)) leading_lepton = lepton[leading_lepton_idx] trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1)) trailing_lepton = lepton[trailing_lepton_idx] dilepton_mass = (leading_lepton + trailing_lepton).mass dilepton_pt = (leading_lepton + trailing_lepton).pt dilepton_dR = delta_r(leading_lepton, trailing_lepton) mt_lep_met = mt(lepton.pt, lepton.phi, ev.MET.pt, ev.MET.phi) min_mt_lep_met = ak.min(mt_lep_met, axis=1) ## Jets jet = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom') jet = jet[ak.argsort( jet.pt_nom, ascending=False )] # need to sort wrt smeared and recorrected jet pt jet = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons jet = jet[~match( jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons central = jet[(abs(jet.eta) < 2.4)] btag = getBTagsDeepFlavB( jet, year=self.year) # should study working point for DeepJet light = getBTagsDeepFlavB(jet, year=self.year, invert=True) fwd = getFwdJet(light) fwd_noPU = getFwdJet(light, puId=False) tau = getTaus(ev) track = getIsoTracks(ev) ## forward jets j_fwd = fwd[ak.singletons(ak.argmax( fwd.p, axis=1))] # highest momentum spectator high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:, :2] bl = cross(lepton, high_score_btag) bl_dR = delta_r(bl['0'], bl['1']) min_bl_dR = ak.min(bl_dR, axis=1) jf = cross(j_fwd, jet) mjf = (jf['0'] + jf['1']).mass j_fwd2 = jf[ak.singletons( ak.argmax(mjf, axis=1) )]['1'] # this is the jet that forms the largest invariant mass with j_fwd delta_eta = ak.fill_none( ak.pad_none(abs(j_fwd2.eta - j_fwd.eta), 1, clip=True), 0) ## MET -> can switch to puppi MET met_pt = ev.MET.pt met_phi = ev.MET.phi ## other variables ht = ak.sum(jet.pt, axis=1) st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1) ## event selectors filters = getFilters(ev, year=self.year, dataset=dataset) dilep = ((ak.num(tightelectron) + ak.num(tightmuon)) == 2) lep0pt = ((ak.num(electron[(electron.pt > 25)]) + ak.num(muon[(muon.pt > 25)])) > 0) lep1pt = ((ak.num(electron[(electron.pt > 20)]) + ak.num(muon[(muon.pt > 20)])) > 1) lepveto = ((ak.num(electron) + ak.num(muon)) == 2) selection = PackedSelection() selection.add('lepveto', lepveto) selection.add('dilep', dilep) selection.add('filter', (filters)) selection.add('p_T(lep0)>25', lep0pt) selection.add('p_T(lep1)>20', lep1pt) selection.add('SS', (SSlepton | SSelectron | SSmuon)) selection.add('N_jet>3', (ak.num(jet) >= 4)) selection.add('N_central>2', (ak.num(central) >= 3)) selection.add('N_btag>0', (ak.num(btag) >= 1)) selection.add('N_fwd>0', (ak.num(fwd) >= 1)) #ss_reqs = ['lepveto', 'dilep', 'filter', 'p_T(lep0)>25', 'p_T(lep1)>20', 'SS'] ss_reqs = [ 'lepveto', 'dilep', 'filter', 'p_T(lep0)>25', 'p_T(lep1)>20', 'SS' ] #bl_reqs = ss_reqs + ['N_jet>3', 'N_central>2', 'N_btag>0', 'N_fwd>0'] bl_reqs = ss_reqs + ['N_jet>3', 'N_central>2', 'N_btag>0'] ss_reqs_d = {sel: True for sel in ss_reqs} ss_selection = selection.require(**ss_reqs_d) bl_reqs_d = {sel: True for sel in bl_reqs} BL = selection.require(**bl_reqs_d) weight = Weights(len(ev)) if not dataset == 'MuonEG': # lumi weight weight.add("weight", ev.weight) # PU weight - not in the babies... weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False) # b-tag SFs weight.add("btag", self.btagSF.Method1a(btag, light)) # lepton SFs weight.add("lepton", self.leptonSF.get(electron, muon)) #cutflow = Cutflow(output, ev, weight=weight) #cutflow_reqs_d = {} #for req in bl_reqs: # cutflow_reqs_d.update({req: True}) # cutflow.addRow( req, selection.require(**cutflow_reqs_d) ) labels = { 'topW_v3': 0, 'TTW': 1, 'TTZ': 2, 'TTH': 3, 'ttbar': 4, 'ttbar1l_MG': 4 } if dataset in labels: label_mult = labels[dataset] else: label_mult = 5 label = np.ones(len(ev[BL])) * label_mult output["n_lep"] += processor.column_accumulator( ak.to_numpy((ak.num(electron) + ak.num(muon))[BL])) output["n_lep_tight"] += processor.column_accumulator( ak.to_numpy((ak.num(tightelectron) + ak.num(tightmuon))[BL])) output["lead_lep_pt"] += processor.column_accumulator( ak.to_numpy(ak.flatten(leading_lepton[BL].pt, axis=1))) output["lead_lep_eta"] += processor.column_accumulator( ak.to_numpy(ak.flatten(leading_lepton[BL].eta, axis=1))) output["lead_lep_phi"] += processor.column_accumulator( ak.to_numpy(ak.flatten(leading_lepton[BL].phi, axis=1))) output["lead_lep_charge"] += processor.column_accumulator( ak.to_numpy(ak.flatten(leading_lepton[BL].charge, axis=1))) output["sublead_lep_pt"] += processor.column_accumulator( ak.to_numpy(ak.flatten(trailing_lepton[BL].pt, axis=1))) output["sublead_lep_eta"] += processor.column_accumulator( ak.to_numpy(ak.flatten(trailing_lepton[BL].eta, axis=1))) output["sublead_lep_phi"] += processor.column_accumulator( ak.to_numpy(ak.flatten(trailing_lepton[BL].phi, axis=1))) output["sublead_lep_charge"] += processor.column_accumulator( ak.to_numpy(ak.flatten(trailing_lepton[BL].charge, axis=1))) output["lead_jet_pt"] += processor.column_accumulator( ak.to_numpy(ak.flatten(jet[:, 0:1][BL].pt, axis=1))) output["lead_jet_eta"] += processor.column_accumulator( ak.to_numpy(ak.flatten(jet[:, 0:1][BL].eta, axis=1))) output["lead_jet_phi"] += processor.column_accumulator( ak.to_numpy(ak.flatten(jet[:, 0:1][BL].phi, axis=1))) output["sublead_jet_pt"] += processor.column_accumulator( ak.to_numpy(ak.flatten(jet[:, 1:2][BL].pt, axis=1))) output["sublead_jet_eta"] += processor.column_accumulator( ak.to_numpy(ak.flatten(jet[:, 1:2][BL].eta, axis=1))) output["sublead_jet_phi"] += processor.column_accumulator( ak.to_numpy(ak.flatten(jet[:, 1:2][BL].phi, axis=1))) output["lead_btag_pt"] += processor.column_accumulator( ak.to_numpy(ak.flatten(high_score_btag[:, 0:1][BL].pt, axis=1))) output["lead_btag_eta"] += processor.column_accumulator( ak.to_numpy(ak.flatten(high_score_btag[:, 0:1][BL].eta, axis=1))) output["lead_btag_phi"] += processor.column_accumulator( ak.to_numpy(ak.flatten(high_score_btag[:, 0:1][BL].phi, axis=1))) output["sublead_btag_pt"] += processor.column_accumulator( ak.to_numpy(ak.flatten(high_score_btag[:, 1:2][BL].pt, axis=1))) output["sublead_btag_eta"] += processor.column_accumulator( ak.to_numpy(ak.flatten(high_score_btag[:, 1:2][BL].eta, axis=1))) output["sublead_btag_phi"] += processor.column_accumulator( ak.to_numpy(ak.flatten(high_score_btag[:, 1:2][BL].phi, axis=1))) output["fwd_jet_p"] += processor.column_accumulator( ak.to_numpy( ak.flatten(ak.fill_none(ak.pad_none(j_fwd[BL].p, 1, clip=True), 0), axis=1))) output["fwd_jet_pt"] += processor.column_accumulator( ak.to_numpy( ak.flatten(ak.fill_none( ak.pad_none(j_fwd[BL].pt, 1, clip=True), 0), axis=1))) output["fwd_jet_eta"] += processor.column_accumulator( ak.to_numpy( ak.flatten(ak.fill_none( ak.pad_none(j_fwd[BL].eta, 1, clip=True), 0), axis=1))) output["fwd_jet_phi"] += processor.column_accumulator( ak.to_numpy( ak.flatten(ak.fill_none( ak.pad_none(j_fwd[BL].phi, 1, clip=True), 0), axis=1))) output["mjj_max"] += processor.column_accumulator( ak.to_numpy(ak.fill_none(ak.max(mjf[BL], axis=1), 0))) output["delta_eta_jj"] += processor.column_accumulator( ak.to_numpy(ak.flatten(delta_eta[BL], axis=1))) output["met"] += processor.column_accumulator(ak.to_numpy(met_pt[BL])) output["ht"] += processor.column_accumulator(ak.to_numpy(ht[BL])) output["st"] += processor.column_accumulator(ak.to_numpy(st[BL])) output["n_jet"] += processor.column_accumulator( ak.to_numpy(ak.num(jet[BL]))) output["n_btag"] += processor.column_accumulator( ak.to_numpy(ak.num(btag[BL]))) output["n_fwd"] += processor.column_accumulator( ak.to_numpy(ak.num(fwd[BL]))) output["n_central"] += processor.column_accumulator( ak.to_numpy(ak.num(central[BL]))) output["n_tau"] += processor.column_accumulator( ak.to_numpy(ak.num(tau[BL]))) output["n_track"] += processor.column_accumulator( ak.to_numpy(ak.num(track[BL]))) output["dilepton_pt"] += processor.column_accumulator( ak.to_numpy(ak.flatten(dilepton_pt[BL], axis=1))) output["dilepton_mass"] += processor.column_accumulator( ak.to_numpy(ak.flatten(dilepton_mass[BL], axis=1))) output["min_bl_dR"] += processor.column_accumulator( ak.to_numpy(min_bl_dR[BL])) output["min_mt_lep_met"] += processor.column_accumulator( ak.to_numpy(min_mt_lep_met[BL])) output["label"] += processor.column_accumulator(label) output["weight"] += processor.column_accumulator(weight.weight()[BL]) output["presel"]["all"] += len(ev[ss_selection]) output["sel"]["all"] += len(ev[BL]) return output
def process(self, df): """ Processing function. This is where the actual analysis happens. """ output = self.accumulator.identity() dataset = df["dataset"] cfg = loadConfig() ## MET -> can switch to puppi MET met_pt = df["MET_pt"] met_phi = df["MET_phi"] ## Muons muon = JaggedCandidateArray.candidatesfromcounts( df['nMuon'], pt=df['Muon_pt'].content, eta=df['Muon_eta'].content, phi=df['Muon_phi'].content, mass=df['Muon_mass'].content, miniPFRelIso_all=df['Muon_miniPFRelIso_all'].content, looseId=df['Muon_looseId'].content) muon = muon[(muon.pt > 10) & (abs(muon.eta) < 2.4) & (muon.looseId) & (muon.miniPFRelIso_all < 0.2)] #muon = Collections(df, "Muon", "tightTTH").get() # this needs a fix for DASK electrons = JaggedCandidateArray.candidatesfromcounts( df['nElectron'], pt=df['Electron_pt'].content, eta=df['Electron_eta'].content, phi=df['Electron_phi'].content, mass=df['Electron_mass'].content, pdgid=df['Electron_pdgId'].content, mini_iso=df['Electron_miniPFRelIso_all'].content) ## Electrons electron = JaggedCandidateArray.candidatesfromcounts( df['nElectron'], pt=df['Electron_pt'].content, eta=df['Electron_eta'].content, phi=df['Electron_phi'].content, mass=df['Electron_mass'].content, miniPFRelIso_all=df['Electron_miniPFRelIso_all'].content, cutBased=df['Electron_cutBased'].content) electron = electron[(electron.pt > 10) & (abs(electron.eta) < 2.4) & (electron.miniPFRelIso_all < 0.1) & (electron.cutBased >= 1)] #electron = Collections(df, "Electron", "tightTTH").get() # this needs a fix for DASK ## FatJets fatjet = JaggedCandidateArray.candidatesfromcounts( df['nFatJet'], pt=df['FatJet_pt'].content, eta=df['FatJet_eta'].content, phi=df['FatJet_phi'].content, mass=df['FatJet_mass'].content, msoftdrop=df["FatJet_msoftdrop"].content, deepTagMD_HbbvsQCD=df['FatJet_deepTagMD_HbbvsQCD'].content, deepTagMD_WvsQCD=df['FatJet_deepTagMD_WvsQCD'].content, deepTag_WvsQCD=df['FatJet_deepTag_WvsQCD'].content) leadingFatJets = fatjet[:, :2] difatjet = leadingFatJets.choose(2) dphiDiFatJet = np.arccos(np.cos(difatjet.i0.phi - difatjet.i1.phi)) nfatjets = fatjet.counts htag = fatjet[((fatjet.pt > 200) & (fatjet.deepTagMD_HbbvsQCD > 0.8365))] htag_hard = fatjet[((fatjet.pt > 300) & (fatjet.deepTagMD_HbbvsQCD > 0.8365))] lead_htag = htag[htag.pt.argmax()] wtag = fatjet[((fatjet.pt > 200) & (fatjet.deepTagMD_HbbvsQCD < 0.8365) & (fatjet.deepTag_WvsQCD > 0.918))] wtag_hard = fatjet[((fatjet.pt > 300) & (fatjet.deepTagMD_HbbvsQCD < 0.8365) & (fatjet.deepTag_WvsQCD > 0.918))] lead_wtag = wtag[wtag.pt.argmax()] wh = lead_htag.cross(lead_wtag) #wh_deltaPhi = np.arccos(wh.i0.phi - wh.i1.phi) wh_deltaR = wh.i0.p4.delta_r(wh.i1.p4) ## Jets jet = JaggedCandidateArray.candidatesfromcounts( df['nJet'], pt=df['Jet_pt'].content, eta=df['Jet_eta'].content, phi=df['Jet_phi'].content, mass=df['Jet_mass'].content, jetId=df['Jet_jetId']. content, # https://twiki.cern.ch/twiki/bin/view/CMS/JetID #puId = df['Jet_puId'].content, # https://twiki.cern.ch/twiki/bin/viewauth/CMS/PileupJetID btagDeepB=df['Jet_btagDeepB']. content, # https://twiki.cern.ch/twiki/bin/viewauth/CMS/BtagRecommendation102X #deepJet = df['Jet_'].content # not there yet? ) jet = jet[(jet.pt > 30) & (abs(jet.eta) < 2.4) & (jet.jetId > 0)] jet = jet[(jet.pt > 30) & (jet.jetId > 1) & (abs(jet.eta) < 2.4)] jet = jet[~jet.match( muon, deltaRCut=0.4)] # remove jets that overlap with muons jet = jet[~jet.match( electron, deltaRCut=0.4)] # remove jets that overlap with electrons jet = jet[jet.pt.argsort(ascending=False)] # sort the jets btag = jet[(jet.btagDeepB > 0.4184)] light = jet[(jet.btagDeepB < 0.4184)] njets = jet.counts nbjets = btag.counts ## Get the leading b-jets high_score_btag = jet[jet.btagDeepB.argsort(ascending=False)][:, :2] leadingJets = jet[:, :2] dijet = leadingJets.choose(2) dphiDiJet = np.arccos(np.cos(dijet.i0.phi - dijet.i1.phi)) leading_jet = leadingJets[leadingJets.pt.argmax()] subleading_jet = leadingJets[leadingJets.pt.argmin()] leading_b = btag[btag.pt.argmax()] bb = high_score_btag.choose(2) bb_deltaPhi = np.arccos(np.cos(bb.i0.phi - bb.i1.phi)) bb_deltaR = bb.i0.p4.delta_r(bb.i1.p4) mtb = mt(btag.pt, btag.phi, met_pt, met_phi) ## other variables ht = jet.pt.sum() #met_sig = met_pt/np.sqrt(ht) min_dphiJetMet4 = np.arccos(np.cos(jet[:, :4].phi - met_phi)).min() #goodjcut = ((jets.pt>30) & (abs(jets.eta)<2.4) & (jets.jetid>0)) #goodjets = jets[goodjcut] #abs_min_dphi_met_leadjs4 = abs(np.arccos(np.cos(goodjets[:,:4].phi-metphi)).min()) #print(min_dphiJetMet4.shape) #print(self.means['min_dphi_met_j4'].shape) ht_ps = (ht > 0) met_ps = (met_pt > 250) njet_ps = (njets >= 2) bjet_ps = (nbjets >= 1) fatjet_sel = (nfatjets >= 1) e_sel = (electron.counts == 0) m_sel = (muon.counts == 0) #it_sel = (veto_it.counts == 0) #t_sel = (veto_t.counts == 0) l_sel = e_sel & m_sel # & it_sel & t_sel h_sel = (htag.counts > 0) wmc_sel = (wtag.counts > 0) sel = ht_ps & met_ps & njet_ps & bjet_ps & l_sel & fatjet_sel & h_sel #& wmc_sel met_sig = met_pt[sel] / np.sqrt(ht[sel]) nEventsBaseline = len(df['weight'][sel]) signal_label = np.ones(nEventsBaseline) if ( df['dataset'] == 'WH') else np.zeros(nEventsBaseline) #Let's make sure we weight our events properly. wght = df['weight'][sel] * 137 output['met'] += processor.column_accumulator(met_pt[sel].flatten()) output['ht'] += processor.column_accumulator(ht[sel].flatten()) output['lead_jet_pt'] += processor.column_accumulator( leading_jet[sel].pt.flatten()) output['sublead_jet_pt'] += processor.column_accumulator( subleading_jet[sel].pt.flatten()) output['njets'] += processor.column_accumulator(njets[sel].flatten()) output['bjets'] += processor.column_accumulator(nbjets[sel].flatten()) output['nWs'] += processor.column_accumulator( wtag[sel].counts.flatten()) output['nHs'] += processor.column_accumulator( htag[sel].counts.flatten()) output['nFatJets'] += processor.column_accumulator( fatjet[sel].counts.flatten()) output['met_significance'] += processor.column_accumulator( met_sig.flatten()) output['min_dphi_met_j4'] += processor.column_accumulator( min_dphiJetMet4[sel].flatten()) #output['dR_fj1_fj2'] += processor.column_accumulator(dR_fj1_fj2[sel].flatten()) output['signal'] += processor.column_accumulator(signal_label) return output
def process(self, events): output = self.accumulator.identity() # use a very loose preselection to filter the events presel = ak.num(events.Jet)>2 ev = events[presel] dataset = ev.metadata['dataset'] # load the config - probably not needed anymore cfg = loadConfig() output['totalEvents']['all'] += len(events) output['skimmedEvents']['all'] += len(ev) if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): ## Generated leptons gen_lep = ev.GenL leading_gen_lep = gen_lep[ak.singletons(ak.argmax(gen_lep.pt, axis=1))] trailing_gen_lep = gen_lep[ak.singletons(ak.argmin(gen_lep.pt, axis=1))] # Get the leptons. This has changed a couple of times now, but we are using fakeable objects as baseline leptons. # The added p4 instance has the corrected pt (conePt for fakeable) and should be used for any following selection or calculation # Any additional correction (if we choose to do so) should be added here, e.g. Rochester corrections, ... ## Muons mu_v = Collections(ev, "Muon", "vetoTTH", year=year).get() # these include all muons, tight and fakeable mu_t = Collections(ev, "Muon", "tightSSTTH", year=year).get() mu_f = Collections(ev, "Muon", "fakeableSSTTH", year=year).get() muon = ak.concatenate([mu_t, mu_f], axis=1) muon['p4'] = get_four_vec_fromPtEtaPhiM(muon, get_pt(muon), muon.eta, muon.phi, muon.mass, copy=False) #FIXME new ## Electrons el_v = Collections(ev, "Electron", "vetoTTH", year=year).get() el_t = Collections(ev, "Electron", "tightSSTTH", year=year).get() el_f = Collections(ev, "Electron", "fakeableSSTTH", year=year).get() electron = ak.concatenate([el_t, el_f], axis=1) electron['p4'] = get_four_vec_fromPtEtaPhiM(electron, get_pt(electron), electron.eta, electron.phi, electron.mass, copy=False) #FIXME new if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): el_t_p = prompt(el_t) el_t_np = nonprompt(el_t) el_f_p = prompt(el_f) el_f_np = nonprompt(el_f) mu_t_p = prompt(mu_t) mu_t_np = nonprompt(mu_t) mu_f_p = prompt(mu_f) mu_f_np = nonprompt(mu_f) is_flipped = ( (el_t_p.matched_gen.pdgId*(-1) == el_t_p.pdgId) & (abs(el_t_p.pdgId) == 11) ) el_t_p_cc = el_t_p[~is_flipped] # this is tight, prompt, and charge consistent el_t_p_cf = el_t_p[is_flipped] # this is tight, prompt, and charge flipped ## Merge electrons and muons. These are fakeable leptons now lepton = ak.concatenate([muon, electron], axis=1) leading_lepton_idx = ak.singletons(ak.argmax(lepton.p4.pt, axis=1)) leading_lepton = lepton[leading_lepton_idx] trailing_lepton_idx = ak.singletons(ak.argmin(lepton.p4.pt, axis=1)) trailing_lepton = lepton[trailing_lepton_idx] dilepton_mass = (leading_lepton.p4 + trailing_lepton.p4).mass dilepton_pt = (leading_lepton.p4 + trailing_lepton.p4).pt #dilepton_dR = delta_r(leading_lepton, trailing_lepton) dilepton_dR = leading_lepton.p4.delta_r(trailing_lepton.p4) lepton_pdgId_pt_ordered = ak.fill_none(ak.pad_none(lepton[ak.argsort(lepton.p4.pt, ascending=False)].pdgId, 2, clip=True), 0) if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): n_nonprompt = getNonPromptFromFlavour(electron) + getNonPromptFromFlavour(muon) n_chargeflip = getChargeFlips(electron, ev.GenPart) + getChargeFlips(muon, ev.GenPart) gp = ev.GenPart gp_e = gp[((abs(gp.pdgId)==11)&(gp.status==1)&((gp.statusFlags&(1<<0))==1)&(gp.statusFlags&(1<<8)==256))] gp_m = gp[((abs(gp.pdgId)==13)&(gp.status==1)&((gp.statusFlags&(1<<0))==1)&(gp.statusFlags&(1<<8)==256))] n_gen_lep = ak.num(gp_e) + ak.num(gp_m) else: n_gen_lep = np.zeros(len(ev)) LL = (n_gen_lep > 2) # this is the classifier for LL events (should mainly be ttZ/tZ/WZ...) mt_lep_met = mt(lepton.p4.pt, lepton.p4.phi, ev.MET.pt, ev.MET.phi) min_mt_lep_met = ak.min(mt_lep_met, axis=1) ## Tau and other stuff tau = getTaus(ev) tau = tau[~match(tau, muon, deltaRCut=0.4)] tau = tau[~match(tau, electron, deltaRCut=0.4)] track = getIsoTracks(ev) ## Jets jet = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom') jet = jet[ak.argsort(jet.pt_nom, ascending=False)] # need to sort wrt smeared and recorrected jet pt jet = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons jet = jet[~match(jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons central = jet[(abs(jet.eta)<2.4)] btag = getBTagsDeepFlavB(jet, year=self.year) # should study working point for DeepJet light = getBTagsDeepFlavB(jet, year=self.year, invert=True) fwd = getFwdJet(light) fwd_noPU = getFwdJet(light, puId=False) high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:,:2] bl = cross(lepton, high_score_btag) bl_dR = delta_r(bl['0'], bl['1']) min_bl_dR = ak.min(bl_dR, axis=1) ## forward jets j_fwd = fwd[ak.singletons(ak.argmax(fwd.p, axis=1))] # highest momentum spectator # try to get either the most forward light jet, or if there's more than one with eta>1.7, the highest pt one most_fwd = light[ak.argsort(abs(light.eta))][:,0:1] #most_fwd = light[ak.singletons(ak.argmax(abs(light.eta)))] best_fwd = ak.concatenate([j_fwd, most_fwd], axis=1)[:,0:1] jf = cross(j_fwd, jet) mjf = (jf['0']+jf['1']).mass j_fwd2 = jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'] # this is the jet that forms the largest invariant mass with j_fwd delta_eta = abs(j_fwd2.eta - j_fwd.eta) ## MET -> can switch to puppi MET met_pt = ev.MET.pt met_phi = ev.MET.phi ## other variables ht = ak.sum(jet.pt, axis=1) #st = met_pt + ht + ak.sum(get_pt(muon), axis=1) + ak.sum(get_pt(electron), axis=1) st = met_pt + ht + ak.sum(lepton.p4.pt, axis=1) # define the weight weight = Weights( len(ev) ) if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): # lumi weight weight.add("weight", ev.weight*cfg['lumi'][self.year]) # PU weight weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False) # b-tag SFs weight.add("btag", self.btagSF.Method1a(btag, light)) # lepton SFs weight.add("lepton", self.leptonSF.get(electron, muon)) cutflow = Cutflow(output, ev, weight=weight) # slightly restructured # calculate everything from loose, require two tights on top # since n_tight == n_loose == 2, the tight and loose leptons are the same in the end # in this selection we'll get events with exactly two fakeable+tight and two loose leptons. sel = Selection( dataset = dataset, events = ev, year = self.year, ele = electron, ele_veto = el_v, mu = muon, mu_veto = mu_v, jet_all = jet, jet_central = central, jet_btag = btag, jet_fwd = fwd, jet_light = light, met = ev.MET, ) baseline = sel.dilep_baseline(cutflow=cutflow, SS=True, omit=['N_fwd>0']) baseline_OS = sel.dilep_baseline(cutflow=cutflow, SS=False, omit=['N_fwd>0']) # this is for charge flip estimation if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): BL = (baseline & ((ak.num(el_t_p_cc)+ak.num(mu_t_p))==2)) # this is the MC baseline for events with two tight prompt leptons BL_incl = (baseline & ((ak.num(el_t)+ak.num(mu_t))==2)) # this is the MC baseline for events with two tight leptons np_est_sel_mc = (baseline & \ ((((ak.num(el_t_p_cc)+ak.num(mu_t_p))==1) & ((ak.num(el_f_np)+ak.num(mu_f_np))==1)) | (((ak.num(el_t_p_cc)+ak.num(mu_t_p))==0) & ((ak.num(el_f_np)+ak.num(mu_f_np))==2)) )) # no overlap between tight and nonprompt, and veto on additional leptons. this should be enough np_obs_sel_mc = (baseline & ((ak.num(el_t)+ak.num(mu_t))==2) & ((ak.num(el_t_np)+ak.num(mu_t_np))>=1) ) # two tight leptons, at least one nonprompt np_est_sel_data = (baseline & ~baseline) # this has to be false cf_est_sel_mc = (baseline_OS & ((ak.num(el_t_p)+ak.num(mu_t_p))==2)) cf_obs_sel_mc = (baseline & ((ak.num(el_t)+ak.num(mu_t))==2) & ((ak.num(el_t_p_cf))>=1) ) # two tight leptons, at least one electron charge flip cf_est_sel_data = (baseline & ~baseline) # this has to be false weight_np_mc = self.nonpromptWeight.get(el_f_np, mu_f_np, meas='TT') weight_cf_mc = self.chargeflipWeight.flip_weight(el_t_p) else: BL = (baseline & ((ak.num(el_t)+ak.num(mu_t))==2)) BL_incl = BL np_est_sel_mc = (baseline & ~baseline) np_obs_sel_mc = (baseline & ~baseline) np_est_sel_data = (baseline & (ak.num(el_t)+ak.num(mu_t)==1) & (ak.num(el_f)+ak.num(mu_f)==1) ) cf_est_sel_mc = (baseline & ~baseline) cf_obs_sel_mc = (baseline & ~baseline) cf_est_sel_data = (baseline_OS & ((ak.num(el_t)+ak.num(mu_t))==2) ) weight_np_mc = np.zeros(len(ev)) weight_cf_mc = np.zeros(len(ev)) #rle = ak.to_numpy(ak.zip([ev.run, ev.luminosityBlock, ev.event])) run_ = ak.to_numpy(ev.run) lumi_ = ak.to_numpy(ev.luminosityBlock) event_ = ak.to_numpy(ev.event) if False: output['%s_run'%dataset] += processor.column_accumulator(run_[BL]) output['%s_lumi'%dataset] += processor.column_accumulator(lumi_[BL]) output['%s_event'%dataset] += processor.column_accumulator(event_[BL]) weight_BL = weight.weight()[BL] # this is just a shortened weight list for the two prompt selection weight_np_data = self.nonpromptWeight.get(el_f, mu_f, meas='data') weight_cf_data = self.chargeflipWeight.flip_weight(el_t) out_sel = (BL | np_est_sel_mc | cf_est_sel_mc) dummy = (np.ones(len(ev))==1) def fill_multiple_np(hist, arrays, add_sel=dummy): #reg_sel = [BL, np_est_sel_mc, np_obs_sel_mc, np_est_sel_data, cf_est_sel_mc, cf_obs_sel_mc, cf_est_sel_data], #print ('len', len(reg_sel[0])) #print ('sel', reg_sel[0]) reg_sel = [BL&add_sel, BL_incl&add_sel, np_est_sel_mc&add_sel, np_obs_sel_mc&add_sel, np_est_sel_data&add_sel, cf_est_sel_mc&add_sel, cf_obs_sel_mc&add_sel, cf_est_sel_data&add_sel], fill_multiple( hist, datasets=[ dataset, # only prompt contribution from process dataset+"_incl", # everything from process (inclusive MC truth) "np_est_mc", # MC based NP estimate "np_obs_mc", # MC based NP observation "np_est_data", "cf_est_mc", "cf_obs_mc", "cf_est_data", ], arrays=arrays, selections=reg_sel[0], # no idea where the additional dimension is coming from... weights=[ weight.weight()[reg_sel[0][0]], weight.weight()[reg_sel[0][1]], weight.weight()[reg_sel[0][2]]*weight_np_mc[reg_sel[0][2]], weight.weight()[reg_sel[0][3]], weight.weight()[reg_sel[0][4]]*weight_np_data[reg_sel[0][4]], weight.weight()[reg_sel[0][5]]*weight_cf_mc[reg_sel[0][5]], weight.weight()[reg_sel[0][6]], weight.weight()[reg_sel[0][7]]*weight_cf_data[reg_sel[0][7]], ], ) if self.evaluate or self.dump: # define the inputs to the NN # this is super stupid. there must be a better way. # used a np.stack which is ok performance wise. pandas data frame seems to be slow and memory inefficient #FIXME no n_b, n_fwd back in v13/v14 of the DNN NN_inputs_d = { 'n_jet': ak.to_numpy(ak.num(jet)), 'n_fwd': ak.to_numpy(ak.num(fwd)), 'n_b': ak.to_numpy(ak.num(btag)), 'n_tau': ak.to_numpy(ak.num(tau)), #'n_track': ak.to_numpy(ak.num(track)), 'st': ak.to_numpy(st), 'met': ak.to_numpy(ev.MET.pt), 'mjj_max': ak.to_numpy(ak.fill_none(ak.max(mjf, axis=1),0)), 'delta_eta_jj': ak.to_numpy(pad_and_flatten(delta_eta)), 'lead_lep_pt': ak.to_numpy(pad_and_flatten(leading_lepton.p4.pt)), 'lead_lep_eta': ak.to_numpy(pad_and_flatten(leading_lepton.p4.eta)), 'sublead_lep_pt': ak.to_numpy(pad_and_flatten(trailing_lepton.p4.pt)), 'sublead_lep_eta': ak.to_numpy(pad_and_flatten(trailing_lepton.p4.eta)), 'dilepton_mass': ak.to_numpy(pad_and_flatten(dilepton_mass)), 'dilepton_pt': ak.to_numpy(pad_and_flatten(dilepton_pt)), 'fwd_jet_pt': ak.to_numpy(pad_and_flatten(best_fwd.pt)), 'fwd_jet_p': ak.to_numpy(pad_and_flatten(best_fwd.p)), 'fwd_jet_eta': ak.to_numpy(pad_and_flatten(best_fwd.eta)), 'lead_jet_pt': ak.to_numpy(pad_and_flatten(jet[:, 0:1].pt)), 'sublead_jet_pt': ak.to_numpy(pad_and_flatten(jet[:, 1:2].pt)), 'lead_jet_eta': ak.to_numpy(pad_and_flatten(jet[:, 0:1].eta)), 'sublead_jet_eta': ak.to_numpy(pad_and_flatten(jet[:, 1:2].eta)), 'lead_btag_pt': ak.to_numpy(pad_and_flatten(high_score_btag[:, 0:1].pt)), 'sublead_btag_pt': ak.to_numpy(pad_and_flatten(high_score_btag[:, 1:2].pt)), 'lead_btag_eta': ak.to_numpy(pad_and_flatten(high_score_btag[:, 0:1].eta)), 'sublead_btag_eta': ak.to_numpy(pad_and_flatten(high_score_btag[:, 1:2].eta)), 'min_bl_dR': ak.to_numpy(ak.fill_none(min_bl_dR, 0)), 'min_mt_lep_met': ak.to_numpy(ak.fill_none(min_mt_lep_met, 0)), } if self.dump: for k in NN_inputs_d.keys(): output[k] += processor.column_accumulator(NN_inputs_d[k][out_sel]) if self.evaluate: NN_inputs = np.stack( [NN_inputs_d[k] for k in NN_inputs_d.keys()] ) NN_inputs = np.nan_to_num(NN_inputs, 0, posinf=1e5, neginf=-1e5) # events with posinf/neginf/nan will not pass the BL selection anyway NN_inputs = np.moveaxis(NN_inputs, 0, 1) # this is needed for a np.stack (old version) model, scaler = load_onnx_model(self.training) try: NN_inputs_scaled = scaler.transform(NN_inputs) NN_pred = predict_onnx(model, NN_inputs_scaled) best_score = np.argmax(NN_pred, axis=1) except ValueError: print ("Problem with prediction. Showing the shapes here:") print (np.shape(NN_inputs)) print (np.shape(weight_BL)) NN_pred = np.array([]) best_score = np.array([]) NN_inputs_scaled = NN_inputs raise ##k.clear_session() #FIXME below needs to be fixed again with changed NN evaluation. Should work now fill_multiple_np(output['node'], {'multiplicity':best_score}) fill_multiple_np(output['node0_score_incl'], {'score':NN_pred[:,0]}) fill_multiple_np(output['node1_score_incl'], {'score':NN_pred[:,1]}) fill_multiple_np(output['node2_score_incl'], {'score':NN_pred[:,2]}) fill_multiple_np(output['node3_score_incl'], {'score':NN_pred[:,3]}) fill_multiple_np(output['node4_score_incl'], {'score':NN_pred[:,4]}) fill_multiple_np(output['node0_score'], {'score':NN_pred[:,0]}, add_sel=(best_score==0)) fill_multiple_np(output['node1_score'], {'score':NN_pred[:,1]}, add_sel=(best_score==1)) fill_multiple_np(output['node2_score'], {'score':NN_pred[:,2]}, add_sel=(best_score==2)) fill_multiple_np(output['node3_score'], {'score':NN_pred[:,3]}, add_sel=(best_score==3)) fill_multiple_np(output['node4_score'], {'score':NN_pred[:,4]}, add_sel=(best_score==4)) #SR_sel_pp = ((best_score==0) & ak.flatten((leading_lepton[BL].pdgId<0))) #SR_sel_mm = ((best_score==0) & ak.flatten((leading_lepton[BL].pdgId>0))) #leading_lepton_BL = leading_lepton[BL] #output['lead_lep_SR_pp'].fill( # dataset = dataset, # pt = ak.to_numpy(ak.flatten(leading_lepton_BL[SR_sel_pp].pt)), # weight = weight_BL[SR_sel_pp] #) #output['lead_lep_SR_mm'].fill( # dataset = dataset, # pt = ak.to_numpy(ak.flatten(leading_lepton_BL[SR_sel_mm].pt)), # weight = weight_BL[SR_sel_mm] #) del model del scaler del NN_inputs, NN_inputs_scaled, NN_pred labels = {'topW_v3': 0, 'TTW':1, 'TTZ': 2, 'TTH': 3, 'ttbar': 4, 'rare':5, 'diboson':6} # these should be all? if dataset in labels: label_mult = labels[dataset] else: label_mult = 7 # data or anything else if self.dump: output['label'] += processor.column_accumulator(np.ones(len(ev[out_sel])) * label_mult) output['SS'] += processor.column_accumulator(ak.to_numpy(BL[out_sel])) output['OS'] += processor.column_accumulator(ak.to_numpy(cf_est_sel_mc[out_sel])) output['AR'] += processor.column_accumulator(ak.to_numpy(np_est_sel_mc[out_sel])) output['LL'] += processor.column_accumulator(ak.to_numpy(LL[out_sel])) output['weight'] += processor.column_accumulator(ak.to_numpy(weight.weight()[out_sel])) output['weight_np'] += processor.column_accumulator(ak.to_numpy(weight_np_mc[out_sel])) output['weight_cf'] += processor.column_accumulator(ak.to_numpy(weight_cf_mc[out_sel])) # first, make a few super inclusive plots output['PV_npvs'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvs, weight=weight_BL) output['PV_npvsGood'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvsGood, weight=weight_BL) fill_multiple_np(output['N_jet'], {'multiplicity': ak.num(jet)}) fill_multiple_np(output['N_b'], {'multiplicity': ak.num(btag)}) fill_multiple_np(output['N_central'], {'multiplicity': ak.num(central)}) fill_multiple_np(output['N_ele'], {'multiplicity':ak.num(electron)}) fill_multiple_np(output['N_mu'], {'multiplicity':ak.num(muon)}) fill_multiple_np(output['N_fwd'], {'multiplicity':ak.num(fwd)}) fill_multiple_np(output['ST'], {'ht': st}) fill_multiple_np(output['HT'], {'ht': ht}) if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): output['nLepFromTop'].fill(dataset=dataset, multiplicity=ev[BL].nLepFromTop, weight=weight_BL) output['nLepFromTau'].fill(dataset=dataset, multiplicity=ev.nLepFromTau[BL], weight=weight_BL) output['nLepFromZ'].fill(dataset=dataset, multiplicity=ev.nLepFromZ[BL], weight=weight_BL) output['nLepFromW'].fill(dataset=dataset, multiplicity=ev.nLepFromW[BL], weight=weight_BL) output['nGenTau'].fill(dataset=dataset, multiplicity=ev.nGenTau[BL], weight=weight_BL) output['nGenL'].fill(dataset=dataset, multiplicity=ak.num(ev.GenL[BL], axis=1), weight=weight_BL) output['chargeFlip_vs_nonprompt'].fill(dataset=dataset, n1=n_chargeflip[BL], n2=n_nonprompt[BL], n_ele=ak.num(electron)[BL], weight=weight_BL) fill_multiple_np(output['MET'], {'pt':ev.MET.pt, 'phi':ev.MET.phi}) if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): output['lead_gen_lep'].fill( dataset = dataset, pt = ak.to_numpy(ak.flatten(leading_gen_lep[BL].pt)), eta = ak.to_numpy(ak.flatten(leading_gen_lep[BL].eta)), phi = ak.to_numpy(ak.flatten(leading_gen_lep[BL].phi)), weight = weight_BL ) output['trail_gen_lep'].fill( dataset = dataset, pt = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].pt)), eta = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].eta)), phi = ak.to_numpy(ak.flatten(trailing_gen_lep[BL].phi)), weight = weight_BL ) fill_multiple_np( output['lead_lep'], { 'pt': pad_and_flatten(leading_lepton.p4.pt), 'eta': pad_and_flatten(leading_lepton.eta), 'phi': pad_and_flatten(leading_lepton.phi), }, ) fill_multiple_np( output['trail_lep'], { 'pt': pad_and_flatten(trailing_lepton.p4.pt), 'eta': pad_and_flatten(trailing_lepton.eta), 'phi': pad_and_flatten(trailing_lepton.phi), }, ) output['j1'].fill( dataset = dataset, pt = ak.flatten(jet.pt_nom[:, 0:1][BL]), eta = ak.flatten(jet.eta[:, 0:1][BL]), phi = ak.flatten(jet.phi[:, 0:1][BL]), weight = weight_BL ) output['j2'].fill( dataset = dataset, pt = ak.flatten(jet[:, 1:2][BL].pt_nom), eta = ak.flatten(jet[:, 1:2][BL].eta), phi = ak.flatten(jet[:, 1:2][BL].phi), weight = weight_BL ) output['j3'].fill( dataset = dataset, pt = ak.flatten(jet[:, 2:3][BL].pt_nom), eta = ak.flatten(jet[:, 2:3][BL].eta), phi = ak.flatten(jet[:, 2:3][BL].phi), weight = weight_BL ) fill_multiple_np( output['fwd_jet'], { 'pt': pad_and_flatten(best_fwd.pt), 'eta': pad_and_flatten(best_fwd.eta), 'phi': pad_and_flatten(best_fwd.phi), }, ) #output['fwd_jet'].fill( # dataset = dataset, # pt = ak.flatten(j_fwd[BL].pt), # eta = ak.flatten(j_fwd[BL].eta), # phi = ak.flatten(j_fwd[BL].phi), # weight = weight_BL #) output['high_p_fwd_p'].fill(dataset=dataset, p = ak.flatten(best_fwd[BL].p), weight = weight_BL) return output
def process(self, events): output = self.accumulator.identity() # use a very loose preselection to filter the events presel = ak.num(events.Jet) > 2 ev = events[presel] dataset = ev.metadata['dataset'] # load the config - probably not needed anymore cfg = loadConfig() output['totalEvents']['all'] += len(events) output['skimmedEvents']['all'] += len(ev) ## Muons muon = Collections(ev, "Muon", "tightSSTTH").get() vetomuon = Collections(ev, "Muon", "vetoTTH").get() dimuon = choose(muon, 2) SSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) > 0, axis=1) OSmuon = ak.any((dimuon['0'].charge * dimuon['1'].charge) < 0, axis=1) leading_muon_idx = ak.singletons(ak.argmax(muon.pt, axis=1)) leading_muon = muon[leading_muon_idx] ## Electrons electron = Collections(ev, "Electron", "tightSSTTH").get() vetoelectron = Collections(ev, "Electron", "vetoTTH").get() dielectron = choose(electron, 2) SSelectron = ak.any( (dielectron['0'].charge * dielectron['1'].charge) > 0, axis=1) OSelectron = ak.any( (dielectron['0'].charge * dielectron['1'].charge) < 0, axis=1) leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1)) leading_electron = electron[leading_electron_idx] ## Merge electrons and muons - this should work better now in ak1 lepton = ak.concatenate([muon, electron], axis=1) dilepton = cross(muon, electron) SSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) > 0, axis=1) OSlepton = ak.any((dilepton['0'].charge * dilepton['1'].charge) < 0, axis=1) leading_lepton_idx = ak.singletons(ak.argmax(lepton.pt, axis=1)) leading_lepton = lepton[leading_lepton_idx] trailing_lepton_idx = ak.singletons(ak.argmin(lepton.pt, axis=1)) trailing_lepton = lepton[trailing_lepton_idx] dilepton_mass = (leading_lepton + trailing_lepton).mass dilepton_pt = (leading_lepton + trailing_lepton).pt dilepton_dR = delta_r(leading_lepton, trailing_lepton) ## Jets jet = getJets(ev, minPt=25, maxEta=4.7, pt_var='pt_nom') jet = jet[ak.argsort( jet.pt_nom, ascending=False )] # need to sort wrt smeared and recorrected jet pt jet = jet[~match(jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons jet = jet[~match( jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons central = jet[(abs(jet.eta) < 2.4)] btag = getBTagsDeepFlavB( jet, year=self.year) # should study working point for DeepJet light = getBTagsDeepFlavB(jet, year=self.year, invert=True) fwd = getFwdJet(light) fwd_noPU = getFwdJet(light, puId=False) ## forward jets high_p_fwd = fwd[ak.singletons(ak.argmax( fwd.p, axis=1))] # highest momentum spectator high_pt_fwd = fwd[ak.singletons(ak.argmax( fwd.pt_nom, axis=1))] # highest transverse momentum spectator high_eta_fwd = fwd[ak.singletons(ak.argmax(abs( fwd.eta), axis=1))] # most forward spectator ## Get the two leading b-jets in terms of btag score high_score_btag = central[ak.argsort(central.btagDeepFlavB)][:, :2] jf = cross(high_p_fwd, jet) mjf = (jf['0'] + jf['1']).mass deltaEta = abs(high_p_fwd.eta - jf[ak.singletons(ak.argmax(mjf, axis=1))]['1'].eta) deltaEtaMax = ak.max(deltaEta, axis=1) mjf_max = ak.max(mjf, axis=1) jj = choose(jet, 2) mjj_max = ak.max((jj['0'] + jj['1']).mass, axis=1) ## MET -> can switch to puppi MET met_pt = ev.MET.pt met_phi = ev.MET.phi ## other variables ht = ak.sum(jet.pt, axis=1) st = met_pt + ht + ak.sum(muon.pt, axis=1) + ak.sum(electron.pt, axis=1) ht_central = ak.sum(central.pt, axis=1) tau = getTaus(ev) track = getIsoTracks(ev) bl = cross(lepton, high_score_btag) bl_dR = delta_r(bl['0'], bl['1']) min_bl_dR = ak.min(bl_dR, axis=1) mt_lep_met = mt(lepton.pt, lepton.phi, ev.MET.pt, ev.MET.phi) min_mt_lep_met = ak.min(mt_lep_met, axis=1) # define the weight weight = Weights(len(ev)) if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): # lumi weight weight.add("weight", ev.weight * cfg['lumi'][self.year]) # PU weight - not in the babies... weight.add("PU", ev.puWeight, weightUp=ev.puWeightUp, weightDown=ev.puWeightDown, shift=False) # b-tag SFs weight.add("btag", self.btagSF.Method1a(btag, light)) # lepton SFs weight.add("lepton", self.leptonSF.get(electron, muon)) #weight.add("trigger", self.triggerSF.get(electron, muon)) cutflow = Cutflow(output, ev, weight=weight) sel = Selection( dataset=dataset, events=ev, year=self.year, ele=electron, ele_veto=vetoelectron, mu=muon, mu_veto=vetomuon, jet_all=jet, jet_central=central, jet_btag=btag, jet_fwd=fwd, met=ev.MET, ) BL = sel.dilep_baseline(cutflow=cutflow, SS=False) # first, make a few super inclusive plots output['PV_npvs'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvs, weight=weight.weight()[BL]) output['PV_npvsGood'].fill(dataset=dataset, multiplicity=ev.PV[BL].npvsGood, weight=weight.weight()[BL]) output['N_jet'].fill(dataset=dataset, multiplicity=ak.num(jet)[BL], weight=weight.weight()[BL]) output['N_tau'].fill(dataset=dataset, multiplicity=ak.num(tau)[BL], weight=weight.weight()[BL]) output['N_track'].fill(dataset=dataset, multiplicity=ak.num(track)[BL], weight=weight.weight()[BL]) BL_minusNb = sel.dilep_baseline(SS=False, omit=['N_btag>0']) output['N_b'].fill(dataset=dataset, multiplicity=ak.num(btag)[BL_minusNb], weight=weight.weight()[BL_minusNb]) output['N_central'].fill(dataset=dataset, multiplicity=ak.num(central)[BL], weight=weight.weight()[BL]) output['N_ele'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight.weight()[BL]) output['N_mu'].fill(dataset=dataset, multiplicity=ak.num(electron)[BL], weight=weight.weight()[BL]) BL_minusFwd = sel.dilep_baseline(SS=False, omit=['N_fwd>0']) output['N_fwd'].fill(dataset=dataset, multiplicity=ak.num(fwd)[BL_minusFwd], weight=weight.weight()[BL_minusFwd]) output['dilep_pt'].fill(dataset=dataset, pt=ak.flatten(dilepton_pt[BL]), weight=weight.weight()[BL]) output['dilep_mass'].fill(dataset=dataset, mass=ak.flatten(dilepton_mass[BL]), weight=weight.weight()[BL]) output['mjf_max'].fill(dataset=dataset, mass=mjf_max[BL], weight=weight.weight()[BL]) output['deltaEta'].fill(dataset=dataset, eta=ak.flatten(deltaEta[BL]), weight=weight.weight()[BL]) output['min_bl_dR'].fill(dataset=dataset, eta=min_bl_dR[BL], weight=weight.weight()[BL]) output['min_mt_lep_met'].fill(dataset=dataset, pt=min_mt_lep_met[BL], weight=weight.weight()[BL]) output['leading_jet_pt'].fill(dataset=dataset, pt=ak.flatten(jet[:, 0:1][BL].pt), weight=weight.weight()[BL]) output['subleading_jet_pt'].fill(dataset=dataset, pt=ak.flatten(jet[:, 1:2][BL].pt), weight=weight.weight()[BL]) output['leading_jet_eta'].fill(dataset=dataset, eta=ak.flatten(jet[:, 0:1][BL].eta), weight=weight.weight()[BL]) output['subleading_jet_eta'].fill(dataset=dataset, eta=ak.flatten(jet[:, 1:2][BL].eta), weight=weight.weight()[BL]) output['leading_btag_pt'].fill(dataset=dataset, pt=ak.flatten( high_score_btag[:, 0:1][BL].pt), weight=weight.weight()[BL]) output['subleading_btag_pt'].fill(dataset=dataset, pt=ak.flatten( high_score_btag[:, 1:2][BL].pt), weight=weight.weight()[BL]) output['leading_btag_eta'].fill(dataset=dataset, eta=ak.flatten( high_score_btag[:, 0:1][BL].eta), weight=weight.weight()[BL]) output['subleading_btag_eta'].fill( dataset=dataset, eta=ak.flatten(high_score_btag[:, 1:2][BL].eta), weight=weight.weight()[BL]) BL_minusMET = sel.dilep_baseline(SS=False, omit=['MET>50']) output['MET'].fill(dataset=dataset, pt=ev.MET[BL_minusMET].pt, phi=ev.MET[BL_minusMET].phi, weight=weight.weight()[BL_minusMET]) #output['electron'].fill( # dataset = dataset, # pt = ak.to_numpy(ak.flatten(electron[BL].pt)), # eta = ak.to_numpy(ak.flatten(electron[BL].eta)), # phi = ak.to_numpy(ak.flatten(electron[BL].phi)), # weight = weight.weight()[BL] #) # #output['muon'].fill( # dataset = dataset, # pt = ak.to_numpy(ak.flatten(muon[BL].pt)), # eta = ak.to_numpy(ak.flatten(muon[BL].eta)), # phi = ak.to_numpy(ak.flatten(muon[BL].phi)), # weight = weight.weight()[BL] #) output['lead_lep'].fill( dataset=dataset, pt=ak.to_numpy(ak.flatten(leading_lepton[BL].pt)), eta=ak.to_numpy(ak.flatten(leading_lepton[BL].eta)), phi=ak.to_numpy(ak.flatten(leading_lepton[BL].phi)), weight=weight.weight()[BL]) output['trail_lep'].fill( dataset=dataset, pt=ak.to_numpy(ak.flatten(trailing_lepton[BL].pt)), eta=ak.to_numpy(ak.flatten(trailing_lepton[BL].eta)), phi=ak.to_numpy(ak.flatten(trailing_lepton[BL].phi)), weight=weight.weight()[BL]) output['fwd_jet'].fill(dataset=dataset, pt=ak.flatten(high_p_fwd[BL].pt_nom), eta=ak.flatten(high_p_fwd[BL].eta), phi=ak.flatten(high_p_fwd[BL].phi), weight=weight.weight()[BL]) output['b1'].fill(dataset=dataset, pt=ak.flatten(high_score_btag[:, 0:1][BL].pt_nom), eta=ak.flatten(high_score_btag[:, 0:1][BL].eta), phi=ak.flatten(high_score_btag[:, 0:1][BL].phi), weight=weight.weight()[BL]) output['b2'].fill(dataset=dataset, pt=ak.flatten(high_score_btag[:, 1:2][BL].pt_nom), eta=ak.flatten(high_score_btag[:, 1:2][BL].eta), phi=ak.flatten(high_score_btag[:, 1:2][BL].phi), weight=weight.weight()[BL]) output['j1'].fill(dataset=dataset, pt=ak.flatten(jet.pt_nom[:, 0:1][BL]), eta=ak.flatten(jet.eta[:, 0:1][BL]), phi=ak.flatten(jet.phi[:, 0:1][BL]), weight=weight.weight()[BL]) output['j2'].fill(dataset=dataset, pt=ak.flatten(jet[:, 1:2][BL].pt_nom), eta=ak.flatten(jet[:, 1:2][BL].eta), phi=ak.flatten(jet[:, 1:2][BL].phi), weight=weight.weight()[BL]) output['j3'].fill(dataset=dataset, pt=ak.flatten(jet[:, 2:3][BL].pt_nom), eta=ak.flatten(jet[:, 2:3][BL].eta), phi=ak.flatten(jet[:, 2:3][BL].phi), weight=weight.weight()[BL]) if re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): #rle = ak.to_numpy(ak.zip([ev.run, ev.luminosityBlock, ev.event])) run_ = ak.to_numpy(ev.run) lumi_ = ak.to_numpy(ev.luminosityBlock) event_ = ak.to_numpy(ev.event) output['%s_run' % dataset] += processor.column_accumulator( run_[BL]) output['%s_lumi' % dataset] += processor.column_accumulator( lumi_[BL]) output['%s_event' % dataset] += processor.column_accumulator( event_[BL]) # Now, take care of systematic unceratinties if not re.search(re.compile('MuonEG|DoubleMuon|DoubleEG|EGamma'), dataset): alljets = getJets(ev, minPt=0, maxEta=4.7) alljets = alljets[(alljets.jetId > 1)] for var in self.variations: # get the collections that change with the variations jet = getPtEtaPhi(alljets, pt_var=var) jet = jet[(jet.pt > 25)] jet = jet[~match( jet, muon, deltaRCut=0.4)] # remove jets that overlap with muons jet = jet[~match( jet, electron, deltaRCut=0.4)] # remove jets that overlap with electrons central = jet[(abs(jet.eta) < 2.4)] btag = getBTagsDeepFlavB( jet, year=self.year) # should study working point for DeepJet light = getBTagsDeepFlavB(jet, year=self.year, invert=True) fwd = getFwdJet(light) fwd_noPU = getFwdJet(light, puId=False) ## forward jets high_p_fwd = fwd[ak.singletons(ak.argmax( fwd.p, axis=1))] # highest momentum spectator high_pt_fwd = fwd[ak.singletons(ak.argmax( fwd.pt, axis=1))] # highest transverse momentum spectator high_eta_fwd = fwd[ak.singletons( ak.argmax(abs(fwd.eta), axis=1))] # most forward spectator ## Get the two leading b-jets in terms of btag score high_score_btag = central[ak.argsort( central.btagDeepFlavB)][:, :2] met = ev.MET #met['pt'] = getattr(met, var) sel = Selection( dataset=dataset, events=ev, year=self.year, ele=electron, ele_veto=vetoelectron, mu=muon, mu_veto=vetomuon, jet_all=jet, jet_central=central, jet_btag=btag, jet_fwd=fwd, met=met, ) BL = sel.dilep_baseline(SS=False) # get the modified selection -> more difficult #selection.add('N_jet>2_'+var, (ak.num(jet.pt)>=3)) # stupid bug here... #selection.add('N_btag=2_'+var, (ak.num(btag)==2) ) #selection.add('N_central>1_'+var, (ak.num(central)>=2) ) #selection.add('N_fwd>0_'+var, (ak.num(fwd)>=1) ) #selection.add('MET>30_'+var, (getattr(ev.MET, var)>30) ) ### Don't change the selection for now... #bl_reqs = os_reqs + ['N_jet>2_'+var, 'MET>30_'+var, 'N_btag=2_'+var, 'N_central>1_'+var, 'N_fwd>0_'+var] #bl_reqs_d = { sel: True for sel in bl_reqs } #BL = selection.require(**bl_reqs_d) # the OS selection remains unchanged output['N_jet_' + var].fill(dataset=dataset, multiplicity=ak.num(jet)[BL], weight=weight.weight()[BL]) BL_minusFwd = sel.dilep_baseline(SS=False, omit=['N_fwd>0']) output['N_fwd_' + var].fill( dataset=dataset, multiplicity=ak.num(fwd)[BL_minusFwd], weight=weight.weight()[BL_minusFwd]) BL_minusNb = sel.dilep_baseline(SS=False, omit=['N_btag>0']) output['N_b_' + var].fill( dataset=dataset, multiplicity=ak.num(btag)[BL_minusNb], weight=weight.weight()[BL_minusNb]) output['N_central_' + var].fill( dataset=dataset, multiplicity=ak.num(central)[BL], weight=weight.weight()[BL]) # We don't need to redo all plots with variations. E.g., just add uncertainties to the jet plots. output['j1_' + var].fill(dataset=dataset, pt=ak.flatten(jet.pt[:, 0:1][BL]), eta=ak.flatten(jet.eta[:, 0:1][BL]), phi=ak.flatten(jet.phi[:, 0:1][BL]), weight=weight.weight()[BL]) output['b1_' + var].fill( dataset=dataset, pt=ak.flatten(high_score_btag[:, 0:1].pt[:, 0:1][BL]), eta=ak.flatten(high_score_btag[:, 0:1].eta[:, 0:1][BL]), phi=ak.flatten(high_score_btag[:, 0:1].phi[:, 0:1][BL]), weight=weight.weight()[BL]) output['fwd_jet_' + var].fill( dataset=dataset, pt=ak.flatten(high_p_fwd[BL].pt), #p = ak.flatten(high_p_fwd[BL].p), eta=ak.flatten(high_p_fwd[BL].eta), phi=ak.flatten(high_p_fwd[BL].phi), weight=weight.weight()[BL]) BL_minusMET = sel.dilep_baseline(SS=False, omit=['MET>50']) output['MET_' + var].fill( dataset=dataset, #pt = getattr(ev.MET, var)[BL_minusMET], pt=ev.MET[BL_minusMET].pt, phi=ev.MET[BL_minusMET].phi, weight=weight.weight()[BL_minusMET]) return output