def runReco(inf): #Set the channel, load in the models and normalization factors if '3l' in inf: channel = '3l' ptDict = ptDictHiggsTop3lS topModel = load_model( "/data_ceph/afwebb/higgs_diff/topMatching/models/keras_model_top3l.h5" ) topNormFactors = np.load( "/data_ceph/afwebb/higgs_diff/topMatching/models/top3l_normFactors.npy", allow_pickle=True) model3lF = load_model( "/data_ceph/afwebb/higgs_diff/higgsMatching/models/keras_model_higgsTop3lF.h5" ) normFactors3lF = np.load( "/data_ceph/afwebb/higgs_diff/higgsMatching/models/higgsTop3lF_normFactors.npy", allow_pickle=True) model3lS = load_model( "/data_ceph/afwebb/higgs_diff/higgsMatching/models/keras_model_higgsTop3lS.h5" ) normFactors3lS = np.load( "/data_ceph/afwebb/higgs_diff/higgsMatching/models/higgsTop3lS_normFactors.npy", allow_pickle=True) elif '2lSS' in inf: channel = '2lSS' ptDict = ptDictHiggsTop2lSS topModel = load_model( "/data_ceph/afwebb/higgs_diff/topMatching/models/keras_model_top2lSS.h5" ) topNormFactors = np.load( "/data_ceph/afwebb/higgs_diff/topMatching/models/top2lSS_normFactors.npy", allow_pickle=True) model2lSS = load_model( "/data_ceph/afwebb/higgs_diff/higgsMatching/models/keras_model_higgsTop2lSS.h5" ) normFactors2lSS = np.load( "/data_ceph/afwebb/higgs_diff/higgsMatching/models/higgsTop2lSS_normFactors.npy", allow_pickle=True) else: print(f'Channel {channel} is invalid. Should be 2lSS or 3l') return #Open the root file f = TFile.Open(inf) nom = f.Get('nominal') if hasattr(nom, "recoHiggsPt_2lSS") or hasattr(nom, "recoHiggsPt_3lS"): print(f'{inf} already has score') return #initialize output dicts events = [] higgsRecoScores = [] topRecoScores = [] if channel == '3l': events3lF = [] eventsDecay = [] higgsRecoScoresF = [] #Loop over all entries nEntries = nom.GetEntries() for idx in range(nEntries): if idx % 10000 == 0: print(str(idx) + '/' + str(nEntries)) #Get the events nom.GetEntry(idx) #Find the best top combination, top reco score topRes = findBestTopKeras(nom, channel, topModel, topNormFactors) if not topRes: topIdx0, topIdx1 = 0, 0 topScore = np.float32(-10) else: topIdx0, topIdx1 = topRes['bestComb'] topScore = topRes['topScore'] topRecoScores.append(topScore) # add the top reco score #Find the higgs decay products, higgs reco score for 3lF model if channel == '3l': res3lF = findBestHiggsTop(nom, '3lF', model3lF, normFactors3lF, topIdx0, topIdx1, topScore) #if not res3lF: continue higgsTopScoreF = res3lF['higgsTopScore'] lepIdx = res3lF['bestComb'][0] events3lF.append( ptDictHiggsTop3lF(nom, lepIdx, higgsTopScoreF, topIdx0, topIdx1, topScore)) higgsRecoScoresF.append(higgsTopScoreF) #Find the higgs decay products, higgs reco score for 3lS, 2lSS (same final state) if channel == '3l': res = findBestHiggsTop(nom, '3lS', model3lS, normFactors3lS, topIdx0, topIdx1, topScore) else: res = findBestHiggsTop(nom, '2lSS', model2lSS, normFactors2lSS, topIdx0, topIdx1, topScore) if not res: higgsTopScore = np.float32(-10) lepIdx, jetIdx0, jetIdx1 = 1, 0, 0 else: higgsTopScore = res['higgsTopScore'] lepIdx, jetIdx0, jetIdx1 = res['bestComb'] #add the pt prediction dictionary events.append( ptDict(nom, jetIdx0, jetIdx1, lepIdx, higgsTopScore, topIdx0, topIdx1, topScore)) higgsRecoScores.append(higgsTopScore) #add decay mode dicts if channel == '3l': eventsDecay.append( decayDict(nom, higgsTopScoreF, higgsTopScore, topIdx0, topIdx1, topScore)) if channel == '3l': return events, events3lF, eventsDecay, higgsRecoScores, higgsRecoScoresF, topRecoScores else: return events, higgsRecoScores, topRecoScores
def runReco(inf): #Set the channel, load in the top model if '3l' in inf: channel='3l' topModel = load_model("/data_ceph/afwebb/higgs_diff/topMatching/models/keras_model_top3l.h5") topNormFactors = np.load("/data_ceph/afwebb/higgs_diff/topMatching/models/top3l_normFactors.npy") flatDict = higgsTopDict3lS is3l = True elif '2lSS' in inf: channel='2lSS' topModel = load_model("/data_ceph/afwebb/higgs_diff/topMatching/models/keras_model_top2lSS.h5") topNormFactors = np.load("/data_ceph/afwebb/higgs_diff/topMatching/models/top2lSS_normFactors.npy") flatDict = higgsTopDict2lSS is3l = False else: print(f'Channel {channel} is invalid. Should be 2lSS or 3l') exit() print('loaded') topMaxVals = topNormFactors[0] topMinVals = topNormFactors[1] topDiff = topMaxVals - topMinVals f = TFile.Open(inf) nom = f.Get('nominal') #initialize output dicts events = {} events3lF = [] #Loop over all entries nEntries = nom.GetEntries() for idx in range(nEntries): if idx%10000==0: print(str(idx)+'/'+str(nEntries)) nom.GetEntry(idx) #Check if the Higgs decay products are reconstructed - first leptons if channel=='2lSS' and nom.lep_Parent_0!=25 and nom.lep_Parent_1!=25: continue if is3l: # Check for lepton, decide if event is 3lF or 3lS if nom.lep_Parent_1!=25 and nom.lep_Parent_2!=25: continue if nom.lep_Parent_0 == 25: channel='3lF' else: channel='3lS' #Check if Higgs jets are reconstructed if channel!='3lF' and sum([x==25 for x in nom.jet_parents])!=2: continue #if sum([x==25 for x in nom.jet_parents])!=2: continue #Find the b-jets from tops topRes = findBestTopKeras(nom, channel, topModel, topNormFactors) if not topRes: continue topIdx0, topIdx1 = topRes['bestComb'] topScore = topRes['topScore'] #Get all possible combinations combos = higgsTopCombos(channel, nom, topIdx0, topIdx1, topScore, 1) if not combos or len(combos['higgsDicts'])==0: continue if channel=='3lF': if events3lF=={}: events3lF=combos['higgsDicts'] else: for k in events3lF: events3lF[k].extend(combos['higgsDicts'][k]) else: if events=={}: events=combos['higgsDicts'] else: for k in events: events[k].extend(combos['higgsDicts'][k]) dfFlat = pd.DataFrame.from_dict(events) dfFlat = shuffle(dfFlat) outF = '/'.join(inf.split("/")[-2:]).replace('.root','.csv') if channel=='2lSS': dfFlat.to_csv('csvFiles/higgsTop2lSS/'+outF, index=False, float_format='%.3f') else:# channel=='3l': dfFlat.to_csv('csvFiles/higgsTop3lS/'+outF, index=False, float_format='%.3f') df3lF = pd.DataFrame.from_dict(events3lF) df3lF = shuffle(df3lF) df3lF.to_csv('csvFiles/higgsTop3lF/'+outF, index=False, float_format='%.3f')
n1bCorrect, n2bCorrect, n3bCorrect = 0,0,0 #Loop over each entry, add to events dict for idx in range(nEntries): if idx%1000==0: print(str(idx)+'/'+str(nEntries)) if idx==5000: break nom.GetEntry(idx) if '1b' in sys.argv[2] and nom.nJets_OR_DL1r_70!=1: continue if '2b' in sys.argv[2] and nom.nJets_OR_DL1r_70!=2: continue if '3b' in sys.argv[2] and nom.nJets_OR_DL1r_70!=3: continue topRes = findBestTopKeras(nom, channel, topModel, topNormFactors) if not topRes: continue topMatches, truthBs, topScore = topRes['bestComb'], topRes['truthComb'], topRes['topScore'] #print(topRes['bestComb'], topRes['truthComb'], topRes['topScore']) #topMatches, truthBs = findBestTopKeras(nom, channel, topModel, topNormFactors, 1) if len(truthBs)!=2: continue if topScore>0.3: nGood+=1 if topMatches[0] in truthBs and topMatches[1] in truthBs: nGoodCorrect+=1 if topMatches[0] in truthBs or topMatches[1] in truthBs: nGoodOne+=1 if topScore<0.3:
lepCorrect = 0 oneCorrect = 0 #Loop over each entry, add to events dict for idx in range(nEntries): if idx % 1000 == 0: print(str(idx) + '/' + str(nEntries)) if idx == 5000: break nom.GetEntry(idx) if '3l' in channel: if channel == '3lF' and nom.lep_Parent_0 != 25: continue if channel == '3lS' and nom.lep_Parent_0 == 25: continue topRes = findBestTopKeras(nom, '3l', topModel, topNormFactors) else: topRes = findBestTopKeras(nom, '2lSS', topModel, topNormFactors) if not topRes: continue topIdx0, topIdx1 = topRes['bestComb'] topScore = topRes['topScore'] #Get dict of all possible jet combinations higgsRes = findBestHiggsTop(nom, channel, higgsModel, higgsNormFactors, topIdx0, topIdx1, topScore) if not higgsRes: continue higgsMatches = higgsRes['bestComb'] truthPair = higgsRes['truthComb']
def runReco(inf): #load in the top model - not picklable, can't do outside the function topModel = load_model( "/data_ceph/afwebb/higgs_diff/topMatching/models/keras_model_top3l.h5") topNormFactors = np.load( "/data_ceph/afwebb/higgs_diff/topMatching/models/top3l_normFactors.npy" ) topMaxVals = topNormFactors[0] topMinVals = topNormFactors[1] topDiff = topMaxVals - topMinVals model3lF = load_model( "/data_ceph/afwebb/higgs_diff/higgsMatching/models/keras_model_higgsTop3lF.h5" ) normFactors3lF = np.load( "/data_ceph/afwebb/higgs_diff/higgsMatching/models/higgsTop3lF_normFactors.npy" ) maxVals3lF, minVals3lF = normFactors3lF diff3lF = maxVals3lF - minVals3lF model3lS = load_model( "/data_ceph/afwebb/higgs_diff/higgsMatching/models/keras_model_higgsTop3lS.h5" ) normFactors3lS = np.load( "/data_ceph/afwebb/higgs_diff/higgsMatching/models/higgsTop3lS_normFactors.npy" ) maxVals3lS, minVals3lS = normFactors3lS diff3lS = maxVals3lS - minVals3lS f = TFile.Open(inf) nom = f.Get('nominal') #initialize output dicts events = [] #Loop over all entries nEntries = nom.GetEntries() for idx in range(nEntries): if idx % 10000 == 0: print(str(idx) + '/' + str(nEntries)) nom.GetEntry(idx) #Perform top matching. Get top candidates, topScore topRes = findBestTopKeras(nom, '3l', topModel, topNormFactors) if not topRes: continue topIdx0, topIdx1 = topRes['bestComb'] topScore = topRes['topScore'] #Perform higgs matching. Get 3lF, 3lS scores res3lF = findBestHiggsTop(nom, '3lF', model3lF, normFactors3lF, topIdx0, topIdx1, topScore) res3lS = findBestHiggsTop(nom, '3lS', model3lS, normFactors3lS, topIdx0, topIdx1, topScore) if not res3lF or not res3lS: continue #identify which lepton came from the Higgs lepIdx = -1 if nom.lep_Parent_0 == 25: isF = True else: isF = False if nom.lep_Parent_1 == 25: lepIdx = 1 if nom.lep_Parent_2 == 25: lepIdx = 2 if lepIdx == -1: continue if isF: events.append( decayDict(nom, res3lF['higgsTopScore'], res3lS['higgsTopScore'], topIdx0, topIdx1, topScore, 0)) #Correct combination else: events.append( decayDict(nom, res3lF['higgsTopScore'], res3lS['higgsTopScore'], topIdx0, topIdx1, topScore, 1)) #Incorrect combination - swaps 2 and 1 dfFlat = pd.DataFrame.from_dict(events) dfFlat = shuffle(dfFlat) outF = '/'.join(inf.split("/")[-2:]).replace('.root', '.csv') dfFlat.to_csv('csvFiles/' + outF, index=False)
def runReco(inf): #Set the channel, load in the top model if '3l' in inf: channel = '3l' is3l = True ptDict = ptDictHiggsTop3lS topModel = load_model( "/data_ceph/afwebb/higgs_diff/topMatching/models/keras_model_top3l.h5" ) topNormFactors = np.load( "/data_ceph/afwebb/higgs_diff/topMatching/models/top3l_normFactors.npy" ) model3lF = load_model( "/data_ceph/afwebb/higgs_diff/higgsMatching/models/keras_model_higgsTop3lF.h5" ) normFactors3lF = np.load( "/data_ceph/afwebb/higgs_diff/higgsMatching/models/higgsTop3lF_normFactors.npy" ) model3lS = load_model( "/data_ceph/afwebb/higgs_diff/higgsMatching/models/keras_model_higgsTop3lS.h5" ) normFactors3lS = np.load( "/data_ceph/afwebb/higgs_diff/higgsMatching/models/higgsTop3lS_normFactors.npy" ) elif '2lSS' in inf: channel = '2lSS' ptDict = ptDictHiggsTop2lSS is3l = False topModel = load_model( "/data_ceph/afwebb/higgs_diff/topMatching/models/keras_model_top2lSS.h5" ) topNormFactors = np.load( "/data_ceph/afwebb/higgs_diff/topMatching/models/top2lSS_normFactors.npy" ) model2lSS = load_model( "/data_ceph/afwebb/higgs_diff/higgsMatching/models/keras_model_higgsTop2lSS.h5" ) normFactors2lSS = np.load( "/data_ceph/afwebb/higgs_diff/higgsMatching/models/higgsTop2lSS_normFactors.npy" ) else: print(f'Channel {channel} is invalid. Should be 2lSS or 3l') exit() f = TFile.Open(inf) nom = f.Get('nominal') #initialize output dicts events = [] events3lF = [] #Loop over all entries nEntries = nom.GetEntries() for idx in range(nEntries): if idx % 10000 == 0: print(str(idx) + '/' + str(nEntries)) nom.GetEntry(idx) #Get the Higgs Pt for i, pdgId in enumerate(nom.m_truth_pdgId): if pdgId == 25: higgs_pt = nom.m_truth_pt[i] break if not higgs_pt: continue topRes = findBestTopKeras(nom, channel, topModel, topNormFactors) if not topRes: continue topIdx0, topIdx1 = topRes['bestComb'] topScore = topRes['topScore'] isF = False if is3l and nom.lep_Parent_0 == 25: isF = True if isF: res3lF = findBestHiggsTop(nom, '3lF', model3lF, normFactors3lF, topIdx0, topIdx1, topScore) if not res3lF: continue higgsTopScore = res3lF['higgsTopScore'] lepIdx = res3lF['bestComb'][0] events3lF.append( ptDictHiggsTop3lF(nom, lepIdx, higgsTopScore, topIdx0, topIdx1, topScore, higgs_pt)) else: if is3l: res = findBestHiggsTop(nom, '3lS', model3lS, normFactors3lS, topIdx0, topIdx1, topScore) else: res = findBestHiggsTop(nom, '2lSS', model2lSS, normFactors2lSS, topIdx0, topIdx1, topScore) if not res: continue higgsTopScore = res['higgsTopScore'] lepIdx, jetIdx0, jetIdx1 = res['bestComb'] events.append( ptDict(nom, jetIdx0, jetIdx1, lepIdx, higgsTopScore, topIdx0, topIdx1, topScore, higgs_pt)) dfFlat = pd.DataFrame.from_dict(events) dfFlat = shuffle(dfFlat) outF = '/'.join(inf.split("/")[-2:]).replace('.root', '.csv') if channel == '2lSS': dfFlat.to_csv('inputFiles/higgsTop2lSS/' + outF, index=False) elif channel == '3l': dfFlat.to_csv('inputFiles/higgsTop3lS/' + outF, index=False) df3lF = pd.DataFrame.from_dict(events3lF) df3lF = shuffle(df3lF) df3lF.to_csv('inputFiles/higgsTop3lF/' + outF, index=False)