Пример #1
0
def runReco(inf):
    #Set the channel, load in the models and normalization factors
    if '3l' in inf:
        channel = '3l'
        ptDict = ptDictHiggsTop3lS

        topModel = load_model(
            "/data_ceph/afwebb/higgs_diff/topMatching/models/keras_model_top3l.h5"
        )
        topNormFactors = np.load(
            "/data_ceph/afwebb/higgs_diff/topMatching/models/top3l_normFactors.npy",
            allow_pickle=True)

        model3lF = load_model(
            "/data_ceph/afwebb/higgs_diff/higgsMatching/models/keras_model_higgsTop3lF.h5"
        )
        normFactors3lF = np.load(
            "/data_ceph/afwebb/higgs_diff/higgsMatching/models/higgsTop3lF_normFactors.npy",
            allow_pickle=True)

        model3lS = load_model(
            "/data_ceph/afwebb/higgs_diff/higgsMatching/models/keras_model_higgsTop3lS.h5"
        )
        normFactors3lS = np.load(
            "/data_ceph/afwebb/higgs_diff/higgsMatching/models/higgsTop3lS_normFactors.npy",
            allow_pickle=True)

    elif '2lSS' in inf:
        channel = '2lSS'
        ptDict = ptDictHiggsTop2lSS

        topModel = load_model(
            "/data_ceph/afwebb/higgs_diff/topMatching/models/keras_model_top2lSS.h5"
        )
        topNormFactors = np.load(
            "/data_ceph/afwebb/higgs_diff/topMatching/models/top2lSS_normFactors.npy",
            allow_pickle=True)

        model2lSS = load_model(
            "/data_ceph/afwebb/higgs_diff/higgsMatching/models/keras_model_higgsTop2lSS.h5"
        )
        normFactors2lSS = np.load(
            "/data_ceph/afwebb/higgs_diff/higgsMatching/models/higgsTop2lSS_normFactors.npy",
            allow_pickle=True)

    else:
        print(f'Channel {channel} is invalid. Should be 2lSS or 3l')
        return

    #Open the root file
    f = TFile.Open(inf)
    nom = f.Get('nominal')
    if hasattr(nom, "recoHiggsPt_2lSS") or hasattr(nom, "recoHiggsPt_3lS"):
        print(f'{inf} already has score')
        return

    #initialize output dicts
    events = []
    higgsRecoScores = []
    topRecoScores = []

    if channel == '3l':
        events3lF = []
        eventsDecay = []
        higgsRecoScoresF = []

    #Loop over all entries
    nEntries = nom.GetEntries()
    for idx in range(nEntries):
        if idx % 10000 == 0:
            print(str(idx) + '/' + str(nEntries))

        #Get the events
        nom.GetEntry(idx)

        #Find the best top combination, top reco score
        topRes = findBestTopKeras(nom, channel, topModel, topNormFactors)
        if not topRes:
            topIdx0, topIdx1 = 0, 0
            topScore = np.float32(-10)
        else:
            topIdx0, topIdx1 = topRes['bestComb']
            topScore = topRes['topScore']

        topRecoScores.append(topScore)  # add the top reco score

        #Find the higgs decay products, higgs reco score for 3lF model
        if channel == '3l':
            res3lF = findBestHiggsTop(nom, '3lF', model3lF, normFactors3lF,
                                      topIdx0, topIdx1, topScore)
            #if not res3lF: continue
            higgsTopScoreF = res3lF['higgsTopScore']
            lepIdx = res3lF['bestComb'][0]

            events3lF.append(
                ptDictHiggsTop3lF(nom, lepIdx, higgsTopScoreF, topIdx0,
                                  topIdx1, topScore))
            higgsRecoScoresF.append(higgsTopScoreF)

        #Find the higgs decay products, higgs reco score for 3lS, 2lSS (same final state)
        if channel == '3l':
            res = findBestHiggsTop(nom, '3lS', model3lS, normFactors3lS,
                                   topIdx0, topIdx1, topScore)
        else:
            res = findBestHiggsTop(nom, '2lSS', model2lSS, normFactors2lSS,
                                   topIdx0, topIdx1, topScore)

        if not res:
            higgsTopScore = np.float32(-10)
            lepIdx, jetIdx0, jetIdx1 = 1, 0, 0
        else:
            higgsTopScore = res['higgsTopScore']
            lepIdx, jetIdx0, jetIdx1 = res['bestComb']

        #add the pt prediction dictionary
        events.append(
            ptDict(nom, jetIdx0, jetIdx1, lepIdx, higgsTopScore, topIdx0,
                   topIdx1, topScore))
        higgsRecoScores.append(higgsTopScore)

        #add decay mode dicts
        if channel == '3l':
            eventsDecay.append(
                decayDict(nom, higgsTopScoreF, higgsTopScore, topIdx0, topIdx1,
                          topScore))

    if channel == '3l':
        return events, events3lF, eventsDecay, higgsRecoScores, higgsRecoScoresF, topRecoScores
    else:
        return events, higgsRecoScores, topRecoScores
Пример #2
0
def runReco(inf):
    #Set the channel, load in the top model
    if '3l' in inf:
        channel='3l'
        topModel = load_model("/data_ceph/afwebb/higgs_diff/topMatching/models/keras_model_top3l.h5")
        topNormFactors = np.load("/data_ceph/afwebb/higgs_diff/topMatching/models/top3l_normFactors.npy")
        flatDict = higgsTopDict3lS
        is3l = True
    elif '2lSS' in inf:
        channel='2lSS'
        topModel = load_model("/data_ceph/afwebb/higgs_diff/topMatching/models/keras_model_top2lSS.h5")
        topNormFactors = np.load("/data_ceph/afwebb/higgs_diff/topMatching/models/top2lSS_normFactors.npy")
        flatDict = higgsTopDict2lSS
        is3l = False
    else:
        print(f'Channel {channel} is invalid. Should be 2lSS or 3l')
        exit()
        
    print('loaded')
    topMaxVals = topNormFactors[0]                                                                                      
    topMinVals = topNormFactors[1]
    topDiff = topMaxVals - topMinVals
    
    f = TFile.Open(inf)
    nom = f.Get('nominal')
    
    #initialize output dicts
    events = {}
    events3lF = []
    
    #Loop over all entries
    nEntries = nom.GetEntries()
    for idx in range(nEntries):
        if idx%10000==0:
            print(str(idx)+'/'+str(nEntries))
            
        nom.GetEntry(idx)

        #Check if the Higgs decay products are reconstructed - first leptons
        if channel=='2lSS' and nom.lep_Parent_0!=25 and nom.lep_Parent_1!=25:
            continue
        if is3l: # Check for lepton, decide if event is 3lF or 3lS
            if nom.lep_Parent_1!=25 and nom.lep_Parent_2!=25:
                continue
            if nom.lep_Parent_0 == 25: 
                channel='3lF'
            else: 
                channel='3lS'

        #Check if Higgs jets are reconstructed
        if channel!='3lF' and sum([x==25 for x in nom.jet_parents])!=2: continue
        #if sum([x==25 for x in nom.jet_parents])!=2: continue

        #Find the b-jets from tops
        topRes = findBestTopKeras(nom, channel, topModel, topNormFactors)
        if not topRes:
            continue
        topIdx0, topIdx1 = topRes['bestComb']                                                                         
        topScore = topRes['topScore']

        #Get all possible combinations
        combos = higgsTopCombos(channel, nom, topIdx0, topIdx1, topScore, 1)
        if not combos or len(combos['higgsDicts'])==0:
            continue

        if channel=='3lF':
            if events3lF=={}:                                                                            
                events3lF=combos['higgsDicts']
            else:
                for k in events3lF:                                                                                   
                    events3lF[k].extend(combos['higgsDicts'][k])
        else:
            if events=={}:                                                                                               
                events=combos['higgsDicts']                                                                     
            else:                                                                                                   
                for k in events:
                    events[k].extend(combos['higgsDicts'][k])

    dfFlat = pd.DataFrame.from_dict(events)
    dfFlat = shuffle(dfFlat)

    outF = '/'.join(inf.split("/")[-2:]).replace('.root','.csv')
    if channel=='2lSS':
        dfFlat.to_csv('csvFiles/higgsTop2lSS/'+outF, index=False, float_format='%.3f')
    else:# channel=='3l':
        dfFlat.to_csv('csvFiles/higgsTop3lS/'+outF, index=False, float_format='%.3f')
        df3lF = pd.DataFrame.from_dict(events3lF)
        df3lF = shuffle(df3lF)
        df3lF.to_csv('csvFiles/higgsTop3lF/'+outF, index=False, float_format='%.3f')
Пример #3
0
n1bCorrect, n2bCorrect, n3bCorrect = 0,0,0

#Loop over each entry, add to events dict
for idx in range(nEntries):
    if idx%1000==0:
        print(str(idx)+'/'+str(nEntries))
    if idx==5000:
        break

    nom.GetEntry(idx)

    if '1b' in sys.argv[2] and nom.nJets_OR_DL1r_70!=1: continue
    if '2b' in sys.argv[2] and nom.nJets_OR_DL1r_70!=2: continue
    if '3b' in sys.argv[2] and nom.nJets_OR_DL1r_70!=3: continue

    topRes = findBestTopKeras(nom, channel, topModel, topNormFactors)
    if not topRes:
        continue
    topMatches, truthBs, topScore = topRes['bestComb'], topRes['truthComb'], topRes['topScore']
    #print(topRes['bestComb'], topRes['truthComb'], topRes['topScore'])
    #topMatches, truthBs = findBestTopKeras(nom, channel, topModel, topNormFactors, 1)

    if len(truthBs)!=2: continue 

    if topScore>0.3:
        nGood+=1
        if topMatches[0] in truthBs and topMatches[1] in truthBs:
            nGoodCorrect+=1
        if topMatches[0] in truthBs or topMatches[1] in truthBs:
            nGoodOne+=1
    if topScore<0.3:
Пример #4
0
lepCorrect = 0
oneCorrect = 0

#Loop over each entry, add to events dict
for idx in range(nEntries):
    if idx % 1000 == 0:
        print(str(idx) + '/' + str(nEntries))
    if idx == 5000:
        break

    nom.GetEntry(idx)

    if '3l' in channel:
        if channel == '3lF' and nom.lep_Parent_0 != 25: continue
        if channel == '3lS' and nom.lep_Parent_0 == 25: continue
        topRes = findBestTopKeras(nom, '3l', topModel, topNormFactors)
    else:
        topRes = findBestTopKeras(nom, '2lSS', topModel, topNormFactors)

    if not topRes:
        continue

    topIdx0, topIdx1 = topRes['bestComb']
    topScore = topRes['topScore']
    #Get dict of all possible jet combinations
    higgsRes = findBestHiggsTop(nom, channel, higgsModel, higgsNormFactors,
                                topIdx0, topIdx1, topScore)

    if not higgsRes: continue
    higgsMatches = higgsRes['bestComb']
    truthPair = higgsRes['truthComb']
Пример #5
0
def runReco(inf):

    #load in the top model - not picklable, can't do outside the function
    topModel = load_model(
        "/data_ceph/afwebb/higgs_diff/topMatching/models/keras_model_top3l.h5")
    topNormFactors = np.load(
        "/data_ceph/afwebb/higgs_diff/topMatching/models/top3l_normFactors.npy"
    )
    topMaxVals = topNormFactors[0]
    topMinVals = topNormFactors[1]
    topDiff = topMaxVals - topMinVals

    model3lF = load_model(
        "/data_ceph/afwebb/higgs_diff/higgsMatching/models/keras_model_higgsTop3lF.h5"
    )
    normFactors3lF = np.load(
        "/data_ceph/afwebb/higgs_diff/higgsMatching/models/higgsTop3lF_normFactors.npy"
    )
    maxVals3lF, minVals3lF = normFactors3lF
    diff3lF = maxVals3lF - minVals3lF

    model3lS = load_model(
        "/data_ceph/afwebb/higgs_diff/higgsMatching/models/keras_model_higgsTop3lS.h5"
    )
    normFactors3lS = np.load(
        "/data_ceph/afwebb/higgs_diff/higgsMatching/models/higgsTop3lS_normFactors.npy"
    )
    maxVals3lS, minVals3lS = normFactors3lS
    diff3lS = maxVals3lS - minVals3lS

    f = TFile.Open(inf)
    nom = f.Get('nominal')

    #initialize output dicts
    events = []

    #Loop over all entries
    nEntries = nom.GetEntries()
    for idx in range(nEntries):
        if idx % 10000 == 0:
            print(str(idx) + '/' + str(nEntries))

        nom.GetEntry(idx)

        #Perform top matching. Get top candidates, topScore
        topRes = findBestTopKeras(nom, '3l', topModel, topNormFactors)
        if not topRes: continue
        topIdx0, topIdx1 = topRes['bestComb']
        topScore = topRes['topScore']

        #Perform higgs matching. Get 3lF, 3lS scores
        res3lF = findBestHiggsTop(nom, '3lF', model3lF, normFactors3lF,
                                  topIdx0, topIdx1, topScore)
        res3lS = findBestHiggsTop(nom, '3lS', model3lS, normFactors3lS,
                                  topIdx0, topIdx1, topScore)

        if not res3lF or not res3lS: continue
        #identify which lepton came from the Higgs
        lepIdx = -1
        if nom.lep_Parent_0 == 25:
            isF = True
        else:
            isF = False
        if nom.lep_Parent_1 == 25: lepIdx = 1
        if nom.lep_Parent_2 == 25: lepIdx = 2

        if lepIdx == -1: continue

        if isF:
            events.append(
                decayDict(nom, res3lF['higgsTopScore'],
                          res3lS['higgsTopScore'], topIdx0, topIdx1, topScore,
                          0))  #Correct combination
        else:
            events.append(
                decayDict(nom, res3lF['higgsTopScore'],
                          res3lS['higgsTopScore'], topIdx0, topIdx1, topScore,
                          1))  #Incorrect combination - swaps 2 and 1

    dfFlat = pd.DataFrame.from_dict(events)
    dfFlat = shuffle(dfFlat)

    outF = '/'.join(inf.split("/")[-2:]).replace('.root', '.csv')
    dfFlat.to_csv('csvFiles/' + outF, index=False)
Пример #6
0
def runReco(inf):
    #Set the channel, load in the top model
    if '3l' in inf:
        channel = '3l'
        is3l = True
        ptDict = ptDictHiggsTop3lS

        topModel = load_model(
            "/data_ceph/afwebb/higgs_diff/topMatching/models/keras_model_top3l.h5"
        )
        topNormFactors = np.load(
            "/data_ceph/afwebb/higgs_diff/topMatching/models/top3l_normFactors.npy"
        )

        model3lF = load_model(
            "/data_ceph/afwebb/higgs_diff/higgsMatching/models/keras_model_higgsTop3lF.h5"
        )
        normFactors3lF = np.load(
            "/data_ceph/afwebb/higgs_diff/higgsMatching/models/higgsTop3lF_normFactors.npy"
        )

        model3lS = load_model(
            "/data_ceph/afwebb/higgs_diff/higgsMatching/models/keras_model_higgsTop3lS.h5"
        )
        normFactors3lS = np.load(
            "/data_ceph/afwebb/higgs_diff/higgsMatching/models/higgsTop3lS_normFactors.npy"
        )

    elif '2lSS' in inf:
        channel = '2lSS'
        ptDict = ptDictHiggsTop2lSS
        is3l = False

        topModel = load_model(
            "/data_ceph/afwebb/higgs_diff/topMatching/models/keras_model_top2lSS.h5"
        )
        topNormFactors = np.load(
            "/data_ceph/afwebb/higgs_diff/topMatching/models/top2lSS_normFactors.npy"
        )

        model2lSS = load_model(
            "/data_ceph/afwebb/higgs_diff/higgsMatching/models/keras_model_higgsTop2lSS.h5"
        )
        normFactors2lSS = np.load(
            "/data_ceph/afwebb/higgs_diff/higgsMatching/models/higgsTop2lSS_normFactors.npy"
        )

    else:
        print(f'Channel {channel} is invalid. Should be 2lSS or 3l')
        exit()

    f = TFile.Open(inf)
    nom = f.Get('nominal')

    #initialize output dicts
    events = []
    events3lF = []

    #Loop over all entries
    nEntries = nom.GetEntries()
    for idx in range(nEntries):
        if idx % 10000 == 0:
            print(str(idx) + '/' + str(nEntries))

        nom.GetEntry(idx)

        #Get the Higgs Pt
        for i, pdgId in enumerate(nom.m_truth_pdgId):
            if pdgId == 25:
                higgs_pt = nom.m_truth_pt[i]
                break
        if not higgs_pt:
            continue

        topRes = findBestTopKeras(nom, channel, topModel, topNormFactors)
        if not topRes: continue
        topIdx0, topIdx1 = topRes['bestComb']
        topScore = topRes['topScore']

        isF = False
        if is3l and nom.lep_Parent_0 == 25:
            isF = True

        if isF:
            res3lF = findBestHiggsTop(nom, '3lF', model3lF, normFactors3lF,
                                      topIdx0, topIdx1, topScore)
            if not res3lF: continue
            higgsTopScore = res3lF['higgsTopScore']
            lepIdx = res3lF['bestComb'][0]

            events3lF.append(
                ptDictHiggsTop3lF(nom, lepIdx, higgsTopScore, topIdx0, topIdx1,
                                  topScore, higgs_pt))
        else:
            if is3l:
                res = findBestHiggsTop(nom, '3lS', model3lS, normFactors3lS,
                                       topIdx0, topIdx1, topScore)
            else:
                res = findBestHiggsTop(nom, '2lSS', model2lSS, normFactors2lSS,
                                       topIdx0, topIdx1, topScore)

            if not res: continue
            higgsTopScore = res['higgsTopScore']
            lepIdx, jetIdx0, jetIdx1 = res['bestComb']
            events.append(
                ptDict(nom, jetIdx0, jetIdx1, lepIdx, higgsTopScore, topIdx0,
                       topIdx1, topScore, higgs_pt))

    dfFlat = pd.DataFrame.from_dict(events)
    dfFlat = shuffle(dfFlat)

    outF = '/'.join(inf.split("/")[-2:]).replace('.root', '.csv')
    if channel == '2lSS':
        dfFlat.to_csv('inputFiles/higgsTop2lSS/' + outF, index=False)
    elif channel == '3l':
        dfFlat.to_csv('inputFiles/higgsTop3lS/' + outF, index=False)
        df3lF = pd.DataFrame.from_dict(events3lF)
        df3lF = shuffle(df3lF)
        df3lF.to_csv('inputFiles/higgsTop3lF/' + outF, index=False)