Exemplo n.º 1
0
def runReco(inf):
    #Set the channel, load in the models and normalization factors
    if '3l' in inf:
        channel = '3l'
        ptDict = ptDictHiggsTop3lS

        topModel = load_model(
            "/data_ceph/afwebb/higgs_diff/topMatching/models/keras_model_top3l.h5"
        )
        topNormFactors = np.load(
            "/data_ceph/afwebb/higgs_diff/topMatching/models/top3l_normFactors.npy",
            allow_pickle=True)

        model3lF = load_model(
            "/data_ceph/afwebb/higgs_diff/higgsMatching/models/keras_model_higgsTop3lF.h5"
        )
        normFactors3lF = np.load(
            "/data_ceph/afwebb/higgs_diff/higgsMatching/models/higgsTop3lF_normFactors.npy",
            allow_pickle=True)

        model3lS = load_model(
            "/data_ceph/afwebb/higgs_diff/higgsMatching/models/keras_model_higgsTop3lS.h5"
        )
        normFactors3lS = np.load(
            "/data_ceph/afwebb/higgs_diff/higgsMatching/models/higgsTop3lS_normFactors.npy",
            allow_pickle=True)

    elif '2lSS' in inf:
        channel = '2lSS'
        ptDict = ptDictHiggsTop2lSS

        topModel = load_model(
            "/data_ceph/afwebb/higgs_diff/topMatching/models/keras_model_top2lSS.h5"
        )
        topNormFactors = np.load(
            "/data_ceph/afwebb/higgs_diff/topMatching/models/top2lSS_normFactors.npy",
            allow_pickle=True)

        model2lSS = load_model(
            "/data_ceph/afwebb/higgs_diff/higgsMatching/models/keras_model_higgsTop2lSS.h5"
        )
        normFactors2lSS = np.load(
            "/data_ceph/afwebb/higgs_diff/higgsMatching/models/higgsTop2lSS_normFactors.npy",
            allow_pickle=True)

    else:
        print(f'Channel {channel} is invalid. Should be 2lSS or 3l')
        return

    #Open the root file
    f = TFile.Open(inf)
    nom = f.Get('nominal')
    if hasattr(nom, "recoHiggsPt_2lSS") or hasattr(nom, "recoHiggsPt_3lS"):
        print(f'{inf} already has score')
        return

    #initialize output dicts
    events = []
    higgsRecoScores = []
    topRecoScores = []

    if channel == '3l':
        events3lF = []
        eventsDecay = []
        higgsRecoScoresF = []

    #Loop over all entries
    nEntries = nom.GetEntries()
    for idx in range(nEntries):
        if idx % 10000 == 0:
            print(str(idx) + '/' + str(nEntries))

        #Get the events
        nom.GetEntry(idx)

        #Find the best top combination, top reco score
        topRes = findBestTopKeras(nom, channel, topModel, topNormFactors)
        if not topRes:
            topIdx0, topIdx1 = 0, 0
            topScore = np.float32(-10)
        else:
            topIdx0, topIdx1 = topRes['bestComb']
            topScore = topRes['topScore']

        topRecoScores.append(topScore)  # add the top reco score

        #Find the higgs decay products, higgs reco score for 3lF model
        if channel == '3l':
            res3lF = findBestHiggsTop(nom, '3lF', model3lF, normFactors3lF,
                                      topIdx0, topIdx1, topScore)
            #if not res3lF: continue
            higgsTopScoreF = res3lF['higgsTopScore']
            lepIdx = res3lF['bestComb'][0]

            events3lF.append(
                ptDictHiggsTop3lF(nom, lepIdx, higgsTopScoreF, topIdx0,
                                  topIdx1, topScore))
            higgsRecoScoresF.append(higgsTopScoreF)

        #Find the higgs decay products, higgs reco score for 3lS, 2lSS (same final state)
        if channel == '3l':
            res = findBestHiggsTop(nom, '3lS', model3lS, normFactors3lS,
                                   topIdx0, topIdx1, topScore)
        else:
            res = findBestHiggsTop(nom, '2lSS', model2lSS, normFactors2lSS,
                                   topIdx0, topIdx1, topScore)

        if not res:
            higgsTopScore = np.float32(-10)
            lepIdx, jetIdx0, jetIdx1 = 1, 0, 0
        else:
            higgsTopScore = res['higgsTopScore']
            lepIdx, jetIdx0, jetIdx1 = res['bestComb']

        #add the pt prediction dictionary
        events.append(
            ptDict(nom, jetIdx0, jetIdx1, lepIdx, higgsTopScore, topIdx0,
                   topIdx1, topScore))
        higgsRecoScores.append(higgsTopScore)

        #add decay mode dicts
        if channel == '3l':
            eventsDecay.append(
                decayDict(nom, higgsTopScoreF, higgsTopScore, topIdx0, topIdx1,
                          topScore))

    if channel == '3l':
        return events, events3lF, eventsDecay, higgsRecoScores, higgsRecoScoresF, topRecoScores
    else:
        return events, higgsRecoScores, topRecoScores
Exemplo n.º 2
0
def runReco(inf):

    #load in the top model - not picklable, can't do outside the function
    topModel = load_model(
        "/data_ceph/afwebb/higgs_diff/topMatching/models/keras_model_top3l.h5")
    topNormFactors = np.load(
        "/data_ceph/afwebb/higgs_diff/topMatching/models/top3l_normFactors.npy"
    )
    topMaxVals = topNormFactors[0]
    topMinVals = topNormFactors[1]
    topDiff = topMaxVals - topMinVals

    model3lF = load_model(
        "/data_ceph/afwebb/higgs_diff/higgsMatching/models/keras_model_higgsTop3lF.h5"
    )
    normFactors3lF = np.load(
        "/data_ceph/afwebb/higgs_diff/higgsMatching/models/higgsTop3lF_normFactors.npy"
    )
    maxVals3lF, minVals3lF = normFactors3lF
    diff3lF = maxVals3lF - minVals3lF

    model3lS = load_model(
        "/data_ceph/afwebb/higgs_diff/higgsMatching/models/keras_model_higgsTop3lS.h5"
    )
    normFactors3lS = np.load(
        "/data_ceph/afwebb/higgs_diff/higgsMatching/models/higgsTop3lS_normFactors.npy"
    )
    maxVals3lS, minVals3lS = normFactors3lS
    diff3lS = maxVals3lS - minVals3lS

    f = TFile.Open(inf)
    nom = f.Get('nominal')

    #initialize output dicts
    events = []

    #Loop over all entries
    nEntries = nom.GetEntries()
    for idx in range(nEntries):
        if idx % 10000 == 0:
            print(str(idx) + '/' + str(nEntries))

        nom.GetEntry(idx)

        #Perform top matching. Get top candidates, topScore
        topRes = findBestTopKeras(nom, '3l', topModel, topNormFactors)
        if not topRes: continue
        topIdx0, topIdx1 = topRes['bestComb']
        topScore = topRes['topScore']

        #Perform higgs matching. Get 3lF, 3lS scores
        res3lF = findBestHiggsTop(nom, '3lF', model3lF, normFactors3lF,
                                  topIdx0, topIdx1, topScore)
        res3lS = findBestHiggsTop(nom, '3lS', model3lS, normFactors3lS,
                                  topIdx0, topIdx1, topScore)

        if not res3lF or not res3lS: continue
        #identify which lepton came from the Higgs
        lepIdx = -1
        if nom.lep_Parent_0 == 25:
            isF = True
        else:
            isF = False
        if nom.lep_Parent_1 == 25: lepIdx = 1
        if nom.lep_Parent_2 == 25: lepIdx = 2

        if lepIdx == -1: continue

        if isF:
            events.append(
                decayDict(nom, res3lF['higgsTopScore'],
                          res3lS['higgsTopScore'], topIdx0, topIdx1, topScore,
                          0))  #Correct combination
        else:
            events.append(
                decayDict(nom, res3lF['higgsTopScore'],
                          res3lS['higgsTopScore'], topIdx0, topIdx1, topScore,
                          1))  #Incorrect combination - swaps 2 and 1

    dfFlat = pd.DataFrame.from_dict(events)
    dfFlat = shuffle(dfFlat)

    outF = '/'.join(inf.split("/")[-2:]).replace('.root', '.csv')
    dfFlat.to_csv('csvFiles/' + outF, index=False)
Exemplo n.º 3
0
    nom.GetEntry(idx)

    if '3l' in channel:
        if channel == '3lF' and nom.lep_Parent_0 != 25: continue
        if channel == '3lS' and nom.lep_Parent_0 == 25: continue
        topRes = findBestTopKeras(nom, '3l', topModel, topNormFactors)
    else:
        topRes = findBestTopKeras(nom, '2lSS', topModel, topNormFactors)

    if not topRes:
        continue

    topIdx0, topIdx1 = topRes['bestComb']
    topScore = topRes['topScore']
    #Get dict of all possible jet combinations
    higgsRes = findBestHiggsTop(nom, channel, higgsModel, higgsNormFactors,
                                topIdx0, topIdx1, topScore)

    if not higgsRes: continue
    higgsMatches = higgsRes['bestComb']
    truthPair = higgsRes['truthComb']
    if channel == '3lF' and len(truthPair) != 1:
        continue
    elif channel != '3lF' and len(truthPair) != 3:
        continue

    nEvents += 1
    if higgsMatches == truthPair:
        nCorrect += 1
    if higgsMatches[0] == truthPair[0]:
        lepCorrect += 1
        if channel != '3lF':
Exemplo n.º 4
0
def runReco(inf):
    #Set the channel, load in the top model
    if '3l' in inf:
        channel = '3l'
        is3l = True
        ptDict = ptDictHiggsTop3lS

        topModel = load_model(
            "/data_ceph/afwebb/higgs_diff/topMatching/models/keras_model_top3l.h5"
        )
        topNormFactors = np.load(
            "/data_ceph/afwebb/higgs_diff/topMatching/models/top3l_normFactors.npy"
        )

        model3lF = load_model(
            "/data_ceph/afwebb/higgs_diff/higgsMatching/models/keras_model_higgsTop3lF.h5"
        )
        normFactors3lF = np.load(
            "/data_ceph/afwebb/higgs_diff/higgsMatching/models/higgsTop3lF_normFactors.npy"
        )

        model3lS = load_model(
            "/data_ceph/afwebb/higgs_diff/higgsMatching/models/keras_model_higgsTop3lS.h5"
        )
        normFactors3lS = np.load(
            "/data_ceph/afwebb/higgs_diff/higgsMatching/models/higgsTop3lS_normFactors.npy"
        )

    elif '2lSS' in inf:
        channel = '2lSS'
        ptDict = ptDictHiggsTop2lSS
        is3l = False

        topModel = load_model(
            "/data_ceph/afwebb/higgs_diff/topMatching/models/keras_model_top2lSS.h5"
        )
        topNormFactors = np.load(
            "/data_ceph/afwebb/higgs_diff/topMatching/models/top2lSS_normFactors.npy"
        )

        model2lSS = load_model(
            "/data_ceph/afwebb/higgs_diff/higgsMatching/models/keras_model_higgsTop2lSS.h5"
        )
        normFactors2lSS = np.load(
            "/data_ceph/afwebb/higgs_diff/higgsMatching/models/higgsTop2lSS_normFactors.npy"
        )

    else:
        print(f'Channel {channel} is invalid. Should be 2lSS or 3l')
        exit()

    f = TFile.Open(inf)
    nom = f.Get('nominal')

    #initialize output dicts
    events = []
    events3lF = []

    #Loop over all entries
    nEntries = nom.GetEntries()
    for idx in range(nEntries):
        if idx % 10000 == 0:
            print(str(idx) + '/' + str(nEntries))

        nom.GetEntry(idx)

        #Get the Higgs Pt
        for i, pdgId in enumerate(nom.m_truth_pdgId):
            if pdgId == 25:
                higgs_pt = nom.m_truth_pt[i]
                break
        if not higgs_pt:
            continue

        topRes = findBestTopKeras(nom, channel, topModel, topNormFactors)
        if not topRes: continue
        topIdx0, topIdx1 = topRes['bestComb']
        topScore = topRes['topScore']

        isF = False
        if is3l and nom.lep_Parent_0 == 25:
            isF = True

        if isF:
            res3lF = findBestHiggsTop(nom, '3lF', model3lF, normFactors3lF,
                                      topIdx0, topIdx1, topScore)
            if not res3lF: continue
            higgsTopScore = res3lF['higgsTopScore']
            lepIdx = res3lF['bestComb'][0]

            events3lF.append(
                ptDictHiggsTop3lF(nom, lepIdx, higgsTopScore, topIdx0, topIdx1,
                                  topScore, higgs_pt))
        else:
            if is3l:
                res = findBestHiggsTop(nom, '3lS', model3lS, normFactors3lS,
                                       topIdx0, topIdx1, topScore)
            else:
                res = findBestHiggsTop(nom, '2lSS', model2lSS, normFactors2lSS,
                                       topIdx0, topIdx1, topScore)

            if not res: continue
            higgsTopScore = res['higgsTopScore']
            lepIdx, jetIdx0, jetIdx1 = res['bestComb']
            events.append(
                ptDict(nom, jetIdx0, jetIdx1, lepIdx, higgsTopScore, topIdx0,
                       topIdx1, topScore, higgs_pt))

    dfFlat = pd.DataFrame.from_dict(events)
    dfFlat = shuffle(dfFlat)

    outF = '/'.join(inf.split("/")[-2:]).replace('.root', '.csv')
    if channel == '2lSS':
        dfFlat.to_csv('inputFiles/higgsTop2lSS/' + outF, index=False)
    elif channel == '3l':
        dfFlat.to_csv('inputFiles/higgsTop3lS/' + outF, index=False)
        df3lF = pd.DataFrame.from_dict(events3lF)
        df3lF = shuffle(df3lF)
        df3lF.to_csv('inputFiles/higgsTop3lF/' + outF, index=False)