示例#1
0
def StitchQCD(QCDdict, normDict=None):
    '''Stitches together histograms in QCD hist groups.

    @param QCDdict ({string:HistGroup}): Dictionary of HistGroup objects
    @param normDict ({string:float}): Factors to normalize each sample to where keys must match QCDdict keys.
            Default to None and assume normalization has already been done.
    Returns:
        HistGroup: New HistGroup with histograms in group being the final stitched versions
    '''
    from TIMBER.Analyzer import HistGroup
    # Normalize first if needed
    if normDict != None:
        for k in normDict.keys():
            for hkey in QCDdict[k].keys():
                QCDdict[k][hkey].Scale(normDict[k])
    # Stitch
    out = HistGroup("QCD")
    for ksample in QCDdict.keys():
        for khist in QCDdict[ksample].keys():
            if khist not in out.keys():
                out[khist] = QCDdict[ksample][khist].Clone()
            else:
                out[khist].Add(QCDdict[ksample][khist])

    return out
示例#2
0
def select(setname,year):
    ROOT.ROOT.EnableImplicitMT(2) # Just use two threads - no need to kill the interactive nodes

    # Initialize TIMBER analyzer
    file_path = '%s/%s_bstar%s.root' %(rootfile_path,setname, year)
    a = analyzer(file_path)

    # Determine normalization weight
    if not a.isData: 
        norm = helpers.getNormFactor(setname,year,config,a.genEventCount)
    else: 
        norm = 1.

    # Book actions on the RDataFrame
    a.Cut('filters',a.GetFlagString(flags))
    a.Cut('trigger',a.GetTriggerString(triggers))
    a.Define('jetIdx','hemispherize(FatJet_phi, FatJet_jetId)') # need to calculate if we have two jets (with Id) that are back-to-back
    a.Cut('nFatJets_cut','nFatJet > max(jetIdx[0],jetIdx[1])') # If we don't do this, we may try to access variables of jets that don't exist! (leads to seg fault)
    a.Cut("hemis","(jetIdx[0] != -1)&&(jetIdx[1] != -1)") # cut on that calculation
    a.Cut('pt_cut','FatJet_pt[jetIdx[0]] > 400 && FatJet_pt[jetIdx[1]] > 400')
    a.Cut('eta_cut','abs(FatJet_eta[jetIdx[0]]) < 2.4 && abs(FatJet_eta[jetIdx[1]]) < 2.4')
    a.Cut('mjet_cut','FatJet_msoftdrop[jetIdx[0]] > 50 && FatJet_msoftdrop[jetIdx[1]] > 50')
    a.Cut('mtw_cut','analyzer::invariantMass(jetIdx[0],jetIdx[1],FatJet_pt,FatJet_eta,FatJet_phi,FatJet_msoftdrop) > 1200')
    a.Define('lead_tau32','FatJet_tau2[jetIdx[0]] > 0 ? FatJet_tau3[jetIdx[0]]/FatJet_tau2[jetIdx[0]] : -1') # Conditional to make sure tau2 != 0 for division
    a.Define('sublead_tau32','FatJet_tau2[jetIdx[1]] > 0 ? FatJet_tau3[jetIdx[1]]/FatJet_tau2[jetIdx[1]] : -1') # condition ? <do if true> : <do if false>
    a.Define('lead_tau21','FatJet_tau1[jetIdx[0]] > 0 ? FatJet_tau2[jetIdx[0]]/FatJet_tau1[jetIdx[0]] : -1') # Conditional to make sure tau2 != 0 for division
    a.Define('sublead_tau21','FatJet_tau1[jetIdx[1]] > 0 ? FatJet_tau2[jetIdx[1]]/FatJet_tau1[jetIdx[1]] : -1') # condition ? <do if true> : <do if false>
    a.Define('norm',str(norm))

    # Book a group to save the histograms
    out = HistGroup("%s_%s"%(setname,year))
    for varname in varnames.keys():
        histname = '%s_%s_%s'%(setname,year,varname)
        hist_tuple = (histname,histname,20,0,1) # Arguments for binning that you would normally pass to a TH1
        hist = a.GetActiveNode().DataFrame.Histo1D(hist_tuple,varname,'norm') # Project dataframe into a histogram (hist name/binning tuple, variable to plot from dataframe, weight)
        hist.GetValue() # This gets the actual TH1 instead of a pointer to the TH1
        out.Add(varname,hist) # Add it to our group

    # Return the group
    return out
def select(setname, year):
    ROOT.ROOT.EnableImplicitMT(
        2)  # Just use two threads - no need to kill the interactive nodes

    # Initialize TIMBER analyzer
    file_path = '%s/%s_bstar%s.root' % (rootfile_path, setname, year)
    a = analyzer(file_path)

    # Determine normalization weight
    if not a.isData:
        norm = helpers.getNormFactor(setname, year, config)
    else:
        norm = 1.

    # Book actions on the RDataFrame
    a.Cut('filters', a.GetFlagString(flags))
    a.Cut('trigger', a.GetTriggerString(triggers))
    a.Define(
        'jetIdx', 'hemispherize(FatJet_phi, FatJet_jetId)'
    )  # need to calculate if we have two jets (with Id) that are back-to-back
    a.Cut(
        'nFatJets_cut', 'nFatJet > max(jetIdx[0],jetIdx[1])'
    )  # If we don't do this, we may try to access variables of jets that don't exist! (leads to seg fault)
    a.Cut("hemis",
          "(jetIdx[0] != -1)&&(jetIdx[1] != -1)")  # cut on that calculation
    a.Cut('pt_cut', 'FatJet_pt[jetIdx[0]] > 400 && FatJet_pt[jetIdx[1]] > 400')
    a.Cut(
        'eta_cut',
        'abs(FatJet_eta[jetIdx[0]]) < 2.4 && abs(FatJet_eta[jetIdx[1]]) < 2.4')
    a.Define('norm', str(norm))

    #################################
    # Build some variables for jets #
    #################################
    # Wtagging decision logic
    # Returns 0 for no tag, 1 for lead tag, 2 for sublead tag, and 3 for both tag (which is physics-wise equivalent to 2)
    wtag_str = "1*Wtag(FatJet_tau2[jetIdx[0]]/FatJet_tau1[jetIdx[0]],0,{0}, FatJet_msoftdrop[jetIdx[0]],65,105) + 2*Wtag(FatJet_tau2[jetIdx[1]]/FatJet_tau1[jetIdx[1]],0,{0}, FatJet_msoftdrop[jetIdx[1]],65,105)".format(
        cuts['tau21'])

    jets = VarGroup('jets')
    jets.Add('wtag_bit', wtag_str)
    jets.Add(
        'top_bit', '(wtag_bit & 2)? 0: (wtag_bit & 1)? 1: -1'
    )  # (if wtag==3 or 2 (subleading w), top_index=0) else (if wtag==1, top_index=1) else (-1)
    jets.Add('top_index', 'top_bit >= 0 ? jetIdx[top_bit] : -1')
    jets.Add('w_index',
             'top_index == 0 ? jetIdx[1] : top_index == 1 ? jetIdx[0] : -1')
    # Calculate some new comlumns that we'd like to cut on (that were costly to do before the other filtering)
    jets.Add(
        "lead_vect",
        "hardware::TLvector(FatJet_pt[jetIdx[0]],FatJet_eta[jetIdx[0]],FatJet_phi[jetIdx[0]],FatJet_msoftdrop[jetIdx[0]])"
    )
    jets.Add(
        "sublead_vect",
        "hardware::TLvector(FatJet_pt[jetIdx[1]],FatJet_eta[jetIdx[1]],FatJet_phi[jetIdx[1]],FatJet_msoftdrop[jetIdx[1]])"
    )
    jets.Add("deltaY", "abs(lead_vect.Rapidity()-sublead_vect.Rapidity())")
    jets.Add("mtw", "hardware::invariantMass({lead_vect,sublead_vect})")

    #########
    # N - 1 #
    #########
    plotting_vars = VarGroup(
        'plotting_vars')  # assume leading is top and subleading is W
    plotting_vars.Add("mtop", "FatJet_msoftdrop[jetIdx[0]]")
    plotting_vars.Add("mW", "FatJet_msoftdrop[jetIdx[1]]")
    plotting_vars.Add("tau32", "FatJet_tau3[jetIdx[0]]/FatJet_tau2[jetIdx[0]]")
    plotting_vars.Add(
        "subjet_btag",
        "max(SubJet_btagDeepB[FatJet_subJetIdx1[jetIdx[0]]],SubJet_btagDeepB[FatJet_subJetIdx2[jetIdx[0]]])"
    )
    plotting_vars.Add("tau21", "FatJet_tau2[jetIdx[1]]/FatJet_tau1[jetIdx[1]]")
    plotting_vars.Add("lead_jet_deepAK8_MD_WvsQCD",
                      "FatJet_deepTagMD_WvsQCD[jetIdx[0]]")
    plotting_vars.Add("sublead_jet_deepAK8_MD_WvsQCD",
                      "FatJet_deepTagMD_WvsQCD[jetIdx[1]]")
    plotting_vars.Add("lead_jet_deepAK8_MD_TvsQCD",
                      "FatJet_deepTagMD_TvsQCD[jetIdx[0]]")
    plotting_vars.Add("sublead_jet_deepAK8_MD_TvsQCD",
                      "FatJet_deepTagMD_TvsQCD[jetIdx[1]]")

    N_cuts = CutGroup('Ncuts')  # cuts
    N_cuts.Add("deltaY_cut", "deltaY<1.6")
    N_cuts.Add("mtop_cut", "(mtop > 105.)&&(mtop < 220.)")
    N_cuts.Add("mW_cut", "(mW > 65.)&&(mW < 105.)")
    #N_cuts.Add("tau32_cut",       "(tau32 > 0.0)&&(tau32 < %s)"%(cuts['tau32']))
    #N_cuts.Add("subjet_btag_cut", "(subjet_btag > %s)&&(subjet_btag < 1.)"%(cuts['sjbtag']))
    #N_cuts.Add("tau21_cut",       "(tau21 > 0.0)&&(tau21 < %s)"%(cuts['tau21']))
    N_cuts.Add("lead_jet_deepAK8_MD_WvsQCD_cut",
               "lead_jet_deepAK8_MD_WvsQCD > 0.9")
    N_cuts.Add("sublead_jet_deepAK8_MD_WvsQCD_cut",
               "sublead_jet_deepAK8_MD_WvsQCD > 0.9")
    N_cuts.Add("lead_jet_deepAK8_MD_TvsQCD_cut",
               "lead_jet_deepAK8_MD_TvsQCD > 0.9")
    N_cuts.Add("sublead_jet_deepAK8_MD_TvsQCD_cut",
               "sublead_jet_deepAK8_MD_TvsQCD > 0.9")

    # Organize N-1 of tagging variables when assuming top is always leading
    nodeToPlot = a.Apply([jets, plotting_vars])
    nminus1Nodes = a.Nminus1(
        N_cuts, nodeToPlot
    )  # constructs N nodes with a different N-1 selection for each
    nminus1Hists = HistGroup('nminus1Hists')
    binning = {
        'mtop': [25, 50, 300],
        'mW': [25, 30, 270],
        'tau32': [20, 0, 1],
        'tau21': [20, 0, 1],
        'subjet_btag': [20, 0, 1],
        'deltaY': [20, 0, 2.0],
        'lead_jet_deepAK8_MD_WvsQCD': [20, 0, 1],
        'sublead_jet_deepAK8_MD_WvsQCD': [20, 0, 1],
        'lead_jet_deepAK8_MD_TvsQCD': [20, 0, 1],
        'sublead_jet_deepAK8_MD_TvsQCD': [20, 0, 1]
    }
    # Add hists to group and write out
    for nkey in nminus1Nodes.keys():
        if nkey == 'full': continue
        var = nkey.replace('_cut', '').replace('minus_', '')
        hist_tuple = (var, var, binning[var][0], binning[var][1],
                      binning[var][2])
        hist = nminus1Nodes[nkey].DataFrame.Histo1D(hist_tuple, var, 'norm')
        hist.GetValue()
        nminus1Hists.Add(var, hist)
        a.PrintNodeTree('exercises/nminus1_tree.dot')
    # Return the group
    return nminus1Hists
            outfile = ROOT.TFile.Open(rootfile_name, 'RECREATE')
            outfile.cd()
            histgroup.Do(
                'Write'
            )  # This will call TH1.Write() for all of the histograms in the group
            outfile.Close()
            del histgroup  # Now that they are saved out, drop from memory

        # Open histogram files that we saved
        print('Opening ' + rootfile_name)
        infile = ROOT.TFile.Open(rootfile_name)
        # ... raise exception if we forgot to run with --select!
        if infile == None:
            raise TypeError(rootfile_name)
        # Put histograms back into HistGroups
        histgroups[setname] = HistGroup(setname)
        for key in infile.GetListOfKeys():  # loop over histograms in the file
            keyname = key.GetName()
            inhist = infile.Get(key.GetName())  # get it from the file
            inhist.SetDirectory(
                0
            )  # set the directory so hist is stored in memory and not as reference to TFile (this way it doesn't get tossed by python garbage collection when infile changes)
            histgroups[setname].Add(keyname, inhist)  # add to our group
            if keyname not in varnames:
                varnames.append(keyname)

    # For each variable to plot...
    for varname in varnames:
        if varname == 'deltaY':
            continue  # deltaY optimization requires cuts on mtw to make sense so skipping
        plot_filename = plotdir + '/%s_%s_Nminus1.png' % (varname,
def MakeEfficiency(year):
    selection = THClass('../dijet_nano_files/THsnapshot_Data_%s.root' % (year),
                        year, 1, 1)
    selection.OpenForSelection('None')
    # selection.a.Define('mth_trig','hardware::InvariantMass(Dijet_vect)')
    # selection.a.Define('m_javg','(Dijet_msoftdrop[0]+Dijet_msoftdrop[0])/2')
    # selection.a.Cut('morePt','ROOT::VecOps::All(Dijet_pt > 400)')
    hists = HistGroup('out')

    noTag = selection.a.Cut('pretrig', 'HLT_PFJet320==1')

    # Baseline - no tagging
    hists.Add(
        'preTagDenominator',
        selection.a.DataFrame.Histo2D(
            ('preTagDenominator', '', 20, 60, 260, 22, 800, 3000), 'm_javg',
            'mth_trig'))
    selection.ApplyTrigs()
    hists.Add(
        'preTagNumerator',
        selection.a.DataFrame.Histo2D(
            ('preTagNumerator', '', 20, 60, 260, 22, 800, 3000), 'm_javg',
            'mth_trig'))

    # DeepAK8 SR
    selection.a.SetActiveNode(noTag)
    selection.ApplyTopPick('deepTag_TvsQCD')
    hists.Add(
        'postTagDenominator_DAK8_SR',
        selection.a.DataFrame.Histo2D(
            ('postTagDenominator_DAK8_SR', '', 20, 60, 260, 22, 800, 3000),
            'm_javg', 'mth_trig'))
    selection.ApplyTrigs()
    hists.Add(
        'preTagNumerator_DAK8_SR',
        selection.a.DataFrame.Histo2D(
            ('preTagNumerator_DAK8_SR', '', 20, 60, 260, 22, 800, 3000),
            'm_javg', 'mth_trig'))
    # DeepAK8 CR
    selection.a.SetActiveNode(noTag)
    selection.ApplyTopPick('deepTag_TvsQCD', invert=True)
    hists.Add(
        'postTagDenominator_DAK8_CR',
        selection.a.DataFrame.Histo2D(
            ('postTagDenominator_DAK8_CR', '', 20, 60, 260, 22, 800, 3000),
            'm_javg', 'mth_trig'))
    selection.ApplyTrigs()
    hists.Add(
        'preTagNumerator_DAK8_CR',
        selection.a.DataFrame.Histo2D(
            ('preTagNumerator_DAK8_CR', '', 20, 60, 260, 22, 800, 3000),
            'm_javg', 'mth_trig'))

    # ParticleNet SR
    selection.a.SetActiveNode(noTag)
    selection.ApplyTopPick('particleNet_TvsQCD')
    hists.Add(
        'postTagDenominator_PN_SR',
        selection.a.DataFrame.Histo2D(
            ('postTagDenominator_PN_SR', '', 20, 60, 260, 22, 800, 3000),
            'm_javg', 'mth_trig'))
    selection.ApplyTrigs()
    hists.Add(
        'preTagNumerator_PN_SR',
        selection.a.DataFrame.Histo2D(
            ('preTagNumerator_PN_SR', '', 20, 60, 260, 22, 800, 3000),
            'm_javg', 'mth_trig'))

    selection.a.SetActiveNode(noTag)
    selection.ApplyTopPick('particleNet_TvsQCD', invert=True)
    hists.Add(
        'postTagDenominator_PN_CR',
        selection.a.DataFrame.Histo2D(
            ('postTagDenominator_PN_CR', '', 20, 60, 260, 22, 800, 3000),
            'm_javg', 'mth_trig'))
    selection.ApplyTrigs()
    hists.Add(
        'preTagNumerator_PN_CR',
        selection.a.DataFrame.Histo2D(
            ('preTagNumerator_PN_CR', '', 20, 60, 260, 22, 800, 3000),
            'm_javg', 'mth_trig'))

    # Make efficieincies
    effs = {
        "Pretag":
        ROOT.TEfficiency(hists['preTagNumerator'], hists['preTagDenominator']),
        "DAK8_SR":
        ROOT.TEfficiency(hists['preTagNumerator_DAK8_SR'],
                         hists['postTagDenominator_DAK8_SR']),
        "DAK8_CR":
        ROOT.TEfficiency(hists['preTagNumerator_DAK8_CR'],
                         hists['postTagDenominator_DAK8_CR']),
        "PN_SR":
        ROOT.TEfficiency(hists['preTagNumerator_PN_SR'],
                         hists['postTagDenominator_PN_SR']),
        "PN_CR":
        ROOT.TEfficiency(hists['preTagNumerator_PN_CR'],
                         hists['postTagDenominator_PN_CR'])
    }

    out = ROOT.TFile.Open('THtrigger2D_%s.root' % year, 'RECREATE')
    out.cd()
    for name, eff in effs.items():
        g = eff.CreateHistogram()
        g.SetName(name + '_hist')
        g.SetTitle(name)
        g.GetXaxis().SetTitle('m_{j}^{avg} (GeV)')
        g.GetYaxis().SetTitle('m_{jj} (GeV)')
        g.GetZaxis().SetTitle('Efficiency')
        g.SetMinimum(0.6)
        g.SetMaximum(1.0)
        f = ROOT.TF2("eff_func", "1-[0]/10*exp([1]*y/1000)*exp([2]*x/200)", 60,
                     260, 800, 2600)
        f.SetParameter(0, 1)
        f.SetParameter(1, -2)
        f.SetParameter(2, -2)
        g.Fit(f)
        g.Write()
        eff.SetName(name)
        eff.Write()
    out.Close()
示例#6
0
def select(setname, year):
    ROOT.ROOT.EnableImplicitMT(
        2)  # Just use two threads - no need to kill the interactive nodes

    # Initialize TIMBER analyzer
    file_path = '%s/%s_bstar%s.root' % (rootfile_path, setname, year)
    a = analyzer(file_path)

    # Determine normalization weight
    if not a.isData:
        # For MC we need to apply the xsec * lumi / NumberOfGeneratedEvents weight
        # This function is a helper defined here: https://github.com/cmantill/BstarToTW_CMSDAS2021/blob/master/helpers.py#L5-L18
        norm = helpers.getNormFactor(setname, year, config, a.genEventCount)
    else:
        norm = 1.

    # Book actions on the RDataFrame

    # First - we will cut on the filters we specified above
    a.Cut('filters', a.GetFlagString(flags))
    a.Cut('trigger', a.GetTriggerString(triggers))

    # Second - we need to calculate if we have two jets (with Id) that are back-to-back
    # The following function will check for jets in opposite hemispheres (of phi) that also pass a jetId
    # it is defined here: https://github.com/cmantill/BstarToTW_CMSDAS2021/blob/master/bstar.cc#L17-L66
    # so first we *define* jetIdx as the index of these two jets back-to-back - ordered by pT
    a.Define('jetIdx', 'hemispherize(FatJet_phi, FatJet_jetId)')

    # Third - we will perform a selection:
    # by requiring at least two fat-jets (step 1) that are back to back (step 2) and that have a minimum pT of 400 (step 3)
    # some of these functions used below such as max() and Sum() are defined in RDataFrame - see the cheatsheet: https://root.cern/doc/master/classROOT_1_1RDataFrame.html#cheatsheet
    a.Cut(
        'nFatJets_cut', 'nFatJet > max(jetIdx[0],jetIdx[1])'
    )  # (step 1) If we don't do this, we may try to access variables of jets that don't exist! (leads to seg fault)
    a.Cut(
        "hemis", "(jetIdx[0] != -1)&&(jetIdx[1] != -1)"
    )  # (step 2) we cut on the variable we just defined - so that both jet indices exist and are different that the default value -1
    a.Cut(
        'pt_cut',
        'FatJet_pt[jetIdx[0]] > 400 && FatJet_pt[jetIdx[1]] > 400')  # (step 3)

    # Now we are ready to define our first variable to plot: lead_jetPt
    a.Define('lead_jetPt', 'FatJet_pt[jetIdx[0]]')

    #ADD SOFT DROP MASS
    a.Define('lead_softdrop_mass', 'FatJet_msoftdrop[jetIdx[0]]')
    #    a.Cut('softdrop_cut','lead_softdrop_mass > 50')

    #EX 2 ADD MORE VARS
    #  a.Define('lead_jet_pt','FatJet_pt[jetIdx[0]]')
    a.Define('lead_jet_pt_nom', 'FatJet_pt_nom[jetIdx[0]]')
    a.Define('lead_tau2', 'FatJet_tau2')

    a.Define(
        'lead_tau21',
        'FatJet_tau1[jetIdx[0]] > 0 ? FatJet_tau2[jetIdx[0]]/FatJet_tau1[jetIdx[0]] : -1'
    )  #Don't divide by zero
    a.Define(
        'lead_tau32',
        'FatJet_tau2[jetIdx[0]] > 0 ? FatJet_tau3[jetIdx[0]]/FatJet_tau2[jetIdx[0]] : -1'
    )
    a.Define('lead_deepAK8_Wscore', 'FatJet_deepTagMD_WvsQCD[jetIdx[0]]')
    a.Define('lead_deepAK8_topscore', 'FatJet_deepTagMD_TvsQCD[jetIdx[0]]')

    a.Define('sublead_softdrop_mass', 'FatJet_msoftdrop[jetIdx[1]]')
    a.Define('sublead_jet_pt', 'FatJet_pt[jetIdx[1]]')
    a.Define('sublead_jet_pt_nom', 'FatJet_pt_nom[jetIdx[1]]')

    a.Define(
        'sublead_tau21',
        'FatJet_tau1[jetIdx[1]] > 0 ? FatJet_tau2[jetIdx[1]]/FatJet_tau1[jetIdx[1]] : -1'
    )
    a.Define(
        'sublead_tau32',
        'FatJet_tau2[jetIdx[1]] > 0 ? FatJet_tau3[jetIdx[1]]/FatJet_tau2[jetIdx[1]] : -1'
    )
    a.Define('sublead_deepAK8_Wscore', 'FatJet_deepTagMD_WvsQCD[jetIdx[1]]')
    a.Define('sublead_deepAK8_topscore', 'FatJet_deepTagMD_TvsQCD[jetIdx[1]]')

    a.Define(
        'lead_vector',
        'hardware::TLvector(FatJet_pt[jetIdx[0]],FatJet_eta[jetIdx[0]],FatJet_phi[jetIdx[0]],FatJet_mass[jetIdx[0]])'
    )
    a.Define(
        'sublead_vector',
        'hardware::TLvector(FatJet_pt[jetIdx[1]],FatJet_eta[jetIdx[1]],FatJet_phi[jetIdx[1]],FatJet_mass[jetIdx[1]])'
    )
    a.Define('invariantMass',
             'hardware::invariantMass({lead_vector,sublead_vector})')

    # To define our second variable, the number of loose b-jets, let's define the b-tagging working points
    # These [loose, medium, tight] working points are for the DeepCSV variable (ranging between 0 and 1) - saved in NanoAOD as Jet_btagDeepB:
    bcut = []
    if year == '16':
        bcut = [0.2217, 0.6321, 0.8953]
    elif year == '17':
        bcut = [0.1522, 0.4941, 0.8001]
    elif year == '18':
        bcut = [0.1241, 0.4184, 0.7571]
    # Then, we use the Sum function of RDataFrame to count the number of AK4Jets with DeepCSV score larger than the loose WP
    a.Define('nbjet_loose',
             'Sum(Jet_btagDeepB > ' + str(bcut[0]) + ')')  # DeepCSV loose WP

    # Finally let's define the normalization weight of the sample as one variable as well
    a.Define('norm', str(norm))

    # A nice functionality of TIMBER is to print all the selections that we have done:

    a.PrintNodeTree(plotdir + '/signal_tree.dot', verbose=True)

    # Now we are ready to save histograms (in a HistGroup)
    out = HistGroup("%s_%s" % (setname, year))
    for varname in varnames.keys():
        histname = '%s_%s_%s' % (setname, year, varname)
        # Arguments for binning that you would normally pass to a TH1 (histname, histname, number of bins, min bin, max bin)
        if "nbjet" in varname:
            hist_tuple = (histname, histname, 10, 0, 10)
        elif "lead_jet" in varname:
            hist_tuple = (histname, histname, 30, 0, 3000)
        elif "lead_softdrop" in varname:
            hist_tuple = (histname, histname, 30, 0, 300)
        elif "lead_tau21" in varname:
            hist_tuple = (histname, histname, 30, 0, 1)
        elif "lead_tau2" in varname:
            hist_tuple = (histname, histname, 30, 0, 1)
        elif "lead_tau32" in varname:
            hist_tuple = (histname, histname, 30, 0, 1)
        elif "lead_deepAK8_Wscore" in varname:
            hist_tuple = (histname, histname, 30, 0, 1)
        elif "lead_deepAK8_topscore" in varname:
            hist_tuple = (histname, histname, 30, 0, 1)
        elif "Mass" in varname:
            hist_tuple = (histname, histname, 50, 0, 5000)
    #  print(varname)
    # elif "Pt" in varname :
    #     hist_tuple = (histname,histname,30,400,2000)
    # elif "msd" in varname :
    #     hist_tuple = (histname,histname,30,40,200)
    # else:
    #     hist_tuple = (histname,histname,30,40,200)
        hist = a.GetActiveNode().DataFrame.Histo1D(
            hist_tuple, varname, 'norm'
        )  # Project dataframe into a histogram (hist name/binning tuple, variable to plot from dataframe, weight)
        hist.GetValue(
        )  # This gets the actual TH1 instead of a pointer to the TH1
        out.Add(varname, hist)  # Add it to our group

    # Return the group
    return out
def main(args):
    ROOT.ROOT.EnableImplicitMT(args.threads)
    start = time.time()
    selection = THClass(
        'dijet_nano/%s_%s_snapshot.txt' % (args.setname, args.era),
        int(args.era), 1, 1)
    kinOnly = selection.OpenForSelection('None')

    # Kinematic plots
    jetPlots = HistGroup('jetPlots')
    # Taggers after mass selection
    selection.a.Define(
        'TopMassBools',
        'Dijet_msoftdrop_corrT > 105 && Dijet_msoftdrop_corrT < 210')
    selection.a.Define('DAK8TopScoresInMassWindow',
                       'Dijet_deepTag_TvsQCD[TopMassBools]')
    selection.a.Define('PNTopScoresInMassWindow',
                       'Dijet_particleNet_TvsQCD[TopMassBools]')
    jetPlots.Add(
        'DAK8TopScoresInMassWindow',
        selection.a.DataFrame.Histo1D(
            ('DAK8TopScoresInMassWindow',
             'DeepAK8 top score for jets in top mass window', 50, 0, 1),
            'DAK8TopScoresInMassWindow'))
    jetPlots.Add(
        'PNTopScoresInMassWindow',
        selection.a.DataFrame.Histo1D(
            ('PNTopScoresInMassWindow',
             'ParticleNet top score for jets in top mass window', 50, 0, 1),
            'PNTopScoresInMassWindow'))

    selection.a.Define(
        'HiggsMassBools',
        'Dijet_msoftdrop_corrH > 100 && Dijet_msoftdrop_corrH < 140')
    selection.a.Define('DAK8HiggsScoresInMassWindow',
                       'Dijet_deepTagMD_HbbvsQCD[HiggsMassBools]')
    selection.a.Define('PNHiggsScoresInMassWindow',
                       'Dijet_particleNet_HbbvsQCD[HiggsMassBools]')
    jetPlots.Add(
        'DAK8HiggsScoresInMassWindow',
        selection.a.DataFrame.Histo1D(
            ('DAK8HiggsScoresInMassWindow',
             'DeepAK8 Higgs score for jets in Higgs mass window', 50, 0, 1),
            'DAK8HiggsScoresInMassWindow'))
    jetPlots.Add(
        'PNHiggsScoresInMassWindow',
        selection.a.DataFrame.Histo1D(
            ('PNHiggsScoresInMassWindow',
             'ParticleNet Higgs score for jets in Higgs mass window', 50, 0,
             1), 'PNHiggsScoresInMassWindow'))

    # Mass after tagger selection
    selection.a.Define('TopDAK8Bools', 'Dijet_deepTag_TvsQCD > 0.9')
    selection.a.Define('TopPNBools', 'Dijet_particleNet_TvsQCD > 0.9')
    selection.a.Define('TopMassAfterDAK8Tag',
                       'Dijet_msoftdrop_corrT[TopDAK8Bools]')
    selection.a.Define('TopMassAfterPNTag',
                       'Dijet_msoftdrop_corrT[TopPNBools]')
    jetPlots.Add(
        'TopMassAfterDAK8Tag',
        selection.a.DataFrame.Histo1D(
            ('TopMassAfterDAK8Tag', 'Jet mass after DAK8 top score > 0.9', 25,
             50, 300), 'TopMassAfterDAK8Tag'))
    jetPlots.Add(
        'TopMassAfterPNTag',
        selection.a.DataFrame.Histo1D(
            ('TopMassAfterPNTag', 'Jet mass after PN top score > 0.9', 25, 50,
             300), 'TopMassAfterPNTag'))

    selection.a.Define('HiggsDAK8Bools', 'Dijet_deepTagMD_HbbvsQCD > 0.9')
    selection.a.Define('HiggsPNBools', 'Dijet_particleNet_HbbvsQCD > 0.9')
    selection.a.Define('HiggsMassAfterDAK8Tag',
                       'Dijet_msoftdrop_corrH[HiggsDAK8Bools]')
    selection.a.Define('HiggsMassAfterPNTag',
                       'Dijet_msoftdrop_corrH[HiggsPNBools]')
    jetPlots.Add(
        'HiggsMassAfterDAK8Tag',
        selection.a.DataFrame.Histo1D(
            ('HiggsMassAfterDAK8Tag', 'Jet mass after DAK8 Higgs score > 0.9',
             25, 50, 300), 'HiggsMassAfterDAK8Tag'))
    jetPlots.Add(
        'HiggsMassAfterPNTag',
        selection.a.DataFrame.Histo1D(
            ('HiggsMassAfterPNTag', 'Jet mass after PN Higgs score > 0.9', 25,
             50, 300), 'HiggsMassAfterPNTag'))

    selection.a.Define(
        'GenPart_vect',
        'hardware::TLvector(GenPart_pt, GenPart_eta, GenPart_phi, GenPart_mass)'
    )

    out = ROOT.TFile.Open(
        'rootfiles/THjetstudy_%s_%s.root' % (args.setname, args.era),
        'RECREATE')
    out.cd()
    presel = selection.a.GetActiveNode()
    # Assign jets on truth in parallel
    selection.a.SetActiveNode(presel)
    selection.ApplyTopPickViaMatch()
    truthtag = selection.a.Define(
        'MassDiff', 'Top_msoftdrop_corrT - Higgs_msoftdrop_corrH')
    nicenames = {"deepTag": "DAK8^{top}", "particleNet": "PN^{top}"}
    for t in ['deepTag', 'particleNet']:
        selection.a.SetActiveNode(presel)
        top_tagger = '%s_TvsQCD' % t
        # higgs_tagger = '%s_HbbvsQCD'%t
        # Signal region
        selection.ApplyTopPick(tagger=top_tagger, invert=False)

        selection.a.Define('MassDiff',
                           'Top_msoftdrop_corrT - Higgs_msoftdrop_corrH')
        selection.a.Define('NNDiff', 'Top_{0} - Higgs_{0}'.format(top_tagger))
        jetPlots.Add(
            'MassDiffvsNNDiff_%s' % t,
            selection.a.DataFrame.Histo2D(
                ('MassDiffvsNNDiff_%s' % t,
                 '(m_{{t}} - m_{{H}}) vs ({0}_{{t}} - {0}_{{H}})'.format(
                     nicenames[t]), 25, -100, 150, 40, -1, 1), 'MassDiff',
                'NNDiff'))
        # Look at unmatched pieces
        checkpoint = selection.a.GetActiveNode()
        selection.a.Cut(
            'NotGenMatchTop',
            '!MatchToGen(6, Top_vect, GenPart_vect, GenPart_pdgId)')
        selection.a.Cut(
            'NotGenMatchH',
            '!MatchToGen(25, Higgs_vect, GenPart_vect, GenPart_pdgId)')
        jetPlots.Add(
            'MassDiffvsNNDiff_%s_BadMatch' % t,
            selection.a.DataFrame.Histo2D(
                ('MassDiffvsNNDiff_%s_BadMatch' % t,
                 '(m_{{t}} - m_{{H}}) vs ({0}_{{t}} - {0}_{{H}}) - Bad matches'
                 .format(nicenames[t]), 25, -100, 150, 40, -1, 1), 'MassDiff',
                'NNDiff'))
        # Look at matched pieces
        selection.a.SetActiveNode(checkpoint)
        selection.a.Cut(
            'GenMatchTop',
            'MatchToGen(6, Top_vect, GenPart_vect, GenPart_pdgId)')
        selection.a.Cut(
            'GenMatchH',
            'MatchToGen(25, Higgs_vect, GenPart_vect, GenPart_pdgId)')
        jetPlots.Add(
            'MassDiffvsNNDiff_%s_GoodMatch' % t,
            selection.a.DataFrame.Histo2D((
                'MassDiffvsNNDiff_%s_GoodMatch' % t,
                '(m_{{t}} - m_{{H}}) vs ({0}_{{t}} - {0}_{{H}}) - Good matches'
                .format(nicenames[t]), 25, -100, 150, 40, -1, 1), 'MassDiff',
                                          'NNDiff'))
        # Assign jets on truth
        selection.a.SetActiveNode(truthtag)
        selection.a.Define('NNDiff_%s' % t,
                           'Top_{0} - Higgs_{0}'.format(top_tagger))
        jetPlots.Add(
            'MassDiffvsNNDiff_%s_TruthMatch' % t,
            selection.a.DataFrame.Histo2D((
                'MassDiffvsNNDiff_%s_TruthMatch' % t,
                '(m_{{t}} - m_{{H}}) vs ({0}_{{t}} - {0}_{{H}}) - Truth matches'
                .format(nicenames[t]), 25, -100, 150, 40, -1, 1), 'MassDiff',
                                          'NNDiff_%s' % t))

    jetPlots.Do('Write')
    selection.a.PrintNodeTree('NodeTree.pdf')
    print('%s sec' % (time.time() - start))
def THstudies(args):
    print('PROCESSING: %s %s' % (args.setname, args.era))
    ROOT.ROOT.EnableImplicitMT(args.threads)
    start = time.time()
    # Base setup
    selection = THClass(
        'dijet_nano/%s_%s_snapshot.txt' % (args.setname, args.era),
        int(args.era), 1, 1)
    selection.OpenForSelection('None')
    selection.a.Define(
        'Dijet_vect',
        'hardware::TLvector(Dijet_pt_corr, Dijet_eta, Dijet_phi, Dijet_msoftdrop_corrT)'
    )
    selection.a.Define('mth', 'hardware::InvariantMass(Dijet_vect)')
    selection.a.Define('m_avg',
                       '(Dijet_msoftdrop_corrT[0]+Dijet_msoftdrop_corrT[1])/2'
                       )  # Use the top version of the corrected mass
    # since it still has JES/JER which both would get anyway
    selection.ApplyTrigs(args.trigEff)
    selection.a.MakeWeightCols(
        extraNominal='' if selection.a.isData else 'genWeight*%s' %
        selection.GetXsecScale())

    # Kinematic definitions
    selection.a.Define('pt0', 'Dijet_pt_corr[0]')
    selection.a.Define('pt1', 'Dijet_pt_corr[1]')
    selection.a.Define('HT', 'pt0+pt1')
    selection.a.Define('deltaEta', 'abs(Dijet_eta[0] - Dijet_eta[1])')
    selection.a.Define('deltaPhi',
                       'hardware::DeltaPhi(Dijet_phi[0],Dijet_phi[1])')
    kinOnly = selection.a.Define(
        'deltaY', 'abs(Dijet_vect[0].Rapidity() - Dijet_vect[1].Rapidity())')

    # Kinematic plots
    kinPlots = HistGroup('kinPlots')
    kinPlots.Add(
        'pt0',
        selection.a.DataFrame.Histo1D(('pt0', 'Lead jet pt', 100, 350, 2350),
                                      'pt0', 'weight__nominal'))
    kinPlots.Add(
        'pt1',
        selection.a.DataFrame.Histo1D(
            ('pt1', 'Sublead jet pt', 100, 350, 2350), 'pt1',
            'weight__nominal'))
    kinPlots.Add(
        'HT',
        selection.a.DataFrame.Histo1D(
            ('HT', 'Sum of pt of two leading jets', 150, 700, 3700), 'HT',
            'weight__nominal'))
    kinPlots.Add(
        'deltaEta',
        selection.a.DataFrame.Histo1D(
            ('deltaEta', '| #Delta #eta |', 48, 0, 4.8), 'deltaEta',
            'weight__nominal'))
    kinPlots.Add(
        'deltaPhi',
        selection.a.DataFrame.Histo1D(
            ('deltaPhi', '| #Delta #phi |', 32, 1, 3.14), 'deltaPhi',
            'weight__nominal'))
    kinPlots.Add(
        'deltaY',
        selection.a.DataFrame.Histo1D(('deltaY', '| #Delta y |', 60, 0, 3),
                                      'deltaY', 'weight__nominal'))

    # Check MC truth to get jet idx assignment
    selection.ApplyTopPickViaMatch()
    kinPlots.Add(
        'tIdx_true',
        selection.a.DataFrame.Histo1D(
            ('tIdx_true', 'Top jet idx based on MC truth', 2, 0, 2), 'tIdx'))
    kinPlots.Add(
        'hIdx_true',
        selection.a.DataFrame.Histo1D(
            ('hIdx_true', 'Higgs jet idx based on MC truth', 2, 0, 2), 'hIdx'))

    # Do N-1 setup before splitting into DAK8 and PN - assume leading top
    #    This is a 50/50 assumption that kills the stats by 50% but
    #    it allows us to make the plots with real world possibility that
    #    there's Higgs and top cross contamination. Also helps to do this without
    #    too much hastle.
    selection.a.SetActiveNode(kinOnly)
    selection.a.ObjectFromCollection('LeadTop', 'Dijet', 0)
    nminus1Node = selection.a.ObjectFromCollection('SubleadHiggs', 'Dijet', 1)

    out = ROOT.TFile.Open(
        'rootfiles/THstudies_%s_%s%s.root' %
        (args.setname, args.era,
         '_' + args.variation if args.variation != 'None' else ''), 'RECREATE')
    out.cd()
    for t in ['deepTag', 'particleNet']:
        top_tagger = '%s_TvsQCD' % t
        higgs_tagger = '%sMD_HbbvsQCD' % t

        # N-1
        selection.a.SetActiveNode(nminus1Node)
        nminusGroup = selection.GetNminus1Group(t)
        nminusNodes = selection.a.Nminus1(nminusGroup)
        for n in nminusNodes.keys():
            if n.startswith('m'):
                bins = [25, 50, 300]
                if n.startswith('mH'): var = 'SubleadHiggs_msoftdrop_corrH'
                else: var = 'LeadTop_msoftdrop_corrT'
            elif n == 'full': continue
            else:
                bins = [50, 0, 1]
                if n.endswith('H_cut'): var = 'SubleadHiggs_%s' % higgs_tagger
                else: var = 'LeadTop_%s' % top_tagger
            print('N-1: Plotting %s for node %s' % (var, n))
            kinPlots.Add(
                n + '_nminus1', nminusNodes[n].DataFrame.Histo1D(
                    (n + '_nminus1', n + '_nminus1', bins[0], bins[1],
                     bins[2]), var, 'weight__nominal'))

    kinPlots.Do('Write')
    selection.a.PrintNodeTree('NodeTree.pdf', verbose=True)
    print('%s sec' % (time.time() - start))
示例#9
0
def select(setname, year):
    ROOT.ROOT.EnableImplicitMT(
        2)  # Just use two threads - no need to kill the interactive nodes

    # Initialize TIMBER analyzer
    file_path = '%s/%s_bstar%s.root' % (rootfile_path, setname, year)
    a = analyzer(file_path)

    # Determine normalization weight
    if not a.isData:
        norm = helpers.getNormFactor(setname, year, config, a.genEventCount)
    else:
        norm = 1.

    # Book actions on the RDataFrame
    a.Cut('filters', a.GetFlagString(flags))
    a.Cut('trigger', a.GetTriggerString(triggers))
    a.Define(
        'jetIdx', 'hemispherize(FatJet_phi, FatJet_jetId)'
    )  # need to calculate if we have two jets (with Id) that are back-to-back
    a.Cut(
        'nFatJets_cut', 'nFatJet > max(jetIdx[0],jetIdx[1])'
    )  # If we don't do this, we may try to access variables of jets that don't exist! (leads to seg fault)
    a.Cut("hemis",
          "(jetIdx[0] != -1)&&(jetIdx[1] != -1)")  # cut on that calculation
    a.Cut('pt_cut', 'FatJet_pt[jetIdx[0]] > 400 && FatJet_pt[jetIdx[1]] > 400')
    a.Cut(
        'eta_cut',
        'abs(FatJet_eta[jetIdx[0]]) < 2.4 && abs(FatJet_eta[jetIdx[1]]) < 2.4')
    a.Cut(
        'mjet_cut',
        'FatJet_msoftdrop[jetIdx[0]] > 50 && FatJet_msoftdrop[jetIdx[1]] > 50')
    a.Define(
        'lead_vector',
        'hardware::TLvector(Jet_pt[jetIdx[0]],Jet_eta[jetIdx[0]],Jet_phi[jetIdx[0]],Jet_mass[jetIdx[0]])'
    )
    a.Define(
        'sublead_vector',
        'hardware::TLvector(Jet_pt[jetIdx[1]],Jet_eta[jetIdx[1]],Jet_phi[jetIdx[1]],Jet_mass[jetIdx[1]])'
    )
    a.Define('invariantMass',
             'hardware::invariantMass({lead_vector,sublead_vector})')
    a.Cut('mtw_cut', 'invariantMass > 1200')
    a.Define(
        'deltaphi',
        'hardware::DeltaPhi(FatJet_phi[jetIdx[0]],FatJet_phi[jetIdx[1]])')
    a.Define(
        'lead_tau32',
        'FatJet_tau2[jetIdx[0]] > 0 ? FatJet_tau3[jetIdx[0]]/FatJet_tau2[jetIdx[0]] : -1'
    )  # Conditional to make sure tau2 != 0 for division
    a.Define(
        'sublead_tau32',
        'FatJet_tau2[jetIdx[1]] > 0 ? FatJet_tau3[jetIdx[1]]/FatJet_tau2[jetIdx[1]] : -1'
    )  # condition ? <do if true> : <do if false>
    a.Define(
        'lead_tau21',
        'FatJet_tau1[jetIdx[0]] > 0 ? FatJet_tau2[jetIdx[0]]/FatJet_tau1[jetIdx[0]] : -1'
    )  # Conditional to make sure tau2 != 0 for division
    a.Define(
        'sublead_tau21',
        'FatJet_tau1[jetIdx[1]] > 0 ? FatJet_tau2[jetIdx[1]]/FatJet_tau1[jetIdx[1]] : -1'
    )  # condition ? <do if true> : <do if false>
    a.Define('lead_deepAK8_TvsQCD', 'FatJet_deepTag_TvsQCD[jetIdx[0]]')
    a.Define('sublead_deepAK8_TvsQCD', 'FatJet_deepTag_TvsQCD[jetIdx[1]]')
    a.Define('lead_deepAK8_WvsQCD', 'FatJet_deepTag_WvsQCD[jetIdx[0]]')
    a.Define('sublead_deepAK8_WvsQCD', 'FatJet_deepTag_WvsQCD[jetIdx[1]]')

    bcut = []
    if year == '16':
        bcut = [0.2217, 0.6321, 0.8953]
    elif year == '17':
        bcut = [0.1522, 0.4941, 0.8001]
    elif year == '18':
        bcut = [0.1241, 0.4184, 0.7571]
    a.Define('nbjet_loose',
             'Sum(Jet_btagDeepB > ' + str(bcut[0]) + ')')  # DeepCSV loose WP
    a.Define('nbjet_medium',
             'Sum(Jet_btagDeepB > ' + str(bcut[1]) + ')')  # DeepCSV medium WP
    a.Define('nbjet_tight',
             'Sum(Jet_btagDeepB > ' + str(bcut[2]) + ')')  # DeepCSV tight WP
    a.Define('lead_jetPt', 'FatJet_pt[jetIdx[0]]')
    a.Define('sublead_jetPt', 'FatJet_pt[jetIdx[1]]')
    a.Define('lead_softdrop_mass', 'FatJet_msoftdrop[jetIdx[0]]')
    a.Define('sublead_softdrop_mass', 'FatJet_msoftdrop[jetIdx[1]]')
    a.Define('norm', str(norm))

    # Book a group to save the histograms
    out = HistGroup("%s_%s" % (setname, year))
    for varname in varnames.keys():
        histname = '%s_%s_%s' % (setname, year, varname)
        # Arguments for binning that you would normally pass to a TH1
        if "nbjet" in varname:
            hist_tuple = (histname, histname, 10, 0, 10)
        elif "tau" in varname:
            hist_tuple = (histname, histname, 20, 0, 1)
        elif "Pt" in varname:
            hist_tuple = (histname, histname, 30, 400, 1000)
        elif "phi" in varname:
            hist_tuple = (histname, histname, 30, -3.2, 3.2)
        elif "softdrop_mass" in varname:
            hist_tuple = (histname, histname, 30, 0, 300)
        else:
            hist_tuple = (histname, histname, 20, 0, 1)
        hist = a.GetActiveNode().DataFrame.Histo1D(
            hist_tuple, varname, 'norm'
        )  # Project dataframe into a histogram (hist name/binning tuple, variable to plot from dataframe, weight)
        hist.GetValue(
        )  # This gets the actual TH1 instead of a pointer to the TH1
        out.Add(varname, hist)  # Add it to our group

    # Return the group
    return out