Exemplo n.º 1
0
 def __rdf_from_dataset(self, dataset):
     t_names = [ntuple.directory for ntuple in \
         dataset.ntuples]
     if len(set(t_names)) == 1:
         tree_name = t_names.pop()
     else:
         raise NameError(
             'Impossible to create RDataFrame with different tree names')
     chain = TChain()
     ftag_fchain = {}
     for ntuple in dataset.ntuples:
         chain.Add('{}/{}'.format(ntuple.path, ntuple.directory))
         for friend in ntuple.friends:
             if friend.tag not in ftag_fchain.keys():
                 ftag_fchain[friend.tag] = TChain()
             ftag_fchain[friend.tag].Add('{}/{}'.format(
                 friend.path, friend.directory))
     for ch in ftag_fchain.values():
         chain.AddFriend(ch)
         # Keep friend chains alive
         self.friend_tchains.append(ch)
     if self.nthreads != 1:
         EnableImplicitMT(self.nthreads)
     # Keep main chain alive
     self.tchains.append(chain)
     rdf = RDataFrame(chain)
     rcw = RDataFrameCutWeight(rdf)
     return rcw
Exemplo n.º 2
0
 def get_frame(tree_name, file_name):
     frame = RDataFrame(tree_name, file_name)
     if verbose:
         colNames = frame.GetColumnNames()
         for j in colNames:
             print(j)
     return frame
Exemplo n.º 3
0
    def makeDataFrame(self):
        sample_dict = {}
        samples_all, samples_singlefake, samples_doublefake = createSampleLists(analysis_dir=self.analysis_dir, server = self.server, channel=self.channel)
        working_samples = samples_doublefake
        working_samples = setSumWeights(working_samples)
        print('###########################################################')
        print('# measuring doublefakerake...')
        print('# %d samples to be used:'%(len(working_samples)))
        print('###########################################################')
        for w in working_samples: print('{:<20}{:<20}'.format(*[w.name,('path: '+w.ana_dir)]))
        chain = TChain('tree') #TChain'ing all data samples together
        for i,s in enumerate(working_samples):
            sample = working_samples[0]
            file_name = '/'.join([sample.ana_dir, sample.dir_name, sample.tree_prod_name, 'tree.root'])
            chain.Add(file_name)
            
        dataframe = RDataFrame(chain)
        weight = 'weight * lhe_weight'
        dataframe = dataframe.Define('w',weight)\
                            .Define('ptCone',self.ptCone())\
                            .Define('abs_hnl_hn_vis_eta','abs(hnl_hn_vis_eta)')\
                            .Define('abs_hnl_hn_eta','abs(hnl_hn_eta)')\
                            .Define('abs_l1_eta','abs(l1_eta)')\
                            .Define('abs_l2_eta','abs(l2_eta)')\
                            .Define('abs_l1_jet_flavour_parton','abs(l1_jet_flavour_parton)')\
                            .Define('abs_l2_jet_flavour_parton','abs(l2_jet_flavour_parton)')\

        return dataframe
Exemplo n.º 4
0
def getRDF(pattern, treeName=None, keepObj=None):
    if treeName is None: treeName = 'tuple0/DecayTree'
    patterns = [pattern] if isinstance(pattern, str) else pattern

    from ROOT import TChain, RDataFrame
    import glob

    ch1 = TChain(treeName)
    for pn in patterns:
        for f in glob.glob(pn):
            ch1.Add(f)

    if keepObj is not None:
        keepObj.append(ch1)  # to keep this object in the memory
        return RDataFrame(ch1)

    ## if no container is provided, return both objects
    return RDataFrame(ch1), ch1
Exemplo n.º 5
0
 def __rdf_from_dataset(self, dataset):
     chain, self.friend_tchains = rdf_from_dataset_helper(dataset)
     if self.nthreads != 1:
         EnableImplicitMT(self.nthreads)
     # Keep main chain alive
     self.tchains.append(chain)
     rdf = RDataFrame(chain)
     rcw = RDataFrameCutWeight(rdf)
     return rcw
Exemplo n.º 6
0
 def get_frame(tree_name, file_name):
     """
     Getter of the frame from the file
     """
     frame = RDataFrame(tree_name, file_name)
     if verbose:
         colNames = frame.GetColumnNames()
         for j in colNames:
             print(j)
     return frame
Exemplo n.º 7
0
def merge_friend(output_ntp_name, friends, tree_branch_dict, config):
    # Here we don't drop any branch. We do only keep specified trees.
    opts = RDF.RSnapshotOptions()
    opts.fMode = 'UPDATE'

    for full_path, tree in friends.items():
        if config[full_path]['keep']:
            rd1 = RDataFrame(tree)
            cut = concat_selections(config[full_path]['selection'])

            if cut:
                rd2 = rd1.Filter(cut)
            else:
                rd2 = rd1

            output_br = make_output_vec(tree_branch_dict[full_path])
            rd2.Snapshot(full_path, output_ntp_name, output_br, opts)
 def get_frame(file_name, df_index=0, tree_name="O2mcparticle_001"):
     """
     Getter of the frame from the file
     """
     if not path.isfile(file_name):
         raise ValueError("Did not find AOD file", file_name)
     sub_names = run_cmd(f"rootls {file_name}").strip().split()
     df_name = []
     for i in sub_names:
         if not i.startswith("DF_") and not i.startswith("TF_"):
             continue
         df_name.append(i)
     df_name = df_name[df_index]
     print(df_name)
     frame = RDataFrame(f"{df_name}/{tree_name}", file_name)
     if verbose:
         colNames = frame.GetColumnNames()
         for j in enumerate(colNames):
             print(j, frame.GetColumnType(j[1]))
     return frame
Exemplo n.º 9
0
    def makeRootDataFrameFromTree(self, tree_file_name, tree_name='tree', verbose=False, friend_name='ML', friend_file_name=None):
        '''Cache files/trees'''

        ttree = self.readTree(tree_file_name, tree_name, verbose)

        if verbose:
            print ('read dataframe', dataframe, 'from file', tree_file_name)

        if friend_file_name:
            ttree.AddFriend(friend_name + '=tree',friend_file_name)
            #VALIDATE#
            validate1 = ttree.GetEntries('l2_pt - %s.l2_pt'%friend_name)
            validate2 = ttree.GetEntries('event - %s.event'%friend_name)

            if not validate1+validate2 == 0: print ('\n\tERROR: FRIEND TREE NOT ALIGNED, FAKERATE USELESS', m, n)

        gROOT.cd()
        # dataframe = RDataFrame(tree_name,tree_file_name)
        dataframe = RDataFrame(ttree)

        return dataframe
Exemplo n.º 10
0
    def measureSFR(self, drawPlot = False):
        sample_dict = {}
        samples_all, samples_singlefake, samples_doublefake = createSampleLists(analysis_dir=self.analysis_dir, server = self.server, channel=self.channel)
        working_samples = samples_singlefake
        working_samples = setSumWeights(working_samples)
        print('###########################################################')
        print('# measuring singlefakerake...')
        print('# %d samples to be used:'%(len(working_samples)))
        print('###########################################################')
        for w in working_samples: print('{:<20}{:<20}'.format(*[w.name,('path: '+w.ana_dir)]))
        chain = TChain('tree') #TChain'ing all data samples together
        for i,s in enumerate(working_samples):
            sample = working_samples[0]
            file_name = '/'.join([sample.ana_dir, sample.dir_name, sample.tree_prod_name, 'tree.root'])
            chain.Add(file_name)
            
        dataframe = RDataFrame(chain)
        weight = 'weight * lhe_weight'
        dataframe = dataframe.Define('w',weight)\
                            .Define('ptCone',self.ptCone())\
                            .Define('abs_hnl_hn_vis_eta','abs(hnl_hn_vis_eta)')\
                            .Define('abs_hnl_hn_eta','abs(hnl_hn_eta)')\
                            .Define('abs_l1_eta','abs(l1_eta)')\
                            .Define('abs_l2_eta','abs(l2_eta)')\
                            .Define('abs_l1_jet_flavour_parton','abs(l1_jet_flavour_parton)')\
                            .Define('abs_l2_jet_flavour_parton','abs(l2_jet_flavour_parton)')\

        # bins_ptCone = np.array([5.,10., 20., 30., 40.,70., 2000])
        # bins_eta    = np.array([0., 0.8, 1.2, 2.4]) 
        bins_ptCone = np.array([5.,10., 20., 30., 40.,70.])
        bins_eta    = np.array([0., 0.8, 1.2, 2.4]) 

        selection_baseline      = getSelection(self.channel,'MR_SF')  

        selection_LL_uncorrelated = '(' + ' & '\
                                    .join([\
                                    selection_baseline,\
                                    getSelection(self.channel,'L_L_uncorrelated')\
                                    ]) + ')' 
        selection_TT_uncorrelated = '(' + ' & '\
                                    .join([\
                                    selection_baseline,\
                                    getSelection(self.channel,'L_L_uncorrelated'),\
                                    getSelection(self.channel,'T_T')\
                                    ]) + ')' 

        h_LL_uncorrelated = dataframe\
                .Filter(selection_LL_uncorrelated)\
                .Histo2D(('h_LL_uncorrelated','h_LL_uncorrelated',len(bins_ptCone)-1,bins_ptCone, len(bins_eta)-1, bins_eta),'ptCone','abs_hnl_hn_vis_eta','w')
        #name the axis, also initiate the dataframe call
        h_LL_uncorrelated.SetTitle(';ptCone [GeV]; dimuon #eta')

        h_TT_uncorrelated = dataframe\
                .Filter(selection_TT_uncorrelated)\
                .Histo2D(('h_TT_uncorrelated','h_TT_uncorrelated',len(bins_ptCone)-1,bins_ptCone, len(bins_eta)-1, bins_eta),'ptCone','abs_hnl_hn_vis_eta','w')
        #name the axis, also initiate the dataframe call
        h_TT_uncorrelated.SetTitle(';ptCone [GeV]; dimuon #eta')

        # preparing the histo and save it into a .root file
        sfr_TH2_dir = '/home/dehuazhu/HNL/CMSSW_9_4_6_patch1/src/PlotFactory/DataBkgPlots/modules/DDE_singlefake.root' 
        sfr_hist = h_TT_uncorrelated.Clone()
        # sfr_hist = h_LL_uncorrelated.Clone()
        # sfrhist = h_baseline.Clone()
        # sfr_hist.Divide(h_LL_uncorrelated.Clone())
        # dfr_hist.SaveAs(sfr_TH2_dir) #uncomment this to save the TH2

        # draw the histo if required 
        if drawPlot == True:
            can = TCanvas('can', '')
            # sfr_hist.Draw('colzTextE')
            # sfr_hist.Draw('colz')
            sfr_hist.Draw()
            pf.showlumi('%d entries'%(sfr_hist.GetEntries()))
            # pf.showlogopreliminary()
            can.Update()
            set_trace()
gInterpreter.Declare('''
Int_t getBin(Double_t x, Double_t y, TH2D* histo) {
  return histo->FindFixBin(x, y);
}

auto getWeight(Double_t x, Double_t y, TH2D* histo) {
  auto binIdx = getBin(x, y, histo);
  return histo->GetBinContent(binIdx);
}
''')

gInterpreter.Declare(f'auto histoNtp = new TFile("{histoNtpN}", "read");')
gInterpreter.Declare(f'auto histo = dynamic_cast<TH2D*>(histoNtp->Get("{histoN}"));')

dfInit = RDataFrame(mcTreeN, mcNtpN)
df = dfInit.Define('wjk_alt', 'getWeight(b_ownpv_ndof, ntracks, histo)').Define('wt', 'wpid*wtrk*wjk_alt')

# NOTE: This comes from the existing ntuple
mcRootBrs = df.AsNumpy(columns=['wjk_occ', 'wjk_alt'])
wtJkOccRoot = mcRootBrs['wjk_occ']
wtJkOccAltRoot = mcRootBrs['wjk_alt']

histoRootMdl = TH2DModel(
    'histoRoot', 'histoRoot',
    20, 1, 200, 20, 0, 450
)
histoRoot = df.Histo2D(histoRootMdl, 'b_ownpv_ndof', 'ntracks', 'wt')


##################
Exemplo n.º 12
0
from ROOT import (ROOT, RDataFrame, TCanvas)

ROOT.EnableImplicitMT()

#get file and tree from directory

minbias_ntuple = "/user/egovorko/work/public/minbias_JpsiPhi.root"
signal_ntuple = "/data/bfys/valukash/forAli/DaVinci_jpsiphi_MC_upgrade.root"
input_tree_name = "Bs2jpsiphi/DecayTree"

minbias_df = RDataFrame(input_tree_name, minbias_ntuple)
signal_df = RDataFrame(input_tree_name, signal_ntuple)
PIDmu_cut = "eventmuons_PIDmu[eventmuons_PIDmu > 0].size() >= 1"
#pt_cut = "eventmuons_PT[eventmuons_PT > 500].size() >= 1"
minbias_pidmu = minbias_df.Filter(PIDmu_cut)
#minbias_pt = minbias_pidmu.Filter(pt_cut)
signal_pidmu = signal_df.Filter(PIDmu_cut)
#signal_pt = signal_pidmu.Filter(pt_cut)

c1 = TCanvas()
c1.Divide(2, 2)
c1.cd(1)
pidmu1 = minbias_df.Histo1D("eventmuons_PIDmu")
pidmu1.SetTitle("Event muon PIDmu distribution (min bias)")
pidmu1.GetXaxis().SetTitle("PIDmu")
pidmu1.Draw()
c1.cd(2)
pidmu2 = signal_df.Histo1D("eventmuons_PIDmu")
pidmu2.SetTitle("Event muon PIDmu distribution (signal)")
pidmu2.GetXaxis().SetTitle("PIDmu")
pidmu2.SetLineColor(2)
Exemplo n.º 13
0
parser.add_argument('--do',
                    nargs='+',
                    help="list of collections to make plots of")
args = parser.parse_args()

models = {
    'm': (';m_{{{0}}} [GeV]; Events', 50, 0, 1000),
    'pt': (';p_{{T,{0}}} [GeV]; Events', 50, 0, 1000),
    'eta': (';#eta_{{{0}}}; Events', 15, -3., 3.),
    'phi': (';#phi_{{{0}}}; Events', 20, -4., 4.)
}
outdir = 'plots'
if args.output:
    outdir = args.output

rdf = RDataFrame("CollectionTree", args.infile)
rdf = rdf.Define('wgt', 'EventInfoAuxDyn.mcEventWeights[0]')

canv = TCanvas('c', '', 800, 600)

for coll in args.do:
    coll = coll.split(':')
    if len(coll) != 1 and len(coll) != 4:
        print('collection should either be "name" or "name:nbins:xmin:xmax"')
        continue

    varsuff = ''
    if 'fatjet' in coll[0]:
        cname = 'AntiKt10TruthTrimmedPtFrac5SmallR20JetsAux'
    elif 'jet' in coll[0]:
        cname = 'AntiKt4TruthDressedWZJetsAux'
        description='find retention rates for various trigger paths.')

    parser.add_argument('ntp', help='specify ntuple path.')

    parser.add_argument('tree', help='specify tree name.')

    parser.add_argument('-t',
                        '--trigger-paths',
                        nargs='+',
                        help='specify trigger paths.')

    return parser.parse_args()


########
# Main #
########

if __name__ == '__main__':
    args = parse_input()

    frame = RDataFrame(args.tree, args.ntp)
    cuts = []
    for tp in args.trigger_paths:
        c = frame.Filter(
            tp, tp)  # This is to avoid garbage collector to delete our pointer
        cuts.append(c)

    report = frame.Report()
    report.Print()
Exemplo n.º 15
0
from ROOT import (ROOT, RDataFrame, TCanvas, TH1D)
import numpy as np

ROOT.EnableImplicitMT()

#get file and tree from directory
input_ntuple = "/data/bfys/valukash/forAli/DaVinci_jpsiphi_MC_upgrade.root"
input_tree_name = "Bs2jpsiphi/DecayTree"

dataframe = RDataFrame(input_tree_name, input_ntuple)
#dataframe_with_truep = dataframe.Define('Bs_momentum', 'pow( Bs_TRUEP_X*Bs_TRUEP_X + Bs_TRUEP_Y*Bs_TRUEP_Y + Bs_TRUEP_Z*Bs_TRUEP_Z , 0.5)')
df_bkg = dataframe.Filter("Bs_BKGCAT == 0 || Bs_BKGCAT == 50")
#df_cuts1 = dataframe_with_truep.Filter("(Bs_TAU > 0.0015) && (Bs_M > 5150) && (Bs_M < 5550) && (Jpsi_M > 3020) && (Jpsi_M < 3170) && (Phi_M > 980) && (Phi_M < 1050) && (muplus_PT > 500) && (mumin_PT > 500) && ((Bs_ENDVERTEX_CHI2/Bs_ENDVERTEX_NDOF) < 20) && (Jpsi_ENDVERTEX_CHI2/Jpsi_ENDVERTEX_NDOF < 16) && (Phi_ENDVERTEX_CHI2/Phi_ENDVERTEX_NDOF < 25)", "trigger_cuts")
cut1 = df_bkg.Filter("(Bs_TAU > 0.0015)", "tau_cut")
cut2 = cut1.Filter("(Bs_M > 5150) && (Bs_M < 5550)", "b_mass_cut")
cut3 = cut2.Filter("(Jpsi_M > 3020) && (Jpsi_M < 3170)", "jpsi_mass_cut")
cut4 = cut3.Filter("(Phi_M > 980) && (Phi_M < 1050)", "phi_mass_cut")
cut5 = cut4.Filter("(muplus_PT > 500) && (mumin_PT > 500)", "mu_pt_cut")
cut6 = cut5.Filter("(Bs_ENDVERTEX_CHI2/Bs_ENDVERTEX_NDOF) < 20", "b_vtx_cut")
cut7 = cut6.Filter("(Jpsi_ENDVERTEX_CHI2/Jpsi_ENDVERTEX_NDOF < 16)",
                   "jpsi_vtx_cut")
cut8 = cut7.Filter("(Phi_ENDVERTEX_CHI2/Phi_ENDVERTEX_NDOF < 25)",
                   "phi_vtx_cut")
cut9 = cut8.Filter("mumin_PIDmu > 0 && muplus_PIDmu > 0", "mu_id_cut")
cut10 = cut9.Filter("Kmin_PIDK > 0 && Kplus_PIDK > 0", "k_id_cut")

#print("Mumin eff: ")
#mumincut = cut9.Report()
#mumincut.Print()
print("All stats: ")
cutsreport = dataframe.Report()
        frames.append(new_frm)

        apply_skim_cuts(new_frm, skim_cuts, ref)


########
# Main #
########

if __name__ == '__main__':
    ntp_dst = '../../ntuples/ref-rdx-run1/Dst-mix/Dst--21_10_21--mix--all--2011-2012--md-mu--phoebe.root'
    ntp_d0 = '../../ntuples/ref-rdx-run1/D0-mix/D0--21_10_21--mix--all--2011-2012--md-mu--phoebe.root'

    if len(sys.argv) == 1 or sys.argv[1].lower() == 'dst':
        print('Working on Dst...')
        frame_dst = RDataFrame('ntp1', ntp_dst)
        apply_cuts(frame_dst, DST_CUTS, DST_SKIM_CUTS, DST_REF_NUMS)
    elif sys.argv[1].lower() == 'd0':
        print('Working on D0...')
        frame_d0 = RDataFrame('ntp1', ntp_d0)
        apply_cuts(frame_d0, D0_CUTS, D0_SKIM_CUTS, D0_REF_NUMS)
    elif sys.argv[1].lower() == 'dstwsmu':
        print('Working on Dst wrong-sign Mu...')
        frame_dst = RDataFrame('ntp1', ntp_dst)
        apply_cuts(frame_dst, DST_WS_MU_CUTS, DST_SKIM_CUTS,
                   DST_WS_MU_REF_NUMS)
    elif sys.argv[1].lower() == 'dstwspi':
        print('Working on Dst wrong-sign slow Pi...')
        frame_dst = RDataFrame('ntp1', ntp_dst)
        apply_cuts(frame_dst, DST_WS_PI_CUTS, DST_SKIM_CUTS,
                   DST_WS_PI_REF_NUMS)
Exemplo n.º 17
0
def get_dataframe(dataset):
    tchain, friend_tchains = rdf_from_dataset_helper(dataset)
    rdf = RDataFrame(tchain)
    setattr(rdf, 'tchain', tchain)
    setattr(rdf, 'friend_tchains', friend_tchains)
    return rdf
Exemplo n.º 18
0
########
# Main #
########

if __name__ == '__main__':
    args = parse_input()
    histos = glob_histos(args.histo_folder)
    config = parse_config(args.config)
    loaded_histos = dict()
    output_opts = RSnapshotOptions()
    output_opts.fMode = 'UPDATE'
    first_write = True

    for idx, tree in enumerate(config['trees']):
        print('Processing tree {}...'.format(tree))
        init_frame = RDataFrame(tree, args.input_ntp)
        frames = [init_frame]
        output_brs = vector('string')(['runNumber', 'eventNumber'])

        for br, directive in config['config'].items():
            if tree in directive['skip_tree']:
                continue

            print('  Processing {}...'.format(br))
            params = ', '.join(resolve_params(directive['vars'], idx))

            histo_name = directive['histo_name']
            histo_dim = len(directive['vars'])
            debug_br = 'debug_{}_bin_idx'.format(br)

            wt_histo = load_histo(