示例#1
0
    def makeDataFrame(self):
        sample_dict = {}
        samples_all, samples_singlefake, samples_doublefake = createSampleLists(analysis_dir=self.analysis_dir, server = self.server, channel=self.channel)
        working_samples = samples_doublefake
        working_samples = setSumWeights(working_samples)
        print('###########################################################')
        print('# measuring doublefakerake...')
        print('# %d samples to be used:'%(len(working_samples)))
        print('###########################################################')
        for w in working_samples: print('{:<20}{:<20}'.format(*[w.name,('path: '+w.ana_dir)]))
        chain = TChain('tree') #TChain'ing all data samples together
        for i,s in enumerate(working_samples):
            sample = working_samples[0]
            file_name = '/'.join([sample.ana_dir, sample.dir_name, sample.tree_prod_name, 'tree.root'])
            chain.Add(file_name)
            
        dataframe = RDataFrame(chain)
        weight = 'weight * lhe_weight'
        dataframe = dataframe.Define('w',weight)\
                            .Define('ptCone',self.ptCone())\
                            .Define('abs_hnl_hn_vis_eta','abs(hnl_hn_vis_eta)')\
                            .Define('abs_hnl_hn_eta','abs(hnl_hn_eta)')\
                            .Define('abs_l1_eta','abs(l1_eta)')\
                            .Define('abs_l2_eta','abs(l2_eta)')\
                            .Define('abs_l1_jet_flavour_parton','abs(l1_jet_flavour_parton)')\
                            .Define('abs_l2_jet_flavour_parton','abs(l2_jet_flavour_parton)')\

        return dataframe
示例#2
0
 def get_frame(tree_name, file_name):
     frame = RDataFrame(tree_name, file_name)
     if verbose:
         colNames = frame.GetColumnNames()
         for j in colNames:
             print(j)
     return frame
示例#3
0
 def get_frame(tree_name, file_name):
     """
     Getter of the frame from the file
     """
     frame = RDataFrame(tree_name, file_name)
     if verbose:
         colNames = frame.GetColumnNames()
         for j in colNames:
             print(j)
     return frame
示例#4
0
def split(file_name): #without.root
    tfile = rt.TFile(file_name+'.root')
    tree = tfile.Get('tree')

    df = RDF(tree)
    n = tree.GetEntries()
    df1 = df.Range(0,int(n/2))
    df2 = df.Range(int(n/2),0)
    df1.Snapshot('tree', '%s_training_half.root'%file_name)
    df2.Snapshot('tree', '%s_untouched_half.root'%file_name)
示例#5
0
 def __rdf_from_dataset(self, dataset):
     t_names = [ntuple.directory for ntuple in \
         dataset.ntuples]
     if len(set(t_names)) == 1:
         tree_name = t_names.pop()
     else:
         raise NameError(
             'Impossible to create RDataFrame with different tree names')
     chain = TChain()
     ftag_fchain = {}
     for ntuple in dataset.ntuples:
         chain.Add('{}/{}'.format(ntuple.path, ntuple.directory))
         for friend in ntuple.friends:
             if friend.tag not in ftag_fchain.keys():
                 ftag_fchain[friend.tag] = TChain()
             ftag_fchain[friend.tag].Add('{}/{}'.format(
                 friend.path, friend.directory))
     for ch in ftag_fchain.values():
         chain.AddFriend(ch)
         # Keep friend chains alive
         self.friend_tchains.append(ch)
     if self.nthreads != 1:
         EnableImplicitMT(self.nthreads)
     # Keep main chain alive
     self.tchains.append(chain)
     rdf = RDataFrame(chain)
     rcw = RDataFrameCutWeight(rdf)
     return rcw
示例#6
0
def merge_friend(output_ntp_name, friends, tree_branch_dict, config):
    # Here we don't drop any branch. We do only keep specified trees.
    opts = RDF.RSnapshotOptions()
    opts.fMode = 'UPDATE'

    for full_path, tree in friends.items():
        if config[full_path]['keep']:
            rd1 = RDataFrame(tree)
            cut = concat_selections(config[full_path]['selection'])

            if cut:
                rd2 = rd1.Filter(cut)
            else:
                rd2 = rd1

            output_br = make_output_vec(tree_branch_dict[full_path])
            rd2.Snapshot(full_path, output_ntp_name, output_br, opts)
示例#7
0
def getRDF(pattern, treeName=None, keepObj=None):
    if treeName is None: treeName = 'tuple0/DecayTree'
    patterns = [pattern] if isinstance(pattern, str) else pattern

    from ROOT import TChain, RDataFrame
    import glob

    ch1 = TChain(treeName)
    for pn in patterns:
        for f in glob.glob(pn):
            ch1.Add(f)

    if keepObj is not None:
        keepObj.append(ch1)  # to keep this object in the memory
        return RDataFrame(ch1)

    ## if no container is provided, return both objects
    return RDataFrame(ch1), ch1
示例#8
0
 def __rdf_from_dataset(self, dataset):
     chain, self.friend_tchains = rdf_from_dataset_helper(dataset)
     if self.nthreads != 1:
         EnableImplicitMT(self.nthreads)
     # Keep main chain alive
     self.tchains.append(chain)
     rdf = RDataFrame(chain)
     rcw = RDataFrameCutWeight(rdf)
     return rcw
 def get_frame(file_name, df_index=0, tree_name="O2mcparticle_001"):
     """
     Getter of the frame from the file
     """
     if not path.isfile(file_name):
         raise ValueError("Did not find AOD file", file_name)
     sub_names = run_cmd(f"rootls {file_name}").strip().split()
     df_name = []
     for i in sub_names:
         if not i.startswith("DF_") and not i.startswith("TF_"):
             continue
         df_name.append(i)
     df_name = df_name[df_index]
     print(df_name)
     frame = RDataFrame(f"{df_name}/{tree_name}", file_name)
     if verbose:
         colNames = frame.GetColumnNames()
         for j in enumerate(colNames):
             print(j, frame.GetColumnType(j[1]))
     return frame
示例#10
0
def produceLightTree(sample='DY',ch='mmm'):
    if ch == 'mmm':
        d17B = data_B_mmm+suffix; d17C = data_C_mmm+suffix; d17D = data_D_mmm+suffix; d17E = data_E_mmm+suffix; d17F = data_F_mmm+suffix; 
        SFR_012_L = SFR_MMM_012_L
        l2_tight = l2_m_tight

    if ch == 'eem':
        d17B = data_B_eem+suffix; d17C = data_C_eem+suffix; d17D = data_D_eem+suffix; d17E = data_E_eem+suffix; d17F = data_F_eem+suffix; 

    t = rt.TChain('tree')

    if sample == 'DY':
        t.Add(DY)
        t.Add(DY_ext)

    if sample == 'data':
        t.Add(d17B)
       #t.Add(d17C)
       #t.Add(d17D)
       #t.Add(d17E)
       #t.Add(d17F)

    print '\n\ttotal entries:', t.GetEntries()

    df = RDF(t)
    df1 = df.Define('LOOSE', '1 * (' + SFR_012_L + ' && hnl_dr_12 > 0.3 && hnl_dr_02 > 0.3 && abs(hnl_m_01 - 91.19) < 10 && hnl_q_01 == 0 )' )
    df2 = df1.Define('TIGHT', '1 * (' + SFR_012_L + ' && hnl_dr_12 > 0.3 && hnl_dr_02 > 0.3 && abs(hnl_m_01 - 91.19) < 10 && hnl_q_01 == 0 && ' + l2_tight + ')' )

    num_L = df2.Filter('LOOSE == 1').Count().GetValue()
    print '\n\tloose entries in MR:', num_L 

    num_T = df2.Filter('TIGHT == 1').Count().GetValue()
    print '\n\ttight entries in MR:', num_T

    df2 = df2.Define('ptcone', PTCONEL2)

    branchList = rt.vector('string')()
    for br in ['event', 'lumi', 'run', 'LOOSE', 'TIGHT', 'l2_reliso_rho_03', 'l2_Medium', 'l2_eta', 'l2_pt', 'l2_dxy', 'l2_dz', 'ptcone']:
        branchList.push_back(br)
    df2.Snapshot('tree', saveDir+'/%s_%s_6_24B_Lcut_29_4.root'%(sample,ch), branchList)
示例#11
0
    def __init__(self, name, label, selection, datacard_name, colour,
                 position_in_stack, basedir, postfix, isdata, ismc, issignal,
                 weight, xs):
        self.name = name
        print 'loading', self.name
        self.label = label
        self.selection = selection
        self.datacard_name = datacard_name
        self.colour = colour
        self.position_in_stack = position_in_stack
        self.basedir = basedir
        self.postfix = postfix
        self.isdata = isdata
        self.ismc = ismc
        self.issignal = issignal
        self.weight = weight
        self.xs = xs
        self.nevents = 1.
        self.file = '/'.join([basedir, self.name, postfix])

        if not self.isdata:
            nevents_file = '/'.join(
                [basedir, self.name, 'SkimAnalyzerCount/SkimReport.txt'])
            with open(nevents_file) as ff:
                lines = ff.readlines()
                for line in lines:
                    if 'Sum Norm Weights' in line:
                        self.nevents = float(re.findall(r'\d+', lines[2])[0])
                        break
        tree_file = '/'.join([self.basedir, self.name, self.postfix])

        rdf = RDF('tree', tree_file)
        rdf = rdf.Filter(self.selection)
        # set_trace()
        df = rdf.AsNumpy()
        self.df = pd.DataFrame(df)
        # scale to 1/pb
        self.lumi_scaling = 1. if self.isdata else (self.xs / self.nevents)
示例#12
0
    def makeRootDataFrameFromTree(self, tree_file_name, tree_name='tree', verbose=False, friend_name='ML', friend_file_name=None):
        '''Cache files/trees'''

        ttree = self.readTree(tree_file_name, tree_name, verbose)

        if verbose:
            print ('read dataframe', dataframe, 'from file', tree_file_name)

        if friend_file_name:
            ttree.AddFriend(friend_name + '=tree',friend_file_name)
            #VALIDATE#
            validate1 = ttree.GetEntries('l2_pt - %s.l2_pt'%friend_name)
            validate2 = ttree.GetEntries('event - %s.event'%friend_name)

            if not validate1+validate2 == 0: print ('\n\tERROR: FRIEND TREE NOT ALIGNED, FAKERATE USELESS', m, n)

        gROOT.cd()
        # dataframe = RDataFrame(tree_name,tree_file_name)
        dataframe = RDataFrame(ttree)

        return dataframe
示例#13
0
parser.add_argument('--do',
                    nargs='+',
                    help="list of collections to make plots of")
args = parser.parse_args()

models = {
    'm': (';m_{{{0}}} [GeV]; Events', 50, 0, 1000),
    'pt': (';p_{{T,{0}}} [GeV]; Events', 50, 0, 1000),
    'eta': (';#eta_{{{0}}}; Events', 15, -3., 3.),
    'phi': (';#phi_{{{0}}}; Events', 20, -4., 4.)
}
outdir = 'plots'
if args.output:
    outdir = args.output

rdf = RDataFrame("CollectionTree", args.infile)
rdf = rdf.Define('wgt', 'EventInfoAuxDyn.mcEventWeights[0]')

canv = TCanvas('c', '', 800, 600)

for coll in args.do:
    coll = coll.split(':')
    if len(coll) != 1 and len(coll) != 4:
        print('collection should either be "name" or "name:nbins:xmin:xmax"')
        continue

    varsuff = ''
    if 'fatjet' in coll[0]:
        cname = 'AntiKt10TruthTrimmedPtFrac5SmallR20JetsAux'
    elif 'jet' in coll[0]:
        cname = 'AntiKt4TruthDressedWZJetsAux'
gInterpreter.Declare('''
Int_t getBin(Double_t x, Double_t y, TH2D* histo) {
  return histo->FindFixBin(x, y);
}

auto getWeight(Double_t x, Double_t y, TH2D* histo) {
  auto binIdx = getBin(x, y, histo);
  return histo->GetBinContent(binIdx);
}
''')

gInterpreter.Declare(f'auto histoNtp = new TFile("{histoNtpN}", "read");')
gInterpreter.Declare(f'auto histo = dynamic_cast<TH2D*>(histoNtp->Get("{histoN}"));')

dfInit = RDataFrame(mcTreeN, mcNtpN)
df = dfInit.Define('wjk_alt', 'getWeight(b_ownpv_ndof, ntracks, histo)').Define('wt', 'wpid*wtrk*wjk_alt')

# NOTE: This comes from the existing ntuple
mcRootBrs = df.AsNumpy(columns=['wjk_occ', 'wjk_alt'])
wtJkOccRoot = mcRootBrs['wjk_occ']
wtJkOccAltRoot = mcRootBrs['wjk_alt']

histoRootMdl = TH2DModel(
    'histoRoot', 'histoRoot',
    20, 1, 200, 20, 0, 450
)
histoRoot = df.Histo2D(histoRootMdl, 'b_ownpv_ndof', 'ntracks', 'wt')


##################
    "Triggered_HLT_PFMETNoMu120_PFMHTNoMu120_IDTight_vX",
    "Triggered_HLT_IsoMu27_vX",
]
branch_vec = ROOT.vector("string")()
[branch_vec.push_back(branch) for branch in branches]

if not options.is_dataframe:
    print(
        "No dataframe was given. Handling the arguments as trees and adding them to chain."
    )
    input_files = args
    input_chain = ROOT.TChain("MVATree")
    for input_file in input_files:
        input_chain.Add(input_file)
    print("Finished loading chain with ", input_chain.GetEntries(), " entries")
    data_frame = RDF(input_chain, branch_vec)
else:
    print("Dataframe flag was set. Handling argument as dataframe.")
    input_file = args[0]
    data_frame = RDF("tree", input_file)

print("Finished converting the chain to RDataFrame")

if not options.is_dataframe and options.save:
    print("saving dataframe to disk as ",
          data_mc_string + "_" + options.name + "_dataframe.root")
    data_frame.Snapshot(
        "tree", data_mc_string + "_" + options.name + "_dataframe.root",
        branch_vec)
    print("saved dataframe to disk ...")
    "DeltaR_AK4Jet_LooseElectron",
    "DeltaR_AK4Jet_LooseMuon"
]
branch_vec = ROOT.vector("string")()
[branch_vec.push_back(branch) for branch in branches]

if not options.is_dataframe:
    print(
        "No dataframe was given. Handling the arguments as trees and adding them to chain."
    )
    input_files = args
    input_chain = ROOT.TChain("MVATree")
    for input_file in input_files:
        input_chain.Add(input_file)
    print("Finished loading chain with ", input_chain.GetEntries(), " entries")
    data_frame = RDF(input_chain, branch_vec)
else:
    print("Dataframe flag was set. Handling argument as dataframe.")
    input_file = args[0]
    data_frame = RDF("tree", input_file)

print("Finished converting the chain to RDataFrame")

if not options.is_dataframe and options.save:
    print("saving dataframe to disk as ",
          data_mc_string + "_" + options.name + "_dataframe.root")
    data_frame.Snapshot(
        "tree", data_mc_string + "_" + options.name + "_dataframe.root",
        branch_vec)
    print("saved dataframe to disk ...")
示例#17
0
        '{lb} <= {br} && {br} <= {ub}'.format(br=br,
                                              lb=train + validation,
                                              ub=100)
    }


########
# Main #
########

if __name__ == '__main__':
    args = parse_input()

    gInterpreter.Declare('auto rand_gen = TRandom3({});'.format(args.seed))

    init_frame = RDataFrame(args.tree, args.ntp)
    rand_frame = init_frame.Define('rand_split', 'rand_gen.Uniform(0, 100)')
    if args.debug:
        print('loaded {} with {} entries'.format(
            args.ntp,
            rand_frame.Count().GetValue()))

    cuts = get_cuts(args.train_ratio, args.validation_ratio)
    for sample, cut in cuts.items():
        subsample_frame = rand_frame.Filter(cut)
        output_ntp = join(args.output_dir,
                          '{}_{}.root'.format(get_filename(args.ntp), sample))

        subsample_frame.Snapshot(args.tree, output_ntp)

        if args.debug:
示例#18
0
from ROOT import (ROOT, RDataFrame, TCanvas, TH1D)
import numpy as np

ROOT.EnableImplicitMT()

#get file and tree from directory
input_ntuple = "/data/bfys/valukash/forAli/DaVinci_jpsiphi_MC_upgrade.root"
input_tree_name = "Bs2jpsiphi/DecayTree"

dataframe = RDataFrame(input_tree_name, input_ntuple)
#dataframe_with_truep = dataframe.Define('Bs_momentum', 'pow( Bs_TRUEP_X*Bs_TRUEP_X + Bs_TRUEP_Y*Bs_TRUEP_Y + Bs_TRUEP_Z*Bs_TRUEP_Z , 0.5)')
df_bkg = dataframe.Filter("Bs_BKGCAT == 0 || Bs_BKGCAT == 50")
#df_cuts1 = dataframe_with_truep.Filter("(Bs_TAU > 0.0015) && (Bs_M > 5150) && (Bs_M < 5550) && (Jpsi_M > 3020) && (Jpsi_M < 3170) && (Phi_M > 980) && (Phi_M < 1050) && (muplus_PT > 500) && (mumin_PT > 500) && ((Bs_ENDVERTEX_CHI2/Bs_ENDVERTEX_NDOF) < 20) && (Jpsi_ENDVERTEX_CHI2/Jpsi_ENDVERTEX_NDOF < 16) && (Phi_ENDVERTEX_CHI2/Phi_ENDVERTEX_NDOF < 25)", "trigger_cuts")
cut1 = df_bkg.Filter("(Bs_TAU > 0.0015)", "tau_cut")
cut2 = cut1.Filter("(Bs_M > 5150) && (Bs_M < 5550)", "b_mass_cut")
cut3 = cut2.Filter("(Jpsi_M > 3020) && (Jpsi_M < 3170)", "jpsi_mass_cut")
cut4 = cut3.Filter("(Phi_M > 980) && (Phi_M < 1050)", "phi_mass_cut")
cut5 = cut4.Filter("(muplus_PT > 500) && (mumin_PT > 500)", "mu_pt_cut")
cut6 = cut5.Filter("(Bs_ENDVERTEX_CHI2/Bs_ENDVERTEX_NDOF) < 20", "b_vtx_cut")
cut7 = cut6.Filter("(Jpsi_ENDVERTEX_CHI2/Jpsi_ENDVERTEX_NDOF < 16)",
                   "jpsi_vtx_cut")
cut8 = cut7.Filter("(Phi_ENDVERTEX_CHI2/Phi_ENDVERTEX_NDOF < 25)",
                   "phi_vtx_cut")
cut9 = cut8.Filter("mumin_PIDmu > 0 && muplus_PIDmu > 0", "mu_id_cut")
cut10 = cut9.Filter("Kmin_PIDK > 0 && Kplus_PIDK > 0", "k_id_cut")

#print("Mumin eff: ")
#mumincut = cut9.Report()
#mumincut.Print()
print("All stats: ")
cutsreport = dataframe.Report()
示例#19
0
# initialize RDataFrame
data_frame = None

# either create the RDataFrame from a ROOT tree in a file or from a ROOT chain made up of several files
# or create it directly from an existing RDataFrame including a ROOT tree
if not options.is_dataframe:
    print(
        "No dataframe was given. Handling the arguments as trees and adding them to chain."
    )
    input_files = args
    input_chain = ROOT.TChain(options.treename)
    for input_file in input_files:
        input_chain.Add(input_file)
    print("Finished loading chain with ", input_chain.GetEntries(), " entries")
    data_frame = RDF(input_chain, branch_vec)
else:
    print("Dataframe flag was set. Handling argument as dataframe.")
    input_file = args[0]
    data_frame = RDF(options.treename, input_file)

print("Finished creating the RDataFrame")

# possibly save the created RDataFrame to disk
if not options.is_dataframe and options.save:
    print("saving dataframe to disk as ",
          data_mc_string + "_" + names + "_dataframe.root")
    data_frame.Snapshot("tree",
                        data_mc_string + "_" + names + "_dataframe.root",
                        branch_vec)
    print("saved dataframe to disk ...")
示例#20
0
parser.add_argument("config", help="Path to the YAML configuration file")
args = parser.parse_args()

ROOT.ROOT.EnableImplicitMT()
gROOT.SetBatch()

with open(os.path.expandvars(args.config), 'r') as stream:
    try:
        params = yaml.full_load(stream)
    except yaml.YAMLError as exc:
        print(exc)

# define paths for loading data and storing results
mc_path = os.path.expandvars(params['MC_PATH'])
data_path = os.path.expandvars(params['DATA_PATH'])
dataDF = RDF('DataTable', data_path)
mcDF = RDF('SignalTable', mc_path)
genDF = RDF('GenTable', mc_path)

results_dir = os.environ['HYPERML_RESULTS_{}'.format(params['NBODY'])]
file_name = results_dir + '/' + params['FILE_PREFIX'] + '_std.root'
results_file = TFile(file_name, 'recreate')

for cclass in params['CENTRALITY_CLASS']:
    cent_dir = results_file.mkdir('{}-{}'.format(cclass[0], cclass[1]))
    dataCentDF = dataDF.Filter('centrality >= {} && centrality < {}'.format(
        cclass[0], cclass[1]))
    mcCentDF = mcDF.Filter('centrality >= {} && centrality < {}'.format(
        cclass[0], cclass[1]))
    genCentDF = genDF.Filter('centrality >= {} && centrality < {}'.format(
        cclass[0], cclass[1]))
        frames.append(new_frm)

        apply_skim_cuts(new_frm, skim_cuts, ref)


########
# Main #
########

if __name__ == '__main__':
    ntp_dst = '../../ntuples/ref-rdx-run1/Dst-mix/Dst--21_10_21--mix--all--2011-2012--md-mu--phoebe.root'
    ntp_d0 = '../../ntuples/ref-rdx-run1/D0-mix/D0--21_10_21--mix--all--2011-2012--md-mu--phoebe.root'

    if len(sys.argv) == 1 or sys.argv[1].lower() == 'dst':
        print('Working on Dst...')
        frame_dst = RDataFrame('ntp1', ntp_dst)
        apply_cuts(frame_dst, DST_CUTS, DST_SKIM_CUTS, DST_REF_NUMS)
    elif sys.argv[1].lower() == 'd0':
        print('Working on D0...')
        frame_d0 = RDataFrame('ntp1', ntp_d0)
        apply_cuts(frame_d0, D0_CUTS, D0_SKIM_CUTS, D0_REF_NUMS)
    elif sys.argv[1].lower() == 'dstwsmu':
        print('Working on Dst wrong-sign Mu...')
        frame_dst = RDataFrame('ntp1', ntp_dst)
        apply_cuts(frame_dst, DST_WS_MU_CUTS, DST_SKIM_CUTS,
                   DST_WS_MU_REF_NUMS)
    elif sys.argv[1].lower() == 'dstwspi':
        print('Working on Dst wrong-sign slow Pi...')
        frame_dst = RDataFrame('ntp1', ntp_dst)
        apply_cuts(frame_dst, DST_WS_PI_CUTS, DST_SKIM_CUTS,
                   DST_WS_PI_REF_NUMS)
from ROOT import (
    ROOT,
    RDataFrame,
)

ROOT.EnableImplicitMT()

#get file and tree from directory
input_ntuple = "/data/bfys/valukash/forAli/DaVinci_jpsiphi_MC_upgrade.root"
input_tree_name = "Bs2jpsiphi/DecayTree"

dataframe = RDataFrame(input_tree_name, input_ntuple)
trigger_cuts = "(Bs_TAU > 0.0015) && (Bs_M > 5150) && (Bs_M < 5550) && (Jpsi_M > 3020) && (Jpsi_M < 3170) && (Phi_M > 980) && (Phi_M < 1050) && (muplus_PT > 500) && (mumin_PT > 500) && ((Bs_ENDVERTEX_CHI2/Bs_ENDVERTEX_NDOF) < 20) && (Jpsi_ENDVERTEX_CHI2/Jpsi_ENDVERTEX_NDOF < 16) && (Phi_ENDVERTEX_CHI2/Phi_ENDVERTEX_NDOF < 25)"
muon_cuts = "!eventmuons_BPVIPCHI2[eventmuons_BPVIPCHI2 < 8].empty() && Bs_len > 0"
df_cuts1 = dataframe.Filter(trigger_cuts)
df_cuts2 = df_cuts1.Filter(muon_cuts)

nentries = dataframe.Count().GetValue()
ntriggered = df_cuts1.Count().GetValue()
ncut = df_cuts2.Count().GetValue()
print("entries: " + str(nentries))
print("triggered: " + str(ntriggered))
print("cut: " + str(ncut))

TrueBs = df_cuts2.Filter("Bs_TRUEID == 531").Count().GetValue()
TrueBsbar = df_cuts2.Filter("Bs_TRUEID == -531").Count().GetValue()

#define right tags, wrong tags and untagged; tagging efficiency, mistag probability and tagging performance
R = df_cuts2.Filter(
    "(Bs_OSMuon_TAGDEC == 1 && Bs_TRUEID == 531) || (Bs_OSMuon_TAGDEC == -1 && Bs_TRUEID == -531)"
).Count().GetValue()
示例#23
0
    def measureSFR(self, drawPlot = False):
        sample_dict = {}
        samples_all, samples_singlefake, samples_doublefake = createSampleLists(analysis_dir=self.analysis_dir, server = self.server, channel=self.channel)
        working_samples = samples_singlefake
        working_samples = setSumWeights(working_samples)
        print('###########################################################')
        print('# measuring singlefakerake...')
        print('# %d samples to be used:'%(len(working_samples)))
        print('###########################################################')
        for w in working_samples: print('{:<20}{:<20}'.format(*[w.name,('path: '+w.ana_dir)]))
        chain = TChain('tree') #TChain'ing all data samples together
        for i,s in enumerate(working_samples):
            sample = working_samples[0]
            file_name = '/'.join([sample.ana_dir, sample.dir_name, sample.tree_prod_name, 'tree.root'])
            chain.Add(file_name)
            
        dataframe = RDataFrame(chain)
        weight = 'weight * lhe_weight'
        dataframe = dataframe.Define('w',weight)\
                            .Define('ptCone',self.ptCone())\
                            .Define('abs_hnl_hn_vis_eta','abs(hnl_hn_vis_eta)')\
                            .Define('abs_hnl_hn_eta','abs(hnl_hn_eta)')\
                            .Define('abs_l1_eta','abs(l1_eta)')\
                            .Define('abs_l2_eta','abs(l2_eta)')\
                            .Define('abs_l1_jet_flavour_parton','abs(l1_jet_flavour_parton)')\
                            .Define('abs_l2_jet_flavour_parton','abs(l2_jet_flavour_parton)')\

        # bins_ptCone = np.array([5.,10., 20., 30., 40.,70., 2000])
        # bins_eta    = np.array([0., 0.8, 1.2, 2.4]) 
        bins_ptCone = np.array([5.,10., 20., 30., 40.,70.])
        bins_eta    = np.array([0., 0.8, 1.2, 2.4]) 

        selection_baseline      = getSelection(self.channel,'MR_SF')  

        selection_LL_uncorrelated = '(' + ' & '\
                                    .join([\
                                    selection_baseline,\
                                    getSelection(self.channel,'L_L_uncorrelated')\
                                    ]) + ')' 
        selection_TT_uncorrelated = '(' + ' & '\
                                    .join([\
                                    selection_baseline,\
                                    getSelection(self.channel,'L_L_uncorrelated'),\
                                    getSelection(self.channel,'T_T')\
                                    ]) + ')' 

        h_LL_uncorrelated = dataframe\
                .Filter(selection_LL_uncorrelated)\
                .Histo2D(('h_LL_uncorrelated','h_LL_uncorrelated',len(bins_ptCone)-1,bins_ptCone, len(bins_eta)-1, bins_eta),'ptCone','abs_hnl_hn_vis_eta','w')
        #name the axis, also initiate the dataframe call
        h_LL_uncorrelated.SetTitle(';ptCone [GeV]; dimuon #eta')

        h_TT_uncorrelated = dataframe\
                .Filter(selection_TT_uncorrelated)\
                .Histo2D(('h_TT_uncorrelated','h_TT_uncorrelated',len(bins_ptCone)-1,bins_ptCone, len(bins_eta)-1, bins_eta),'ptCone','abs_hnl_hn_vis_eta','w')
        #name the axis, also initiate the dataframe call
        h_TT_uncorrelated.SetTitle(';ptCone [GeV]; dimuon #eta')

        # preparing the histo and save it into a .root file
        sfr_TH2_dir = '/home/dehuazhu/HNL/CMSSW_9_4_6_patch1/src/PlotFactory/DataBkgPlots/modules/DDE_singlefake.root' 
        sfr_hist = h_TT_uncorrelated.Clone()
        # sfr_hist = h_LL_uncorrelated.Clone()
        # sfrhist = h_baseline.Clone()
        # sfr_hist.Divide(h_LL_uncorrelated.Clone())
        # dfr_hist.SaveAs(sfr_TH2_dir) #uncomment this to save the TH2

        # draw the histo if required 
        if drawPlot == True:
            can = TCanvas('can', '')
            # sfr_hist.Draw('colzTextE')
            # sfr_hist.Draw('colz')
            sfr_hist.Draw()
            pf.showlumi('%d entries'%(sfr_hist.GetEntries()))
            # pf.showlogopreliminary()
            can.Update()
            set_trace()
示例#24
0
from ROOT import (ROOT, RDataFrame, TCanvas)

ROOT.EnableImplicitMT()

#get file and tree from directory

minbias_ntuple = "/user/egovorko/work/public/minbias_JpsiPhi.root"
signal_ntuple = "/data/bfys/valukash/forAli/DaVinci_jpsiphi_MC_upgrade.root"
input_tree_name = "Bs2jpsiphi/DecayTree"

minbias_df = RDataFrame(input_tree_name, minbias_ntuple)
signal_df = RDataFrame(input_tree_name, signal_ntuple)
PIDmu_cut = "eventmuons_PIDmu[eventmuons_PIDmu > 0].size() >= 1"
#pt_cut = "eventmuons_PT[eventmuons_PT > 500].size() >= 1"
minbias_pidmu = minbias_df.Filter(PIDmu_cut)
#minbias_pt = minbias_pidmu.Filter(pt_cut)
signal_pidmu = signal_df.Filter(PIDmu_cut)
#signal_pt = signal_pidmu.Filter(pt_cut)

c1 = TCanvas()
c1.Divide(2, 2)
c1.cd(1)
pidmu1 = minbias_df.Histo1D("eventmuons_PIDmu")
pidmu1.SetTitle("Event muon PIDmu distribution (min bias)")
pidmu1.GetXaxis().SetTitle("PIDmu")
pidmu1.Draw()
c1.cd(2)
pidmu2 = signal_df.Histo1D("eventmuons_PIDmu")
pidmu2.SetTitle("Event muon PIDmu distribution (signal)")
pidmu2.GetXaxis().SetTitle("PIDmu")
pidmu2.SetLineColor(2)
        description='find retention rates for various trigger paths.')

    parser.add_argument('ntp', help='specify ntuple path.')

    parser.add_argument('tree', help='specify tree name.')

    parser.add_argument('-t',
                        '--trigger-paths',
                        nargs='+',
                        help='specify trigger paths.')

    return parser.parse_args()


########
# Main #
########

if __name__ == '__main__':
    args = parse_input()

    frame = RDataFrame(args.tree, args.ntp)
    cuts = []
    for tp in args.trigger_paths:
        c = frame.Filter(
            tp, tp)  # This is to avoid garbage collector to delete our pointer
        cuts.append(c)

    report = frame.Report()
    report.Print()
示例#26
0
########
# Main #
########

if __name__ == '__main__':
    args = parse_input()
    histos = glob_histos(args.histo_folder)
    config = parse_config(args.config)
    loaded_histos = dict()
    output_opts = RSnapshotOptions()
    output_opts.fMode = 'UPDATE'
    first_write = True

    for idx, tree in enumerate(config['trees']):
        print('Processing tree {}...'.format(tree))
        init_frame = RDataFrame(tree, args.input_ntp)
        frames = [init_frame]
        output_brs = vector('string')(['runNumber', 'eventNumber'])

        for br, directive in config['config'].items():
            if tree in directive['skip_tree']:
                continue

            print('  Processing {}...'.format(br))
            params = ', '.join(resolve_params(directive['vars'], idx))

            histo_name = directive['histo_name']
            histo_dim = len(directive['vars'])
            debug_br = 'debug_{}_bin_idx'.format(br)

            wt_histo = load_histo(
示例#27
0
def get_dataframe(dataset):
    tchain, friend_tchains = rdf_from_dataset_helper(dataset)
    rdf = RDataFrame(tchain)
    setattr(rdf, 'tchain', tchain)
    setattr(rdf, 'friend_tchains', friend_tchains)
    return rdf
示例#28
0
from ROOT import (
    ROOT,
    RDataFrame,
)

ROOT.EnableImplicitMT()

#get file and tree from directory
input_ntuple = "/data/bfys/valukash/forAli/DaVinci_jpsiphi_MC_upgrade.root"
input_tree_name = "Bs2jpsiphi/DecayTree"
dataframe = RDataFrame(input_tree_name, input_ntuple)
nentries = dataframe.Count().GetValue()
bs_tau = "(Bs_TAU > 0.002)"
bs_m = "(Bs_M > 5150) && (Bs_M < 5550)"
jpsi_m = "(Jpsi_M > 3020) && (Jpsi_M < 3170)"
phi_m = "(Phi_M > 980) && (Phi_M < 1050)"
bs_vtx = "(Bs_ENDVERTEX_CHI2/Bs_ENDVERTEX_NDOF < 20)"
jpsi_vtx = "(Jpsi_ENDVERTEX_CHI2/Jpsi_ENDVERTEX_NDOF < 16)"
phi_vtx = "(Phi_ENDVERTEX_CHI2/Phi_ENDVERTEX_NDOF < 25)"
mu_pt = "(muplus_PT > 500) && (mumin_PT > 500)"

trigger_cut_list = [
    bs_tau, bs_m, jpsi_m, phi_m, bs_vtx, jpsi_vtx, phi_vtx, mu_pt
]
efficiency_list = []
cum_efficiency_list = []
cum_eff_df = dataframe
for i in range(len(trigger_cut_list)):
    eff_df = dataframe.Filter(trigger_cut_list[i])
    eff = (eff_df.Count().GetValue()) / nentries
    efficiency_list.append(eff)
示例#29
0
    "AK15Jet_DeepAK15_TvsQCD",
]

branch_vec = ROOT.vector("string")()
[branch_vec.push_back(branch) for branch in branches]

if not options.is_dataframe:
    print(
        "No dataframe was given. Handling the arguments as trees and adding them to chain."
    )
    input_files = args
    input_chain = ROOT.TChain("MVATree")
    for input_file in input_files:
        input_chain.Add(input_file)
    print("Finished loading chain with ", input_chain.GetEntries(), " entries")
    data_frame = RDF(input_chain, branch_vec)
else:
    print("Dataframe flag was set. Handling argument as dataframe.")
    input_file = args[0]
    data_frame = RDF("tree", input_file)

print("Finished converting the chain to RDataFrame")

if not options.is_dataframe and options.save:
    print("saving dataframe to disk as ",
          data_mc_string + "_" + options.name + "_dataframe.root")
    data_frame.Snapshot(
        "tree", data_mc_string + "_" + options.name + "_dataframe.root",
        branch_vec)
    print("saved dataframe to disk ...")
"pt_pfmet_raw_div_pt_genmet" : "pt_pfmet_raw/pt_genmet",
"pt_pfmet_t1_div_pt_genmet" : "pt_pfmet_t1/pt_genmet",
"pt_pfmet_t1smear_div_pt_genmet" : "pt_pfmet_t1smear/pt_genmet"
}

branch_vec = ROOT.vector("string")()
[branch_vec.push_back(branch) for branch in branches]

if not options.is_dataframe:
    print ("No dataframe was given. Handling the arguments as trees and adding them to chain.")
    input_files = args
    input_chain = ROOT.TChain("METAnalyzer/MET_tree")
    for input_file in input_files:
        input_chain.Add(input_file)
    print ("Finished loading chain with ", input_chain.GetEntries(), " entries")
    data_frame = RDF(input_chain, branch_vec)
else:
    print ("Dataframe flag was set. Handling argument as dataframe.")
    input_file = args[0]
    data_frame = RDF("tree", input_file)

print ("Finished converting the chain to RDataFrame")

if not options.is_dataframe and options.save:
    print ("saving dataframe to disk as ", data_mc_string + "_" + names + "_dataframe.root")
    data_frame.Snapshot("tree", data_mc_string + "_" + names + "_dataframe.root", branch_vec)
    print ("saved dataframe to disk ...")

name = options.name

selection = options.selection