def check(var, nbins, low, up, friend_var="Particle.{}", frame=df): h = f"{origin} {var}" h = frame.Histo1D(RDF.TH1DModel(h, h, nbins, low, up), f"f{var}") friend_var = friend_var.format(var) h2 = f"{friend} {friend_var}" h2 = df_delphes.Histo1D(RDF.TH1DModel(h2, h2, nbins, low, up), friend_var) h2.SetLineColor(2) h2.SetLineStyle(3) can = canvas(var, diff=True) can.cd(1) h.SetDirectory(0) h2.SetDirectory(0) hdrawn = [h.DrawCopy(), h2.DrawCopy("same")] for i in hdrawn: i.SetDirectory(0) leg = TLegend(.7, .5, .9, .75) leg.AddEntry(h.GetValue()) leg.AddEntry(h2.GetValue()) leg.Draw() gPad.Update() can.cd(2) hdiff = h.DrawCopy() hdiff.SetDirectory(0) hdiff.SetName("hdiff") hdiff.SetTitle("diff") hdiff.Add(h2.GetValue(), -1) hdiff.GetYaxis().SetRangeUser(-1, 1) gPad.Update() for i in range(1, hdiff.GetNbinsX() + 1): diff = hdiff.GetBinContent(i) if diff != 0: return False return True
def plot(frame, x): hn = f"{x[0]}" ht = f";{x[0]}" h = frame.Histo1D(RDF.TH1DModel(hn, ht, x[1], x[2], x[3]), x[0]) can = canvas(hn) can.SetLeftMargin(0.15) h.Draw("COLZ") can.Update()
def split(file_name): #without.root tfile = rt.TFile(file_name+'.root') tree = tfile.Get('tree') df = RDF(tree) n = tree.GetEntries() df1 = df.Range(0,int(n/2)) df2 = df.Range(int(n/2),0) df1.Snapshot('tree', '%s_training_half.root'%file_name) df2.Snapshot('tree', '%s_untouched_half.root'%file_name)
def correlate(frame, x, y): hn = f"{x[0]}_vs_{y[0]}" ht = f";{x[0]};{y[0]}" h = frame.Histo2D( RDF.TH2DModel(hn, ht, x[1], x[2], x[3], y[1], y[2], y[3]), x[0], y[0]) can = canvas(hn) can.SetLeftMargin(0.15) h.Draw("COLZ") can.Update()
def merge_friend(output_ntp_name, friends, tree_branch_dict, config): # Here we don't drop any branch. We do only keep specified trees. opts = RDF.RSnapshotOptions() opts.fMode = 'UPDATE' for full_path, tree in friends.items(): if config[full_path]['keep']: rd1 = RDataFrame(tree) cut = concat_selections(config[full_path]['selection']) if cut: rd2 = rd1.Filter(cut) else: rd2 = rd1 output_br = make_output_vec(tree_branch_dict[full_path]) rd2.Snapshot(full_path, output_ntp_name, output_br, opts)
def produceLightTree(sample='DY',ch='mmm'): if ch == 'mmm': d17B = data_B_mmm+suffix; d17C = data_C_mmm+suffix; d17D = data_D_mmm+suffix; d17E = data_E_mmm+suffix; d17F = data_F_mmm+suffix; SFR_012_L = SFR_MMM_012_L l2_tight = l2_m_tight if ch == 'eem': d17B = data_B_eem+suffix; d17C = data_C_eem+suffix; d17D = data_D_eem+suffix; d17E = data_E_eem+suffix; d17F = data_F_eem+suffix; t = rt.TChain('tree') if sample == 'DY': t.Add(DY) t.Add(DY_ext) if sample == 'data': t.Add(d17B) #t.Add(d17C) #t.Add(d17D) #t.Add(d17E) #t.Add(d17F) print '\n\ttotal entries:', t.GetEntries() df = RDF(t) df1 = df.Define('LOOSE', '1 * (' + SFR_012_L + ' && hnl_dr_12 > 0.3 && hnl_dr_02 > 0.3 && abs(hnl_m_01 - 91.19) < 10 && hnl_q_01 == 0 )' ) df2 = df1.Define('TIGHT', '1 * (' + SFR_012_L + ' && hnl_dr_12 > 0.3 && hnl_dr_02 > 0.3 && abs(hnl_m_01 - 91.19) < 10 && hnl_q_01 == 0 && ' + l2_tight + ')' ) num_L = df2.Filter('LOOSE == 1').Count().GetValue() print '\n\tloose entries in MR:', num_L num_T = df2.Filter('TIGHT == 1').Count().GetValue() print '\n\ttight entries in MR:', num_T df2 = df2.Define('ptcone', PTCONEL2) branchList = rt.vector('string')() for br in ['event', 'lumi', 'run', 'LOOSE', 'TIGHT', 'l2_reliso_rho_03', 'l2_Medium', 'l2_eta', 'l2_pt', 'l2_dxy', 'l2_dz', 'ptcone']: branchList.push_back(br) df2.Snapshot('tree', saveDir+'/%s_%s_6_24B_Lcut_29_4.root'%(sample,ch), branchList)
def __init__(self, name, label, selection, datacard_name, colour, position_in_stack, basedir, postfix, isdata, ismc, issignal, weight, xs): self.name = name print 'loading', self.name self.label = label self.selection = selection self.datacard_name = datacard_name self.colour = colour self.position_in_stack = position_in_stack self.basedir = basedir self.postfix = postfix self.isdata = isdata self.ismc = ismc self.issignal = issignal self.weight = weight self.xs = xs self.nevents = 1. self.file = '/'.join([basedir, self.name, postfix]) if not self.isdata: nevents_file = '/'.join( [basedir, self.name, 'SkimAnalyzerCount/SkimReport.txt']) with open(nevents_file) as ff: lines = ff.readlines() for line in lines: if 'Sum Norm Weights' in line: self.nevents = float(re.findall(r'\d+', lines[2])[0]) break tree_file = '/'.join([self.basedir, self.name, self.postfix]) rdf = RDF('tree', tree_file) rdf = rdf.Filter(self.selection) # set_trace() df = rdf.AsNumpy() self.df = pd.DataFrame(df) # scale to 1/pb self.lumi_scaling = 1. if self.isdata else (self.xs / self.nevents)
"AK15Jet_DeepAK15_TvsQCD", ] branch_vec = ROOT.vector("string")() [branch_vec.push_back(branch) for branch in branches] if not options.is_dataframe: print( "No dataframe was given. Handling the arguments as trees and adding them to chain." ) input_files = args input_chain = ROOT.TChain("MVATree") for input_file in input_files: input_chain.Add(input_file) print("Finished loading chain with ", input_chain.GetEntries(), " entries") data_frame = RDF(input_chain, branch_vec) else: print("Dataframe flag was set. Handling argument as dataframe.") input_file = args[0] data_frame = RDF("tree", input_file) print("Finished converting the chain to RDataFrame") if not options.is_dataframe and options.save: print("saving dataframe to disk as ", data_mc_string + "_" + options.name + "_dataframe.root") data_frame.Snapshot( "tree", data_mc_string + "_" + options.name + "_dataframe.root", branch_vec) print("saved dataframe to disk ...")
if 'fatjet' in coll[0]: cname = 'AntiKt10TruthTrimmedPtFrac5SmallR20JetsAux' elif 'jet' in coll[0]: cname = 'AntiKt4TruthDressedWZJetsAux' elif 'electron' in coll[0]: cname = 'TruthElectronsAuxDyn' varsuff = '_dressed' elif 'muon' in coll[0]: cname = 'TruthMuonsAuxDyn' varsuff = '_dressed' else: print('collection {} not handled, please add'.format(coll)) continue varsuff += '[0]' model = RDF.TH1DModel(cname + 'eta', models['eta'][0].format(coll[0]), models['eta'][1], models['eta'][2], models['eta'][3]) h = rdf.Define(cname+'_eta_0', cname+'.eta'+varsuff) \ .Histo1D(model, cname+'_eta_0', 'wgt') h.Draw() canv.SaveAs(outdir + '/' + coll[0] + '_eta.pdf') canv.Clear('D') model = RDF.TH1DModel(cname + 'phi', models['phi'][0].format(coll[0]), models['phi'][1], models['phi'][2], models['phi'][3]) h = rdf.Define(cname+'_phi_0', cname+'.phi'+varsuff) \ .Histo1D(model, cname+'_phi_0', 'wgt') h.Draw() canv.SaveAs(outdir + '/' + coll[0] + '_phi.pdf') canv.Clear('D') canv.SetLogy()
# initialize RDataFrame data_frame = None # either create the RDataFrame from a ROOT tree in a file or from a ROOT chain made up of several files # or create it directly from an existing RDataFrame including a ROOT tree if not options.is_dataframe: print( "No dataframe was given. Handling the arguments as trees and adding them to chain." ) input_files = args input_chain = ROOT.TChain(options.treename) for input_file in input_files: input_chain.Add(input_file) print("Finished loading chain with ", input_chain.GetEntries(), " entries") data_frame = RDF(input_chain, branch_vec) else: print("Dataframe flag was set. Handling argument as dataframe.") input_file = args[0] data_frame = RDF(options.treename, input_file) print("Finished creating the RDataFrame") # possibly save the created RDataFrame to disk if not options.is_dataframe and options.save: print("saving dataframe to disk as ", data_mc_string + "_" + names + "_dataframe.root") data_frame.Snapshot("tree", data_mc_string + "_" + names + "_dataframe.root", branch_vec) print("saved dataframe to disk ...")
parser.add_argument("config", help="Path to the YAML configuration file") args = parser.parse_args() ROOT.ROOT.EnableImplicitMT() gROOT.SetBatch() with open(os.path.expandvars(args.config), 'r') as stream: try: params = yaml.full_load(stream) except yaml.YAMLError as exc: print(exc) # define paths for loading data and storing results mc_path = os.path.expandvars(params['MC_PATH']) data_path = os.path.expandvars(params['DATA_PATH']) dataDF = RDF('DataTable', data_path) mcDF = RDF('SignalTable', mc_path) genDF = RDF('GenTable', mc_path) results_dir = os.environ['HYPERML_RESULTS_{}'.format(params['NBODY'])] file_name = results_dir + '/' + params['FILE_PREFIX'] + '_std.root' results_file = TFile(file_name, 'recreate') for cclass in params['CENTRALITY_CLASS']: cent_dir = results_file.mkdir('{}-{}'.format(cclass[0], cclass[1])) dataCentDF = dataDF.Filter('centrality >= {} && centrality < {}'.format( cclass[0], cclass[1])) mcCentDF = mcDF.Filter('centrality >= {} && centrality < {}'.format( cclass[0], cclass[1])) genCentDF = genDF.Filter('centrality >= {} && centrality < {}'.format( cclass[0], cclass[1]))
import root_numpy as rnp import uproot as ur import pandas as pd import numpy as np tf_WJ = rt.TFile( '/work/dezhu/4_production/production_20190411_Bkg_mmm/ntuples/WJetsToLNu/HNLTreeProducer/tree.root' ) tf_DY = rt.TFile( '/work/dezhu/4_production/production_20190411_Bkg_mmm/ntuples/DYBB/HNLTreeProducer/tree.root' ) t_WJ = tf_WJ.Get('tree') t_DY = tf_DY.Get('tree') rdf_WJ = RDF(t_WJ) rdf_DY = RDF(t_DY) uf_WJ = ur.open( '/work/dezhu/4_production/production_20190411_Bkg_mmm/ntuples/WJetsToLNu/HNLTreeProducer/tree.root' ) uf_DY = ur.open( '/work/dezhu/4_production/production_20190411_Bkg_mmm/ntuples/DYBB/HNLTreeProducer/tree.root' ) ut_WJ = uf_WJ['tree'] ut_DY = uf_DY['tree'] pdf_WJ_out = ut_WJ.pandas.df( ['event', 'lumi', 'run', 'l2_pt', 'l2_dxy', 'l2_eta']) pdf_DY_out = ut_DY.pandas.df(