def test_file(inname, outname, train_file, variables, testEvery): log.info('START processing: %s', os.path.basename(inname)) bdt = get_training(train_file, variables) with io.root_open(outname, 'w') as tout: out_tree = Tree('tree') out_tree.create_branches({ 'flavour' : 'F', 'vertexCategory' : 'I', 'jetPt' : 'F', 'jetEta' : 'F', 'BDTG' : 'F', }) with io.root_open(inname) as tin: in_tree = tin.tree in_tree.SetBranchStatus('*', 0) in_tree.SetBranchStatus('flavour', 1) in_tree.SetBranchStatus('vertexCategory', 1) in_tree.SetBranchStatus('jetPt', 1) in_tree.SetBranchStatus('jetEta', 1) for var in variables: in_tree.SetBranchStatus(var, 1) for idx, entry in enumerate(in_tree): if testEvery != 1 and (idx % testEvery) == 0: continue var_vals = [getattr(entry, i) for i in variables] btd_out = bdt.predict_proba([var_vals])[0][1] out_tree.flavour = entry.flavour out_tree.vertexCategory = entry.vertexCategory out_tree.jetPt = entry.jetPt out_tree.jetEta = entry.jetEta out_tree.BDTG = btd_out out_tree.fill() out_tree.write() log.info('DONE processing: %s', os.path.basename(inname))
def calc_single_track_effs(path): """Calc the 2D effs for trigger and assocs""" with root_open(path + 'output_MC.root', 'read') as f_mc: with root_open(path + 'output_reconstructed.root', 'read') as f_re: with root_open(path + 'output_effs.root', 'update') as f_effs: f_effs.mkdir('processed') f_effs.mkdir('processed/efficiencies') f_effs.processed.efficiencies.cd() # names are identical in mc and recon # this might be a problem! print 'calculating effs' for tpl_mc in f_mc.walk('processed/total_yield'): names = tpl_mc[2] names.sort() for h_name in tpl_mc[2]: ty_mc = f_mc.Get('processed/total_yield/' + h_name) ty_re = f_re.Get('processed/total_yield/' + h_name) eff = ty_re / ty_mc name = 'eff_' + ty_re.name[6:] eff.SetNameTitle(name, 'efficiency' + ty_re.name[6:]) f_effs.Write(name) err = r.Double() x_bins, y_bins = (eff.GetXaxis().GetNbins(), eff.GetYaxis().GetNbins()) inte = eff.IntegralAndError(1, x_bins, 1, y_bins, err) print (eff.name, inte / (x_bins * y_bins), 'Error: ', err / (x_bins * y_bins))
def multiclass_graph(mt_dec,probas,name,axis_range=[0.,250.]) : #create a graph and save it as root file dec_modes = [0.0,1.0,10.0] print np.shape(mt_dec) print np.shape(probas) num_of_classes = len(probas[0]) all_input = np.transpose(np.vstack((mt_dec[:,0],mt_dec[:,1], probas[:,0], probas[:,1], probas[:,2], probas[:,3], probas[:,4], probas[:,5]))) for j in dec_modes : x = np.array(filter(lambda x: x[1] == j, all_input))[:,0] for k in xrange(0,num_of_classes,1) : y = np.array(filter(lambda x: x[1] == j, all_input))[:,k+2] graph = Graph(len(x),"g1") #create a root file and fill it with the graph P(W+jet) vs mt root_open("plots/ROOTfiles/G_"+name+GetClass(k+1)+'_dec_'+str(j)+".root", 'recreate') fill_graph(graph,np.column_stack((x,y))) graph.Write() #create Canvas and save the plot as png c = Canvas() graph.SetTitle(name+GetClass(k+1)+'_dec_'+str(j)) graph.SetMarkerSize(0.3) graph.GetXaxis().SetRangeUser(axis_range[0],axis_range[1]) graph.Draw("AP") c.SaveAs("plots/G_"+name+GetClass(k+1)+'_dec_'+str(j)+".png")
def main(cutname, cutfile, infile, outfile): c = root_open(cutfile) f = root_open(infile) cdict = {p: t for p, d, t in c if p != ''} clist = list(set(cdict['cutDir'])) dic = {p: t for p, d, t in f if p != ''} f_copy = root_open(outfile, "recreate") for d, tl in dic.iteritems(): f_copy.mkdir(d) f_copy.cd(d) for t in tl: tree = f[d][t] ct = [i for i in clist if i[-3:] in t] if ct != []: ctree = c['cutDir'][ct[0]] cut = "{}==0.0".format(cutname) tree.AddFriend(ctree) else: cut = '' tree_copy = tree.CopyTree(cut) tree_copy.Fill() tree_copy.Write() print "{}/{}: in".format(d, t),tree.GetEntries()," out", tree_copy.GetEntries() tree.IsA().Destructor(tree) tree_copy.IsA().Destructor(tree_copy) f_copy.cd('') f.close() f_copy.close() c.close()
def test_pickler_proxy(): h = Hist(5, 0, 1, name='hist') f = tempfile.NamedTemporaryFile(suffix='.root') with root_open(f.name, 'recreate') as outfile: dump([h], outfile) class IsCalled(object): def __init__(self, func): self.func = func self.called = False def __call__(self, path): if path != '_pickle;1': self.called = True return self.func(path) with root_open(f.name) as infile: infile.Get = IsCalled(infile.Get) hlist = load(infile, use_proxy=False) assert_true(infile.Get.called) with root_open(f.name) as infile: infile.Get = IsCalled(infile.Get) hlist = load(infile, use_proxy=True) assert_false(infile.Get.called) assert_equal(hlist[0].name, 'hist') assert_true(infile.Get.called) f.close()
def get_file( ntuple_path=NTUPLE_PATH, file_name=None, student=DEFAULT_STUDENT, hdf=False, suffix="_train", force_reopen=False, **kwargs ): if file_name is None: ext = ".h5" if hdf else ".root" filename = student + suffix + ext if filename in FILES and not force_reopen: return FILES[filename] file_path = os.path.join(ntuple_path, filename) # file_path = os.path.join(ntuple_path, student + suffix, filename) log.info("opening {0} ...".format(file_path)) if hdf: # student_file = tables.open_file(file_path)#, driver="H5FD_CORE") log.error("Not Implemented yet") raise RuntimeError("Not Implemented yet") else: student_file = root_open(file_path, "READ") FILES[filename] = student_file else: file_path = os.path.join(ntuple_path, file_name) log.info("opening {0} ...".format(file_path)) student_file = root_open(file_path, "READ") FILES[filename] = student_file return student_file
def calc_effs(path_mc, path_recon, out_dir=None): """ Save the calculated effs for each z-section and eventclass to file """ with root_open(path_mc, 'read') as f_mc: with root_open(path_recon, 'read') as f_re: if not out_dir: out_dir = dirname(abspath(path_mc)) with root_open(out_dir + '/output_effs.root', 'recreate') as f_effs: f_effs.mkdir('processed') f_effs.mkdir('processed/eff_from_ty') f_effs.mkdir('processed/eff_from_signal') print 'calculating effs from total yield' f_effs.processed.eff_from_ty.cd() for tpl_mc in f_mc.walk('processed/total_yield'): names = tpl_mc[2] names.sort() for h_name in tpl_mc[2]: ty_mc = f_mc.Get('processed/total_yield/' + h_name) ty_re = f_re.Get('processed/total_yield/' + h_name) eff = ty_re / ty_mc name = 'eff_' + ty_re.name[6:] eff.SetNameTitle(name, 'efficiency' + ty_re.name[6:]) f_effs.Write(name) err = ROOT.Double() x_bins, y_bins = (eff.GetXaxis().GetNbins(), eff.GetYaxis().GetNbins()) inte = eff.IntegralAndError(1, x_bins, 1, y_bins, err) print (eff.name, inte / (x_bins * y_bins), 'Error: ', err / (x_bins * y_bins))
def make_plot(hist_name = 'emu/Stage_0/h1_0_emu_Mass', rebin = 40, y1_mi= 1e-4, y1_ma = 1, x_mi= 0, x_ma= 900, y2_mi= 0., y2_ma= 2., doRatio = True, label = '$M_{e\mu}$ (GeV)'): print("Now plotting: %s"%hist_name) hist_style = sc.style_container(style = 'CMS', useRoot = False,cms=13,lumi=0, cmsPositon = "upper left", legendPosition = 'upper right', kind = 'Graphs') hist_50 = get_hist_from_file('/disk1/erdweg/television/DATA_50/merged/allData',hist_name,rebinfac = rebin) hist_25 = get_hist_from_file('/disk1/erdweg/television/DATA_25/merged/allData',hist_name,rebinfac = rebin) hist_50 = Graph(hist_50.Clone('50ns')) hist_50.SetLineColor('red') hist_50.SetTitle('50ns') hist_50.xaxis.SetTitle(label) hist_50.yaxis.SetTitle('Events') hist_25 = Graph(hist_25.Clone('25ns')) hist_25.SetLineColor('blue') hist_25.SetTitle('25ns') hist_25.xaxis.SetTitle(label) hist_25.yaxis.SetTitle('Events') test = plotter(sig=[hist_50,hist_25],style=hist_style) if doRatio: test.Add_plot('Empty',pos=1, height=15, label='50ns/25ns') test.create_plot() if doRatio: tfile = root_open('/disk1/erdweg/television/DATA_50/merged/allData.root', "READ") d_hist1 = tfile.Get(hist_name) d_hist1.Rebin(rebin) d_hist1.Scale(1./d_hist1.Integral()) tfile = root_open('/disk1/erdweg/television/DATA_25/merged/allData.root', "READ") d_hist2 = tfile.Get(hist_name) d_hist2.Rebin(rebin) d_hist2.Scale(1./d_hist2.Integral()) ratio = d_hist1.Clone('ratio') for ibin,jbin,lbin in zip(ratio,d_hist1,d_hist2): if lbin.value != 0: ibin.value = jbin.value/lbin.value ibin.error = math.sqrt(jbin.error**2/lbin.value**2 + (lbin.error**2 * jbin.value**2)/lbin.value**4) else: ibin.value = -100 ibin.error = 0 # print('6',type(ratio)) duke_errorbar(ratio, xerr = hist_style.Get_xerr(), emptybins = False, axes = test.Get_axis2(), markersize = hist_style.Get_marker_size(), marker = hist_style.Get_marker_style(), ecolor = hist_style.Get_marker_color(), markerfacecolor = hist_style.Get_marker_color(), markeredgecolor = hist_style.Get_marker_color(), capthick = hist_style.Get_marker_error_cap_width(), zorder = 2.2) test.Get_axis1().set_ylim(ymin = y1_mi, ymax = y1_ma) test.Get_axis1().set_xlim(xmin = x_mi, xmax = x_ma) if doRatio: test.Get_axis2().set_ylim(ymin = y2_mi, ymax = y2_ma) test.SavePlot('plots/BX_comparison'+hist_name.split('/')[-1]+'.pdf') return 42
def data_to_mc_switcher(filename, path, outpath): #filename is varname.root varname = filename.split(".")[0] if "nopdf" in varname: return if "*" in varname: return print filename, varname hists = {} try: if not os.path.isdir(outpath): os.makedirs(outpath) except OSError: raise if "tmatrix" in filename: shutil.copy(path+"/"+filename, outpath) return with root_open(path+"/"+filename) as f: outfile = root_open(outpath + "/" +filename, "RECREATE") for path, dirs, objects in f.walk(): #load all histograms from file first = True for hist in objects: #format 3j2t_abs_lj_eta__tchan__tchan__up #ignore jt_var__DATA #print hist if not "DATA" in hist and not "DEBUG" in hist: hists[hist] = f.Get(hist).Clone() #print hist, f.Get(hist).Clone(), hists[hist] if first and not "DEBUG" in hist: first = False hn = hist.split("__") jt = hn[0].split("_")[0] #make jt_var__DATA as sum of jt_var__[tchan+ttjets+wzjets+qcd] if jt+"_"+varname+"__wzjets" in hists: hists[jt+"_"+varname+"__DATA"] = hists[jt+"_"+varname+"__tchan"].Clone() hists[jt+"_"+varname+"__DATA"].Add(hists[jt+"_"+varname+"__ttjets"].Clone()) hists[jt+"_"+varname+"__DATA"].Add(hists[jt+"_"+varname+"__wzjets"].Clone()) #hists[jt+"_"+varname+"__DATA"].Add(hists[jt+"_"+varname+"__wjets_light"].Clone()) hists[jt+"_"+varname+"__DATA"].Add(hists[jt+"_"+varname+"__qcd"].Clone()) else: hists[jt+"_"+varname+"__DATA"] = hists[jt+"_"+varname+"__tchan"].Clone() hists[jt+"_"+varname+"__DATA"].Add(hists[jt+"_"+varname+"__twchan"].Clone()) hists[jt+"_"+varname+"__DATA"].Add(hists[jt+"_"+varname+"__schan"].Clone()) hists[jt+"_"+varname+"__DATA"].Add(hists[jt+"_"+varname+"__ttjets"].Clone()) hists[jt+"_"+varname+"__DATA"].Add(hists[jt+"_"+varname+"__wjets"].Clone()) hists[jt+"_"+varname+"__DATA"].Add(hists[jt+"_"+varname+"__dyjets"].Clone()) hists[jt+"_"+varname+"__DATA"].Add(hists[jt+"_"+varname+"__diboson"].Clone()) hists[jt+"_"+varname+"__DATA"].Add(hists[jt+"_"+varname+"__qcd"].Clone()) #signal scaling closure test #hists[jt+"_"+varname+"__tchan"].Scale(1.1) for name, h in hists.items(): #print name, h h.SetNameTitle(name, name) if "DATA" in name: for bin in range(h.GetNbinsX()+2): h.SetBinError(bin, math.sqrt(h.GetBinContent(bin))) h.Write() outfile.Close()
def test_file_open(): fname = 'test_file_open.root' with File.open(fname, 'w'): pass with root_open(fname, 'r'): pass with root_open(fname): pass os.unlink(fname)
def makedf(): """For a particular Q2W analysis range, starting at the level of h5s, put all analysis objects into a DataFrame output -- DF5.h5 contains DataFrame d with analysis data represented by columns: TOP--VARSET--q2wbinnum--q2wbin--SEQ--POL--h5--h1{ij}--h5{p}--hR2_{p}^{ij} """ outfile = os.path.join(ANADIR,'DF5.h5') store = pd.HDFStore(outfile) norm = 50000*math.pi d = pd.DataFrame() #1. First do the "looping part" of process of creating DF for top in range(0,NTOPS): if top != T5:continue for varset in range(0,NVARSETS): if varset != VST1:continue ftemplate = root_open(SEQ_POLS_H5FILE[0][0]%TOPS_NAME[top]) keys = ftemplate.GetListOfKeys() q2wbinnum=0 dl_counter=0 #counter for number of dls (=Data-Lists, defined later) insereted into DF for q2wdir in keys: q2wbinnum+=1 for seq in range(0,NSEQ): for pol in range(0,NPOLS):#poll: dl_counter+=1 f=root_open(SEQ_POLS_H5FILE[seq][pol]%TOPS_NAME[top]) h5=f.Get('%s/%s'%(q2wdir.GetName(),SEQ_POLS_H5[seq][pol]%VARSETS_NUMBER[varset])) f.Close() #Create Data-List (dl) to be added to the DataFrame dl =[TOPS_NUMBER[top],VARSETS_NUMBER[varset],q2wbinnum,q2wdir.GetName(),seq,pol,h5] rindex=['TOP','VARSET','q2wbinnum','q2wbin','SEQ','POL','h5'] print 'len(dl)=',len(dl) print 'len(rindex)=',len(rindex) if not d: data = pd.DataFrame({'s1':dl},index=rindex) # Data for 1st. Column d = d.append(data) else: d['s%d'%dl_counter]=dl dt = d.transpose() #2. Now use semi-vectorized operation to fill up rest of the DF5 h5s = dt['h5'] h1s=[] for i in range(len(h5s)): h1s.append(h5s[i].Projection(M1)) #dt['h1_1M1']=dt['h5'] dt['h1_1M1']=h1s store['d']=dt
def hist_checking(control_hist_location, cur_hist_location, path, technique): with root_open(control_hist_location) as control_file, \ root_open(cur_hist_location) as cur_file: cur_hist = cur_file.get(path.encode('ascii','ignore')) control_hist = control_file.get(path.encode('ascii','ignore')) if technique == 'Kolmogorov-Smirnov': p_value = cur_hist.KolmogorovTest(control_hist) elif technique == 'chi_square': p_value = cur_hist.Chi2Test(control_hist) return 1. - p_value
def main(): start_time = time.time() # open file filename="tree.root" infile= root_open(filename, "read") if not infile.IsOpen(): print ("does not exist, so will abort") return False #declare histogram h = Hist(200, -10, 10) #run #read_simple(infile,h) #run_time = time.time() - start_time #print("runtime: %s" %run_time) #h.Draw() #raw_input("Press Enter to continue...") start_time = time.time() read_SetBranchAddress(infile,h) run_time = time.time() - start_time print("runtime: %s" %run_time) h.Draw() raw_input("Press Enter to continue...")
def download_file(alien_src, local_dest): """ Download file `alien_src` to `local_path`. Parameters ---------- alien_path, local_path : string Full path to files Returns ------- int : File size in bytes """ check_alien_token() try: os.makedirs(os.path.dirname(local_dest)) except OSError: pass # fix the dest to include the file name if not os.path.basename(local_dest): local_dest = os.path.join(local_dest, os.path.basename(alien_src)) with root_open("alien://" + alien_src) as f: if not f.Cp(local_dest): try: os.remove(local_dest) except OSError: pass # file probably didn't exist at all raise OSError("An error occued while downloading {}; " "The broken file was deleted.".format(local_dest)) return f.GetSize()
def __init__(self, *args, **kwargs): self.mc_weight = kwargs.pop( 'mc_weight', True) self.posterior_trigger_correction = kwargs.pop( 'posterior_trigger_correction', True) self.embedding_spin_weight = kwargs.pop( 'embedding_spin_weight', True) self.embedding_reco_unfold = kwargs.pop( 'embedding_reco_unfold', True) self.embedding_trigger_weight = kwargs.pop( 'embedding_trigger_weight', True) self.tau_trigger_eff = kwargs.pop( 'tau_trigger_eff', True) super(Embedded_Ztautau, self).__init__(*args, **kwargs) with root_open(os.path.join(DAT_DIR, 'embedding_corrections.root')) as file: self.trigger_correct = file['ebmc_weight_{0}'.format(self.year % 1000)] self.trigger_correct.SetDirectory(0) if self.systematics: # normalize ISOL and MFS variations to same as nominal # at preselection from ..categories import Category_Preselection nps = [ ('MFS_UP',), ('MFS_DOWN',), ('ISOL_UP',), ('ISOL_DOWN',)] nominal_events = self.events(Category_Preselection)[1].value for np in nps: np_events = self.events(Category_Preselection, systematic=np)[1].value self.norms[np] = nominal_events / np_events
def loadHistogramsFromFile(filename, histonames, with2d, with3d): """ loads specified histograms from the ROOT file given by filename and returns them. The histograms will no longer be associated with the file. """ log = logging.getLogger('pyroplot') import rootpy from rootpy.io import root_open from rootpy.plotting import Hist histos = {} f = root_open(filename); nignored = 0 for h in histonames: try: histo = f.Get(h) except rootpy.io.DoesNotExist: # this can happen if the reference file contains more histos than the others log.warn("%s not found in file %s"%(h,filename)) continue # might want to ignore Hist2D etc: VERY SLOW and plot processing not optimally suited yet if ((histo.__class__.__name__=="Hist" or histo.__class__.__name__=="Profile") or ((with3d or with2d) and histo.__class__.__name__=="Hist2D") or (with3d and (histo.__class__.__name__=="Profile2D" or histo.__class__.__name__=="Hist3D"))): histo.SetDirectory(0) # remove association with file histos[h] = histo else: log.debug("IGNORING %s as it is of class '%s'"%(h,histo.__class__.__name__)) nignored += 1 f.close() log.info("Loaded %d histograms from file %s"%(len(histos),filename)) if nignored: log.info("IGNORED %d matching 2D/3D histograms: to see these use the --with-2D or --with-3D switches."%(nignored)) return histos
def calc_bg(fn): print 'computing backgrounds' with root_open(fn, 'update') as f: try: f.mkdir('processed') except ValueError: pass folders = ['background', 'weighted_background'] [f.rm('processed/'+folder) for folder in folders] [f.mkdir('processed/'+folder) for folder in folders] # get signal per z section per eclass # NOTE: range(bin_bin, bin_max) # Bin number starts at 1 !!!!!! for w in ['', 'weighted_']: bgs = [f.Get('raw/'+w+'background'+str(i)) for i in range(0, 4)] for eclass, bg in enumerate(bgs): logging.info('Calculating '+w+'background for class ' + str(eclass)) for sec_bin in range(1, 11): # get one background per section bg.GetZaxis().SetRange(sec_bin, sec_bin) bg_tmp = bg.Project3D('yx') # yes, 'yx'... bg_tmp.SetNameTitle((bg.GetName()[:-1] + '_z_sec_' + str(sec_bin - 1) + '_class_' + str(eclass)), bg.GetTitle() + ' z section' + str(sec_bin - 1)) scale_background(bg_tmp) f.cd('processed/'+w+'background') bg_tmp.Write()
def np2root(data, column_names, outname="output.root",tname="tree",dtype=float): """ converts numpy array to ROOT TTree and file. :param data: the 2D array containing M variables for N events :param column_names: M variables :param outname: name of the output root file :param dtype: float or int or list or dictionary. will map columns to data types in ROOT tree. :return: """ # adding support for different types. branches = {} if not (isinstance(dtype,dict) or isinstance(dtype,list)): assert dtype in [float, int], "dtype not understood" mtype = FloatCol if dtype == int: mtype = IntCol branches = {col: mtype() for col in column_names} elif isinstance(dtype,dict): my_map = { col : FloatCol if val == float else IntCol for col,val in dtype.iteritems()} branches = {col: my_map[col]() for col in column_names} else: my_map = [ FloatCol if val == float else IntCol for val in dtype] branches = {col: my_map[i]() for i,col in enumerate(column_names)} fOut = root_open(outname,"RECREATE") tree = Tree(tname) tree.create_branches(branches) rows, cols = shape(data) for i in range(0, rows): for j in range(0, cols): exec("tree.{col} = {val}".format(col=column_names[j], val=data[i,j])) in locals() tree.Fill() fOut.Write() fOut.Close() print 'wrote ROOT file {name}'.format(name=outname)
def roc(infiles, sig, bkg, optimize): sigs = {} bkgs = {} ROOT.TH1.AddDirectory(False) for fname in infiles: bname = os.path.basename(fname) _, category, flavour = tuple(bname.strip('.root').split('_')) if flavour != sig and flavour != bkg: continue log.info('reading file %s' % fname) with root_open(fname) as tfile: tree = tfile.tree tree.SetBranchStatus('*', 0) tree.SetBranchStatus('BDTG', 1) histo = plt.Hist1D(1010,0,1.01) for entry in tree: histo.Fill(entry.BDTG) cat = 'all' if not optimize else category if flavour == sig: if cat not in sigs: sigs[cat] = histo.Clone() else: sigs[cat] += histo else: if cat not in bkgs: bkgs[cat] = histo.Clone() else: bkgs[cat] += histo new_sigs = {} compressed = {} for cat in sigs: compressed[cat] = compress(sigs[cat], bkgs[cat]) return get_roc(compressed)
def __init__(self, *args, **kwargs): self.tauspinner = kwargs.pop('tauspinner', True) self.posterior_trigger_correction = kwargs.pop('posterior_trigger_correction', True) super(Embedded_Ztautau, self).__init__(*args, **kwargs) with root_open(os.path.join(DAT_DIR, 'embedding_corrections.root')) as file: self.trigger_correct = file['ebmc_weight_{0}'.format(self.year % 1000)] self.trigger_correct.SetDirectory(0)
def main(infile, outfile): # open my data file f = root_open(infile) # build a dictionary of the file structure dic = {p: t for p, d, t in f if p != ''} # open a new file to put the data in after the cut is applied f_store = pd.HDFStore(outfile) # iterate over the directoies for d, tl in dic.iteritems(): # iterate over list of trees in the directory for t in tl: # tree is original data tree tree = f[d][t] #print tree.branchnames df = pd.DataFrame( tree2rec( tree, branches=tree.branchnames)) print "writing to: {}/{}".format(d, t), tree.GetEntries() f_store.append('{}/{}'.format(d, t), df) tree.IsA().Destructor(tree) f_store.close() f.close()
def hdf2root(infile, outfile, verbose=False): try: from rootpy.io import root_open from rootpy import asrootpy from root_numpy import array2tree except ImportError: raise ImportError( "Please load ROOT into PYTHONPATH and install rootpy+root_numpy:\n" " `pip install rootpy root_numpy`" ) from tables import open_file h5 = open_file(infile, 'r') rf = root_open(outfile, 'recreate') # 'walk_nodes' does not allow to check if is a group or leaf # exception handling is bugged # introspection/typecheck is buged # => this moronic nested loop instead of simple `walk` for group in h5.walk_groups(): for leafname, leaf in group._v_leaves.items(): arr = leaf[:] if arr.dtype.names is None: dt = np.dtype((arr.dtype, [(leafname, arr.dtype)])) arr = arr.view(dt) treename = leaf._v_pathname.replace('/', '_') tree = asrootpy(array2tree(arr, name=treename)) tree.write() rf.close() h5.close()
def get_fit_t_dependece(chi2, t_binning, wave, component = -1, mmin = .5, mmax = 2.5, nPoints = 1000): """ Returns t' dependence histogram specified @param chi2: Chi2 to be evaluated @type chi2: chi2 @param tbin: t_binning bin to be used @type tbin: list @param wave: Number of the wave to be used @type wave: int @param component: Component to be used @type component: int @param mmin: Lower integral limit @type mmin: float @param mmax: Upper integral limit @type mmax: float @param nPoints: Number of points used for integration @type nPoints: int @return: t' dependence histogram @rtype: Hist """ nTbin = len(t_binning)-1 if not nTbin == chi2.nTbin(): raise IndexError # Number of tBins does not match values = [] name = '' for tbin in range(nTbin): values.append(get_integral_value(chi2,tbin,wave,component,mmin,mmax,nPoints)) hist = TH1D(name,name,nTbin,np.asarray(t_binning,dtype = np.float64)) hist.SetTitle(name) hist.SetName(name) for i in range(nTbin): hist.SetBinContent(i+1,values[i]/(t_binning[i+1]-t_binning[i])) with root_open("samuel.root","RECREATE"): hist.Write() return hist
def work(self): pickle_name = os.path.splitext(self.file)[0] if self.profile is not False and self.profile is not None: pickle_name += '_profiled_mu{0}'.format(self.profile) if self.observed: pickle_name += '_observed' pickle_name += '.pickle' if os.path.exists(pickle_name) and not self.refit: with open(pickle_name, 'r') as pickle_file: result = pickle.load(pickle_file) if self.workspace_name in result: return result[self.workspace_name] # get the significance of the workspace with root_open(self.file) as file: ws = file[self.workspace_name] result = significance(ws, observed=self.observed, injection=self.injection, injection_test=self.injection_test, profile=self.profile, **self.fit_params) # write the value into a pickle with open(pickle_name, 'w') as pickle_file: pickle.dump({self.workspace_name: result}, pickle_file) return result
def __init__(self,outputFileName,treeName) : # Open/recreate output file self.theTreeFile = root_open(outputFileName,"RECREATE") # Create tree with given name and branches structure self.theTree = Tree(treeName)
def i3root2hdf5(infile, force=False): h5file = infile + '.h5' bad_keys = ['AntMCTree', 'MasterTree'] rf = root_open(infile, 'r') keys = [k.name for k in rf.keys()] if force: mode = 'w' else: mode = 'a' h5 = h5py.File(h5file, mode) for key in keys: if key in bad_keys: continue tree = rf[key] arr = tree2array(tree) try: h5.create_dataset( key, data=arr, compression='gzip', compression_opts=9, shuffle=True, fletcher32=True, ) except TypeError: continue h5.flush() h5.close()
def __init__(self, filename, Lumi=10*1000, XSName="XS", EventName="NEvent"): self.file = root_open(filename, "read") self.lumi = Lumi self.xs = self.file.Get(XSName).GetBinContent(2) if XSName in self.file else 0 self.Nevent = self.file.Get(EventName).GetBinContent(2) if EventName in self.file else 0 self.cutpat = re.compile("^(\w*)_(\d+)") self.isData = self.__isData__()
def __init__(self, channels, baseCutSet, inFile, outfile='./results/output.root', maxEvents=float("inf"), intLumi=10000, rowCleaner='', cutModifiers=[], ntupleDir='ntuple'): ''' channels: list of strings or single string in the format (e.g.) eemm for a 2e2mu final state. '4l', 'zz' and 'ZZ' turn into ['eeee' 'eemm' 'mmmm'] cutSet: string with the name of the cut template to use infile: string of an input file name, with path outfile: string of an output file name, with path maxEvents: stop after this many events processed intLumi: in output text file, report how many events we would expect for this integrated luminosity rowCleaner: name of a module to clean out redundant rows. If an empty string (or other False boolean), no cleaning is performed. ''' self.cutSet = [baseCutSet]+cutModifiers CutClass = getCutClass(baseCutSet, *cutModifiers) self.cuts = CutClass() self.outFile = outfile self.cutOrder = self.cuts.getCutList() self.sample = inFile.split('/')[-1].replace('.root','') self.inFile = root_open(inFile) assert bool(inFile), 'No file %s'%self.inFile self.maxEvents = maxEvents # if we don't use all the events, we need to know how many we would have done in the whole thing if self.maxEvents < float('inf'): self.ntupleSize = {} self.channels = parseChannels(channels) self.ntuples = {} for channel in parseChannels(channels): try: nt = self.inFile.Get('/'.join([channel,ntupleDir])) # if not nt.GetEntries(): # raise DoesNotExist('') self.ntuples[channel] = nt nt.create_buffer() except DoesNotExist: print "Ntuple for channel %s is empty or not found! Skipping."%channel self.channels.remove(channel) continue if self.maxEvents < float('inf'): self.ntupleSize[channel] = self.ntuples[channel].GetEntries() self.results = NtupleCopier(self.outFile, **self.ntuples) self.prepareCutSummary() self.intLumi = intLumi self.cleanRows = bool(rowCleaner) if self.cleanRows: self.CleanerClass = getCleanerClass(rowCleaner)
def test_draw(): with root_open(FILE_PATHS[0]) as f: tree = f.tree tree.draw('a_x') tree.draw('a_x:a_y') tree.draw('a_x:TMath::Exp(a_y)') tree.draw('a_x:a_y:a_z') tree.draw('a_x:a_y:a_z:b_x') tree.draw('a_x:a_y:a_z:b_x:b_y', options='para') h1 = Hist(10, -1, 2, name='h1') h2 = Hist2D(10, -1, 2, 10, -1, 2) h3 = Hist3D(10, -1, 2, 10, -1, 2, 10, -1, 2) # dimensionality does not match assert_raises(TypeError, tree.draw, 'a_x:a_y', hist=h1) # name does not match assert_raises(ValueError, tree.draw, 'a_x>>+something', hist=h1) # hist is not a TH1 assert_raises(TypeError, tree.draw, 'a_x:a_y', hist=ROOT.TGraph()) # name does match and is fine (just redundant) tree.draw('a_x>>h1', hist=h1) assert_equal(h1.Integral() > 0, True) h1.Reset() tree.draw('a_x>>+h1', hist=h1) assert_equal(h1.Integral() > 0, True) h1.Reset() # both binning and hist are specified assert_raises(ValueError, tree.draw, 'a_x>>+h1(10, 0, 1)', hist=h1) tree.draw('a_x', hist=h1) assert_equal(h1.Integral() > 0, True) tree.draw('a_x:a_y', hist=h2) assert_equal(h2.Integral() > 0, True) tree.draw('a_x:a_y:a_z', hist=h3) assert_equal(h3.Integral() > 0, True) h3.Reset() tree.draw('a_x>0:a_y/2:a_z*2', hist=h3) assert_equal(h3.Integral() > 0, True) # create a histogram hist = tree.draw('a_x:a_y:a_z', create_hist=True) assert_equal(hist.Integral() > 0, True) hist = tree.draw('a_x:a_y:a_z>>new_hist_1') assert_equal(hist.Integral() > 0, True) assert_equal(hist.name, 'new_hist_1') # create_hist=True is redundant here hist = tree.draw('a_x:a_y:a_z>>new_hist_2', create_hist=True) assert_equal(hist.Integral() > 0, True) assert_equal(hist.name, 'new_hist_2')
def getNumberOfInitialEvents(inputFileName) : # Open file and get number of entries in hcount theFile = root_open(inputFileName,"READ") theCountHisto = theFile.Get("FlatTree/hcount") count = theCountHisto.GetEntries(); theFile.Close() return count;
def __init__(self, fn, maxEvts, trainFrac, isBkg, iseBkg, iseSig): print "Initializing Container!" #self.tin = r.TChain("EcalVeto") #self.tin.Add(fn) self.tfile = root_open(fn, 'r+') self.tin = self.tfile.EcalVeto #self.tin.Print() self.maxEvts = maxEvts self.trainFrac = trainFrac self.isBkg = isBkg self.iseBkg = iseBkg self.iseSig = iseSig
def renameElements(filename, suffix, debug): print("Renaming elemnts in filename: \"{}\"".format(filename)) elementNamesMap = {} with root_open(filename, "UPDATE") as f: if debug: print("f.ls() pre:") f.ls() # Cannot just iterate over GetListOfKeys because the hash list is updated when an element is added to a file... # Instead, we copy the keys so the iterable is not updated (this should really be treated as a ROOT bug...) keys = f.GetListOfKeys() simpleListOfKeys = [] for key in keys: simpleListOfKeys.append(key.GetName()) # Loop over the available keys. If it is the correction task, then unpack the component lists for key in simpleListOfKeys: if key.endswith("_" + suffix): print( "Skipping the processing of element {} since it has already been processed!" .format(key)) # Same the name in the proper formatt to be used in the YAML map elementNamesMap[key.replace("_" + suffix, "")] = [key] # Don't apply the suffix twice continue element = f.Get(key) print("Processing element: {}".format(element.GetName())) # Remove the existing element from the file f.Delete(element.GetName() + ";*") # Rewrite the name if "AliEmcalCorrectionTask" in element.GetName(): for component in element: elementNamesMap = rewriteWithDifferentName( component, suffix, elementNamesMap) else: elementNamesMap = rewriteWithDifferentName( element, suffix, elementNamesMap) if debug: print("keys: {}".format(keys.GetEntries())) print("f.ls() post:") f.ls() # Save the map for user with the comparison script # Write the reference map to the same directory as the reference file yamlReferenceMapLocation = os.path.join(os.path.dirname(filename), "referenceMap.yaml") print("Writing yaml reference map to \"{}\"".format( yamlReferenceMapLocation)) with open(yamlReferenceMapLocation, "w+b") as f: yaml.safe_dump(elementNamesMap, f, default_flow_style=False)
def generate_root_file_with_tree(self, file_name, mode="update"): f = root_open(file_name, mode) # how can we capture the qualifier information? It seems wasteful to have to duplicate it... tree_meta = Tree(name="Table 1::metadata", title="Table 1", model=DataRecord) tree_meta.create_branches({ 'reaction': 'C', 'qualifier_1_type': 'C', 'qualifier_1_value': 'C', 'qualifier_2_type': 'C', 'qualifier_2_value': 'C' }) tree_meta.reaction = 'P --> P' tree_meta.qualifier_1_type = 'SQRT(S)' tree_meta.qualifier_1_value = '8000.0 GeV' tree_meta.qualifier_1_type = '' tree_meta.qualifier_1_value = '95% CL Limit' tree_meta.fill() tree = Tree(name="Table 1::data", title="Table 1", model=DataRecord) # F - Float, I - Integer for i in xrange(1000): tree.qual_1_type = "sqrt(s)" tree.qual_1_value = "8000.0 GeV" tree.qual_2_type = "" tree.qual_2_value = "95% CL upper limit [fb]" tree.x_val = gauss(1., 4.) tree.x_err_y_minus = gauss(0., 1) tree.x_err_y_plus = gauss(0., 1) tree.expected_val = gauss(1., 4.) tree.expected_err_y_minus = gauss(1., 4.) tree.expected_err_x_minus = gauss(1., 4.) tree.observed_val = gauss(1., 4.) tree.observed_err_y_minus = gauss(1., 4.) tree.observed_err_x_minus = gauss(1., 4.) tree.i = i tree.fill() tree.write() f.close()
def test_attrs(): with root_open(FILE_PATHS[0]) as f: tree = f.tree tree.read_branches_on_demand = True tree.define_object('a', 'a_') tree.define_collection('b', 'b_', 'b_n') for event in tree: # test a setattr before a getattr with caching new_a_y = random() event.a_y = new_a_y assert_almost_equal(event.a_y, new_a_y) assert_equal(event.a_x, event.a.x) assert_equal(len(event.b) > 0, True)
def copy_in_trigger_signal(in_files_name, out_name, tree_name, prefix, cdc_events, cth_events, rand_t=None): # Convert input lists to sets first set_cdc_events = set(cdc_events) set_cth_events = set(cth_events) # Define the chain of input trees in_chain = TreeChain(name=tree_name, files=in_files_name) # First create a new file to save the new tree in: out_file = root_open(out_name, "r+") # Add the time shift if we want it in the tree ExtraBranches = Tagged if rand_t is not None: ExtraBranches += Smeared # Get the new tree with its extra branches out_tree = Tree(tree_name, model=ExtraBranches.prefix(prefix)) # This creates all the same branches in the new tree but # their addresses point to the same memory used by the original tree. out_tree.create_branches(in_chain._buffer) out_tree.update_buffer(in_chain._buffer) # Now loop over the original tree(s) and fill the new tree for entry in in_chain: # Add in the new values this_event_number = entry[prefix + "EventNumber"].value out_tree.__setattr__(prefix + "GoodTrack", this_event_number in set_cdc_events) out_tree.__setattr__(prefix + "GoodTrig", this_event_number in set_cth_events) if rand_t is not None: try: out_tree.__setattr__(prefix + "SmearTime", rand_t[this_event_number]) except: for key, item in entry.iteritems(): print key, item # Fill, noting that most of the buffer is shared between the chain # and the output tree out_tree.Fill() # Close it up out_tree.Write() out_file.Close()
def main(): style = get_style('CMSTDR') style.SetTitleSize(0.07, "XYZ") style.SetPalette(54) # kBlueYellow style.SetPadLeftMargin(0.10) style.SetPadRightMargin(0.12) set_style(style) input_dir = '/home/sauvan/Documents/HEP/Projects/CMS/L1CalorimeterTrigger_Phase2HGCal/Misc/FastShower/output/' files = glob(input_dir+'*.root') for file_name in files: with root_open(file_name) as file: for object in file.objects(cls=TCanvas): if 'Event 1' in object.GetName(): plot_event(splitext(basename(file_name))[0], object)
def load_pgun(): global infile_r #infile = 'ntuple_SingleMuon_Toy_5GeV_add.3.root' infile = '/tmp/jiafu/ntuple_SingleMuon_Toy_2GeV.0.root' infile_r = root_open(infile) tree = infile_r.ntupler.tree #tree = TreeChain('ntupler/tree', [infile]) print('[INFO] Opening file: %s' % infile) # Define collection tree.define_collection(name='hits', prefix='vh_', size='vh_size') tree.define_collection(name='tracks', prefix='vt_', size='vt_size') tree.define_collection(name='particles', prefix='vp_', size='vp_size') return tree
def main(): latex, files = decode_arguments() for path in files: with root_open(path, 'r') as f: print(path) print() if latex: print(f.latex_pull_table.GetTitle()) else: print(f.pull_table.GetTitle()) print()
def download_from_grid_archive(alien_src, local_dest): """ Download the files from a grid-zip-file at `alien_src` to `local_path`. If all files from the archive already exist locally, do not re-download them. Parameters ---------- alien_path, local_path : string Full path to files Returns ------- int : File size in bytes """ check_alien_token() try: os.makedirs(os.path.dirname(local_dest)) except OSError: pass # fix the dest to include the file name if not os.path.basename(local_dest): local_dest = os.path.join(local_dest, os.path.basename(alien_src)) with root_open("alien://" + alien_src) as f: if not f.IsArchive(): raise ValueError( "{} does not point to an archive file.".format(alien_src)) fsize = f.GetSize() fnames = [m.GetName() for m in f.GetArchive().GetMembers()] local_dir = os.path.dirname(local_dest) if all([ os.path.isfile(os.path.join(local_dir, fname)) for fname in fnames ]): raise OSError("Files exist; not redownloading") if not f.Cp(local_dest): raise RuntimeError("Could not download {}!".format(alien_src)) with zipfile.ZipFile(local_dest) as zf: try: zf.extractall(os.path.dirname(local_dest)) except IOError: print("Error unzipping {}. File was deleted".format(local_dest)) # Delete the zip archive file try: os.remove(local_dest) except OSError: pass # file probably didn't exist at all?! return fsize
def save_to_root(self, fit_type): # Open root file, and create tree f = root_open(self.root_filename, "update") t = f.paramTree # Cannot write to tree buffer without this, for some reason. for entries in t: continue # Check if saving true or fitted parameter values if (fit_type == 0): params_f = open(self.true_params_filename, 'r') else: params_f = open(self.fitted_params_filename, 'r') for line in params_f.readlines(): # Split line into items items = line.split() # Test if first entry in file is a label (can cast to int). If not, continue. try: int(items[0]) except: continue # Save type of fit, and label to tree t.fitType = fit_type t.label = int(items[0]) # Save fitted parameter value to tree. If it doesn't exist, continue. try: t.paramValue = float(items[1]) except Exception: continue # Save parameter error, if it exists. If not, set to zero. try: t.paramError = float(items[4]) except Exception: t.paramError = 0.0 # Fill tree t.fill() # Write to tree, and close file t.write("", ROOT.TObject.kWriteDelete) f.close()
def _getHist(self): with root_open(self.fName) as f: try: hClus = asrootpy(f.Get('demo/Clusters/nClusters')) hClus.SetDirectory(0) hClus.SetName('nClusters') self.dHist['nClusters'] = hClus except: self.dHist['nClusters'] = None for region in REGIONS: hOrig = asrootpy(f.Get('demo/' + self.dirName + '/' + region)) hOrig.SetDirectory(0) hOrig.SetName(region + '_Original') hNorm = hOrig.Clone() hNorm.SetDirectory(0) hNorm.Scale(1 / hNorm.integral()) hNorm.SetName(region + '_NormTo1') hCut = hOrig.Clone() hCut.SetDirectory(0) hCut = self._doCut(hCut) hCut.SetName(region + '_Cut') hCutNorm = hOrig.Clone() hCutNorm.SetDirectory(0) hCutNorm = self._doCut(hCutNorm) hCutNorm.Scale(1 / hCutNorm.integral()) hCutNorm.SetName(region + '_CutNormTo1') hCDF = hOrig.Clone() hCDF.SetDirectory(0) hCDF = self._doCDF(hCDF) hCDF.SetName(region + '_CDF') hCutCDF = hOrig.Clone() hCutCDF.SetDirectory(0) hCutCDF = self._doCut(hCutCDF) hCutCDF = self._doCDF(hCutCDF) hCutCDF.SetName(region + '_CutCDF') self.dHist[region + '_Original'] = hOrig self.dHist[region + '_NormTo1'] = hNorm self.dHist[region + '_Cut'] = hCut self.dHist[region + '_CutNormTo1'] = hCutNorm self.dHist[region + '_CDF'] = hCDF self.dHist[region + '_CutCDF'] = hCutCDF return
def fetchFiles(d): dtag = d.split('-')[0][1:].replace('/', '_') eospath_ = SOURCEEOSPATH + d.rsplit('/', 1)[0] timestamps = subprocess.check_output( shlex.split('eos {0} ls {1}'.format(XDIRECTOR, eospath_))).split() if not timestamps: print("--> Zero timestamp directory found under", eospath_) print("--> Empty list returned for", dtag) return dtag, [] timestamps = sorted(timestamps, key=lambda x: datetime.strptime(x, "%y%m%d_%H%M%S")) eospath = join(eospath_, timestamps[-1]) # most recent submission flist = [] try: flist = subprocess.check_output( shlex.split( 'eos {0} find -name "*ffAOD*.root" -f --xurl {1}'.format( XDIRECTOR, eospath))).split() except: print("--> cannot stat eos path: ", eospath) print("--> Empty list returned for", dtag) return dtag, [] print(dtag, "Total number of files (uncleaned):", len(flist)) nonzeroes_ = [] for f in flist: nevents = 0 try: thefile = root_open(f) # make sure it can be opened properly except Exception as e: print("--> Cannot open file", f) print(str(e)) continue try: events = Events(f) for evt in events: nevents += 1 except Exception as e: print("--> FWLite failed for", f) print(str(e)) continue if nevents > 0: nonzeroes_.append(f) return dtag, nonzeroes_
def wrapper(self, **kwargs): with root_open(self.f_name, 'update') as self.f: self.sums = self.f.MultEstimators.__getattr__(self.sums_dir_name) try: self.results_post = self.f.MultEstimators.__getattr__(self.results_dir_name) except AttributeError: # results dir does not exists (yet) pass return_value = func(self, **kwargs) # Delete all TLists in sums since we own them and they would be left in memory otherwise for obj in self.sums: if isinstance(obj, collection.List): obj.Delete() self.sums.Delete() return return_value
def append_lumi_scale_branch(src, dst, xsec, target_lumi): """Append a branch to the train and test trees of a sample for scaling the MC sample luminosity to the target luminosity. The scaling value is calculated as the ratio of the target luminosity to the sample luminosity, where the sample luminosity is defined as the difference between the number of positively and negatively weighted events of the full MC sample divided by the cross-section of the MC sample. Parameters ---------- src : string The path to the sample. dst : string The output path to the new sample with training and testing trees. xsec : numeric The cross-section of the Monte-Carlo sample in units of picobarns (pb). target_lumi : numeric The target luminosity in units of inverse picobarns (pb-1). """ with root_open(src) as f: n_pos = f.Get('CountPosWeight').GetBinContent(1) n_neg = f.Get('CountNegWeight').GetBinContent(1) sample_lumi = (n_pos - n_neg) / float(xsec) lumi_scale = target_lumi / sample_lumi with root_open(dst, 'a') as f: for name in ['train', 'test']: t = f.Get(name) t.create_branches({'lumi_scale': 'F'}) b = t.GetBranch('lumi_scale') for entry in t: entry.lumi_scale = lumi_scale b.Fill() t.Write()
def tmva_process(info): """ Create TMVA classification factory, train, test and evaluate all methods :param rep.estimators.tmva._AdditionalInformationPredict info: additional information """ import ROOT reader = ROOT.TMVA.Reader() features_pointers = [] for feature in info.features: features_pointers.append(array.array('f', [0.])) reader.AddVariable(feature, features_pointers[-1]) model_type, sigmoid_function = info.model_type reader.BookMVA(info.method_name, info.xml_file) file_root = root_open(info.filename, mode='update') tree = file_root[info.treename] for ind, feature in enumerate(info.features): tree.SetBranchAddress(feature, features_pointers[ind]) tree.create_branches({info.method_name: 'F'}) branch = tree.get_branch(info.method_name) signal_efficiency = None if model_type == 'classification' and sigmoid_function is not None and 'sig_eff' in sigmoid_function: signal_efficiency = float(sigmoid_function.strip().split('=')[1]) assert 0.0 <= signal_efficiency <= 1., 'signal efficiency must be in [0, 1], not {}'.format( signal_efficiency) for event in range(tree.GetEntries()): tree.GetEntry(event) if model_type == 'classification': if signal_efficiency is not None: prediction = reader.EvaluateMVA(info.method_name, signal_efficiency) else: prediction = reader.EvaluateMVA(info.method_name) else: prediction = reader.EvaluateRegression(info.method_name)[0] tree.__setattr__(info.method_name, prediction) branch.Fill() tree.Write() file_root.Close()
def __init__(self, name, filename, sysName=""): self.name = name self.filename = filename self.var = {} self.cut = {} self.histograms = {} self.file = None self.tree = None self.isSys = False if filename != None: self.file = root_open(filename) if sysName == "": self.tree = self.file.Nominal else: self.tree = self.file.Get(sysName) self.isSys = True
def add_files(groups, configs): hall = ph.HChain(args.topLevel) for group in configs[groups]: hc = ph.HGroup(group['name']) logger.log(25, "Group: {0:s}".format(group['name'])) for f in group['files']: logger.log(25, "\tPattern: {0:s}".format(f)) for fname in glob.glob(f): logger.log(25, "\t\tAdding {0:s}".format(fname)) rootFile = root_open(fname) hc.append(rootFile) if len(hc) == 0: raise ValueError("{0:s} has no files loaded.".format(group['name'])) logger.log(25, "\tAdding {0:s}".format(hc)) hall.append(hc) return hall
def get_file(ntuple_path=NTUPLE_PATH, student=DEFAULT_STUDENT, hdf=False, suffix='', force_reopen=False): ext = '.h5' if hdf else '.root' filename = student + suffix + ext if filename in FILES and not force_reopen: return FILES[filename] file_path = os.path.join(ntuple_path, filename) # file_path = os.path.join(ntuple_path, student + suffix, filename) log.info("opening {0} ...".format(file_path)) if hdf: # student_file = tables.open_file(file_path)#, driver="H5FD_CORE") log.error('Not Implemented yet') raise RuntimeError('Not Implemented yet') else: student_file = root_open(file_path, 'READ') FILES[filename] = student_file return student_file
def reload_histograms(self, input_filename): """ Read back histograms from the given root file. May need to append histograms returns: Should return True if histograms were written without problem. If anything else is returned, processing of the trees will stop """ results = [] with root_open(input_filename, "r") as input_file: for hist in self.all_plots: indir = input_file.GetDirectory(hist.directory_name) results.append(hist.from_root(indir)) ok = all(results) return ok
def read_2cumulant_Aside(self): datafile = root_open(self.filename, 'read') directory = datafile.Get(self.directory) cumu_dW2A = rnp.hist2array( directory.Get('cumulants').Get('standard').Get('dW2A').Get( 'cumu_dW2A')) cumu_dW2TwoA = rnp.hist2array( directory.Get('cumulants').Get('standard').Get('dW2TwoA').Get( 'cumu_dW2TwoA')) self.differential[:, 0, ..., dW2A] = cumu_dW2A self.differential[:, 1, ..., dW2A] = cumu_dW2A self.differential[:, 2, ..., dW2A] = cumu_dW2A self.differential[:, 0, ..., dW2TwoA] = cumu_dW2TwoA[0] self.differential[:, 1, ..., dW2TwoA] = cumu_dW2TwoA[1] self.differential[:, 2, ..., dW2TwoA] = cumu_dW2TwoA[2]
def write_out_file(infile, outfile, tree=None): f = root_open(infile) T = f[tree] cells = filter(lambda x: x.startswith('cell'), T.branchnames) assert len(cells) == sum(map(np.prod, LAYER_SPECS)) + OVERFLOW_BINS X = pd.DataFrame(tree2array(T, branches=cells)).values E = pd.DataFrame(tree2array(T, branches=['TotalEnergy'])).values.ravel() with HDF5File(outfile, 'w') as h5: for layer, (sh, (l, u)) in enumerate(zip(LAYER_SPECS, LAYER_DIV)): h5['layer_{}'.format(layer)] = X[:, l:u].reshape((-1, ) + sh) h5['overflow'] = X[:, -OVERFLOW_BINS:] h5['energy'] = E.reshape(-1, 1)
def augment_file(in_folder, out_folder, tree_name, mcolls): # first, copy the original ROOT file to its destination, keeping the directory structure the same #data_outdir = data_outpath + data_file #if not os.path.exists(data_outdir): # os.makedirs(data_outdir) if not os.path.exists(out_folder): os.makedirs(out_folder) data_outfile = os.path.join(out_folder, Config.MC_filename) data_infile = os.path.join(in_folder, Config.MC_filename) copyfile(data_infile, data_outfile) #tree_name = "ClassTree" # now, can read the file from its new location and change it fcoll = FileCollection({data_outfile: cuts.no_cut}, 0.0, 1.0, tree_name = tree_name) length = fcoll.get_length() indata = utils.read_data(fcoll, start = 0, stop = length, branches = Config.branches, tree_name = tree_name) # loop over ModelCollections here to get the prediction from each out_branches = [] prepared_dtype = [] branch_names = [] for mcoll in mcolls: out_branches.append(mcoll.predict(indata)) branch_names.append(mcoll.name) prepared_dtype.append((mcoll.name.encode("ascii"), 'f4')) print prepared_dtype # make it into the correct type and shape new_branches = np.array(np.zeros(length), dtype = prepared_dtype) for out_data, branch_name in zip(out_branches, branch_names): new_branches[branch_name] = out_data # now re-open the output file in append mode outfile = root_open(data_outfile, mode = "a"); outtree = outfile.Get(tree_name + "/candTree") root_numpy.array2tree(new_branches, tree = outtree) outfile.write() outfile.close()
def _set_count_histogram_attributes(self): """Set the count histograms as attributes accessible by their name.""" self._count_histograms = [] # Aggregate the count histograms across all files. with contextlib2.ExitStack() as stack: files = [ stack.enter_context(root_open(filename)) for filename in self.filenames ] for obj in files[0]: if isinstance(obj, ROOT.TH1): name = obj.GetName() hist = sum(f.Get(name) for f in files) hist.SetName(name) hist.SetDirectory(0) setattr(self, name, hist) self._count_histograms.append(hist)
def save_all(self, runs, fname='strt_session.root'): with root_open(fname, 'recreate') as root_file: # d1 = root_file.mkdir('Test1') # d1.cd() # ntuple = Ntuple(('a', 'b', 'c'), name="test") # for i in range(20): # ntuple.Fill(gauss(.5, 1.), gauss(.3, 2.), gauss(13., 42.)) # ntuple.write() for run in runs: run_dir = root_file.mkdir(run.name) run_dir.cd() event_tree = Tree('Events', model=RootSaver.rEvent) track_tree = Tree('Tracks', model=RootSaver.rTrack) for event in run.events: event_tree.id = event.id for i in range(len(event.hits)): event_tree.xhits.push_back(event.hits[i].x) event_tree.yhits.push_back(event.hits[i].y) event_tree.nHits = len(event.hits) event_tree.nTracks = len(event.tracks) event_tree.nGoodTracks = len( [t for t in event.tracks if t.is_good]) event_tree.fill(reset=True) for track in event.tracks: track_tree.id = track.id track_tree.event_id = track.event_id for i in range(len(track.hit_indices)): track_tree.hit_indices.push_back( track.hit_indices[i]) track_tree.residuals.push_back(track.lincoor[i]) track_tree.lincoords.push_back(track.residuals[i]) track_tree.color = track.int_color() track_tree.length = track.length() track_tree.rho = track.rho track_tree.theta = track.theta track_tree.x0 = track.get_start_point()[0] track_tree.y0 = track.get_start_point()[1] track_tree.x1 = track.get_end_point()[0] track_tree.y1 = track.get_end_point()[1] track_tree.nHits = len(track.hit_indices) track_tree.R2 = track.R2 track_tree.is_good = track.is_good track_tree.fill(reset=True) event_tree.write() track_tree.write()
def write_files(self, filename, **kwargs): """ Write output histograms to file Args: filename: Name of output file """ print("{}: write results to file {} and folder".format( self._name, filename)) base_folder = "{}/BayesSubUnfolding/".format(self._name) print(base_folder) open_as = "append" if kwargs.get("append", False) else "recreate" with root_open(filename, open_as) as output_file: TDir = output_file.mkdir("{}{}".format(base_folder, "JetConeJtWeightBin"), title="JetConeJtWeightBin", recurse=True) TDir.cd() # output_file.cd(TDir) for i, (jt, pt) in enumerate(zip(self._hJtMeasBin, self._jetPtBins)): jt.name = "JetConeJtWeightBinNFin{0[NFin]:02d}JetPt{0[pT]:02d}".format( { "NFin": self._NFin, "pT": i }) jt.title = "Finder:Full_Jets_R04_00 p_{{T,jet}} : {} - {}".format( pt[0], pt[1]) jt.Write() if self._hBgJtNormalized is not None: TDir = output_file.mkdir("{}{}".format(base_folder, "BgJtWeightBin"), title="BgJtWeightBin", recurse=True) TDir.cd() for i, (jt, pt) in enumerate( zip(self._hBgJtNormalized, self._jetPtBins)): jt.name = "BgJtWeightBinNFin{0[NFin]:02d}JetPt{0[pT]:02d}".format( { "NFin": self._NFin, "pT": i }) jt.Write()
def __init__(self, tree_name, output_file): ''' Constructor ''' self._tree_name = tree_name self._output_file = output_file self._file = root_open(output_file, 'recreate') self._tree = None self._variables = [] self._collections = [] self._counter = Counter() self._created_branches = False self._branches = {} self._model = None
def run_pred(inputPath): f = TFile.Open(inputPath, "READ") try: nom = f.Get("nominal") except: print('cant open ' + inputPath) return 0 dsid = inputPath.split('/')[-1] dsid = dsid.replace('.root', '') print(dsid) try: nom.GetEntries() except: print("failed to open") return 0 try: nom.Mll01 except: print('failed for ' + inputPath) return 0 if nom.GetEntries() == 0: print("no entries") return 0 if hasattr(nom, "tZ_score_test2"): print('already there') return 0 event_dict = create_dict(nom) inDF = pd.DataFrame(event_dict) xgbMat = xgb.DMatrix(inDF, feature_names=list(inDF)) tZ_score_test = xgbModel.predict(xgbMat) with root_open(inputPath, mode='a') as myfile: tZ_score_test = np.asarray(tZ_score_test) tZ_score_test.dtype = [('tZ_score_test2', 'float32')] tZ_score_test.dtype.names = ['tZ_score_test2'] root_numpy.array2tree(tZ_score_test, tree=myfile.nominal) myfile.write() myfile.Close()
def test_cuts(self): with root_open(self.file_paths[0]) as f: tree = f.tree h1 = Hist(10, -1, 2) h2 = Hist2D(10, -1, 2, 10, -1, 2) h3 = Hist3D(10, -1, 2, 10, -1, 2, 10, -1, 2) tree.draw('a_x', hist=h1) assert_equals(h1.Integral() > 0, True) tree.draw('a_x:a_y', hist=h2) assert_equals(h2.Integral() > 0, True) tree.draw('a_x:a_y:a_z', hist=h3) assert_equals(h3.Integral() > 0, True) h3.Reset() tree.draw('a_x>0:a_y/2:a_z*2', hist=h3) assert_equals(h3.Integral() > 0, True)
def get_mean_rms(category, var): gr_mean = Graph(len(SIGNALS_14TEV)) gr_rms = Graph(len(SIGNALS_14TEV)) for ip, signal in enumerate(SIGNALS_14TEV): with root_open('efficiencies/eff_presel_{0}_v{1}.root'.format( signal, VERSION)) as fsig: h_s = fsig[category].Get('h_' + category + '_' + var['name']) gr_mean.SetPoint(ip, DATASETS[signal]['mu'], h_s.GetMean()) gr_mean.SetPointError(ip, 0, 0, h_s.GetMeanError(), h_s.GetMeanError()) gr_rms.SetPoint(ip, DATASETS[signal]['mu'], h_s.GetRMS()) gr_rms.SetPointError(ip, 0, 0, h_s.GetRMSError(), h_s.GetRMSError()) gr_mean.xaxis.title = 'Average Interactions Per Bunch Crossing' gr_mean.yaxis.title = 'Mean of ' + get_label(var) gr_rms.xaxis.title = 'Average Interactions Per Bunch Crossing' gr_rms.yaxis.title = 'RMS of ' + get_label(var) return gr_mean, gr_rms
def load_all(self, fname='strt_session.root'): full_fname = os.path.abspath(fname) if not os.path.exists(full_fname): print 'File %s does not exists.' % full_fname return runs = [] with root_open(full_fname) as root_file: directories = next(root_file.walk())[1] for dir in directories: run_id = generate_run_id(runs) run = Run(run_id, name=dir) event_tree = root_file.Get(dir).Get('Events') track_tree = None try: track_tree = root_file.Get(dir).Get('Tracks') except DoesNotExist: pass for event in event_tree: e = Event(ev_id=event.id, data_file_path='') for i in range(event.xhits.size()): h = Hit(event.xhits[i], event.yhits[i]) e.hits.append(h) run.events.append(e) current_event = run.events[0] for track in track_tree: ev_id = track.event_id if ev_id != current_event.id: current_event = filter_by_id(run.events, ev_id) t = Track(ev_id, track.id) for i in track.hit_indices: t.hit_indices.append(i) t.color_from_int(track.color) t.rho = track.rho t.theta = track.theta t.set_line((track.x0, track.x1), (track.y0, track.y1)) t.R2 = track.R2 t.is_good = track.is_good t.calculate_parameters(current_event) current_event.tracks.append(t) runs.append(run) print '%d runs were loaded' % len(runs) return runs