Пример #1
0
def test_file(inname, outname, train_file, variables, testEvery):
   log.info('START processing: %s', os.path.basename(inname))
   bdt = get_training(train_file, variables)
   with io.root_open(outname, 'w') as tout:
      out_tree = Tree('tree')
      out_tree.create_branches({
            'flavour' : 'F',
            'vertexCategory' : 'I', 
            'jetPt' : 'F',
            'jetEta' : 'F',
            'BDTG' : 'F',
            })
      with io.root_open(inname) as tin:
         in_tree = tin.tree
         in_tree.SetBranchStatus('*', 0)
         in_tree.SetBranchStatus('flavour', 1)
         in_tree.SetBranchStatus('vertexCategory', 1)
         in_tree.SetBranchStatus('jetPt', 1)
         in_tree.SetBranchStatus('jetEta', 1)
         for var in variables:
            in_tree.SetBranchStatus(var, 1)
         for idx, entry in enumerate(in_tree):
            if testEvery != 1 and (idx % testEvery) == 0: continue
            var_vals = [getattr(entry, i) for i in variables]
            btd_out = bdt.predict_proba([var_vals])[0][1]
            out_tree.flavour = entry.flavour
            out_tree.vertexCategory = entry.vertexCategory
            out_tree.jetPt = entry.jetPt
            out_tree.jetEta = entry.jetEta
            out_tree.BDTG = btd_out
            out_tree.fill()
      out_tree.write()
   log.info('DONE processing: %s', os.path.basename(inname))
def calc_single_track_effs(path):
    """Calc the 2D effs for trigger and assocs"""
        with root_open(path + 'output_MC.root', 'read') as f_mc:
        with root_open(path + 'output_reconstructed.root', 'read') as f_re:
            with root_open(path + 'output_effs.root', 'update') as f_effs:
                f_effs.mkdir('processed')
                f_effs.mkdir('processed/efficiencies')

                f_effs.processed.efficiencies.cd()
                # names are identical in mc and recon
                # this might be a problem!
                print 'calculating effs'
                for tpl_mc in f_mc.walk('processed/total_yield'):
                    names = tpl_mc[2]
                    names.sort()
                    for h_name in tpl_mc[2]:
                        ty_mc = f_mc.Get('processed/total_yield/' + h_name)
                        ty_re = f_re.Get('processed/total_yield/' + h_name)
                        eff = ty_re / ty_mc
                        name = 'eff_' + ty_re.name[6:]
                        eff.SetNameTitle(name,
                                         'efficiency' + ty_re.name[6:])
                        f_effs.Write(name)
                        err = r.Double()
                        x_bins, y_bins = (eff.GetXaxis().GetNbins(),
                                          eff.GetYaxis().GetNbins())
                        inte = eff.IntegralAndError(1, x_bins, 1, y_bins, err)
                        print (eff.name,
                               inte / (x_bins * y_bins), 'Error: ',
                               err / (x_bins * y_bins))
def multiclass_graph(mt_dec,probas,name,axis_range=[0.,250.]) :
    #create a graph and save it as root file
    
    dec_modes = [0.0,1.0,10.0]

    print np.shape(mt_dec)
    print np.shape(probas)    
    
    num_of_classes = len(probas[0])
        
    all_input = np.transpose(np.vstack((mt_dec[:,0],mt_dec[:,1], probas[:,0], probas[:,1], probas[:,2], probas[:,3], probas[:,4], probas[:,5])))
    
    
    for j in dec_modes :    
        x = np.array(filter(lambda x: x[1] == j, all_input))[:,0]
        
        for k in xrange(0,num_of_classes,1) :
            y = np.array(filter(lambda x: x[1] == j, all_input))[:,k+2]        
            graph = Graph(len(x),"g1")
            #create a root file and fill it with the graph P(W+jet) vs mt
            root_open("plots/ROOTfiles/G_"+name+GetClass(k+1)+'_dec_'+str(j)+".root", 'recreate')
            fill_graph(graph,np.column_stack((x,y)))
            graph.Write()
            #create Canvas and save the plot as png
            c = Canvas()
            graph.SetTitle(name+GetClass(k+1)+'_dec_'+str(j))
            graph.SetMarkerSize(0.3)
            graph.GetXaxis().SetRangeUser(axis_range[0],axis_range[1])
            graph.Draw("AP")
            c.SaveAs("plots/G_"+name+GetClass(k+1)+'_dec_'+str(j)+".png")
Пример #4
0
def main(cutname, cutfile, infile, outfile):

    c = root_open(cutfile)
    f = root_open(infile)

    cdict = {p: t for p, d, t in c if p != ''}
    clist = list(set(cdict['cutDir']))
    dic = {p: t for p, d, t in f if p != ''}

    f_copy = root_open(outfile, "recreate")
    for d, tl in dic.iteritems():
        f_copy.mkdir(d)
        f_copy.cd(d)
        for t in tl:
            tree = f[d][t]
            ct = [i for i in clist if i[-3:] in t]
            if ct != []:
                ctree = c['cutDir'][ct[0]]
                cut = "{}==0.0".format(cutname)
                tree.AddFriend(ctree)
            else:
                cut = ''
            tree_copy = tree.CopyTree(cut)
            tree_copy.Fill()
            tree_copy.Write()
            print "{}/{}: in".format(d, t),tree.GetEntries()," out", tree_copy.GetEntries()
            tree.IsA().Destructor(tree)
            tree_copy.IsA().Destructor(tree_copy)
        f_copy.cd('')

    f.close()
    f_copy.close()
    c.close()
Пример #5
0
def test_pickler_proxy():
    h = Hist(5, 0, 1, name='hist')
    f = tempfile.NamedTemporaryFile(suffix='.root')

    with root_open(f.name, 'recreate') as outfile:
        dump([h], outfile)

    class IsCalled(object):
        def __init__(self, func):
            self.func = func
            self.called = False

        def __call__(self, path):
            if path != '_pickle;1':
                self.called = True
            return self.func(path)

    with root_open(f.name) as infile:
        infile.Get = IsCalled(infile.Get)
        hlist = load(infile, use_proxy=False)
        assert_true(infile.Get.called)

    with root_open(f.name) as infile:
        infile.Get = IsCalled(infile.Get)
        hlist = load(infile, use_proxy=True)
        assert_false(infile.Get.called)
        assert_equal(hlist[0].name, 'hist')
        assert_true(infile.Get.called)

    f.close()
Пример #6
0
def get_file(
    ntuple_path=NTUPLE_PATH,
    file_name=None,
    student=DEFAULT_STUDENT,
    hdf=False,
    suffix="_train",
    force_reopen=False,
    **kwargs
):

    if file_name is None:
        ext = ".h5" if hdf else ".root"
        filename = student + suffix + ext
        if filename in FILES and not force_reopen:
            return FILES[filename]
        file_path = os.path.join(ntuple_path, filename)
        #     file_path = os.path.join(ntuple_path, student + suffix, filename)
        log.info("opening {0} ...".format(file_path))
        if hdf:
            #         student_file = tables.open_file(file_path)#, driver="H5FD_CORE")
            log.error("Not Implemented yet")
            raise RuntimeError("Not Implemented yet")
        else:
            student_file = root_open(file_path, "READ")
            FILES[filename] = student_file

    else:
        file_path = os.path.join(ntuple_path, file_name)
        log.info("opening {0} ...".format(file_path))
        student_file = root_open(file_path, "READ")
        FILES[filename] = student_file
    return student_file
def calc_effs(path_mc, path_recon, out_dir=None):
    """
    Save the calculated effs for each z-section and eventclass
    to file
    """
    with root_open(path_mc, 'read') as f_mc:
        with root_open(path_recon, 'read') as f_re:
            if not out_dir:
                out_dir = dirname(abspath(path_mc))
            with root_open(out_dir + '/output_effs.root', 'recreate') as f_effs:
                f_effs.mkdir('processed')
                f_effs.mkdir('processed/eff_from_ty')
                f_effs.mkdir('processed/eff_from_signal')

                print 'calculating effs from total yield'
                f_effs.processed.eff_from_ty.cd()
                for tpl_mc in f_mc.walk('processed/total_yield'):
                    names = tpl_mc[2]
                    names.sort()
                    for h_name in tpl_mc[2]:
                        ty_mc = f_mc.Get('processed/total_yield/' + h_name)
                        ty_re = f_re.Get('processed/total_yield/' + h_name)
                        eff = ty_re / ty_mc
                        name = 'eff_' + ty_re.name[6:]
                        eff.SetNameTitle(name,
                                         'efficiency' + ty_re.name[6:])
                        f_effs.Write(name)
                        err = ROOT.Double()
                        x_bins, y_bins = (eff.GetXaxis().GetNbins(),
                                          eff.GetYaxis().GetNbins())
                        inte = eff.IntegralAndError(1, x_bins, 1, y_bins, err)
                        print (eff.name,
                               inte / (x_bins * y_bins), 'Error: ',
                               err / (x_bins * y_bins))
Пример #8
0
def make_plot(hist_name = 'emu/Stage_0/h1_0_emu_Mass', rebin = 40, y1_mi= 1e-4, y1_ma = 1, x_mi= 0, x_ma= 900, y2_mi= 0., y2_ma= 2., doRatio = True, label = '$M_{e\mu}$ (GeV)'):
    print("Now plotting: %s"%hist_name)

    hist_style = sc.style_container(style = 'CMS', useRoot = False,cms=13,lumi=0, cmsPositon = "upper left", legendPosition = 'upper right', kind = 'Graphs')

    hist_50 = get_hist_from_file('/disk1/erdweg/television/DATA_50/merged/allData',hist_name,rebinfac = rebin)
    hist_25 = get_hist_from_file('/disk1/erdweg/television/DATA_25/merged/allData',hist_name,rebinfac = rebin)

    hist_50 = Graph(hist_50.Clone('50ns'))
    hist_50.SetLineColor('red')
    hist_50.SetTitle('50ns')
    hist_50.xaxis.SetTitle(label)
    hist_50.yaxis.SetTitle('Events')
    hist_25 = Graph(hist_25.Clone('25ns'))
    hist_25.SetLineColor('blue')
    hist_25.SetTitle('25ns')
    hist_25.xaxis.SetTitle(label)
    hist_25.yaxis.SetTitle('Events')

    test = plotter(sig=[hist_50,hist_25],style=hist_style)
    if doRatio:
        test.Add_plot('Empty',pos=1, height=15, label='50ns/25ns')
    test.create_plot()
    if doRatio:
        tfile = root_open('/disk1/erdweg/television/DATA_50/merged/allData.root', "READ")
        d_hist1 = tfile.Get(hist_name)
        d_hist1.Rebin(rebin)
        d_hist1.Scale(1./d_hist1.Integral())
        tfile = root_open('/disk1/erdweg/television/DATA_25/merged/allData.root', "READ")
        d_hist2 = tfile.Get(hist_name)
        d_hist2.Rebin(rebin)
        d_hist2.Scale(1./d_hist2.Integral())
    
        ratio = d_hist1.Clone('ratio')
        for ibin,jbin,lbin in zip(ratio,d_hist1,d_hist2):
            if lbin.value != 0:
                ibin.value = jbin.value/lbin.value
                ibin.error = math.sqrt(jbin.error**2/lbin.value**2 + (lbin.error**2 * jbin.value**2)/lbin.value**4)
            else:
                ibin.value = -100
                ibin.error = 0
        # print('6',type(ratio))
        duke_errorbar(ratio, xerr = hist_style.Get_xerr(), emptybins = False, axes = test.Get_axis2(),
                      markersize = hist_style.Get_marker_size(),
                      marker = hist_style.Get_marker_style(),
                      ecolor = hist_style.Get_marker_color(),
                      markerfacecolor = hist_style.Get_marker_color(),
                      markeredgecolor = hist_style.Get_marker_color(),
                      capthick = hist_style.Get_marker_error_cap_width(),
                      zorder = 2.2)

    test.Get_axis1().set_ylim(ymin = y1_mi, ymax = y1_ma)
    test.Get_axis1().set_xlim(xmin = x_mi, xmax = x_ma)
    if doRatio:
        test.Get_axis2().set_ylim(ymin = y2_mi, ymax = y2_ma)

    test.SavePlot('plots/BX_comparison'+hist_name.split('/')[-1]+'.pdf')
    return 42
Пример #9
0
def data_to_mc_switcher(filename, path, outpath):
    #filename is varname.root
    varname = filename.split(".")[0]
    if "nopdf" in varname: return
    if "*" in varname: return
    print filename, varname
    hists = {}
    try: 
        if not os.path.isdir(outpath):
            os.makedirs(outpath)
    except OSError:
            raise
    if "tmatrix" in filename:
        shutil.copy(path+"/"+filename, outpath)
        return
    with root_open(path+"/"+filename) as f:
        outfile = root_open(outpath + "/" +filename, "RECREATE")
        for path, dirs, objects in f.walk():
            #load all histograms from file
            first = True
            for hist in objects:
                #format 3j2t_abs_lj_eta__tchan__tchan__up
                #ignore jt_var__DATA
                #print hist
                if not "DATA" in hist and not "DEBUG" in hist:
                    hists[hist] = f.Get(hist).Clone()                    
                    #print hist, f.Get(hist).Clone(), hists[hist]
                if first and not "DEBUG" in hist:
                    first = False
                    hn = hist.split("__")
                    jt = hn[0].split("_")[0]
        #make jt_var__DATA as sum of jt_var__[tchan+ttjets+wzjets+qcd]
        if jt+"_"+varname+"__wzjets" in hists:
            hists[jt+"_"+varname+"__DATA"] = hists[jt+"_"+varname+"__tchan"].Clone()
            hists[jt+"_"+varname+"__DATA"].Add(hists[jt+"_"+varname+"__ttjets"].Clone())
            hists[jt+"_"+varname+"__DATA"].Add(hists[jt+"_"+varname+"__wzjets"].Clone())
            #hists[jt+"_"+varname+"__DATA"].Add(hists[jt+"_"+varname+"__wjets_light"].Clone())
            hists[jt+"_"+varname+"__DATA"].Add(hists[jt+"_"+varname+"__qcd"].Clone())
        else:
            hists[jt+"_"+varname+"__DATA"] = hists[jt+"_"+varname+"__tchan"].Clone()
            hists[jt+"_"+varname+"__DATA"].Add(hists[jt+"_"+varname+"__twchan"].Clone())
            hists[jt+"_"+varname+"__DATA"].Add(hists[jt+"_"+varname+"__schan"].Clone())
            hists[jt+"_"+varname+"__DATA"].Add(hists[jt+"_"+varname+"__ttjets"].Clone())
            hists[jt+"_"+varname+"__DATA"].Add(hists[jt+"_"+varname+"__wjets"].Clone())
            hists[jt+"_"+varname+"__DATA"].Add(hists[jt+"_"+varname+"__dyjets"].Clone())
            hists[jt+"_"+varname+"__DATA"].Add(hists[jt+"_"+varname+"__diboson"].Clone())
            hists[jt+"_"+varname+"__DATA"].Add(hists[jt+"_"+varname+"__qcd"].Clone())
        #signal scaling closure test        
        #hists[jt+"_"+varname+"__tchan"].Scale(1.1)
    
    for name, h in hists.items():
        #print name, h
        h.SetNameTitle(name, name)
        if "DATA" in name:
            for bin in range(h.GetNbinsX()+2):
                h.SetBinError(bin, math.sqrt(h.GetBinContent(bin)))
        h.Write()
    outfile.Close()
Пример #10
0
def test_file_open():
    fname = 'test_file_open.root'
    with File.open(fname, 'w'):
        pass
    with root_open(fname, 'r'):
        pass
    with root_open(fname):
        pass
    os.unlink(fname)
Пример #11
0
def makedf():
	"""For a particular Q2W analysis range, starting at the level of h5s, put all analysis
	objects into a DataFrame

	output -- DF5.h5
	contains DataFrame d with analysis data represented by columns:
	TOP--VARSET--q2wbinnum--q2wbin--SEQ--POL--h5--h1{ij}--h5{p}--hR2_{p}^{ij}

	"""
	outfile = os.path.join(ANADIR,'DF5.h5')
	store = pd.HDFStore(outfile)

	norm = 50000*math.pi

	d = pd.DataFrame()

	#1. First do the "looping part" of process of creating DF
	for top in range(0,NTOPS):
		if top != T5:continue
		for varset in range(0,NVARSETS):
			if varset != VST1:continue
			ftemplate = root_open(SEQ_POLS_H5FILE[0][0]%TOPS_NAME[top])
			keys = ftemplate.GetListOfKeys()
			q2wbinnum=0
			dl_counter=0 #counter for number of dls (=Data-Lists, defined later) insereted into DF
			for q2wdir in keys:
				q2wbinnum+=1
				for seq in range(0,NSEQ):
					for pol in range(0,NPOLS):#poll:
						dl_counter+=1
				
						f=root_open(SEQ_POLS_H5FILE[seq][pol]%TOPS_NAME[top])
						h5=f.Get('%s/%s'%(q2wdir.GetName(),SEQ_POLS_H5[seq][pol]%VARSETS_NUMBER[varset]))
						f.Close()
									
						#Create Data-List (dl) to be added to the DataFrame			
						dl    =[TOPS_NUMBER[top],VARSETS_NUMBER[varset],q2wbinnum,q2wdir.GetName(),seq,pol,h5]
						rindex=['TOP','VARSET','q2wbinnum','q2wbin','SEQ','POL','h5']
						print 'len(dl)=',len(dl)
						print 'len(rindex)=',len(rindex)
						if not d:
							data = pd.DataFrame({'s1':dl},index=rindex) # Data for 1st. Column 
							d = d.append(data)
						else:
							d['s%d'%dl_counter]=dl
	
	dt = d.transpose()
	
	#2. Now use semi-vectorized operation to fill up rest of the DF5
	h5s = dt['h5']
	h1s=[]
	for i in range(len(h5s)):
		h1s.append(h5s[i].Projection(M1))
		#dt['h1_1M1']=dt['h5']
	dt['h1_1M1']=h1s

	store['d']=dt
Пример #12
0
def hist_checking(control_hist_location, cur_hist_location, path, technique):
    with root_open(control_hist_location) as control_file, \
            root_open(cur_hist_location) as cur_file:
        cur_hist = cur_file.get(path.encode('ascii','ignore'))
        control_hist = control_file.get(path.encode('ascii','ignore'))
        if technique == 'Kolmogorov-Smirnov':
            p_value = cur_hist.KolmogorovTest(control_hist)
        elif technique == 'chi_square':
            p_value = cur_hist.Chi2Test(control_hist)
    return 1. - p_value
Пример #13
0
def main():

  start_time = time.time()
  # open file
  filename="tree.root"
  infile= root_open(filename, "read")
  if not infile.IsOpen():
    print ("does not exist, so will abort")
    return False
    
  #declare histogram
  h = Hist(200, -10, 10)
  
  #run
  #read_simple(infile,h)
  #run_time = time.time() - start_time
  #print("runtime: %s" %run_time)
  #h.Draw()
  #raw_input("Press Enter to continue...")
  
  start_time = time.time()
  read_SetBranchAddress(infile,h)
  run_time = time.time() - start_time
  print("runtime: %s" %run_time)
  h.Draw()
  raw_input("Press Enter to continue...")
Пример #14
0
def download_file(alien_src, local_dest):
    """
    Download file `alien_src` to `local_path`.

    Parameters
    ----------
    alien_path, local_path : string
        Full path to files

    Returns
    -------
    int : File size in bytes
    """
    check_alien_token()
    try:
        os.makedirs(os.path.dirname(local_dest))
    except OSError:
        pass
    # fix the dest to include the file name
    if not os.path.basename(local_dest):
        local_dest = os.path.join(local_dest, os.path.basename(alien_src))
    with root_open("alien://" + alien_src) as f:
        if not f.Cp(local_dest):
            try:
                os.remove(local_dest)
            except OSError:
                pass  # file probably didn't exist at all
            raise OSError("An error occued while downloading {}; "
                          "The broken file was deleted.".format(local_dest))
        return f.GetSize()
Пример #15
0
 def __init__(self, *args, **kwargs):
     self.mc_weight = kwargs.pop(
         'mc_weight', True)
     self.posterior_trigger_correction = kwargs.pop(
         'posterior_trigger_correction', True)
     self.embedding_spin_weight = kwargs.pop(
         'embedding_spin_weight', True)
     self.embedding_reco_unfold = kwargs.pop(
         'embedding_reco_unfold', True)
     self.embedding_trigger_weight = kwargs.pop(
         'embedding_trigger_weight', True)
     self.tau_trigger_eff = kwargs.pop(
         'tau_trigger_eff', True)
     super(Embedded_Ztautau, self).__init__(*args, **kwargs)
     with root_open(os.path.join(DAT_DIR, 'embedding_corrections.root')) as file:
         self.trigger_correct = file['ebmc_weight_{0}'.format(self.year % 1000)]
         self.trigger_correct.SetDirectory(0)
     if self.systematics:
         # normalize ISOL and MFS variations to same as nominal
         # at preselection
         from ..categories import Category_Preselection
         nps = [
             ('MFS_UP',),
             ('MFS_DOWN',),
             ('ISOL_UP',),
             ('ISOL_DOWN',)]
         nominal_events = self.events(Category_Preselection)[1].value
         for np in nps:
             np_events = self.events(Category_Preselection,
                                     systematic=np)[1].value
             self.norms[np] =  nominal_events / np_events
Пример #16
0
def loadHistogramsFromFile(filename, histonames, with2d, with3d):
    """ 
    loads specified histograms from the ROOT file given by filename and returns them.
    The histograms will no longer be associated with the file.
    """
    log = logging.getLogger('pyroplot')
    import rootpy
    from rootpy.io import root_open
    from rootpy.plotting import Hist
    histos = {}
    f =  root_open(filename);
    nignored = 0
    for h in histonames:
        try:
            histo = f.Get(h)
        except rootpy.io.DoesNotExist:
            # this can happen if the reference file contains more histos than the others
            log.warn("%s not found in file %s"%(h,filename))
            continue
        # might want to ignore Hist2D etc: VERY SLOW and plot processing not optimally suited yet
        if ((histo.__class__.__name__=="Hist" 
             or histo.__class__.__name__=="Profile")
            or ((with3d or with2d) and histo.__class__.__name__=="Hist2D")
            or (with3d and (histo.__class__.__name__=="Profile2D" 
                or histo.__class__.__name__=="Hist3D"))):
            histo.SetDirectory(0) # remove association with file
            histos[h] = histo
        else:
            log.debug("IGNORING %s as it is of class '%s'"%(h,histo.__class__.__name__))
            nignored += 1
    f.close()
    log.info("Loaded %d histograms from file %s"%(len(histos),filename))
    if nignored:
        log.info("IGNORED %d matching 2D/3D histograms: to see these use the --with-2D or --with-3D switches."%(nignored))
    return histos
def calc_bg(fn):
    print 'computing backgrounds'
    with root_open(fn, 'update') as f:
        try:
            f.mkdir('processed')
        except ValueError:
            pass
        folders = ['background', 'weighted_background']
        [f.rm('processed/'+folder) for folder in folders]
        [f.mkdir('processed/'+folder) for folder in folders]

        # get signal per z section per eclass
        # NOTE: range(bin_bin, bin_max)
        # Bin number starts at 1 !!!!!!
        for w in ['', 'weighted_']:
            bgs = [f.Get('raw/'+w+'background'+str(i)) for i in range(0, 4)]
            for eclass, bg in enumerate(bgs):
                logging.info('Calculating '+w+'background for class ' + str(eclass))
                for sec_bin in range(1, 11):
                    # get one background per section
                    bg.GetZaxis().SetRange(sec_bin, sec_bin)
                    bg_tmp = bg.Project3D('yx')  # yes, 'yx'...
                    bg_tmp.SetNameTitle((bg.GetName()[:-1]
                                         + '_z_sec_' + str(sec_bin - 1)
                                         + '_class_' + str(eclass)),
                                    bg.GetTitle() + ' z section' + str(sec_bin - 1))
                    scale_background(bg_tmp)
                    f.cd('processed/'+w+'background')
                    bg_tmp.Write()
Пример #18
0
def np2root(data, column_names, outname="output.root",tname="tree",dtype=float):
    """
    converts numpy array to ROOT TTree and file.
    :param data: the 2D array containing M variables for N events
    :param column_names: M variables
    :param outname: name of the output root file
    :param dtype: float or int or list or dictionary. will map columns to data types in ROOT tree.
    :return:
    """
    # adding support for different types.
    branches = {}
    if not (isinstance(dtype,dict) or isinstance(dtype,list)):
        assert dtype in [float, int], "dtype not understood"
        mtype = FloatCol
        if dtype == int: mtype = IntCol
        branches = {col: mtype() for col in column_names}
    elif isinstance(dtype,dict):
        my_map = { col : FloatCol if val == float else IntCol for col,val in dtype.iteritems()}
        branches = {col: my_map[col]() for col in column_names}
    else:
        my_map = [ FloatCol if val == float else IntCol for val in dtype]
        branches = {col: my_map[i]() for i,col in enumerate(column_names)}

    fOut = root_open(outname,"RECREATE")
    tree = Tree(tname)
    tree.create_branches(branches)
    rows, cols = shape(data)
    for i in range(0, rows):
        for j in range(0, cols):
            exec("tree.{col} = {val}".format(col=column_names[j], val=data[i,j])) in locals()
        tree.Fill()
    fOut.Write()
    fOut.Close()
    print 'wrote ROOT file {name}'.format(name=outname)
Пример #19
0
def roc(infiles, sig, bkg, optimize):
   sigs = {}
   bkgs = {}
   ROOT.TH1.AddDirectory(False)
   for fname in infiles:
      bname = os.path.basename(fname)
      _, category, flavour = tuple(bname.strip('.root').split('_'))
      if flavour != sig and flavour != bkg: continue
      log.info('reading file %s' % fname)
      with root_open(fname) as tfile:
         tree = tfile.tree
         tree.SetBranchStatus('*', 0)
         tree.SetBranchStatus('BDTG', 1)
         histo = plt.Hist1D(1010,0,1.01)
         for entry in tree:
            histo.Fill(entry.BDTG)
         cat = 'all' if not optimize else category
         if flavour == sig:
            if cat not in sigs:
               sigs[cat] = histo.Clone()
            else:
               sigs[cat] += histo
         else:
            if cat not in bkgs:
               bkgs[cat] = histo.Clone()
            else:
               bkgs[cat] += histo
   
   new_sigs = {}
   compressed = {}
   for cat in sigs:
      compressed[cat] = compress(sigs[cat], bkgs[cat])
   return get_roc(compressed)
Пример #20
0
 def __init__(self, *args, **kwargs):
     self.tauspinner = kwargs.pop('tauspinner', True)
     self.posterior_trigger_correction = kwargs.pop('posterior_trigger_correction', True)
     super(Embedded_Ztautau, self).__init__(*args, **kwargs)
     with root_open(os.path.join(DAT_DIR, 'embedding_corrections.root')) as file:
         self.trigger_correct = file['ebmc_weight_{0}'.format(self.year % 1000)]
         self.trigger_correct.SetDirectory(0)
Пример #21
0
def main(infile, outfile):
    # open my data file
    f = root_open(infile)
    # build a dictionary of the file structure

    dic = {p: t for p, d, t in f if p != ''}
    # open a new file to put the data in after the cut is applied
    f_store = pd.HDFStore(outfile)
    # iterate over the directoies
    for d, tl in dic.iteritems():

        # iterate over list of trees in the directory
        for t in tl:
            # tree is original data tree
            tree = f[d][t]
            #print tree.branchnames

            df = pd.DataFrame(
                tree2rec(
                    tree,
                    branches=tree.branchnames))
            print "writing to: {}/{}".format(d, t), tree.GetEntries()
            f_store.append('{}/{}'.format(d, t), df)
            tree.IsA().Destructor(tree)
    f_store.close()
    f.close()
Пример #22
0
def hdf2root(infile, outfile, verbose=False):
    try:
        from rootpy.io import root_open
        from rootpy import asrootpy
        from root_numpy import array2tree
    except ImportError:
        raise ImportError(
            "Please load ROOT into PYTHONPATH and install rootpy+root_numpy:\n"
            "   `pip install rootpy root_numpy`"
        )

    from tables import open_file

    h5 = open_file(infile, 'r')
    rf = root_open(outfile, 'recreate')

    # 'walk_nodes' does not allow to check if is a group or leaf
    #   exception handling is bugged
    #   introspection/typecheck is buged
    # => this moronic nested loop instead of simple `walk`
    for group in h5.walk_groups():
        for leafname, leaf in group._v_leaves.items():
            arr = leaf[:]
            if arr.dtype.names is None:
                dt = np.dtype((arr.dtype, [(leafname, arr.dtype)]))
                arr = arr.view(dt)
            treename = leaf._v_pathname.replace('/', '_')
            tree = asrootpy(array2tree(arr, name=treename))
            tree.write()
    rf.close()
    h5.close()
Пример #23
0
def get_fit_t_dependece(chi2, t_binning, wave, component = -1, mmin = .5, mmax = 2.5, nPoints = 1000):
	"""
	Returns t' dependence histogram specified
	@param chi2: Chi2 to be evaluated
	@type chi2: chi2
	@param tbin: t_binning bin to be used
	@type tbin: list
	@param wave: Number of the wave to be used
	@type wave: int
	@param component: Component to be used
	@type component: int
	@param mmin: Lower integral limit
	@type mmin: float
	@param mmax: Upper integral limit
	@type mmax: float
	@param nPoints: Number of points used for integration
	@type nPoints: int
	@return: t' dependence histogram
	@rtype: Hist
	"""
	nTbin = len(t_binning)-1
	if not nTbin == chi2.nTbin():
		raise IndexError # Number of tBins does not match
	values = []
	name = ''
	for tbin in range(nTbin):
		values.append(get_integral_value(chi2,tbin,wave,component,mmin,mmax,nPoints))
	hist = TH1D(name,name,nTbin,np.asarray(t_binning,dtype = np.float64))
	hist.SetTitle(name)
	hist.SetName(name)
	for i in range(nTbin):
		hist.SetBinContent(i+1,values[i]/(t_binning[i+1]-t_binning[i]))
	with root_open("samuel.root","RECREATE"):
		hist.Write()
	return hist
Пример #24
0
 def work(self):
     pickle_name = os.path.splitext(self.file)[0]
     if self.profile is not False and self.profile is not None:
         pickle_name += '_profiled_mu{0}'.format(self.profile)
     if self.observed:
         pickle_name += '_observed'
     pickle_name += '.pickle'
     if os.path.exists(pickle_name) and not self.refit:
         with open(pickle_name, 'r') as pickle_file:
             result = pickle.load(pickle_file)
         if self.workspace_name in result:
             return result[self.workspace_name]
     # get the significance of the workspace
     with root_open(self.file) as file:
         ws = file[self.workspace_name]
         result = significance(ws,
                               observed=self.observed,
                               injection=self.injection,
                               injection_test=self.injection_test,
                               profile=self.profile,
                               **self.fit_params)
     # write the value into a pickle
     with open(pickle_name, 'w') as pickle_file:
         pickle.dump({self.workspace_name: result}, pickle_file)
     return result
Пример #25
0
    def __init__(self,outputFileName,treeName) :

        # Open/recreate output file
        self.theTreeFile = root_open(outputFileName,"RECREATE")

        # Create tree with given name and branches structure
        self.theTree = Tree(treeName)
Пример #26
0
def i3root2hdf5(infile, force=False):
    h5file = infile + '.h5'
    bad_keys = ['AntMCTree', 'MasterTree']
    rf = root_open(infile, 'r')
    keys = [k.name for k in rf.keys()]
    if force:
        mode = 'w'
    else:
        mode = 'a'
    h5 = h5py.File(h5file, mode)
    for key in keys:
        if key in bad_keys:
            continue
        tree = rf[key]
        arr = tree2array(tree)
        try:
            h5.create_dataset(
                key,
                data=arr,
                compression='gzip',
                compression_opts=9,
                shuffle=True,
                fletcher32=True,
            )
        except TypeError:
            continue
        h5.flush()
    h5.close()
Пример #27
0
 def __init__(self, filename, Lumi=10*1000, XSName="XS", EventName="NEvent"):
     self.file = root_open(filename, "read")
     self.lumi = Lumi
     self.xs = self.file.Get(XSName).GetBinContent(2) if XSName in self.file else 0
     self.Nevent = self.file.Get(EventName).GetBinContent(2) if EventName in self.file else 0
     self.cutpat = re.compile("^(\w*)_(\d+)")
     self.isData = self.__isData__()
Пример #28
0
    def __init__(self, channels, baseCutSet, inFile, outfile='./results/output.root',
                 maxEvents=float("inf"), intLumi=10000, rowCleaner='',
                 cutModifiers=[], ntupleDir='ntuple'):
        '''
        channels:    list of strings or single string in the format (e.g.) eemm for
                         a 2e2mu final state. '4l', 'zz' and 'ZZ' turn into ['eeee' 'eemm' 'mmmm']
        cutSet:      string with the name of the cut template to use
        infile:      string of an input file name, with path
        outfile:     string of an output file name, with path
        maxEvents:   stop after this many events processed
        intLumi:     in output text file, report how many events we would expect for this integrated luminosity
        rowCleaner:  name of a module to clean out redundant rows. If an empty
                         string (or other False boolean), no cleaning is performed.
        '''
        self.cutSet = [baseCutSet]+cutModifiers
        CutClass = getCutClass(baseCutSet, *cutModifiers)

        self.cuts = CutClass()

        self.outFile = outfile

        self.cutOrder = self.cuts.getCutList()

        self.sample = inFile.split('/')[-1].replace('.root','')
        self.inFile = root_open(inFile)
        assert bool(inFile), 'No file %s'%self.inFile

        self.maxEvents = maxEvents
        # if we don't use all the events, we need to know how many we would have done in the whole thing
        if self.maxEvents < float('inf'):
            self.ntupleSize = {}

        self.channels = parseChannels(channels)

        self.ntuples = {}
        for channel in parseChannels(channels):
            try:
                nt = self.inFile.Get('/'.join([channel,ntupleDir]))
                # if not nt.GetEntries():
                #     raise DoesNotExist('')
                self.ntuples[channel] = nt
                nt.create_buffer()
            except DoesNotExist:
                print "Ntuple for channel %s is empty or not found! Skipping."%channel
                self.channels.remove(channel)
                continue

            if self.maxEvents < float('inf'):
                self.ntupleSize[channel] = self.ntuples[channel].GetEntries()

        self.results = NtupleCopier(self.outFile, **self.ntuples)

        self.prepareCutSummary()

        self.intLumi = intLumi

        self.cleanRows = bool(rowCleaner)
        if self.cleanRows:
            self.CleanerClass = getCleanerClass(rowCleaner)
Пример #29
0
def test_draw():

    with root_open(FILE_PATHS[0]) as f:
        tree = f.tree

        tree.draw('a_x')
        tree.draw('a_x:a_y')
        tree.draw('a_x:TMath::Exp(a_y)')
        tree.draw('a_x:a_y:a_z')
        tree.draw('a_x:a_y:a_z:b_x')
        tree.draw('a_x:a_y:a_z:b_x:b_y', options='para')

        h1 = Hist(10, -1, 2, name='h1')
        h2 = Hist2D(10, -1, 2, 10, -1, 2)
        h3 = Hist3D(10, -1, 2, 10, -1, 2, 10, -1, 2)

        # dimensionality does not match
        assert_raises(TypeError, tree.draw, 'a_x:a_y', hist=h1)

        # name does not match
        assert_raises(ValueError, tree.draw, 'a_x>>+something', hist=h1)

        # hist is not a TH1
        assert_raises(TypeError, tree.draw, 'a_x:a_y', hist=ROOT.TGraph())

        # name does match and is fine (just redundant)
        tree.draw('a_x>>h1', hist=h1)
        assert_equal(h1.Integral() > 0, True)
        h1.Reset()
        tree.draw('a_x>>+h1', hist=h1)
        assert_equal(h1.Integral() > 0, True)
        h1.Reset()

        # both binning and hist are specified
        assert_raises(ValueError, tree.draw, 'a_x>>+h1(10, 0, 1)', hist=h1)

        tree.draw('a_x', hist=h1)
        assert_equal(h1.Integral() > 0, True)
        tree.draw('a_x:a_y', hist=h2)
        assert_equal(h2.Integral() > 0, True)
        tree.draw('a_x:a_y:a_z', hist=h3)
        assert_equal(h3.Integral() > 0, True)

        h3.Reset()
        tree.draw('a_x>0:a_y/2:a_z*2', hist=h3)
        assert_equal(h3.Integral() > 0, True)

        # create a histogram
        hist = tree.draw('a_x:a_y:a_z', create_hist=True)
        assert_equal(hist.Integral() > 0, True)

        hist = tree.draw('a_x:a_y:a_z>>new_hist_1')
        assert_equal(hist.Integral() > 0, True)
        assert_equal(hist.name, 'new_hist_1')

        # create_hist=True is redundant here
        hist = tree.draw('a_x:a_y:a_z>>new_hist_2', create_hist=True)
        assert_equal(hist.Integral() > 0, True)
        assert_equal(hist.name, 'new_hist_2')
Пример #30
0
def getNumberOfInitialEvents(inputFileName) :

    # Open file and get number of entries in hcount
    theFile = root_open(inputFileName,"READ")
    theCountHisto = theFile.Get("FlatTree/hcount")
    count = theCountHisto.GetEntries();
    theFile.Close()
    return count;
Пример #31
0
    def __init__(self, fn, maxEvts, trainFrac, isBkg, iseBkg, iseSig):
        print "Initializing Container!"
        #self.tin = r.TChain("EcalVeto")
        #self.tin.Add(fn)
        self.tfile = root_open(fn, 'r+')
        self.tin = self.tfile.EcalVeto
        #self.tin.Print()

        self.maxEvts = maxEvts
        self.trainFrac = trainFrac
        self.isBkg = isBkg
        self.iseBkg = iseBkg
        self.iseSig = iseSig
Пример #32
0
def renameElements(filename, suffix, debug):
    print("Renaming elemnts in filename: \"{}\"".format(filename))
    elementNamesMap = {}
    with root_open(filename, "UPDATE") as f:
        if debug:
            print("f.ls()  pre:")
            f.ls()
        # Cannot just iterate over GetListOfKeys because the hash list is updated when an element is added to a file...
        # Instead, we copy the keys so the iterable is not updated (this should really be treated as a ROOT bug...)
        keys = f.GetListOfKeys()
        simpleListOfKeys = []
        for key in keys:
            simpleListOfKeys.append(key.GetName())

        # Loop over the available keys. If it is the correction task, then unpack the component lists
        for key in simpleListOfKeys:
            if key.endswith("_" + suffix):
                print(
                    "Skipping the processing of element {} since it has already been processed!"
                    .format(key))
                # Same the name in the proper formatt to be used in the YAML map
                elementNamesMap[key.replace("_" + suffix, "")] = [key]
                # Don't apply the suffix twice
                continue

            element = f.Get(key)
            print("Processing element: {}".format(element.GetName()))
            # Remove the existing element from the file
            f.Delete(element.GetName() + ";*")
            # Rewrite the name
            if "AliEmcalCorrectionTask" in element.GetName():
                for component in element:
                    elementNamesMap = rewriteWithDifferentName(
                        component, suffix, elementNamesMap)
            else:
                elementNamesMap = rewriteWithDifferentName(
                    element, suffix, elementNamesMap)

        if debug:
            print("keys: {}".format(keys.GetEntries()))
            print("f.ls() post:")
            f.ls()

    # Save the map for user with the comparison script
    # Write the reference map to the same directory as the reference file
    yamlReferenceMapLocation = os.path.join(os.path.dirname(filename),
                                            "referenceMap.yaml")
    print("Writing yaml reference map to \"{}\"".format(
        yamlReferenceMapLocation))
    with open(yamlReferenceMapLocation, "w+b") as f:
        yaml.safe_dump(elementNamesMap, f, default_flow_style=False)
Пример #33
0
    def generate_root_file_with_tree(self, file_name, mode="update"):
        f = root_open(file_name, mode)

        # how can we capture the qualifier information? It seems wasteful to have to duplicate it...
        tree_meta = Tree(name="Table 1::metadata",
                         title="Table 1",
                         model=DataRecord)
        tree_meta.create_branches({
            'reaction': 'C',
            'qualifier_1_type': 'C',
            'qualifier_1_value': 'C',
            'qualifier_2_type': 'C',
            'qualifier_2_value': 'C'
        })

        tree_meta.reaction = 'P --> P'
        tree_meta.qualifier_1_type = 'SQRT(S)'
        tree_meta.qualifier_1_value = '8000.0 GeV'
        tree_meta.qualifier_1_type = ''
        tree_meta.qualifier_1_value = '95% CL Limit'

        tree_meta.fill()

        tree = Tree(name="Table 1::data", title="Table 1", model=DataRecord)
        # F - Float, I - Integer

        for i in xrange(1000):
            tree.qual_1_type = "sqrt(s)"
            tree.qual_1_value = "8000.0 GeV"

            tree.qual_2_type = ""
            tree.qual_2_value = "95% CL upper limit [fb]"

            tree.x_val = gauss(1., 4.)
            tree.x_err_y_minus = gauss(0., 1)
            tree.x_err_y_plus = gauss(0., 1)

            tree.expected_val = gauss(1., 4.)
            tree.expected_err_y_minus = gauss(1., 4.)
            tree.expected_err_x_minus = gauss(1., 4.)

            tree.observed_val = gauss(1., 4.)
            tree.observed_err_y_minus = gauss(1., 4.)
            tree.observed_err_x_minus = gauss(1., 4.)

            tree.i = i
            tree.fill()

        tree.write()

        f.close()
Пример #34
0
def test_attrs():
    with root_open(FILE_PATHS[0]) as f:
        tree = f.tree
        tree.read_branches_on_demand = True
        tree.define_object('a', 'a_')
        tree.define_collection('b', 'b_', 'b_n')
        for event in tree:
            # test a setattr before a getattr with caching
            new_a_y = random()
            event.a_y = new_a_y
            assert_almost_equal(event.a_y, new_a_y)

            assert_equal(event.a_x, event.a.x)
            assert_equal(len(event.b) > 0, True)
def copy_in_trigger_signal(in_files_name,
                           out_name,
                           tree_name,
                           prefix,
                           cdc_events,
                           cth_events,
                           rand_t=None):
    # Convert input lists to sets first
    set_cdc_events = set(cdc_events)
    set_cth_events = set(cth_events)

    # Define the chain of input trees
    in_chain = TreeChain(name=tree_name, files=in_files_name)
    # First create a new file to save the new tree in:
    out_file = root_open(out_name, "r+")

    # Add the time shift if we want it in the tree
    ExtraBranches = Tagged
    if rand_t is not None:
        ExtraBranches += Smeared

    # Get the new tree with its extra branches
    out_tree = Tree(tree_name, model=ExtraBranches.prefix(prefix))

    # This creates all the same branches in the new tree but
    # their addresses point to the same memory used by the original tree.
    out_tree.create_branches(in_chain._buffer)
    out_tree.update_buffer(in_chain._buffer)

    # Now loop over the original tree(s) and fill the new tree
    for entry in in_chain:
        # Add in the new values
        this_event_number = entry[prefix + "EventNumber"].value
        out_tree.__setattr__(prefix + "GoodTrack", this_event_number
                             in set_cdc_events)
        out_tree.__setattr__(prefix + "GoodTrig", this_event_number
                             in set_cth_events)
        if rand_t is not None:
            try:
                out_tree.__setattr__(prefix + "SmearTime",
                                     rand_t[this_event_number])
            except:
                for key, item in entry.iteritems():
                    print key, item
        # Fill, noting that most of the buffer is shared between the chain
        # and the output tree
        out_tree.Fill()
    # Close it up
    out_tree.Write()
    out_file.Close()
Пример #36
0
def main():
  style = get_style('CMSTDR')
  style.SetTitleSize(0.07, "XYZ")
  style.SetPalette(54) # kBlueYellow
  style.SetPadLeftMargin(0.10)
  style.SetPadRightMargin(0.12)
  set_style(style)
  input_dir = '/home/sauvan/Documents/HEP/Projects/CMS/L1CalorimeterTrigger_Phase2HGCal/Misc/FastShower/output/'
  files = glob(input_dir+'*.root')
  for file_name in files:
    with root_open(file_name) as file:
      for object in file.objects(cls=TCanvas):
        if 'Event 1' in object.GetName():
          plot_event(splitext(basename(file_name))[0], object)
def load_pgun():
    global infile_r
    #infile = 'ntuple_SingleMuon_Toy_5GeV_add.3.root'
    infile = '/tmp/jiafu/ntuple_SingleMuon_Toy_2GeV.0.root'
    infile_r = root_open(infile)
    tree = infile_r.ntupler.tree
    #tree = TreeChain('ntupler/tree', [infile])
    print('[INFO] Opening file: %s' % infile)

    # Define collection
    tree.define_collection(name='hits', prefix='vh_', size='vh_size')
    tree.define_collection(name='tracks', prefix='vt_', size='vt_size')
    tree.define_collection(name='particles', prefix='vp_', size='vp_size')
    return tree
Пример #38
0
def main():
    latex, files = decode_arguments()

    for path in files:
        with root_open(path, 'r') as f:
            print(path)
            print()

            if latex:
                print(f.latex_pull_table.GetTitle())
            else:
                print(f.pull_table.GetTitle())

            print()
Пример #39
0
def download_from_grid_archive(alien_src, local_dest):
    """
    Download the files from a grid-zip-file at `alien_src` to `local_path`.
    If all files from the archive already exist locally, do not re-download them.

    Parameters
    ----------
    alien_path, local_path : string
        Full path to files

    Returns
    -------
    int : File size in bytes
    """
    check_alien_token()
    try:
        os.makedirs(os.path.dirname(local_dest))
    except OSError:
        pass

    # fix the dest to include the file name
    if not os.path.basename(local_dest):
        local_dest = os.path.join(local_dest, os.path.basename(alien_src))
    with root_open("alien://" + alien_src) as f:
        if not f.IsArchive():
            raise ValueError(
                "{} does not point to an archive file.".format(alien_src))
        fsize = f.GetSize()
        fnames = [m.GetName() for m in f.GetArchive().GetMembers()]
        local_dir = os.path.dirname(local_dest)
        if all([
                os.path.isfile(os.path.join(local_dir, fname))
                for fname in fnames
        ]):
            raise OSError("Files exist; not redownloading")

        if not f.Cp(local_dest):
            raise RuntimeError("Could not download {}!".format(alien_src))

    with zipfile.ZipFile(local_dest) as zf:
        try:
            zf.extractall(os.path.dirname(local_dest))
        except IOError:
            print("Error unzipping {}. File was deleted".format(local_dest))
    # Delete the zip archive file
    try:
        os.remove(local_dest)
    except OSError:
        pass  # file probably didn't exist at all?!
    return fsize
Пример #40
0
    def save_to_root(self, fit_type):

        # Open root file, and create tree
        f = root_open(self.root_filename, "update")
        t = f.paramTree

        # Cannot write to tree buffer without this, for some reason.
        for entries in t:
            continue

        # Check if saving true or fitted parameter values
        if (fit_type == 0):
            params_f = open(self.true_params_filename, 'r')
        else:
            params_f = open(self.fitted_params_filename, 'r')

        for line in params_f.readlines():

            # Split line into items
            items = line.split()

            # Test if first entry in file is a label (can cast to int). If not, continue.
            try:
                int(items[0])
            except:
                continue

            # Save type of fit, and label to tree
            t.fitType = fit_type
            t.label = int(items[0])

            # Save fitted parameter value to tree. If it doesn't exist, continue.
            try:
                t.paramValue = float(items[1])
            except Exception:
                continue

            # Save parameter error, if it exists. If not, set to zero.
            try:
                t.paramError = float(items[4])
            except Exception:
                t.paramError = 0.0

            # Fill tree
            t.fill()

        # Write to tree, and close file
        t.write("", ROOT.TObject.kWriteDelete)
        f.close()
Пример #41
0
    def _getHist(self):
        with root_open(self.fName) as f:
            try:
                hClus = asrootpy(f.Get('demo/Clusters/nClusters'))
                hClus.SetDirectory(0)
                hClus.SetName('nClusters')
                self.dHist['nClusters'] = hClus
            except:
                self.dHist['nClusters'] = None

            for region in REGIONS:
                hOrig = asrootpy(f.Get('demo/' + self.dirName + '/' + region))
                hOrig.SetDirectory(0)
                hOrig.SetName(region + '_Original')

                hNorm = hOrig.Clone()
                hNorm.SetDirectory(0)
                hNorm.Scale(1 / hNorm.integral())
                hNorm.SetName(region + '_NormTo1')

                hCut = hOrig.Clone()
                hCut.SetDirectory(0)
                hCut = self._doCut(hCut)
                hCut.SetName(region + '_Cut')

                hCutNorm = hOrig.Clone()
                hCutNorm.SetDirectory(0)
                hCutNorm = self._doCut(hCutNorm)
                hCutNorm.Scale(1 / hCutNorm.integral())
                hCutNorm.SetName(region + '_CutNormTo1')

                hCDF = hOrig.Clone()
                hCDF.SetDirectory(0)
                hCDF = self._doCDF(hCDF)
                hCDF.SetName(region + '_CDF')

                hCutCDF = hOrig.Clone()
                hCutCDF.SetDirectory(0)
                hCutCDF = self._doCut(hCutCDF)
                hCutCDF = self._doCDF(hCutCDF)
                hCutCDF.SetName(region + '_CutCDF')

                self.dHist[region + '_Original'] = hOrig
                self.dHist[region + '_NormTo1'] = hNorm
                self.dHist[region + '_Cut'] = hCut
                self.dHist[region + '_CutNormTo1'] = hCutNorm
                self.dHist[region + '_CDF'] = hCDF
                self.dHist[region + '_CutCDF'] = hCutCDF
        return
Пример #42
0
def fetchFiles(d):
    dtag = d.split('-')[0][1:].replace('/', '_')
    eospath_ = SOURCEEOSPATH + d.rsplit('/', 1)[0]
    timestamps = subprocess.check_output(
        shlex.split('eos {0} ls {1}'.format(XDIRECTOR, eospath_))).split()
    if not timestamps:
        print("--> Zero timestamp directory found under", eospath_)
        print("--> Empty list returned for", dtag)
        return dtag, []
    timestamps = sorted(timestamps,
                        key=lambda x: datetime.strptime(x, "%y%m%d_%H%M%S"))
    eospath = join(eospath_, timestamps[-1])  # most recent submission

    flist = []

    try:
        flist = subprocess.check_output(
            shlex.split(
                'eos {0} find -name "*ffAOD*.root" -f --xurl {1}'.format(
                    XDIRECTOR, eospath))).split()
    except:
        print("--> cannot stat eos path: ", eospath)
        print("--> Empty list returned for", dtag)
        return dtag, []

    print(dtag, "Total number of files (uncleaned):", len(flist))

    nonzeroes_ = []
    for f in flist:
        nevents = 0
        try:
            thefile = root_open(f)  # make sure it can be opened properly
        except Exception as e:
            print("--> Cannot open file", f)
            print(str(e))
            continue

        try:
            events = Events(f)
            for evt in events:
                nevents += 1
        except Exception as e:
            print("--> FWLite failed for", f)
            print(str(e))
            continue

        if nevents > 0: nonzeroes_.append(f)

    return dtag, nonzeroes_
Пример #43
0
 def wrapper(self, **kwargs):
     with root_open(self.f_name, 'update') as self.f:
         self.sums = self.f.MultEstimators.__getattr__(self.sums_dir_name)
         try:
             self.results_post = self.f.MultEstimators.__getattr__(self.results_dir_name)
         except AttributeError:
             # results dir does not exists (yet)
             pass
         return_value = func(self, **kwargs)
         # Delete all TLists in sums since we own them and they would be left in memory otherwise
         for obj in self.sums:
             if isinstance(obj, collection.List):
                 obj.Delete()
         self.sums.Delete()
     return return_value
Пример #44
0
def append_lumi_scale_branch(src, dst, xsec, target_lumi):
    """Append a branch to the train and test trees of a sample for scaling
    the MC sample luminosity to the target luminosity. The scaling value is
    calculated as the ratio of the target luminosity to the sample luminosity,
    where the sample luminosity is defined as the difference between the number
    of positively and negatively weighted events of the full MC sample divided
    by the cross-section of the MC sample.

    Parameters
    ----------
    src : string
        The path to the sample.

    dst : string
        The output path to the new sample with training and testing trees.

    xsec : numeric
        The cross-section of the Monte-Carlo sample in units of picobarns (pb).

    target_lumi : numeric
        The target luminosity in units of inverse picobarns (pb-1).
    """
    with root_open(src) as f:
        n_pos = f.Get('CountPosWeight').GetBinContent(1)
        n_neg = f.Get('CountNegWeight').GetBinContent(1)
    sample_lumi = (n_pos - n_neg) / float(xsec)
    lumi_scale = target_lumi / sample_lumi
    with root_open(dst, 'a') as f:
        for name in ['train', 'test']:
            t = f.Get(name)
            t.create_branches({'lumi_scale': 'F'})
            b = t.GetBranch('lumi_scale')
            for entry in t:
                entry.lumi_scale = lumi_scale
                b.Fill()
            t.Write()
Пример #45
0
def tmva_process(info):
    """
    Create TMVA classification factory, train, test and evaluate all methods

    :param rep.estimators.tmva._AdditionalInformationPredict info: additional information

    """
    import ROOT

    reader = ROOT.TMVA.Reader()

    features_pointers = []
    for feature in info.features:
        features_pointers.append(array.array('f', [0.]))
        reader.AddVariable(feature, features_pointers[-1])

    model_type, sigmoid_function = info.model_type
    reader.BookMVA(info.method_name, info.xml_file)

    file_root = root_open(info.filename, mode='update')
    tree = file_root[info.treename]

    for ind, feature in enumerate(info.features):
        tree.SetBranchAddress(feature, features_pointers[ind])

    tree.create_branches({info.method_name: 'F'})
    branch = tree.get_branch(info.method_name)

    signal_efficiency = None
    if model_type == 'classification' and sigmoid_function is not None and 'sig_eff' in sigmoid_function:
        signal_efficiency = float(sigmoid_function.strip().split('=')[1])
        assert 0.0 <= signal_efficiency <= 1., 'signal efficiency must be in [0, 1], not {}'.format(
            signal_efficiency)

    for event in range(tree.GetEntries()):
        tree.GetEntry(event)
        if model_type == 'classification':
            if signal_efficiency is not None:
                prediction = reader.EvaluateMVA(info.method_name,
                                                signal_efficiency)
            else:
                prediction = reader.EvaluateMVA(info.method_name)
        else:
            prediction = reader.EvaluateRegression(info.method_name)[0]
        tree.__setattr__(info.method_name, prediction)
        branch.Fill()
    tree.Write()
    file_root.Close()
Пример #46
0
 def __init__(self, name, filename, sysName=""):
     self.name = name
     self.filename = filename
     self.var = {}
     self.cut = {}
     self.histograms = {}
     self.file = None
     self.tree = None
     self.isSys = False
     if filename != None:
         self.file = root_open(filename)
         if sysName == "":
             self.tree = self.file.Nominal
         else:
             self.tree = self.file.Get(sysName)
             self.isSys = True
Пример #47
0
def add_files(groups, configs):
  hall = ph.HChain(args.topLevel)
  for group in configs[groups]:
    hc = ph.HGroup(group['name'])
    logger.log(25, "Group: {0:s}".format(group['name']))
    for f in group['files']:
      logger.log(25, "\tPattern: {0:s}".format(f))
      for fname in glob.glob(f):
        logger.log(25, "\t\tAdding {0:s}".format(fname))
        rootFile = root_open(fname)
        hc.append(rootFile)
    if len(hc) == 0:
      raise ValueError("{0:s} has no files loaded.".format(group['name']))
    logger.log(25, "\tAdding {0:s}".format(hc))
    hall.append(hc)
  return hall
Пример #48
0
def get_file(ntuple_path=NTUPLE_PATH, student=DEFAULT_STUDENT, hdf=False, suffix='', force_reopen=False):
    ext = '.h5' if hdf else '.root'
    filename = student + suffix + ext
    if filename in FILES and not force_reopen:
        return FILES[filename]
    file_path = os.path.join(ntuple_path, filename)
    #     file_path = os.path.join(ntuple_path, student + suffix, filename)
    log.info("opening {0} ...".format(file_path))
    if hdf:
        #         student_file = tables.open_file(file_path)#, driver="H5FD_CORE")
        log.error('Not Implemented yet')
        raise RuntimeError('Not Implemented yet')
    else:
        student_file = root_open(file_path, 'READ')
    FILES[filename] = student_file
    return student_file
Пример #49
0
    def reload_histograms(self, input_filename):
        """
        Read back histograms from the given root file.
        May need to append histograms

        returns:
          Should return True if histograms were written without problem.
          If anything else is returned, processing of the trees will stop
        """
        results = []
        with root_open(input_filename, "r") as input_file:
            for hist in self.all_plots:
                indir = input_file.GetDirectory(hist.directory_name)
                results.append(hist.from_root(indir))
        ok = all(results)
        return ok
Пример #50
0
    def read_2cumulant_Aside(self):
        datafile = root_open(self.filename, 'read')
        directory = datafile.Get(self.directory)

        cumu_dW2A = rnp.hist2array(
            directory.Get('cumulants').Get('standard').Get('dW2A').Get(
                'cumu_dW2A'))
        cumu_dW2TwoA = rnp.hist2array(
            directory.Get('cumulants').Get('standard').Get('dW2TwoA').Get(
                'cumu_dW2TwoA'))

        self.differential[:, 0, ..., dW2A] = cumu_dW2A
        self.differential[:, 1, ..., dW2A] = cumu_dW2A
        self.differential[:, 2, ..., dW2A] = cumu_dW2A
        self.differential[:, 0, ..., dW2TwoA] = cumu_dW2TwoA[0]
        self.differential[:, 1, ..., dW2TwoA] = cumu_dW2TwoA[1]
        self.differential[:, 2, ..., dW2TwoA] = cumu_dW2TwoA[2]
Пример #51
0
def write_out_file(infile, outfile, tree=None):
    f = root_open(infile)
    T = f[tree]

    cells = filter(lambda x: x.startswith('cell'), T.branchnames)

    assert len(cells) == sum(map(np.prod, LAYER_SPECS)) + OVERFLOW_BINS

    X = pd.DataFrame(tree2array(T, branches=cells)).values
    E = pd.DataFrame(tree2array(T, branches=['TotalEnergy'])).values.ravel()

    with HDF5File(outfile, 'w') as h5:
        for layer, (sh, (l, u)) in enumerate(zip(LAYER_SPECS, LAYER_DIV)):
            h5['layer_{}'.format(layer)] = X[:, l:u].reshape((-1, ) + sh)

        h5['overflow'] = X[:, -OVERFLOW_BINS:]
        h5['energy'] = E.reshape(-1, 1)
Пример #52
0
def augment_file(in_folder, out_folder, tree_name, mcolls):
    # first, copy the original ROOT file to its destination, keeping the directory structure the same
    #data_outdir = data_outpath + data_file
    #if not os.path.exists(data_outdir):
    #    os.makedirs(data_outdir)
    if not os.path.exists(out_folder):
        os.makedirs(out_folder)

    data_outfile = os.path.join(out_folder, Config.MC_filename)
    data_infile = os.path.join(in_folder, Config.MC_filename)

    copyfile(data_infile, data_outfile)

    #tree_name = "ClassTree"

    # now, can read the file from its new location and change it
    fcoll = FileCollection({data_outfile: cuts.no_cut}, 0.0, 1.0, tree_name = tree_name)
    length = fcoll.get_length()

    indata = utils.read_data(fcoll, start = 0, stop = length, branches = Config.branches, tree_name = tree_name)

    # loop over ModelCollections here to get the prediction from each
    out_branches = []
    prepared_dtype = []
    branch_names = []

    for mcoll in mcolls:
        out_branches.append(mcoll.predict(indata))
        branch_names.append(mcoll.name)
        prepared_dtype.append((mcoll.name.encode("ascii"), 'f4'))

    print prepared_dtype

    # make it into the correct type and shape
    new_branches = np.array(np.zeros(length), dtype = prepared_dtype)

    for out_data, branch_name in zip(out_branches, branch_names):
        new_branches[branch_name] = out_data

    # now re-open the output file in append mode
    outfile = root_open(data_outfile, mode = "a");
    outtree = outfile.Get(tree_name + "/candTree")

    root_numpy.array2tree(new_branches, tree = outtree)
    outfile.write()
    outfile.close()
Пример #53
0
 def _set_count_histogram_attributes(self):
     """Set the count histograms as attributes accessible by their name."""
     self._count_histograms = []
     # Aggregate the count histograms across all files.
     with contextlib2.ExitStack() as stack:
         files = [
             stack.enter_context(root_open(filename))
             for filename in self.filenames
         ]
         for obj in files[0]:
             if isinstance(obj, ROOT.TH1):
                 name = obj.GetName()
                 hist = sum(f.Get(name) for f in files)
                 hist.SetName(name)
                 hist.SetDirectory(0)
                 setattr(self, name, hist)
                 self._count_histograms.append(hist)
Пример #54
0
    def save_all(self, runs, fname='strt_session.root'):
        with root_open(fname, 'recreate') as root_file:
            #             d1 = root_file.mkdir('Test1')
            #             d1.cd()
            #             ntuple = Ntuple(('a', 'b', 'c'), name="test")
            #             for i in range(20):
            #                 ntuple.Fill(gauss(.5, 1.), gauss(.3, 2.), gauss(13., 42.))
            #             ntuple.write()
            for run in runs:
                run_dir = root_file.mkdir(run.name)
                run_dir.cd()
                event_tree = Tree('Events', model=RootSaver.rEvent)
                track_tree = Tree('Tracks', model=RootSaver.rTrack)
                for event in run.events:
                    event_tree.id = event.id
                    for i in range(len(event.hits)):
                        event_tree.xhits.push_back(event.hits[i].x)
                        event_tree.yhits.push_back(event.hits[i].y)
                    event_tree.nHits = len(event.hits)
                    event_tree.nTracks = len(event.tracks)
                    event_tree.nGoodTracks = len(
                        [t for t in event.tracks if t.is_good])
                    event_tree.fill(reset=True)

                    for track in event.tracks:
                        track_tree.id = track.id
                        track_tree.event_id = track.event_id
                        for i in range(len(track.hit_indices)):
                            track_tree.hit_indices.push_back(
                                track.hit_indices[i])
                            track_tree.residuals.push_back(track.lincoor[i])
                            track_tree.lincoords.push_back(track.residuals[i])
                        track_tree.color = track.int_color()
                        track_tree.length = track.length()
                        track_tree.rho = track.rho
                        track_tree.theta = track.theta
                        track_tree.x0 = track.get_start_point()[0]
                        track_tree.y0 = track.get_start_point()[1]
                        track_tree.x1 = track.get_end_point()[0]
                        track_tree.y1 = track.get_end_point()[1]
                        track_tree.nHits = len(track.hit_indices)
                        track_tree.R2 = track.R2
                        track_tree.is_good = track.is_good
                        track_tree.fill(reset=True)
                event_tree.write()
                track_tree.write()
Пример #55
0
    def write_files(self, filename, **kwargs):
        """
        Write output histograms to file

        Args:
        filename: Name of output file
        """
        print("{}: write results to file {} and folder".format(
            self._name, filename))
        base_folder = "{}/BayesSubUnfolding/".format(self._name)
        print(base_folder)

        open_as = "append" if kwargs.get("append", False) else "recreate"

        with root_open(filename, open_as) as output_file:
            TDir = output_file.mkdir("{}{}".format(base_folder,
                                                   "JetConeJtWeightBin"),
                                     title="JetConeJtWeightBin",
                                     recurse=True)
            TDir.cd()
            # output_file.cd(TDir)
            for i, (jt, pt) in enumerate(zip(self._hJtMeasBin,
                                             self._jetPtBins)):
                jt.name = "JetConeJtWeightBinNFin{0[NFin]:02d}JetPt{0[pT]:02d}".format(
                    {
                        "NFin": self._NFin,
                        "pT": i
                    })
                jt.title = "Finder:Full_Jets_R04_00 p_{{T,jet}} : {} - {}".format(
                    pt[0], pt[1])
                jt.Write()

            if self._hBgJtNormalized is not None:
                TDir = output_file.mkdir("{}{}".format(base_folder,
                                                       "BgJtWeightBin"),
                                         title="BgJtWeightBin",
                                         recurse=True)
                TDir.cd()
                for i, (jt, pt) in enumerate(
                        zip(self._hBgJtNormalized, self._jetPtBins)):
                    jt.name = "BgJtWeightBinNFin{0[NFin]:02d}JetPt{0[pT]:02d}".format(
                        {
                            "NFin": self._NFin,
                            "pT": i
                        })
                    jt.Write()
Пример #56
0
    def __init__(self, tree_name, output_file):
        '''
        Constructor
        '''
        self._tree_name = tree_name
        self._output_file = output_file
        self._file = root_open(output_file, 'recreate')
        self._tree = None

        self._variables = []
        self._collections = []

        self._counter = Counter()

        self._created_branches = False
        self._branches = {}
        self._model = None
Пример #57
0
def run_pred(inputPath):
    f = TFile.Open(inputPath, "READ")
    try:
        nom = f.Get("nominal")
    except:
        print('cant open ' + inputPath)
        return 0
    dsid = inputPath.split('/')[-1]
    dsid = dsid.replace('.root', '')
    print(dsid)

    try:
        nom.GetEntries()
    except:
        print("failed to open")
        return 0

    try:
        nom.Mll01
    except:
        print('failed for ' + inputPath)
        return 0

    if nom.GetEntries() == 0:
        print("no entries")
        return 0
    if hasattr(nom, "tZ_score_test2"):
        print('already there')
        return 0

    event_dict = create_dict(nom)

    inDF = pd.DataFrame(event_dict)

    xgbMat = xgb.DMatrix(inDF, feature_names=list(inDF))
    tZ_score_test = xgbModel.predict(xgbMat)

    with root_open(inputPath, mode='a') as myfile:
        tZ_score_test = np.asarray(tZ_score_test)
        tZ_score_test.dtype = [('tZ_score_test2', 'float32')]
        tZ_score_test.dtype.names = ['tZ_score_test2']
        root_numpy.array2tree(tZ_score_test, tree=myfile.nominal)

        myfile.write()
        myfile.Close()
Пример #58
0
    def test_cuts(self):

        with root_open(self.file_paths[0]) as f:
            tree = f.tree
            h1 = Hist(10, -1, 2)
            h2 = Hist2D(10, -1, 2, 10, -1, 2)
            h3 = Hist3D(10, -1, 2, 10, -1, 2, 10, -1, 2)

            tree.draw('a_x', hist=h1)
            assert_equals(h1.Integral() > 0, True)
            tree.draw('a_x:a_y', hist=h2)
            assert_equals(h2.Integral() > 0, True)
            tree.draw('a_x:a_y:a_z', hist=h3)
            assert_equals(h3.Integral() > 0, True)

            h3.Reset()
            tree.draw('a_x>0:a_y/2:a_z*2', hist=h3)
            assert_equals(h3.Integral() > 0, True)
Пример #59
0
def get_mean_rms(category, var):
    gr_mean = Graph(len(SIGNALS_14TEV))
    gr_rms = Graph(len(SIGNALS_14TEV))
    for ip, signal in enumerate(SIGNALS_14TEV):
        with root_open('efficiencies/eff_presel_{0}_v{1}.root'.format(
                signal, VERSION)) as fsig:
            h_s = fsig[category].Get('h_' + category + '_' + var['name'])
            gr_mean.SetPoint(ip, DATASETS[signal]['mu'], h_s.GetMean())
            gr_mean.SetPointError(ip, 0, 0, h_s.GetMeanError(),
                                  h_s.GetMeanError())
            gr_rms.SetPoint(ip, DATASETS[signal]['mu'], h_s.GetRMS())
            gr_rms.SetPointError(ip, 0, 0, h_s.GetRMSError(),
                                 h_s.GetRMSError())
    gr_mean.xaxis.title = 'Average Interactions Per Bunch Crossing'
    gr_mean.yaxis.title = 'Mean of ' + get_label(var)
    gr_rms.xaxis.title = 'Average Interactions Per Bunch Crossing'
    gr_rms.yaxis.title = 'RMS of ' + get_label(var)
    return gr_mean, gr_rms
Пример #60
0
    def load_all(self, fname='strt_session.root'):
        full_fname = os.path.abspath(fname)
        if not os.path.exists(full_fname):
            print 'File %s does not exists.' % full_fname
            return
        runs = []
        with root_open(full_fname) as root_file:
            directories = next(root_file.walk())[1]
            for dir in directories:
                run_id = generate_run_id(runs)
                run = Run(run_id, name=dir)
                event_tree = root_file.Get(dir).Get('Events')
                track_tree = None
                try:
                    track_tree = root_file.Get(dir).Get('Tracks')
                except DoesNotExist:
                    pass
                for event in event_tree:
                    e = Event(ev_id=event.id, data_file_path='')
                    for i in range(event.xhits.size()):
                        h = Hit(event.xhits[i], event.yhits[i])
                        e.hits.append(h)
                    run.events.append(e)

                current_event = run.events[0]
                for track in track_tree:
                    ev_id = track.event_id
                    if ev_id != current_event.id:
                        current_event = filter_by_id(run.events, ev_id)
                    t = Track(ev_id, track.id)
                    for i in track.hit_indices:
                        t.hit_indices.append(i)
                    t.color_from_int(track.color)
                    t.rho = track.rho
                    t.theta = track.theta
                    t.set_line((track.x0, track.x1), (track.y0, track.y1))
                    t.R2 = track.R2
                    t.is_good = track.is_good
                    t.calculate_parameters(current_event)
                    current_event.tracks.append(t)

                runs.append(run)
        print '%d runs were loaded' % len(runs)
        return runs