def readDataFile_hits(filename,event_start_no,event_stride): ''' DESCRIPTION: This function will read the root file which contains the simulated data of particles and the corresponding recorded hits in the detector. The recorded hits in detertor will be later used for energy interpolation to the square cells. This code is similar to starting code in repo. USAGE: INPUT: filename : the name of root file event_start_no : the starting event number from where we want to process minibatch. event_stride : the size of minibatch to process in one go, (the time cost taken is less than the memory on increasing the value) query_string: this will be used to filter out the events like selecting the hits in EE part with certain energy etc. OUTPUT: df : the pandas dataframe of the data in root file with only the recorded hits to convert to image of the required batch size ''' print '>>> Reading the root File to get hits dataframe' tree=uproot.open(filename)['ana/hgc'] branches=[] #Just extracting the required attributes to create image branches += ["rechit_detid","rechit_energy"] #Adding the branches for logical Error check (Optional) #branches +=["rechit_z","rechit_cluster2d","cluster2d_multicluster"] #branches +=["rechit_cluster2d","cluster2d_multicluster"] cache={} df=tree.pandas.df(branches,cache=cache,executor=executor) #Renaming the attribute in short form col_names={name:name.replace('rechit_','') for name in branches} df.rename(col_names,inplace=True,axis=1) #Extracting out the minibatch of event to process at a time if event_stride=='upto_end': df=df.iloc[event_start_no:] else: df=df.iloc[event_start_no:event_start_no+event_stride] #Do the Filtering here only no need to do it each time for each event #Printing for sanity check #print df.head() print 'Shape of dataframe: ',df.shape # print all_event_hits.loc[0,'energy'] # print type(all_event_hits.loc[0,'energy']) # print all_event_hits.loc[0,'energy'].shape return df
def thist_to_np_xy_cache(infile,key='reconstructedProfileHisto'): assert(infile.endswith(".root")) npzfile = infile+"."+str(key)+".npz" if os.path.isfile(npzfile): cached = np.load(npzfile) return [cached['x'].tolist(),cached['y'].tolist()] else: f=uproot.open(infile) outdata_x=binedges_to_centers(f[key].edges) outdata_y=f[key].values np.savez(npzfile, x=np.array(outdata_x), y=np.array(outdata_y)) return [outdata_x,outdata_y]
def load_ttree(filename, treename, branchnames=None): """ Loads a root ttree into a numpy record array If branchnames is None all branches are read. """ import numpy as np tree = uproot.open(filename)[treename] if not branchnames: branchnames = tree.keys() array_dict = tree.arrays(branchnames) return np.rec.fromarrays(array_dict.values(), names=[x.decode() for x in array_dict.keys()])
def readDataFile_genpart(filename,event_start_no,event_stride): ''' DESCRIPTION: This function is similar to readDataFile_hits but this will read the genpart of the same events as read by the above function to generate the target label for the corresponding image files. USAGE: INPUT: filename : the name of the root file containing the events event_start_no : starting event number in this file to extract the events from. This Will be controlled manually while generating the data for training set. event_stride : the number of events to be processed in the in one go.(consider memory cost here than the time cost.) OUTPUT: df : returns the data frame containing the particles whose properties we will need to predict from the corresponding hit images of events ''' #Reading the root file to a dataframe print '>>> Reading the rootfile to get genpart dataframe' tree=uproot.open(filename)['ana/hgc'] branches =["genpart_energy","genpart_phi","genpart_eta", "genpart_gen","genpart_pid","genpart_reachedEE", "genpart_posx","genpart_posy","genpart_posz"] cache={} df=tree.pandas.df(branches,cache=cache,executor=executor) #Renaming the attributes in short form col_names={name:name.replace('genpart_','') for name in branches} df.rename(col_names,inplace=True,axis=1) #Extracting the dataframe for the required events if event_stride=='upto_end': df=df.iloc[event_start_no:] else: df=df.iloc[event_start_no:event_start_no+event_stride] print '>>> Extraction completed with current shape: ',df.shape # print df.dtypes # print type(df.loc[0,'posx']) # print df.loc[0,'posx'] # sys.exit(1) return df
def readDataFile_hits(filename,event_id,layer_num): tree=uproot.open(filename)['ana/hgc'] branches=[] branches += ["rechit_energy","rechit_detid", "rechit_x","rechit_y","rechit_z","rechit_layer"] cache={} df=tree.pandas.df(branches,cache=cache,executor=executor) #Projecting the dataframe for the required attributes print '>>> Selecting a single event' all_hits = pd.DataFrame({name.replace('rechit_',''):df.loc[event_id,name] for name in branches if 'rechit_' in name }) all_hits=all_hits[all_hits['layer']==layer_num] print all_hits.head() print all_hits.shape return all_hits
def Reader(self): ''' Read the content of a TTree in a ROOT File. Note the use of the uproot package. The variables should be a list of the "variable" to read. ''' logger.debug("Reading {}".format(self.file_name)) import uproot for key in self.variables: self.data.update({str(key): []}) tree = uproot.open(self.file_name)[self.tree_name] for data in tree.iterate(self.variables): for key, value in data.items(): varName = key.decode("utf-8") self.data.update( {str(varName): self.data[str(varName)] + value.tolist()}) return True
def open_compwa_plot_data(input_file_path): from pycompwa.plotting import PlotData pd = PlotData() # open file file = uproot.open(input_file_path) trees = file.keys() file = file.get("final_state_id_to_name_mapping") for k, v in file.items(): pd.particle_id_to_name_mapping[v] = k.decode()[:k.decode().find(';')] if "data" in [x.decode()[:x.decode().find(';')] for x in trees]: pd.data = load_ttree(input_file_path, "data") if "intensity_weighted_phspdata" in [x.decode()[:x.decode().find(';')] for x in trees]: pd.fit_result_data = load_ttree( input_file_path, "intensity_weighted_phspdata") return pd
def readDataFile(filename): ''' DESCRIPTION: This function will read the root file which contains the simulated data of particles and the corresponding recorded hits in the detector. The recorded hits in detertor will be later used for energy interpolation to the square cells. This code is similar to starting code in repo. USAGE: INPUT: filename : the name of root file OUTPUT: df : the pandas dataframe of the data in root file ''' tree=uproot.open(filename)['ana/hgc'] branches=[] branches += ["rechit_detid","rechit_z", "rechit_energy", 'rechit_cluster2d','cluster2d_multicluster'] cache={} df=tree.pandas.df(branches,cache=cache,executor=executor) return df
from matplotlib import colors from mpl_toolkits.mplot3d import Axes3D from matplotlib import cm import scipy.stats as stats from scipy.stats import norm import matplotlib.mlab as mlab import pickle import math from scipy.stats import norm import random import collections import more_itertools as mit from pandas.core.common import flatten filee = "/home/amrutha/muraves/simulation/MuravesSim_geo_updated/build/z_layer_5p4/MuravesSim.root" file = uproot.open(filee) lis = file.keys() tree = file[lis[0]] lis1 = tree.keys() eventid = tree.arrays() data = pd.DataFrame(eventid) muon_data = data.loc[data.iloc[:, 1] == 1] muon_data = muon_data.loc[muon_data.iloc[:, 7] != 0] for i in range(0, len(muon_data.iloc[:, 3])): muon_data.iloc[i, 3] = muon_data.iloc[i, 3][0] muon_data.iloc[i, 4] = muon_data.iloc[i, 4][0] print(i) muon_data.iloc[:, 4] = muon_data.iloc[:, 4] / 1000 muon_data.iloc[:, 10] = muon_data.iloc[:, 10] / 10 muon_data.iloc[:, 11] = muon_data.iloc[:, 11] / 10
def count_entries_in_phasespace(treefilename): assert(treefilename.endswith(".root")) f=uproot.open(treefilename) return f['PhaseSpace'].numentries
import uproot, uproot_methods from Builder import Initialize file = uproot.open("nano_5.root") tree = file["Events"] e = Initialize({ 'pt': tree.array("Electron_pt"), 'eta': tree.array("Electron_eta"), 'phi': tree.array("Electron_phi"), 'mass': tree.array("Electron_mass"), 'iso': tree.array('Electron_pfRelIso03_all'), 'dxy': tree.array('Electron_dxy'), 'dz': tree.array('Electron_dz'), 'id': tree.array('Electron_mvaSpring16GP_WP90') }) mu = Initialize({ 'pt': tree.array("Muon_pt"), 'eta': tree.array("Muon_eta"), 'phi': tree.array("Muon_phi"), 'mass': tree.array("Muon_mass"), 'iso': tree.array('Muon_pfRelIso04_all'), 'dxy': tree.array('Muon_dxy'), 'dz': tree.array('Muon_dz') }) tau = Initialize({ 'pt': tree.array('Tau_pt'), 'eta': tree.array('Tau_eta'), 'phi': tree.array('Tau_phi'),
from matplotlib import pyplot as plt import uproot import numpy as np import pandas as pd #my_file = uproot.open("/nfs/dust/cms/user/xuan/Zprime_102X/BDT_Out/uhh2.AnalysisModuleRunner.MC.ZprimeToTT_2016v3.root") #my_file_mc = uproot.open("/nfs/dust/cms/user/xuan/Zprime_102X/BDT_Out/uhh2.AnalysisModuleRunner.MC.TT_TuneCUETP8M2T4_2016v3.root") my_file_bkg1 = uproot.open("/nfs/dust/cms/user/xuan/Zprime_102X/BDT_Out/uhh2.AnalysisModuleRunner.MC.WJetsToLNu_HT-100To200_2018.root") my_file_bkg2 = uproot.open("/nfs/dust/cms/user/xuan/Zprime_102X/BDT_Out/uhh2.AnalysisModuleRunner.MC.WJetsToLNu_HT-1200To2500_2018.root") my_file_bkg3 = uproot.open("/nfs/dust/cms/user/xuan/Zprime_102X/BDT_Out/uhh2.AnalysisModuleRunner.MC.WJetsToLNu_HT-200To400_2018.root") my_file_bkg4 = uproot.open("/nfs/dust/cms/user/xuan/Zprime_102X/BDT_Out/uhh2.AnalysisModuleRunner.MC.WJetsToLNu_HT-2500ToInf_2018.root") my_file_bkg5 = uproot.open("/nfs/dust/cms/user/xuan/Zprime_102X/BDT_Out/uhh2.AnalysisModuleRunner.MC.WJetsToLNu_HT-400To600_2018.root") my_file_bkg6 = uproot.open("/nfs/dust/cms/user/xuan/Zprime_102X/BDT_Out/uhh2.AnalysisModuleRunner.MC.WJetsToLNu_HT-600To800_2018.root") #my_file_bkg7 = uproot.open("/nfs/dust/cms/user/xuan/Zprime_102X/BDT_Out/uhh2.AnalysisModuleRunner.MC.WJetsToLNu_HT-70To100_2018.root") my_file_bkg8 = uproot.open("/nfs/dust/cms/user/xuan/Zprime_102X/BDT_Out/uhh2.AnalysisModuleRunner.MC.WJetsToLNu_HT-800To1200_2018.root") #my_file_bkg = uproot.open("/nfs/dust/cms/user/xuan/Zprime_102X/BDT_Out/uhh2.AnalysisModuleRunner.MC.WJetsToLNu_2016v3.root") my_file = uproot.open("/nfs/dust/cms/user/xuan/Zprime_102X/BDT_Out/uhh2.AnalysisModuleRunner.MC.ZprimeToTT_2018.root") my_file_mc = uproot.open("/nfs/dust/cms/user/xuan/Zprime_102X/BDT_Out/uhh2.AnalysisModuleRunner.MC.TTToSemiLeptonic_2018.root") #my_file_bkg = uproot.open("/nfs/dust/cms/user/xuan/Zprime_102X/BDT_Out/uhh2.AnalysisModuleRunner.MC.WJetsToLNu_2018.root") tree = my_file['AnalysisTree'] tree_mc = my_file_mc['AnalysisTree'] #tree_bkg = my_file_bkg['AnalysisTree'] #tree_bkg1 = my_file_bkg1['AnalysisTree'] tree_bkg2 = my_file_bkg2['AnalysisTree'] tree_bkg3 = my_file_bkg3['AnalysisTree'] tree_bkg4 = my_file_bkg4['AnalysisTree'] tree_bkg5 = my_file_bkg5['AnalysisTree'] tree_bkg6 = my_file_bkg6['AnalysisTree'] #tree_bkg7 = my_file_bkg7['AnalysisTree']
def _work_function(item, processor_instance, flatten=False, savemetrics=False, mmap=False, nano=False, cachestrategy=None, skipbadfiles=False, retries=0, xrootdtimeout=None): if processor_instance == 'heavy': item, processor_instance = item if not isinstance(processor_instance, ProcessorABC): processor_instance = cloudpickle.loads( lz4f.decompress(processor_instance)) if mmap: localsource = {} else: opts = dict(uproot.FileSource.defaults) opts.update({'parallel': None}) def localsource(path): return uproot.FileSource(path, **opts) import warnings out = processor_instance.accumulator.identity() retry_count = 0 while retry_count <= retries: try: from uproot.source.xrootd import XRootDSource xrootdsource = XRootDSource.defaults xrootdsource['timeout'] = xrootdtimeout file = uproot.open(item.filename, localsource=localsource, xrootdsource=xrootdsource) if nano: cache = None if cachestrategy == 'dask-worker': from distributed import get_worker from .dask import ColumnCache worker = get_worker() try: cache = worker.plugins[ColumnCache.name] except KeyError: # emit warning if not found? pass df = NanoEvents.from_file( file=file, treename=item.treename, entrystart=item.entrystart, entrystop=item.entrystop, metadata={'dataset': item.dataset}, cache=cache, ) else: tree = file[item.treename] df = LazyDataFrame(tree, item.entrystart, item.entrystop, flatten=flatten) df['dataset'] = item.dataset tic = time.time() out = processor_instance.process(df) toc = time.time() metrics = dict_accumulator() if savemetrics: if isinstance(file.source, uproot.source.xrootd.XRootDSource): metrics['bytesread'] = value_accumulator( int, file.source.bytesread) metrics['dataservers'] = set_accumulator( {file.source._source.get_property('DataServer')}) metrics['columns'] = set_accumulator(df.materialized) metrics['entries'] = value_accumulator(int, df.size) metrics['processtime'] = value_accumulator(float, toc - tic) wrapped_out = dict_accumulator({'out': out, 'metrics': metrics}) file.source.close() break # catch xrootd errors and optionally skip # or retry to read the file except OSError as e: if not skipbadfiles: raise e else: w_str = 'Bad file source %s.' % item.filename if retries: w_str += ' Attempt %d of %d.' % (retry_count + 1, retries + 1) if retry_count + 1 < retries: w_str += ' Will retry.' else: w_str += ' Skipping.' else: w_str += ' Skipping.' warnings.warn(w_str) metrics = dict_accumulator() if savemetrics: metrics['bytesread'] = value_accumulator(int, 0) metrics['dataservers'] = set_accumulator({}) metrics['columns'] = set_accumulator({}) metrics['entries'] = value_accumulator(int, 0) metrics['processtime'] = value_accumulator(float, 0) metrics['skippedbadfiles'] = value_accumulator(int, 1) wrapped_out = dict_accumulator({'out': out, 'metrics': metrics}) except Exception as e: if retries == retry_count: raise e w_str = 'Attempt %d of %d. Will retry.' % (retry_count + 1, retries + 1) warnings.warn(w_str) retry_count += 1 return wrapped_out
def h1_invmass(counts, bins=34, name=''): th1 = ROOT.TH1D(f'{name}', f'{name}_x', int(bins), 2.96, 3.04) for index in range(0, len(counts)): th1.SetBinContent(index + 1, counts[index]) # th1.SetBinError(index + 1, np.sqrt(counts[index])) th1.SetDirectory(0) return th1 ##COMPUTE PRESELECTION-EFFICIENCY df_rec = uproot.open("../Tables/SignalTable_pp13TeV_mtexp.root")[ "SignalTable"].pandas.df().query("pt>0 and rej_accept>0") df_sim = uproot.open( "../Tables/SignalTable_pp13TeV_mtexp.root")["SignalTable"].pandas.df() presel_eff = len(df_rec) / len(df_sim.query("abs(gY)<0.5 and rej_accept>0")) print("-------------------------------------") print("Pre-selection efficiency: ", presel_eff) ## FIT INVARIANT MASS SPECTRA df = pd.read_parquet( "../Utils/ReducedDataFrames/selected_df_data_pp.parquet.gzip") df_ls = pd.read_parquet( "../Utils/ReducedDataFrames/selected_df_ls_pp.parquet.gzip") df_em = pd.read_parquet( "../Utils/ReducedDataFrames/selected_df_em_pp.parquet.gzip")
#!/usr/bin/env python3 """ Doesn't work, forget it """ import numpy as numpy import uproot f = uproot.open("Lumi_MC_100000.root") events = f["pndsim"] # Track IDs lmdPoints = events['LMDPoint'][b'LMDPoint.fTrackID'] # eventIDs lmdPoints = events['LMDPoint'][b'LMDPoint.fEventId'] # eventIDs lmdPoints = events['LMDPoint'][b'LMDPoint.fX'].array() # eventIDs # lmdPoints = events['LMDPoint'][b'LMDPoint.fEventId'] print(lmdPoints) # print(lmdPoints.array())
def file(name, filepath, treepath, location_prefix=None, localsource=uproot.MemmapSource.defaults, xrootdsource=uproot.XRootDSource.defaults, httpsource=uproot.HTTPSource.defaults, **options): fullfilepath = filepath if location_prefix is None else location_prefix + filepath uprootfile = uproot.open(fullfilepath, localsource=localsource, xrootdsource=xrootdsource, httpsource=httpsource, **options) numentries = 0 colnames = [] columns = [] branches = [] for branchname, uprootbranch in uprootfile[treepath].iteritems( recursive=True): if uprootbranch.numbaskets != uprootbranch._numgoodbaskets: raise NotImplementedError( "branch recovery not handled by uproot-skyhook yet") if numpy.uint8(uprootbranch._tree_iofeatures) & numpy.uint8( uproot.const.kGenerateOffsetMap) != 0: raise NotImplementedError( "branch feature kGenerateOffsetMap not handled by uproot-skyhook yet" ) local_offsets = uprootbranch._fBasketEntry[:uprootbranch.numbaskets + 1] page_seeks = numpy.empty(uprootbranch.numbaskets, dtype="<u8") compression = None iscompressed = numpy.empty(uprootbranch.numbaskets, dtype=numpy.bool_) compressedbytes = numpy.empty(uprootbranch.numbaskets, dtype="<u4") uncompressedbytes = numpy.empty(uprootbranch.numbaskets, dtype="<u4") basket_page_offsets = numpy.empty(uprootbranch.numbaskets + 1, dtype="<u4") basket_page_offsets[0] = 0 basket_keylens = numpy.zeros(uprootbranch.numbaskets, dtype="<u4") basket_data_borders = numpy.zeros(uprootbranch.numbaskets, dtype="<u4") for i in range(uprootbranch.numbaskets): source = uprootbranch._source.threadlocal() key = uprootbranch._basketkey(source, i, True) cursor = uproot.source.cursor.Cursor(key._fSeekKey + key._fKeylen) basket_compressedbytes = key._fNbytes - key._fKeylen basket_uncompressedbytes = key._fObjlen if basket_compressedbytes == basket_uncompressedbytes: pagei = basket_page_offsets[i] page_seeks[pagei] = cursor.index iscompressed[pagei] = False compressedbytes[pagei] = basket_compressedbytes uncompressedbytes[pagei] = basket_uncompressedbytes pagei += 1 basket_page_offsets[i + 1] = pagei else: pagei = basket_page_offsets[i] start = cursor.index total_compressedbytes = 0 while cursor.index - start < basket_compressedbytes: algo, method, c1, c2, c3, u1, u2, u3 = cursor.fields( source.parent(), uproot.source.compressed.CompressedSource._header) page_compressedbytes = c1 + (c2 << 8) + (c3 << 16) page_uncompressedbytes = u1 + (u2 << 8) + (u3 << 16) total_compressedbytes += 9 + page_compressedbytes if algo == b"ZL": if compression is not None and compression != uproot_skyhook.layout.zlib: raise ValueError( "different compression used by different baskets" ) compression = uproot_skyhook.layout.zlib elif algo == b"XZ": if compression is not None and compression != uproot_skyhook.layout.lzma: raise ValueError( "different compression used by different baskets" ) compression = uproot_skyhook.layout.lzma elif algo == b"L4": if compression is not None and compression != uproot_skyhook.layout.lz4: raise ValueError( "different compression used by different baskets" ) compression = uproot_skyhook.layout.lz4 cursor.skip(8) page_compressedbytes -= 8 elif algo == b"CS": raise ValueError( "unsupported compression algorithm: 'old' (according to ROOT comments, hasn't been used in 20+ years!)" ) # extremely rare, though possible, for numpages > numbaskets if pagei >= len(page_seeks): page_seeks = numpy.resize(page_seeks, int(len(page_seeks) * 1.2)) iscompressed = numpy.resize( iscompressed, int(len(iscompressed) * 1.2)) compressedbytes = numpy.resize( compressedbytes, int(len(compressedbytes) * 1.2)) uncompressedbytes = numpy.resize( uncompressedbytes, int(len(uncompressedbytes) * 1.2)) page_seeks[pagei] = cursor.index iscompressed[pagei] = True compressedbytes[pagei] = page_compressedbytes uncompressedbytes[pagei] = page_uncompressedbytes pagei += 1 cursor.skip(page_compressedbytes) if total_compressedbytes != basket_compressedbytes: raise ValueError( "total compressedbytes of all compressed pages ({0}) is not equal to the compressedbytes in the basket key ({1})", total_compressedbytes, basket_compressedbytes) basket_page_offsets[i + 1] = pagei basket_keylens[i] = key._fKeylen basket_data_borders[ i] = 0 if key._fObjlen == key.border else key.border if len(page_seeks) > basket_page_offsets[-1]: page_seeks = page_seeks[:basket_page_offsets[-1]].copy() iscompressed = iscompressed[:basket_page_offsets[-1]].copy() compressedbytes = compressedbytes[:basket_page_offsets[-1]].copy() uncompressedbytes = uncompressedbytes[:basket_page_offsets[ -1]].copy() if (basket_data_borders == 0).all(): basket_keylens = None basket_data_borders = None if compression is None: compression = uproot_skyhook.layout.none iscompressed = None compressedbytes = None colnames.append(branchname.decode("utf-8")) columns.append( uproot_skyhook.layout.Column( uprootbranch.interpretation, None if uprootbranch.title == b"" or uprootbranch.title is None else uprootbranch.title.decode("utf-8"))) branches.append( uproot_skyhook.layout.Branch(local_offsets, page_seeks, compression, iscompressed, compressedbytes, uncompressedbytes, basket_page_offsets, basket_keylens, basket_data_borders)) numentries = max(numentries, branches[-1].local_offsets[-1]) file = uproot_skyhook.layout.File(filepath, uprootfile._context.tfile["_fUUID"], branches) return uproot_skyhook.layout.Dataset(name, treepath, colnames, columns, [file], [0, numentries], location_prefix=location_prefix)
REPLAYPATH = "/home/%s/Work/JLab/hallc_replay_lt" % USER[1] # Add more path setting as needed in a similar manner OUTPATH = "%s/UTIL_KAONLT/scripts/demo/OUTPUT" % REPLAYPATH CUTPATH = "%s/UTIL_KAONLT/DB/CUTS" % REPLAYPATH sys.path.insert(0, '%s/UTIL_KAONLT/bin/python/' % REPLAYPATH) import kaonlt as klt # Import kaonlt module, need the path setting line above prior to importing this print("Running as %s on %s, hallc_replay_lt path assumed as %s" % (USER[1], HOST[1], REPLAYPATH)) # Construct the name of the rootfile based upon the info we provided rootName = "%s/UTIL_KAONLT/ROOTfiles/%s_%s_%s.root" % (REPLAYPATH, ROOTPrefix, runNum, MaxEvent) # Read stuff from the main event tree, here we're just going to get some quantities for the acceptance for the HMS/SHMS e_tree = up.open(rootName)["T"] # HMS info H_gtr_beta = e_tree.array("H.gtr.beta") H_gtr_xp = e_tree.array("H.gtr.th") # xpfp -> Theta H_gtr_yp = e_tree.array("H.gtr.ph") # ypfp -> Phi H_gtr_dp = e_tree.array("H.gtr.dp") # SHMS info P_gtr_beta = e_tree.array("P.gtr.beta") P_gtr_xp = e_tree.array("P.gtr.th") # xpfp -> Theta P_gtr_yp = e_tree.array("P.gtr.ph") # ypfp -> Phi P_gtr_p = e_tree.array("P.gtr.p") P_gtr_dp = e_tree.array("P.gtr.dp") r = klt.pyRoot() # Specify the file which contains the cuts we want to use fout = '%s/UTIL_KAONLT/DB/CUTS/run_type/demo.cuts' % REPLAYPATH
'title': r'$\pi$' }, 'mu': { 'title': r'$\mu$' }, }, } plotTitleAddOn = { 'Dst': r'$D^{*}$ tree', 'D0': r'$D^{0}$ tree', } for ntpName in ntpsIn: hep.style.use('LHCb2') ntp = uproot.open(ntpName) for treeId, scheme in plotScheme.items(): if treeId in ntpName: for part in scheme: brP, brEta, brWt = read_branches( ntp, 'tree', [f'{part}_p', f'{part}_eta', f'wtrk_{part}']) effRatio = f', {plotTitleAddOn[treeId]}, tracking eff: {brWt.sum() / brWt.size:.2f}' plotPEta(brP, brEta, f'{treeId}_{part}_p_eta.png', binning=plotRange, title=scheme[part]['title'] + effRatio) break
def test_vector_of_vector_of_numbers(self): branch = uproot.open("tests/samples/vectorVectorDouble.root")["t"]["x"] assert branch.array().tolist() == [[], [[], []], [[10.0], [], [10.0, 20.0]], [[20.0, -21.0, -22.0]], [[200.0], [-201.0], [202.0]]]
def test_array(self): tree = uproot.open( "tests/samples/small-evnt-tree-fullsplit.root")["tree"] assert tree.array("ArrayI16[10]").tolist() == [[i] * 10 for i in range(100)]
def test_strings3(self): tree = uproot.open( "tests/samples/small-evnt-tree-fullsplit.root")["tree"] assert tree.array("StlVecStr").tolist() == [ [], [b'vec-001'], [b'vec-002', b'vec-002'], [b'vec-003', b'vec-003', b'vec-003'], [b'vec-004', b'vec-004', b'vec-004', b'vec-004'], [b'vec-005', b'vec-005', b'vec-005', b'vec-005', b'vec-005'], [ b'vec-006', b'vec-006', b'vec-006', b'vec-006', b'vec-006', b'vec-006' ], [ b'vec-007', b'vec-007', b'vec-007', b'vec-007', b'vec-007', b'vec-007', b'vec-007' ], [ b'vec-008', b'vec-008', b'vec-008', b'vec-008', b'vec-008', b'vec-008', b'vec-008', b'vec-008' ], [ b'vec-009', b'vec-009', b'vec-009', b'vec-009', b'vec-009', b'vec-009', b'vec-009', b'vec-009', b'vec-009' ], [], [b'vec-011'], [b'vec-012', b'vec-012'], [b'vec-013', b'vec-013', b'vec-013'], [b'vec-014', b'vec-014', b'vec-014', b'vec-014'], [b'vec-015', b'vec-015', b'vec-015', b'vec-015', b'vec-015'], [ b'vec-016', b'vec-016', b'vec-016', b'vec-016', b'vec-016', b'vec-016' ], [ b'vec-017', b'vec-017', b'vec-017', b'vec-017', b'vec-017', b'vec-017', b'vec-017' ], [ b'vec-018', b'vec-018', b'vec-018', b'vec-018', b'vec-018', b'vec-018', b'vec-018', b'vec-018' ], [ b'vec-019', b'vec-019', b'vec-019', b'vec-019', b'vec-019', b'vec-019', b'vec-019', b'vec-019', b'vec-019' ], [], [b'vec-021'], [b'vec-022', b'vec-022'], [b'vec-023', b'vec-023', b'vec-023'], [b'vec-024', b'vec-024', b'vec-024', b'vec-024'], [b'vec-025', b'vec-025', b'vec-025', b'vec-025', b'vec-025'], [ b'vec-026', b'vec-026', b'vec-026', b'vec-026', b'vec-026', b'vec-026' ], [ b'vec-027', b'vec-027', b'vec-027', b'vec-027', b'vec-027', b'vec-027', b'vec-027' ], [ b'vec-028', b'vec-028', b'vec-028', b'vec-028', b'vec-028', b'vec-028', b'vec-028', b'vec-028' ], [ b'vec-029', b'vec-029', b'vec-029', b'vec-029', b'vec-029', b'vec-029', b'vec-029', b'vec-029', b'vec-029' ], [], [b'vec-031'], [b'vec-032', b'vec-032'], [b'vec-033', b'vec-033', b'vec-033'], [b'vec-034', b'vec-034', b'vec-034', b'vec-034'], [b'vec-035', b'vec-035', b'vec-035', b'vec-035', b'vec-035'], [ b'vec-036', b'vec-036', b'vec-036', b'vec-036', b'vec-036', b'vec-036' ], [ b'vec-037', b'vec-037', b'vec-037', b'vec-037', b'vec-037', b'vec-037', b'vec-037' ], [ b'vec-038', b'vec-038', b'vec-038', b'vec-038', b'vec-038', b'vec-038', b'vec-038', b'vec-038' ], [ b'vec-039', b'vec-039', b'vec-039', b'vec-039', b'vec-039', b'vec-039', b'vec-039', b'vec-039', b'vec-039' ], [], [b'vec-041'], [b'vec-042', b'vec-042'], [b'vec-043', b'vec-043', b'vec-043'], [b'vec-044', b'vec-044', b'vec-044', b'vec-044'], [b'vec-045', b'vec-045', b'vec-045', b'vec-045', b'vec-045'], [ b'vec-046', b'vec-046', b'vec-046', b'vec-046', b'vec-046', b'vec-046' ], [ b'vec-047', b'vec-047', b'vec-047', b'vec-047', b'vec-047', b'vec-047', b'vec-047' ], [ b'vec-048', b'vec-048', b'vec-048', b'vec-048', b'vec-048', b'vec-048', b'vec-048', b'vec-048' ], [ b'vec-049', b'vec-049', b'vec-049', b'vec-049', b'vec-049', b'vec-049', b'vec-049', b'vec-049', b'vec-049' ], [], [b'vec-051'], [b'vec-052', b'vec-052'], [b'vec-053', b'vec-053', b'vec-053'], [b'vec-054', b'vec-054', b'vec-054', b'vec-054'], [b'vec-055', b'vec-055', b'vec-055', b'vec-055', b'vec-055'], [ b'vec-056', b'vec-056', b'vec-056', b'vec-056', b'vec-056', b'vec-056' ], [ b'vec-057', b'vec-057', b'vec-057', b'vec-057', b'vec-057', b'vec-057', b'vec-057' ], [ b'vec-058', b'vec-058', b'vec-058', b'vec-058', b'vec-058', b'vec-058', b'vec-058', b'vec-058' ], [ b'vec-059', b'vec-059', b'vec-059', b'vec-059', b'vec-059', b'vec-059', b'vec-059', b'vec-059', b'vec-059' ], [], [b'vec-061'], [b'vec-062', b'vec-062'], [b'vec-063', b'vec-063', b'vec-063'], [b'vec-064', b'vec-064', b'vec-064', b'vec-064'], [b'vec-065', b'vec-065', b'vec-065', b'vec-065', b'vec-065'], [ b'vec-066', b'vec-066', b'vec-066', b'vec-066', b'vec-066', b'vec-066' ], [ b'vec-067', b'vec-067', b'vec-067', b'vec-067', b'vec-067', b'vec-067', b'vec-067' ], [ b'vec-068', b'vec-068', b'vec-068', b'vec-068', b'vec-068', b'vec-068', b'vec-068', b'vec-068' ], [ b'vec-069', b'vec-069', b'vec-069', b'vec-069', b'vec-069', b'vec-069', b'vec-069', b'vec-069', b'vec-069' ], [], [b'vec-071'], [b'vec-072', b'vec-072'], [b'vec-073', b'vec-073', b'vec-073'], [b'vec-074', b'vec-074', b'vec-074', b'vec-074'], [b'vec-075', b'vec-075', b'vec-075', b'vec-075', b'vec-075'], [ b'vec-076', b'vec-076', b'vec-076', b'vec-076', b'vec-076', b'vec-076' ], [ b'vec-077', b'vec-077', b'vec-077', b'vec-077', b'vec-077', b'vec-077', b'vec-077' ], [ b'vec-078', b'vec-078', b'vec-078', b'vec-078', b'vec-078', b'vec-078', b'vec-078', b'vec-078' ], [ b'vec-079', b'vec-079', b'vec-079', b'vec-079', b'vec-079', b'vec-079', b'vec-079', b'vec-079', b'vec-079' ], [], [b'vec-081'], [b'vec-082', b'vec-082'], [b'vec-083', b'vec-083', b'vec-083'], [b'vec-084', b'vec-084', b'vec-084', b'vec-084'], [b'vec-085', b'vec-085', b'vec-085', b'vec-085', b'vec-085'], [ b'vec-086', b'vec-086', b'vec-086', b'vec-086', b'vec-086', b'vec-086' ], [ b'vec-087', b'vec-087', b'vec-087', b'vec-087', b'vec-087', b'vec-087', b'vec-087' ], [ b'vec-088', b'vec-088', b'vec-088', b'vec-088', b'vec-088', b'vec-088', b'vec-088', b'vec-088' ], [ b'vec-089', b'vec-089', b'vec-089', b'vec-089', b'vec-089', b'vec-089', b'vec-089', b'vec-089', b'vec-089' ], [], [b'vec-091'], [b'vec-092', b'vec-092'], [b'vec-093', b'vec-093', b'vec-093'], [b'vec-094', b'vec-094', b'vec-094', b'vec-094'], [b'vec-095', b'vec-095', b'vec-095', b'vec-095', b'vec-095'], [ b'vec-096', b'vec-096', b'vec-096', b'vec-096', b'vec-096', b'vec-096' ], [ b'vec-097', b'vec-097', b'vec-097', b'vec-097', b'vec-097', b'vec-097', b'vec-097' ], [ b'vec-098', b'vec-098', b'vec-098', b'vec-098', b'vec-098', b'vec-098', b'vec-098', b'vec-098' ], [ b'vec-099', b'vec-099', b'vec-099', b'vec-099', b'vec-099', b'vec-099', b'vec-099', b'vec-099', b'vec-099' ] ]
import uproot import matplotlib.pyplot as plt import seaborn as sns import scipy.optimize as scp import numpy as np import sys sns.set(font_scale=2) NBINS = 1200 #f = uproot.open("./RPTest_100.root") f = uproot.open("./MCRecoEventTest.root") #f = uproot.open("./FullComb_preTrigFix.root") ftree = f.get("phaseII") ftree.items() digitT = ftree.get("digitT") TrueVtxTime = ftree.get("trueVtxTime") TrueVtxX = ftree.get("trueVtxX") TrueVtxY = ftree.get("trueVtxY") TrueVtxZ = ftree.get("trueVtxZ") DeltaT = ftree.get("deltaVtxT") digitX = ftree.get("digitX") digitY = ftree.get("digitY") digitType = ftree.get("digitType") digitZ = ftree.get("digitZ") evn = ftree.get("eventNumber") evnums = evn.array() diT = digitT.array() diX = digitX.array() diY = digitY.array() diZ = digitZ.array()
def main(args): fname = args.fname file = uproot.open(fname) all_ttrees = dict( file.allitems( filterclass=lambda cls: issubclass(cls, uproot.tree.TTreeMethods))) tracks = all_ttrees[b'PWGHF_TreeCreator/tree_Particle;1'] pds_trks = tracks.pandas.df() # entrystop=10) events = all_ttrees[b'PWGHF_TreeCreator/tree_event_char;1'] pds_evs = events.pandas.df() # print the banner first fj.ClusterSequence.print_banner() # signal jet definition maxrap = 0.9 jet_R0 = args.jetR jet_def = fj.JetDefinition(fj.antikt_algorithm, jet_R0) jet_selector = fj.SelectorPtMin(0.0) & fj.SelectorPtMax( 1000.0) & fj.SelectorAbsEtaMax(1) jet_area_def = fj.AreaDefinition(fj.active_area, fj.GhostedAreaSpec(maxrap)) print(jet_def) # background estimation grid_spacing = maxrap / 10. gmbge = fj.GridMedianBackgroundEstimator(maxrap, grid_spacing) print() output_columns = ['evid', 'pt', 'eta', 'phi', 'area', 'ptsub'] e_jets = pd.DataFrame(columns=output_columns) for i, e in pds_evs.iterrows(): iev_id = int(e['ev_id']) _ts = pds_trks.loc[pds_trks['ev_id'] == iev_id] start = time.time() _tpsj = fj_parts_from_tracks_numpy(_ts) end = time.time() dt_swig = end - start start = time.time() _tpsj_for = fj_parts_from_tracks(_ts) end = time.time() dt_for = end - start # print ('len {} =?= {}'.format(len(_tpsj_for), len(_tpsj))) print( '[i] timing (ntracks={}): dt_for: {} dt_swig: {} ratio: {}'.format( len(_tpsj), dt_for, dt_swig, dt_for / dt_swig)) # print('maximum particle rapidity:', max([psj.rap() for psj in _tpsj])) _cs = fj.ClusterSequenceArea(_tpsj, jet_def, jet_area_def) _jets = jet_selector(fj.sorted_by_pt(_cs.inclusive_jets())) gmbge.set_particles(_tpsj) # print("rho = ", gmbge.rho()) # print("sigma = ", gmbge.sigma()) # _jets = jet_selector(jet_def(_tpsj)) # _jets_a = [[iev_id, j.perp(), j.eta(), j.phi()] for j in _jets] # _jets_a = pd.DataFrame(np.array([[iev_id, j.perp(), j.eta(), j.phi()] for j in _jets]), columns=['evid', 'pt', 'eta', 'phi']) _jets_a = pd.DataFrame([[ iev_id, j.perp(), j.eta(), j.phi(), j.area(), j.perp() - gmbge.rho() * j.area() ] for j in _jets], columns=output_columns) # , columns=['evid, pt, eta, phi'] e_jets = e_jets.append(_jets_a, ignore_index=True) # print('event', i, 'number of parts', len(_tpsj), 'number of jets', len(_jets)) # print(_jets_a.describe()) if args.fjsubtract: fj_example_02_area(_tpsj) # print(e_jets.describe()) joblib.dump(e_jets, args.output)
NUM_EVENTS_PROCESSED = 0 INPUT_FILES = glob.glob('/pnfs/desy.de/cms/tier2/store/user/missirol/jme_trigger/jmeTriggerNtuples/pfMET/v02/191103/Data_Run2018B_EGamma/*/*/*/*/*.root') output = 'test' for i_inpf in INPUT_FILES: if VERBOSE: print('\033[1m'+'\033[92m'+'[input]'+'\033[0m', i_inpf) stop_exe = False if UPROOT: import uproot i_ttree = uproot.open(i_inpf)['JMETriggerNTuple/Events'] i_firstEvent = 0 i_lastEvent = min(num_maxEvents - NUM_EVENTS_PROCESSED, i_ttree.numentries) if (num_maxEvents >= 0) else i_ttree.numentries hltPuppiMET_pt = i_ttree.arrays('*', entrystart=i_firstEvent, entrystop=i_lastEvent) for i_ent in range(i_firstEvent, i_lastEvent): a = hltPuppiMET_pt['hltPuppiMET_pt'][i_ent] if (num_maxEvents >= 0) and (NUM_EVENTS_PROCESSED >= num_maxEvents): stop_exe = True break # analyze_event(event=i_evt, th1s=th1s, th2s=th2s)
from vectorized import vectorize import uproot import numpy as np from itertools import combinations import matplotlib.pyplot as plt import time import functional columnar_events = uproot.open( "http://scikit-hep.org/uproot/examples/HZZ.root")["events"] columns = columnar_events.arrays(["*Muon*"]) Muon_E = columns["Muon_E"].content Muon_Px = columns["Muon_Px"].content Muon_Py = columns["Muon_Py"].content Muon_Pz = columns["Muon_Pz"].content starts = columns["Muon_Px"].starts stops = columns["Muon_Px"].stops # ====================================================================== # Examples # ====================================================================== def totalp(index, Muon_Px, Muon_Py, Muon_Pz, Muon_P): px2 = Muon_Px[index]**2 py2 = Muon_Py[index]**2 pz2 = Muon_Pz[index]**2 Muon_P[index] = np.sqrt(px2 + py2 + pz2)
import uproot import pandas import numpy as np import xgboost file_train = uproot.open("file_train.root") file_test = uproot.open("file_test.root") file_train_and_test = uproot.open("file_train_and_test.root") tree_train = file_train["tree_name"] tree_test = file_test["tree_name"] tree_train_and_test = file_train_and_test["tree_name"] # function here just so I don't repeat this four times in the code def getit(tf): # select which branches not to import df = tf.pandas.df(lambda branch: branch.name != b'true_mass' and branch. name[:3] != b'dir') # create new branches based on other branches df['sum_nhits_1'] = df.loc[:, ['nhits1_p0', 'nhits1_p1', 'nhits1_p2']].sum( axis=1) df['sum_nhits_2'] = df.loc[:, ['nhits2_p0', 'nhits2_p1', 'nhits2_p2']].sum( axis=1) df['sum_nhits'] = df.loc[:, ['sum_nhits_1', 'sum_nhits_2']].sum(axis=1) return df data_train_sig = getit(tree_train) data_test_sig = getit(tree_test)
help= 'training k-fold. use 0,1,2 for offset in selecting the third of data to reserve for validation' ) args = parser.parse_args() lr_init = 1e-2 epochs = 20 train_batch_size = 2**10 infer_batch_size = 2**15 num_workers = 4 kfold = int(args.kfold) train_modulus = 3 train_portion = 2 t = uproot.open(args.train)['Tree'] pt, eta, phi, w = t.arrays(['jetPt', 'jetEta', 'jetPhi', 'weight'], outputtype=tuple) # mass set to zero in toyTrees lv = uproot_methods.TLorentzVectorArray.from_ptetaphim(pt, eta, phi, 0) lv, w = torch.FloatTensor([pt, eta, phi, lv.mass]), torch.FloatTensor(w) # lv is [feature, event, jet], want [event, feature, jet] lv = lv.transpose(0, 1) print("Split into training and validation sets") n = lv.shape[0] idx = np.arange(n) is_train = (idx + kfold) % train_modulus < train_portion is_valid = ~is_train dataset_train = TensorDataset(lv[is_train], w[is_train])
if __name__ == '__main__': #recomb = "06417" recomb = "0715" energyList = [1,2,3,4,5,6,7] variables = ['dEdx','aarondEdx','energy','hitCorrection','mcDepCorrection','missedHitsCorrection','noHitsCorrection','energyCorrected','mcInitEnergy','recoMinusTrueOverTrue','mcIDEdiscrep'] # rfile2 = up.open( "PDSPProd2_1GeV.root" ) # df2 = rfile2["pdAnaTree/AnaTree"].pandas.df( variables ) # fig = plt.figure(figsize=(26,14)) plotCount = 1 for energy in energyList: print("{} GeV".format(str(energy))) rfile = up.open( "singlePositron_{}GeV_sceON_keepOFF_recomb{}.root".format( energy, recomb ) ) rfile_phot = up.open( "singlePhoton_{}GeV_sceON_keepOFF_recomb{}.root".format( energy, recomb ) ) df = rfile["pdAnaTree/AnaTree"].pandas.df( variables ) df_phot = rfile_phot["pdAnaTree/AnaTree"].pandas.df( variables ) peakVal = peakValue( np.histogram(df.mcDepCorrection/df.mcInitEnergy *100, range=(0,5), bins=50) ) peakVal_phot = peakValue( np.histogram(df_phot.mcDepCorrection/df_phot.mcInitEnergy *100, range=(0,5), bins=50) ) plt.subplot(len(energyList),6,plotCount) plotCount += 1 plt.hist( df.mcDepCorrection/df.mcInitEnergy *100, range=(0,5), bins=50, density=True, histtype='stepfilled', edgecolor='r', fc=(1,0,0,0.1), label=r'$e^-$: Peak={}'.format(round(peakVal,3)) ) plt.hist( df_phot.mcDepCorrection/df_phot.mcInitEnergy *100, range=(0,5), bins=50, density=True, histtype='stepfilled', edgecolor='b', fc=(0,0,1,0.1), label=r'$\gamma$: Peak={}'.format(round(peakVal_phot,3)) ) plt.axvline(peakVal, c='r', ls='--', lw=0.5) plt.axvline(peakVal_phot, c='b', ls='--', lw=0.5)
def Recon(filename, output, mode, offset, types, initial, MC, method, verbose): ''' reconstruction fid: root reference file convert to .h5 fout: output file ''' # Create the output file and the group print(filename) # filename # Create the output file and the group h5file = tables.open_file(output, mode="w", title="OneTonDetector", filters=tables.Filters(complevel=9)) group = "/" # Create tables ReconTable = h5file.create_table(group, "Recon", pub.ReconData, "Recon") recondata = ReconTable.row # Loop for event f = uproot.open(filename) data = f['SimTriggerInfo'] if types == 'Sim_root': PMTId = data['PEList.PMTId'].array() Time = data['PEList.HitPosInWindow'].array() Charge = data['PEList.Charge'].array() SegmentId = ak.to_numpy(ak.flatten( data['truthList.SegmentId'].array())) VertexId = ak.to_numpy(ak.flatten(data['truthList.VertexId'].array())) x = ak.to_numpy(ak.flatten(data['truthList.x'].array())) y = ak.to_numpy(ak.flatten(data['truthList.y'].array())) z = ak.to_numpy(ak.flatten(data['truthList.z'].array())) E = ak.to_numpy(ak.flatten(data['truthList.EkMerged'].array())) for pmt, time_array, pe_array, sid, vid, xt, yt, zt, Et in zip( PMTId, Time, Charge, SegmentId, VertexId, x, y, z, E): recondata['x_truth'] = xt recondata['y_truth'] = yt recondata['z_truth'] = zt recondata['E_truth'] = Et recondata['EventID'] = sid fired_PMT = ak.to_numpy(pmt) time_array = ak.to_numpy(time_array) # PMT order: 0-29 # PE /= Gain # pe_array, cid = np.histogram(pmt, bins=np.arange(31)-0.5, weights=PE) # For hit info pe_array, cid = np.histogram(fired_PMT, bins=np.arange(31)) # For very rough estimate # pe_array = np.round(pe_array) if np.sum(pe_array) == 0: continue if args.initial == 'WA': x0_in = pub.Initial.ChargeWeighted(pe_array, PMT_pos, time_array) elif args.initial == 'fit': x0_in = pub.Initial.FitGrid(pe_array, mesh, tpl, time_array) elif args.initial == 'MC': x0_in = pub.Initial.MCGrid(pe_array, mesh, tpl, time_array) x0_in = x0_in[1:] result_in = minimize(pub.Likelihood_Truth.Likelihood, x0_in, method='SLSQP', bounds=((0, 1), (None, None), (None, None), (None, None)), args=(coeff_time, coeff_pe, PMT_pos, fired_PMT, time_array, pe_array, cut_time, cut_pe)) z, x = pub.Likelihood_Truth.Calc_basis(result_in.x, PMT_pos, cut_pe) L, E_in = pub.Likelihood_Truth.Likelihood_PE( coeff_pe, z, x, pe_array, cut_pe) # xyz coordinate in2 = pub.r2c(result_in.x[:3]) * shell recondata['x_sph_in'] = in2[0] recondata['y_sph_in'] = in2[1] recondata['z_sph_in'] = in2[2] recondata['success_in'] = result_in.success recondata['Likelihood_in'] = result_in.fun # outer recon if args.initial == 'WA': x0_out = result_in.copy() x0_out[0] = 0.92 else: x0_out = pub.Initial.FitGrid(pe_array, mesh_out, tpl_out, time_array) x0_out = x0_out[1:] result_out = minimize(pub.Likelihood_Truth.Likelihood, x0_out, method='SLSQP', bounds=((0, 1), (None, None), (None, None), (None, None)), args=(coeff_time, coeff_pe, PMT_pos, fired_PMT, time_array, pe_array, cut_time, cut_pe)) z, x = pub.Likelihood_Truth.Calc_basis(result_out.x, PMT_pos, cut_pe) L, E_out = pub.Likelihood_Truth.Likelihood_PE( coeff_pe, z, x, pe_array, cut_pe) out2 = pub.r2c(result_out.x[:3]) * shell recondata['x_sph_out'] = out2[0] recondata['y_sph_out'] = out2[1] recondata['z_sph_out'] = out2[2] recondata['success_out'] = result_out.success recondata['Likelihood_out'] = result_out.fun # 0-th order (Energy intercept) base_in = LG.legval(result_in.x[1], coeff_pe.T) base_out = LG.legval(result_out.x[1], coeff_pe.T) recondata['E_sph_in'] = np.exp(E_in - base_in[0] + np.log(2)) recondata['E_sph_out'] = np.exp(E_out - base_out[0] + np.log(2)) if (verbose): print('-' * 60) print(f'inner: {np.exp(E_in - base_in[0] + np.log(2))}') print(f'outer: {np.exp(E_out - base_out[0] + np.log(2))}') print('inner') print(f'Template likelihood: {-np.max(L)}') print( '%d vertex: [%+.2f, %+.2f, %+.2f] radius: %+.2f, Likelihood: %+.6f' % (sid, in2[0], in2[1], in2[2], norm(in2), result_in.fun)) print('outer') print( '%d vertex: [%+.2f, %+.2f, %+.2f] radius: %+.2f, Likelihood: %+.6f' % (sid, out2[0], out2[1], out2[2], norm(out2), result_out.fun)) recondata.append() elif types == 'h5': pass # Flush into the output file ReconTable.flush() h5file.close()
"correctedTerEigenValLength", "pandoraPriProjectionLength", "pandoraSecProjectionLength", "pandoraTerProjectionLength", "correctedPriProjectionLength", "correctedSecProjectionLength", "correctedTerProjectionLength", # "nonCorrectedHit3DX", "nonCorrectedHit3DY", "nonCorrectedHit3DZ", # "correctedHit3DX", "correctedHit3DY", "correctedHit3DZ", # "dEdt", # "hitCharge", "hitTrueEnergy", ] print("Open File 1") rfile1 = up.open( "./data/singleParticles/electrons/singleElectron_1GeV_trimmed_cnn.root" ) print("Open File 2") rfile2 = up.open( "./data/singleParticles/electrons/singleElectron_2GeV_trimmed_cnn.root" ) print("Open File 3") rfile3 = up.open( "./data/singleParticles/electrons/singleElectron_3GeV_trimmed_cnn.root" ) print("Open File 4") rfile4 = up.open( "./data/singleParticles/electrons/singleElectron_4GeV_trimmed_cnn.root" ) print("Open File 5") rfile5 = up.open(
from preprocessing import * # for PointNet preprocessing_algo = make_graph_noedge # for EdgeNet #preprocessing_algo = make_graph_etaphi #grouping_algo = 'knn' #or 'kdtree' #preprocessing_args= dict(k=4) #preprocessing_args= dict(r = 0.07) #if algo == 'kdtree' #layer_norm = 150 #only used for etaphi, no effect for other preprocessors fname = '../data/ntup/partGun_PDGid15_x1000_Pt3.0To100.0_NTUP_1.root' test = uproot.open(fname)['ana']['hgc'] #example of generating a binary ground-truth adjacency matrix #for both endcaps in all events for all clusters #truth is now that hits in adjacent layers are connected #and so are hits in the same layer within delta-R < 2 arrays = test.arrays([b'simcluster_hits_indices']) rechit = test.arrays([ b'rechit_x', b'rechit_y', b'rechit_z', b'rechit_eta', b'rechit_phi', b'rechit_layer', b'rechit_time', b'rechit_energy' ]) NEvents = rechit[b'rechit_z'].shape[0] rechit[b'rechit_x'].content[rechit[b'rechit_z'].content < 0] *= -1 sim_indices = awkward.fromiter(arrays[b'simcluster_hits_indices']) valid_sim_indices = sim_indices[sim_indices > -1]
import uproot import matplotlib.pyplot as plt import numpy as np import pandas as pd import os import os.path '''Takes in a root file and extracts the appropriate data, then uses uproot to view the 2D Histogram image.''' Sample = 'TTbar' BASE_PATH = "/storage2/ec6821/P2JetsSums/MSciProjects/2020/Histograms_{sample}".format( sample=Sample) for subdir, dirs, files in os.walk(BASE_PATH): i = 0 for filename in files: filepath = subdir + os.sep + filename if filepath.endswith(".root"): f = uproot.open(filepath) a = f["caloGrid"].values # plt.imshow(a) # plt.show() outPathBase = '{sample}_numpy'.format(sample=Sample.upper()) if not os.path.isdir(outPathBase): os.mkdir(outPathBase) outFile = outPathBase + '/' + filename.replace('.root', '') np.save(outFile, a) # break
def preprocess(args): """ Preprocess the flat ROOT files in input The input ROOT files are organized as follows: - run - event - trackster_id - [layers of the layerClusters belonging to this trackster] - [etas of the layerClusters belonging to this trackster] - [phis of the layerClusters belonging to this trackster] - [energies of the layerClusters belonging to this trackster] - energy of the caloParticle linked to this trackster - pdgId of the caloParticle linked to this trackster - raw_energy of the trackster FLAT PROTOCOL BUFFER PADDED FORMAT This is the format that will be used in input to the training. It should be structured as follows: - trackster number - energy of the caloParticle - pdgId of the caloParticle - raw_energy of the trackster - sigma1 of the trackster (along the "major" PCA component) - sigma2 of the trackster - sigma3 of the trackster - layer of a layerCluster - energy of a layerCluster - eta of a layerCluster - phi of a layerCluster This structure is padded in the sense that, for every trackster, there will be 10 layerClusters for each layer, for all 50 layers of HGCAL. The target final size of the DataFrame is: rows of target DataFrame = number of tracksters in input ROOT files * 50 * 10 The number of input trackster in the input ROOT files has to be deduced by the ROOT file itself. Something along the lines: df.shape[0] done on the input ROOT file. """ if not isinstance(args, argparse.Namespace): args = SimpleNamespace(**args) if args.debug: pd.set_option('display.max_rows', 200) rootfiles_path = os.path.join(args.inputDir, args.suffix + '*.root') rootfiles = glob(rootfiles_path) if len(rootfiles) == 0: print('Input directory: {} Input suffix: {}'.format( args.inputDir, args.suffix)) raise ValueError( '[preprocessing_pb.py]: No input files found in {}.'.format( rootfiles, rootfiles_path)) max_perlayer = 10 number_layers = 50 keepVars = [ 'ts_energy', 'ts_sigma1', 'ts_sigma2', 'ts_sigma3', 'cp_missingEnergyFraction' ] keepVars += [ 'cp_energy', 'cp_pdgid', 'lc_energy', 'lc_eta', 'lc_phi', 'lc_layer' ] for rootfile in tqdm(rootfiles): start_time = time() filename = os.path.basename(rootfile).replace(".root", "") if args.debug: print('File: ', rootfile, filename) try: with uproot.open(rootfile) as open_file: directory = open_file[args.dir] tree = directory[args.tree] df = tree.arrays(filter_name=keepVars, library='pd') df = df[df['cp_missingEnergyFraction'] < args.maxMissingEnergyFraction] df.drop(['cp_missingEnergyFraction'], axis=1) except: print('File {} had a problem.'.format(rootfile)) raise unique_entries = df.index.get_level_values('entry').unique().to_list() events = len(unique_entries) if args.debug: print(df) checkDirAndCreate(args.outputDir) checkDirAndCreate(os.path.join(args.outputDir, 'padded')) name = os.path.join(args.outputDir, 'padded', filename + '_padded.pb') writer = tf.io.TFRecordWriter(name) with open(args.targetsFile, 'a') as f: f.write(name + '\n') for entry in unique_entries: example = make_example_pid_and_pad(df.iloc[[entry]], number_layers, max_perlayer, args.debug) writer.write(example.SerializeToString()) if args.debug: print(example) end_time = time() name = name.replace('.pb', '.log') with open(name, 'w') as logfile: logfile.write('Events: {} Time: {} Rate: {}'.format( events, (end_time - start_time), events / float(end_time - start_time))) with open(args.targetsFile, 'a') as f: f.write(name + '\n') print("Done!")
import ROOT as R import matplotlib.pyplot as plt from sklearn.neural_network import MLPClassifier from sklearn.ensemble import GradientBoostingClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import roc_curve, roc_auc_score def scale_list(factor, items): for item in items: item.Scale(factor) # Import trees loc_yeB = "/data/bfys/jrol/RapidSim/{0}2tau2pipipi_filtered.root" loc_noB = "/data/bfys/jrol/RapidSim/{0}2tau2pipipi_noBTracking.root" f_Dplus_noB = uproot.open(loc_noB.format("Dplus")); Dplus_tree_noB = f_Dplus_noB["DecayTree"] f_Dsplus_noB = uproot.open(loc_noB.format("Dsplus")); Dsplus_tree_noB = f_Dsplus_noB["DecayTree"] f_Bplus_noB = uproot.open(loc_noB.format("Bplus")); Bplus_tree_noB = f_Bplus_noB["DecayTree"] f_Bcplus_noB = uproot.open(loc_noB.format("Bcplus")); Bcplus_tree_noB = f_Bcplus_noB["DecayTree"] f_Dplus_yeB = uproot.open(loc_yeB.format("Dplus")); Dplus_tree_yeB = f_Dplus_yeB["DecayTree"] f_Dsplus_yeB = uproot.open(loc_yeB.format("Dsplus")); Dsplus_tree_yeB = f_Dsplus_yeB["DecayTree"] f_Bplus_yeB = uproot.open(loc_yeB.format("Bplus")); Bplus_tree_yeB = f_Bplus_yeB["DecayTree"] f_Bcplus_yeB = uproot.open(loc_yeB.format("Bcplus")); Bcplus_tree_yeB = f_Bcplus_yeB["DecayTree"] Dp_df_noB = Dplus_tree_noB.pandas.df(); Dp_df_yeB = Dplus_tree_yeB.pandas.df() Dsp_df_noB = Dsplus_tree_noB.pandas.df(); Dsp_df_yeB = Dsplus_tree_yeB.pandas.df() Bp_df_noB = Bplus_tree_noB.pandas.df(); Bp_df_yeB = Bplus_tree_yeB.pandas.df() Bcp_df_noB = Bcplus_tree_noB.pandas.df(); Bcp_df_yeB = Bcplus_tree_yeB.pandas.df() h2d_sig = R.TH2F("2dh_sig", "Signal events", 30, 0, 7.0, 30, 0, 4)
def __init__(self, name): self.mc_file = uproot.open(name) self.t_pts = self.mc_file['npts'] self.n_event = self.t_pts.numentries self.n_pts_prev = np.zeros((4,2), dtype=np.uint32)
def thist_to_np_xy(infile,key='reconstructedProfileHisto'): assert(infile.endswith(".root")) f=uproot.open(infile) outdata_x=binedges_to_centers(f[key].edges) outdata_y=f[key].values return [outdata_x,outdata_y]
from sklearn.model_selection import train_test_split, cross_val_score from sklearn import metrics from sklearn.ensemble import GradientBoostingClassifier from sklearn.metrics import accuracy_score import matplotlib.pyplot as plt import utils_endcap import time, pickle from tqdm import tqdm from scipy import stats # for sklearn, see np.random.seed(1337) fin = uproot.open( "/home/prasant/Files/Lowmass_ntuple/Out_Singlephoton_Lowmass_photonIDMVA_woShowershape_LMTrain_18pT18_RunIIFall17_3_1_0_03122018.root" ) print fin.keys() prompt = fin['promptPhotons'] fake = fin['fakePhotons'] print fin['promptPhotons'].keys() print fin['fakePhotons'].keys() ## for endcap geometry_selection = lambda tree: np.logical_and( abs(tree.array('scEta')) > 1.566, abs(tree.array('scEta')) < 2.5) input_values, target_values, orig_weights, train_weights, pt, scEta, input_vars = utils_endcap.load_file( fin, geometry_selection)
import uproot, uproot_methods from Builder import Initialize file = uproot.open("nano_5.root") tree = file["Events"] e = Initialize({'pt':tree.array("Electron_pt"), 'eta':tree.array("Electron_eta"), 'phi':tree.array("Electron_phi"), 'mass':tree.array("Electron_mass"), 'iso':tree.array('Electron_pfRelIso03_all'), 'dxy':tree.array('Electron_dxy'), 'dz':tree.array('Electron_dz'), 'id':tree.array('Electron_mvaSpring16GP_WP90')}) mu = Initialize({'pt':tree.array("Muon_pt"), 'eta':tree.array("Muon_eta"), 'phi':tree.array("Muon_phi"), 'mass':tree.array("Muon_mass"), 'iso':tree.array('Muon_pfRelIso04_all'), 'dxy':tree.array('Muon_dxy'), 'dz':tree.array('Muon_dz')}) tau = Initialize({'pt':tree.array('Tau_pt'), 'eta':tree.array('Tau_eta'), 'phi':tree.array('Tau_phi'), 'mass':tree.array('Tau_mass'), 'decayMode':tree.array('Tau_idDecayMode'), 'decayModeNew':tree.array('Tau_idDecayModeNewDMs'), 'id':tree.array('Tau_idMVAnew')})
args = parser.parse_args() outdir=args.o if len(outdir) < 1: exit() os.system('mkdir -p '+outdir) def convertAndWrite(infile): thisofile=outdir+'/'+os.path.basename(infile)[:-5]+'_skim.root' os.system('skim '+infile+' '+thisofile) allfiles = [] with open(args.inputFile) as f: for l in f: l = l.rstrip('\n').rstrip(' ') if len(l) and os.path.isfile(l): try: tree = uproot.open(l)["B4"] nevents = tree.numentries allfiles.append(l) except: pass print(allfiles) p = Pool() p.map(convertAndWrite, allfiles)
ntree_limit = 800 _model = xgb.Booster({'nthread': 6}) #mpath='../mva/model_' + args.model + '/xgb_fulldata_None.model' mpath = '/work/ytakahas/work/analysis/CMSSW_10_2_10/src/rJpsi/mva/model_' + args.model + '/xgb_fulldata_None.model' os.system('ls -lart ' + mpath) print('model path = ', mpath) #_model.load_model('/work/ytakahas/work/analysis/CMSSW_10_2_10/src/BcJpsiTauNu/mva/model_' + args.model + '/xgb_fulldata_None.model') _model.load_model(mpath) #_model.load_model('/work/ytakahas/work/mva/BcJpsiTau/model/xgb_fulldata_None.model') print(args.file) os.system('ls -lart ' + args.file) events = uproot.open(args.file)['tree'] #print 'setup' #ofile = TFile('Myroot.root', 'recreate') #otree = TTree('tree', 'tree') #mass = np.zeros(1, dtype=float) #xgbs = np.zeros(1, dtype=float) #otree.Branch('mass', mass, 'mass/F') #otree.Branch('xgbs', xgbs, 'xgbs/F') for i, params in enumerate( events.iterate(outputtype=pd.DataFrame, entrysteps=1000000)): print print( i, 'making ... ' + fdir + '/Myroot_' + args.prefix + '_' + str(i) +
def GetTree(file_name, add_cuts="", write_tracks=False): """Retrieves the events in the TTree with uproot and returns them as a pandas DataFrame.""" if debug: t0_jets = dt.datetime.now() print('Start GetTree') var_list = list(mapping.keys()) tree = up.open(file_name)[tree_name] if write_tracks: tracks_ndarray = GetTracks(tree) if debug: print('Getting tracks ndarray took a total of: {}'.format( dt.datetime.now() - t0_jets)) t0_jets = dt.datetime.now() df = tree.pandas.df(var_list) if debug: print('Getting df with uproot took: {}'.format(dt.datetime.now() - t0_jets)) t0_jets = dt.datetime.now() df['jet_bH_pt'] = df.apply(lambda row: max(row['jet_bH_pt'])[0], axis=1) df['jet_bH_pt'] = df['jet_bH_pt'].mask( df['jet_bH_pt'].lt(0), 0) # Set all negative bH pt values to 0 df['jetPtRank'] = df.groupby(level=0)['jet_pt'].rank( ascending=False) # Add jet pT rank # If jet_jf_dR is larger than 15, it was set to the "default" value of std::hypot(-11,-11), so set this to its actual default of -1 df['jet_jf_dR'] = df['jet_jf_dR'].mask(df['jet_jf_dR'].gt(15), default_values2['jf_dR'][0]) # Apply jet quality cuts df.query('jet_pt>20e3 & abs(jet_eta)<2.5 & (abs(jet_eta)>2.4 |\ jet_pt>60e3 | jet_JVT>0.5) & (jet_aliveAfterOR ==True)', inplace=True) if add_cuts != "": df.query(add_cuts, inplace=True) if debug: print('Querying jets df took: {}'.format(dt.datetime.now() - t0_jets)) t0_jets = dt.datetime.now() df.rename(index=str, columns=mapping, inplace=True) # changing eta to absolute eta df['absEta_btagJes'] = df['eta_btagJes'].abs() # Replacing default values with this synthax # df.replace({'A': {0: 100, 4: 400}}) rep_dict = {} for key, val in default_values2.items(): if key in list(var_conv_oldDl1.keys()): replacer = {} for elem in val: replacer[elem] = np.nan rep_dict[var_conv_oldDl1[key]] = replacer df.replace(rep_dict, inplace=True) # Generating default flags df['JetFitter_isDefaults'] = FindCheck(df['JetFitter_mass'].values) df['SV1_isDefaults'] = FindCheck(df['SV1_masssvx'].values) df['IP2D_isDefaults'] = FindCheck(df['IP2D_bu'].values) df['IP3D_isDefaults'] = FindCheck(df['IP3D_bu'].values) df['JetFitterSecondaryVertex_isDefaults'] = FindCheck( df['JetFitterSecondaryVertex_nTracks'].values) # rnnip default flag not necessary anymore df['rnnip_isDefaults'] = FindCheck(df['rnnip_pu'].values) if debug: print('Remaining jets columns took: {}'.format(dt.datetime.now() - t0_jets)) t0_jets = dt.datetime.now() if write_tracks: return df, tracks_ndarray else: return df