def get_event_id(self, df): self.ev_id = df['ev_id'].values[0] self.run_number = df['run_number'].values[0] #pindent('processing', 'run_number=', self.run_number, 'ev_id=', self.ev_id, 'internal iev=', self.iev, 'cyclical iev=', self.iev_count) if self.iev_count >= self.max_events or self.iev == 0: #open a new file if self.fout: self.fout.Write() self.fout.Close() self.td = None self.fout = None outfname = self.file_output.replace('.root', '_{}.root'.format(self.nfile)) pinfo('opening new file at', self.iev, 'file number', self.nfile, 'file name', outfname) self.fout = ROOT.TFile(outfname, 'recreate') self.nfile = self.nfile + 1 self.iev_count = 0 self.trees = {} # these are branches # for sdf in df: for sdf in self.df: foldername = os.path.dirname(sdf.split(';')[0]) tname = os.path.basename(sdf.split(';')[0]) self.fout.cd() if self.td is None: self.td = ROOT.TDirectoryFile(foldername, foldername) self.td.cd() # tw = ROOT.TNtuple(tname, tname, ':'.join(self.df[sdf].columns)) self.trees[tname] = treewriter.RTreeWriter(tree_name=tname, fout=self.td) # fill for loc of iev and run number for each tree # for sdf in self.df: # pinfo(self.df[sdf]) for sdf in self.df: tname = os.path.basename(sdf.split(';')[0]) _df_sel = self.df[sdf].loc[ (self.df[sdf]['run_number'] == self.run_number) & (self.df[sdf]['ev_id'] == self.ev_id)] #if len(_df_sel) < 1: # pwarning('no entries for', sdf, len(_df_sel), self.run_number, self.ev_id) #else: # pinfo('rows for', sdf, len(_df_sel)) for index, row in _df_sel.iterrows(): #print(row) for c in _df_sel.columns: # pindent(self.trees[tname]) # pindent(tname, index, c, '=', row[c]) val = row[c] self.trees[tname].fill_branch(c, row[c]) # pindent(self.trees[tname].tree.GetName(), 'fill') self.trees[tname].fill_tree() self.iev = self.iev + 1 self.iev_count = self.iev_count + 1
def split_file(self, file_input, file_output, nevents, tolerance=0.1): pinfo('spliting file', file_input, 'max nevents:', nevents, 'tolerance:', tolerance) tlist = self.get_list_of_trees(file_input) pinfo('list of trees:', tlist) self.df_grouped = {} self.df = {} for tname in tlist: t = uproot.open(file_input)[tname] self.df[tname] = t.pandas.df() self.df_grouped[tname] = self.df[tname].groupby( ['run_number', 'ev_id']) pindent('tree', tname, 'Nrows in pandas:', len(self.df[tname]), 'Nevents:', len(self.df_grouped[tname])) # get max events order df_names_sorted = sorted(self.df_grouped, key=lambda s: len(self.df_grouped[s]), reverse=True) for dfname in df_names_sorted: pindent(dfname, len(self.df_grouped[dfname])) self.iev = 0 self.iev_count = 0 self.max_events = nevents self.file_output = file_output self.nfile = 0 self.fout = None self.td = None self.df_grouped[df_names_sorted[0]].apply(self.get_event_id)
def initialize_output(self, output_name=None): if output_name: if self.output_filename != output_name: self.output_filename = output_name if self.outf: if self.outf.GetName() != self.output_filename: pinfo('closing output file', self.outf.GetName()) self.outf.Write() self.outf.Close() self.outf = None else: return True if self.outf is None: self.outf = ROOT.TFile(self.output_filename, 'recreate') self.outf.cd() self.tdet = ROOT.TTree('tdet', 'tdet') self.twdet = RTreeWriter( tree=self.tdet, name='Output Tree detector level pp simulation') self.tpp = ROOT.TTree('tpp', 'tpp') self.twpp = RTreeWriter(tree=self.tpp, name='Output Tree pp simulation') self.th = ROOT.TTree('th', 'th') self.twh = RTreeWriter( tree=self.th, name='Output Tree pp simulation embedded into PbPb') pinfo('new output file', self.outf.GetName())
def process_file(self, fname): _ev_cuts = "is_ev_rej == 0 & abs(z_vtx_reco) < 10." self.event_df = self.pd_tree(path=fname, tname=self.event_tree_name, squery=_ev_cuts) if self.event_df is None: return False pinfo('events from', fname, len(self.event_df.index)) _d0cuts_base = "(pt_cand > 3.0 & pt_prong0 > 0.6 & pt_prong1 > 0.6 & abs(eta_cand) < 0.8) & " _d0cuts_extra = "(dca)<0.03 & abs(cos_t_star)<0.8 & (imp_par_prod)<-0.0001 & (cos_p)>0.9 & " _d0cuts_kpi = _d0cuts_base + _d0cuts_extra _d0cuts_kpi += "((abs(nsigTPC_Pi_0) < 3. & (abs(nsigTOF_Pi_0) < 3. | nsigTOF_Pi_0 < -900) & abs(nsigTPC_K_1) < 3. & (abs(nsigTOF_K_1) < 3. | nsigTOF_K_1 < -900)) | " _d0cuts_kpi += "(abs(nsigTPC_Pi_1) < 3. & (abs(nsigTOF_Pi_1) < 3. | nsigTOF_Pi_1 < -900) & abs(nsigTPC_K_0) < 3. & (abs(nsigTOF_K_0) < 3. | nsigTOF_K_0 < -900)))" # Nikki with these cuts: (pt_cand)>3, (pt_prong0)>0.6, (pt_prong1)>0.6, (dca)<0.03, abs(cos_t_star)<0.8, (imp_par_prod)<-0.0001, (cos_p)>0.9 self.d0_df = self.pd_tree(path=fname, tname=self.d0_tree_name, squery=_d0cuts_kpi) if self.d0_df is None: return False pinfo('d0s from', fname, len(self.d0_df.index)) # pinfo(list(self.event_df)) if 'ev_id_ext' in list(self.event_df): self.d0ev_df = pd.merge(self.d0_df, self.event_df, on=['run_number', 'ev_id', 'ev_id_ext']) else: self.d0ev_df = pd.merge(self.d0_df, self.event_df, on=['run_number', 'ev_id']) self.d0ev_df.query(_ev_cuts, inplace=True) self.d0ev_df_grouped = self.d0ev_df.groupby(['run_number', 'ev_id']) pinfo('d0s after event cuts from ', fname, len(self.d0ev_df.index)) pinfo('N d0 groups after event cuts from ', fname, len(self.d0ev_df_grouped)) self.track_df = self.pd_tree(path=fname, tname=self.track_tree_name) # self.track_df = _track_df.groupby(['run_number','ev_id']) if self.track_df is None: return False pinfo('tracks from', fname, len(self.track_df.index)) # event based processing - not efficient for D0 analysis # self.pbar.close() # self.event_df.apply(self.process_event, axis=1) # with tqdm.tqdm(total=len(self.event_df.index)) as self.pbar: # d0 based processing # with tqdm.tqdm(total=len(self.d0ev_df.index)) as self.pbar: # self.d0ev_df.apply(self.process_d0s, axis=1) with tqdm.tqdm(total=len(self.d0ev_df_grouped)) as self.pbar: _tmp = self.d0ev_df_grouped.apply(self.process_d0s) self.pbar.close() self.event_df = None self.d0_df = None self.d0ev_df = None self.d0ev_df_grouped = None self.track_df = None
def test_cxx(args): reader = aleph.Reader(args.input) nev = aleph_utils.get_n_events(args.input) for i in tqdm(range(nev)): if reader.read_next_event(): e = reader.get_event() else: pinfo('no more events to read') break
def write(self, write_graphs = True): self.fout.cd() for t in self.trees: self.trees[t].Write() if write_graphs: self.write_as_graph(self.trees[t]) self.fout.Write() self.fout.Close() pinfo('MemTrace.write - write & close {}'.format(self.output_name))
def reset_output(self): cwd = ROOT.gDirectory.CurrentDirectory() ROOT.gDirectory.cd('/') if self.fout is None: self.fout = ROOT.TFile(self.output_name, 'recreate') else: self.fout.Close() self.fout = ROOT.TFile(self.output_name, 'recreate') pinfo('MemTrace.reset_output', self.output_name, 'path:', ROOT.gDirectory.GetPath()) cwd.cd()
def process_d0s_0(self, df): self.pbar.update(1) _n_d0s = len(df) if _n_d0s < 1: return # pinfo(df) if 'ev_id_ext' in list(self.event_df): # _ev_query = "run_number == {} & ev_id == {} & ev_id_ext == {}".format(df['run_number'], df['ev_id'], df['ev_id_ext']) _ev_query = "run_number == {} & ev_id == {} & ev_id_ext == {}".format(df['run_number'].values[0], df['ev_id'].values[0], df['ev_id_ext'].values[0]) else: _ev_query = "run_number == {} & ev_id == {}".format(df['run_number'].values[0], df['ev_id'].values[0]) _df_tracks = self.track_df.query(_ev_query) _df_tracks.reset_index(drop=True) djmm = fjtools.DJetMatchMaker() djmm.set_ch_pt_eta_phi(_df_tracks['ParticlePt'].values, _df_tracks['ParticleEta'].values, _df_tracks['ParticlePhi'].values) # _parts = fjext.vectorize_pt_eta_phi(_df_tracks['ParticlePt'].values, _df_tracks['ParticleEta'].values, _df_tracks['ParticlePhi'].values) self._user_index_offset = 10000 self.D0_index_offset = 10000 # _d0s = fjext.vectorize_pt_eta_phi([df['pt_cand']], [df['eta_cand']], [df['phi_cand']], self._user_index_offset) # _d0s = fjext.vectorize_pt_eta_phi(df['pt_cand'].values, df['eta_cand'].values, df['phi_cand'].values, self._user_index_offset) # _d0s = fjext.vectorize_pt_eta_phi_m(df['pt_cand'].values, df['eta_cand'].values, df['phi_cand'].values, df['inv_mass'].values, self._user_index_offset) djmm.set_Ds_pt_eta_phi_m(df['pt_cand'].values, df['eta_cand'].values, df['phi_cand'].values, df['inv_mass'].values, self.D0_index_offset) _d0s = djmm.Ds; _d0s_gh = [p * 1.e-6 for p in _d0s] # for di in range(_d0s.size()): # print(djmm.Ds[di].m(), df['inv_mass'].values[di]) _d0_imass_list = df['inv_mass'].values.tolist() # _d0_imass_list = [df['inv_mass']] self.tw.fill_branches(dpsj = _d0s, dpsjgh = _d0s_gh, minv = _d0_imass_list) self.tw.fill_tree() self.daughter_user_index_offset = self.D0_index_offset * 2 djmm.set_daughters0_pt_eta_phi(df['pt_prong0'].values, df['eta_prong0'].values, df['phi_prong0'].values, self.daughter_user_index_offset) djmm.set_daughters1_pt_eta_phi(df['pt_prong1'].values, df['eta_prong1'].values, df['phi_prong1'].values, self.daughter_user_index_offset * 2) pinfo('n D0s', len(_d0s)) # djmm.match(0.1) # djmm.match(0.01) # djmm.match(0.005) _parts_and_ds = djmm.match(0.005) # _parts_and_ds = _parts _tmp = [_parts_and_ds.push_back(p) for p in djmm.Ds] # # pinfo('n parts = ', len(_parts_and_ds)) ja = jet_analysis.JetAnalysis(jet_R = 0.2, particle_eta_max=0.9, jet_pt_min=2.0) ja.analyze_event(_parts_and_ds) if len(ja.jets) < 1: return True # self.d0_jet_correl_ghosts(ja.jets, _d0s, _d0_imass_list) self.d0_jet_correl(ja.jets, djmm.Ds) return True
def test_cxx_gzip_python_stream(args): pinfo(args) tw = treewriter.RTreeWriter(name='aleph', file_name=args.output) nev = aleph_utils.get_n_events_gzip(args.input) with gzip.open(args.input) as f: _data = f.readlines() data = aleph.StringVector() __ = [ data.push_back("{}".format(s.decode("utf-8").strip('\n'))) for s in _data ] pinfo(type(data)) pinfo('number of lines read', len(data)) reader = aleph.ReaderLines(data) for i in tqdm(range(nev)): if reader.read_next_event(): e = reader.get_event() __ = [stream_particle(e, p, tw) for p in e.get_particles()] else: pinfo('no more events to read') break tw.write_and_close()
def instance(cls, **kwargs): if cls._instance is None: pinfo('Creating new MemTrace instance') cls._instance = cls.__new__(cls) super(MemTrace, cls._instance).__init__(**kwargs) cls._instance._process = psutil.Process(os.getpid()) cls._instance.event_number = 0 cls._instance.event_tree_name = 'mt' cls._instance.process = psutil.Process(os.getpid()) cls._instance.trees = {} cls._instance.fout = None cls._instance.output_name='memtrace.root' cls._instance.toffset = time.time() cls._partial_write = False cls._instance.configure_from_args(**kwargs) return cls._instance
def main(args): if args.output == 'default.root': args.output = args.input + '.root' pinfo('args', args) tw = treewriter.RTreeWriter(name='taleph', file_name=args.output) with gzip.open(args.input) as f: data = f.readlines() pinfo('number of lines read', len(data)) for l in tqdm(data): lstr = l.decode("utf-8") if 'ALEPH_DATA RUN' in lstr: run_number, event_number, ecm = get_event_info(lstr) continue if 'Primary vertex info' in lstr: vflag, vx, vy, ex, ey = get_pvertex_info(lstr) continue if 'END_EVENT' in lstr: continue if 'px=' in lstr: px, py, pz, m, q, pwflag, d0, z0, ntpc, nitc, nvdet = get_part( lstr) tw.fill_branches(run=run_number, event=event_number, ecm=ecm, vflag=vflag, vx=vx, vy=vy, ex=ex, ey=ey, px=px, py=py, pz=pz, m=m, q=q, pwflag=pwflag, d0=d0, z0=z0, ntpc=ntpc, nitc=nitc, nvdet=nvdet) tw.fill_tree() tw.write_and_close()
def main(): _d0cuts_base = "(pt_cand > 3.0 & pt_prong0 > 0.6 & pt_prong1 > 0.6 & abs(eta_cand) < 0.8) & " _d0cuts_extra = "(dca)<0.03 & abs(cos_t_star)<0.8 & (imp_par_prod)<-0.0001 & (cos_p)>0.9 & " _d0cuts_kpi = _d0cuts_base + _d0cuts_extra _d0cuts_kpi += "((abs(nsigTPC_Pi_0) < 3. & (abs(nsigTOF_Pi_0) < 3. | nsigTOF_Pi_0 < -900) & abs(nsigTPC_K_1) < 3. & (abs(nsigTOF_K_1) < 3. | nsigTOF_K_1 < -900)) | " _d0cuts_kpi += "(abs(nsigTPC_Pi_1) < 3. & (abs(nsigTOF_Pi_1) < 3. | nsigTOF_Pi_1 < -900) & abs(nsigTPC_K_0) < 3. & (abs(nsigTOF_K_0) < 3. | nsigTOF_K_0 < -900)))" parser = argparse.ArgumentParser(description='D0 analysis on alice data', prog=os.path.basename(__file__)) parser.add_argument( '-f', '--flist', help='single root file or a file with a list of files to process', type=str, default=None, required=True) parser.add_argument('-n', '--nfiles', help='max n files to process', type=int, default=0, required=False) parser.add_argument('-o', '--output', help="prefix output file names", type=str, default='./count_D0.csv') args = parser.parse_args() fname = args.flist if '.root' in fname: # count_D_in_file(fname, _d0cuts_kpi) nD0s = count_D_in_file_merge(fname, _d0cuts_kpi) pinfo(fname, 'N of events with selected D0cand', nD0s) else: pinfo('reading file list from', fname) with open(fname) as f: flist = [f.rstrip('\n') for f in f.readlines()] pinfo('number of files', len(flist)) counts = [] # for ifn, fn in enumerate(flist): if args.nfiles > 0: flist = flist[:args.nfiles] for fn in tqdm.tqdm(flist): # pinfo('file', ifn, 'of', len(flist)) # count_D_in_file(fn, _d0cuts_kpi) nD0s = count_D_in_file_merge(fn, _d0cuts_kpi) counts.append([fn, nD0s]) counts_sorted = sorted(counts, key=lambda c: c[1], reverse=True) df = pd.DataFrame(counts_sorted, columns=['fname', 'ND0_cand_events']) df.to_csv(args.output, index=False) pinfo(args.output, 'written.')
def main(): parser = argparse.ArgumentParser(description='pythia8 fastjet on the fly', prog=os.path.basename(__file__)) parser.add_argument('input', type=str, default='') parser.add_argument('--nev', type=int, default=-1) args = parser.parse_args() finame = args.input f = open(finame) csv_f = csv.reader(f) rfout = ROOT.TFile(finame.replace('.csv', '.root'), 'recreate') rfout.cd() tw = treewriter.RTreeWriter(tree_name='emissions', fout=rfout) pinfo('working on a file', finame) irow = 0 scols = [] nmax = args.nev for row in tqdm.tqdm(csv_f): if irow < 1: scols = copy.deepcopy(row) irow = irow + 1 continue else: tw.fill_branch('e', make_dict(scols, row)) tw.fill_tree() if nmax > 0 and irow >= nmax: break irow = irow + 1 rfout.Write() pinfo('file written:', rfout.GetName()) pinfo('done.')
def execute_analyses_on_file_list(self, file_list, nfiles=0): self.pbar2 = tqdm.tqdm(mininterval=20, maxinterval=60) self.pbar2_mark = None for a in self.analyses: a.callback = self.update_status print() if os.path.exists(file_list): with open(file_list) as f: files = f.readlines() if int(nfiles) > 0: files = files[:nfiles] for f in files: fn = f.strip('\n') pinfo('+file:', fn) for f in tqdm.tqdm(files): fn = f.strip('\n') if self.load_file(fn): self.execute_analyses() self.pbar2.close() else: perror('file list does not exist', file_list) pinfo('done.')
def process_files(fname): pinfo('reading file list from', fname) with open(fname) as f: flist = f.readlines() pinfo('number of files', len(flist)) for ifn, fn in enumerate(flist): pinfo('file', ifn, 'of', len(flist)) HFAIO(output_file='./hfaio_rfile_{}'.format(ifn), input_file=fn.strip('\n'))
def snapshot(self, label='mem'): cwd = ROOT.gDirectory.CurrentDirectory().GetPath() new_key = False try: self.fout.cd() except: self.reset_output() try: self.trees[label] except KeyError: new_key = True if new_key: self.fout.cd() self.trees[label] = ROOT.TNtuple(label, label, 'rss:vms:t') pinfo('MemTrace - new tuple {}'.format(label), file=sys.stderr) rss = self.process.memory_info().rss vms = self.process.memory_info().vms # n = self.trees[label].GetEntries() ts = time.time() - self.toffset self.trees[label].Fill(rss, vms, ts) if self._partial_write: self._write_() ROOT.gDirectory.cd(cwd)
def test_cxx_gzip(args): pinfo('this is cxx with ROOT python iface...') ROOT.gSystem.Load("libpyjetty_alephR") pinfo(args) nev = aleph_utils.get_n_events_gzip(args.input) with gzip.open(args.input) as f: _data = f.readlines() # data = aleph.StringVector() # __ = [data.push_back("{}".format(s.decode("utf-8").strip('\n'))) for s in _data] data = ROOT.std.vector('string')(len(_data)) __ = [ data.push_back("{}".format(s.decode("utf-8").strip('\n'))) for s in _data ] pinfo('number of lines read', len(data), type(data)) ROOT.AlephR.write_root_tree_lines(data, args.output, nev) pinfo('output is', args.output)
def write_csv(args): import csv csv_columns = [ 'run', 'event', 'ecm', 'vflag', 'vx', 'vy', 'ex', 'ey', 'px', 'py', 'pz', 'm', 'e', 'pt', 'phi', 'eta', 'q', 'pwflag', 'd0', 'z0', 'ntpc', 'nitc', 'nvdet' ] dict_data = [] ROOT.gSystem.Load("libpyjetty_alephR.dylib") nev = aleph_utils.get_n_events_gzip(args.input) with gzip.open(args.input) as f: _data = f.readlines() data = aleph.StringVector() __ = [ data.push_back("{}".format(s.decode("utf-8").strip('\n'))) for s in _data ] pinfo(type(data)) pinfo('number of lines read', data.size()) reader = ROOT.Aleph.ReaderLines(data) for i in tqdm(range(nev)): if reader.read_next_event(): e = reader.get_event() #__ = [ make_dict_row(e, p, dict_data) for p in e.get_particles()] else: pinfo('no more events to read') break pinfo('writing csv file', args.output) csv_file = args.output try: with open(csv_file, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=csv_columns) writer.writeheader() for data in dict_data: writer.writerow(data) except IOError: print("I/O error")
def count_D_in_file_merge(fname, _d0cuts_kpi): d0_tree_name = 'PWGHF_TreeCreator/tree_D0' event_tree_name = 'PWGHF_TreeCreator/tree_event_char' _ev_cuts = "is_ev_rej == 0 & abs(z_vtx_reco) < 10." event_df = pd_tree(path=fname, tname=event_tree_name, squery=_ev_cuts) if event_df is None: return False pinfo('Nev', fname, len(event_df.index)) d0_df = pd_tree(path=fname, tname=d0_tree_name, squery=_d0cuts_kpi) if d0_df is None: return False pinfo('ND0', fname, len(d0_df.index)) # pinfo(list(event_df)) if 'ev_id_ext' in list(event_df): d0ev_df = pd.merge(d0_df, event_df, on=['run_number', 'ev_id', 'ev_id_ext']) else: d0ev_df = pd.merge(d0_df, event_df, on=['run_number', 'ev_id']) d0ev_df.query(_ev_cuts, inplace=True) d0ev_df_grouped = d0ev_df.groupby(['run_number', 'ev_id']) pinfo('ND0+EvCuts ', fname, len(d0ev_df.index)) pinfo('GR[ND0+EvCuts] ', fname, len(d0ev_df_grouped))
def execute_analyses_on_file_list(self, file_list, nfiles=0): print() if os.path.exists(file_list): with open(file_list) as f: files = f.readlines() if int(nfiles) > 0: files = files[:nfiles] for f in files: fn = f.strip('\n') pinfo('+file:', fn) for f in tqdm.tqdm(files): fn = f.strip('\n') pinfo('file:', fn) if self.load_file(fn): self.execute_analyses() else: perror('file list does not exist', file_list) pinfo('done.')
def analyze_slower(self): _d0_df = self.d0_df.query(self.d0_selection.query_string) pinfo('N d0s ', len(_d0_df.index)) if 'ev_id_ext' in list(self.event_df): d0ev_df = pd.merge(_d0_df, self.event_df, on=['run_number', 'ev_id', 'ev_id_ext']) else: d0ev_df = pd.merge(_d0_df, self.event_df, on=['run_number', 'ev_id']) d0ev_df.query(self.event_selection.query_string, inplace=True) d0ev_df_grouped = d0ev_df.groupby(['run_number', 'ev_id']) pinfo('d0s after event cuts ', len(d0ev_df.index)) pinfo('N d0 groups after event cuts ', len(d0ev_df_grouped)) with tqdm.tqdm(total=len(d0ev_df_grouped)) as self.pbar: _tmp = d0ev_df_grouped.apply(self.exec_analysis_d0_df) self.pbar.close()
def main(): parser = argparse.ArgumentParser(description='test duplicate entries', prog=os.path.basename(__file__)) parser.add_argument('fname', help='input file', default='', type=str) args = parser.parse_args() event_tree_name = 'PWGHF_TreeCreator/tree_event_char' event_tree = uproot.open(args.fname)[event_tree_name] if not event_tree: perror('Tree {} not found in file {}'.format(event_tree_name, args.fname)) return False pinfo(args.fname) event_df_orig = event_tree.pandas.df() len_event_df_orig = len(event_df_orig) df_event_accepted = event_df_orig.query('is_ev_rej == 0') df_event_accepted.reset_index(drop=True) len_event_df_accepted = len(df_event_accepted) event_df_nodup = df_event_accepted.drop_duplicates() len_event_df_nodup = len(event_df_nodup) if len_event_df_accepted != len_event_df_nodup: perror('original event length:', len_event_df_orig, 'accepted:', len_event_df_accepted, 'nodup:', len_event_df_nodup) else: pindent('original event length:', len_event_df_orig, 'accepted:', len_event_df_accepted, 'nodup:', len_event_df_nodup) track_tree_name = 'PWGHF_TreeCreator/tree_Particle' track_tree = uproot.open(args.fname)[track_tree_name] if not track_tree: perror('Tree {} not found in file {}'.format(tree_name, args.fname)) return False track_df_orig = track_tree.pandas.df() track_df = pd.merge(track_df_orig, event_df_nodup, on=['run_number', 'ev_id']) len_track_df = len(track_df) track_df_nodup = track_df.drop_duplicates() len_track_df_nodup = len(track_df_nodup) if len_track_df_nodup < len_track_df: perror('track+event rows:', len_track_df, 'nodup:', len_track_df_nodup) else: pindent('track+event rows:', len_track_df, 'nodup:', len_track_df_nodup) track_df_grouped = track_df.groupby(['run_number', 'ev_id']) len_track_df_grouped = len(track_df_grouped) if len_track_df_grouped <= len_event_df_nodup: pindent('track+event length grouped:', len_track_df_grouped) else: perror('track+event length grouped:', len_track_df_grouped) # track_df_nodup = track_df_grouped.drop_duplicates() # print ('track+event length no dup:', len(track_df_nodup)) # from James # Check if there are duplicated tracks in an event. duplicate_selection = [ 'run_number', 'ev_id', 'ParticlePt', 'ParticleEta', 'ParticlePhi' ] # if use_ev_id_ext: # duplicate_selection.append('ev_id_ext') duplicate_rows_df = track_df.duplicated(duplicate_selection) for i, row in duplicate_rows_df.iteritems(): if row: print(i, row) # for r in duplicate_rows_df: # print(type(r)) n_duplicates = sum(duplicate_rows_df) pindent('2nd pass: using duplicate selection ', duplicate_selection) if n_duplicates > 0: perror( '2nd pass: there appear to be {} duplicate particles in the dataframe' .format(n_duplicates)) perror('this is: {:.2} of all tracks'.format(n_duplicates / len_track_df)) track_df_nodup = track_df.drop_duplicates(duplicate_selection, inplace=False) pwarning('new count rows for particles:', len(track_df_nodup), 'old count:', len_track_df) else: pindent('no duplicate particles found')
def finalize(self): self.fout.Write() self.fout.Close() pinfo(self.fout.GetName(), 'written.')
def analyze_slower(self): # generated D0 candidate dataframe d0_gen_df_copy = self.d0_gen_df.copy() #reconstructed D0 candidate dataframe after applying D0 selection cuts _d0_df = self.d0_df.query(self.d0_selection.query_string, engine="python") #Merging the D0 dataframe with event if 'ev_id_ext' in list(self.event_df): d0ev_df = pd.merge(_d0_df, self.event_df, on=['run_number', 'ev_id', 'ev_id_ext']) d0ev_gen_df = pd.merge(d0_gen_df_copy, self.event_df, on=['run_number', 'ev_id', 'ev_id_ext']) else: d0ev_df = pd.merge(_d0_df, self.event_df, on=['run_number', 'ev_id']) d0ev_gen_df = pd.merge(d0_gen_df_copy, self.event_df, on=['run_number', 'ev_id']) #after merging the dataframe with event, apply the event selection cut on z vertex or event rejected d0ev_df.query(self.event_selection.query_string, inplace=True) d0ev_gen_df.query(self.event_selection.query_string, inplace=True) pinfo('N generated d0s with d0 selection cuts and event cuts', len(d0ev_gen_df.index)) #need to remove events id's at reconstructed level not present at generated level #coming from fake D0 due to looser selection cuts d0ev_df.sort_values(by=['run_number', 'ev_id'], inplace=True) d0ev_gen_df.sort_values(by=['run_number', 'ev_id'], inplace=True) df_d0runs = d0ev_df[['run_number', 'ev_id']].copy() df_gend0runs = d0ev_gen_df[['run_number', 'ev_id']].copy() #find reconstructed event matching to generator df_runs = pd.merge(df_d0runs, df_gend0runs, on=['run_number', 'ev_id']) df_runs.drop_duplicates(keep='first', inplace=True) d0ev_df = pd.merge(d0ev_df, df_runs, on=['run_number', 'ev_id']) #apply fiducial cut on D candidate d0ev_df = self.apply_cut_fiducial_acceptance(d0ev_df) #apply special cuts for low pt d0ev_df = self.apply_cut_special_np(d0ev_df) d0ev_df_grouped = d0ev_df.groupby(['run_number', 'ev_id']) d0ev_gen_df_grouped = d0ev_gen_df.groupby(['run_number', 'ev_id']) pinfo('d0s after event cuts ', len(d0ev_df.index)) pinfo('N d0 groups after event cuts ', len(d0ev_df_grouped)) pinfo('generated d0s after event cuts ', len(d0ev_gen_df.index)) pinfo('N generated d0 groups after event cuts ', len(d0ev_gen_df_grouped)) with tqdm.tqdm(total=len(d0ev_df_grouped)) as self.pbar: _tmp = d0ev_df_grouped.apply(self.exec_analysis_d0_df) self.pbar.close() with tqdm.tqdm(total=len(d0ev_gen_df_grouped)) as self.pbar: _tmp = d0ev_gen_df_grouped.apply(self.exec_analysis_d0_gen_df) self.pbar.close()
def main(): parser = argparse.ArgumentParser(description='test groomers', prog=os.path.basename(__file__)) parser.add_argument('-o', '--output-filename', default="output.root", type=str) parser.add_argument('datalistpp', help='run through a file list', default='', type=str) parser.add_argument('--datalistAA', help='run through a file list - embedding mode', default='', type=str) parser.add_argument('--jetR', default=0.4, type=float) parser.add_argument('--alpha', default=0, type=float) parser.add_argument('--dRmax', default=0.25, type=float) parser.add_argument('--overwrite', help="overwrite output", default=False, action='store_true') parser.add_argument('--jetptcut', help='remove jets below the cut', default=50., type=float) parser.add_argument('--nev', help='number of events to run', default=0, type=int) parser.add_argument('--max-eta', help='max eta for particles', default=0.9, type=float) parser.add_argument('--npart-cut', help='npart cut on centrality low,high hint:' + npart_cents, default='325,450', type=str) args = parser.parse_args() try: npart_min = int(args.npart_cut.split(',')[0]) npart_max = int(args.npart_cut.split(',')[1]) except: perror( 'unable to parse npart centrality selection - two integer numbers with a coma in-between needed - specified:', args.npart_cut) return 1 # initialize constituent subtractor cs = None if args.dRmax > 0: cs = CEventSubtractor(alpha=args.alpha, max_distance=args.dRmax, max_eta=args.max_eta, bge_rho_grid_size=0.25, max_pt_correct=100) pp_data = DataIO(name='Sim Pythia Detector level', file_list=args.datalistpp, random_file_order=False, tree_name='tree_Particle_gen') ja_pp = JetAnalysis(jet_R=args.jetR, jet_algorithm=fj.antikt_algorithm, jet_pt_min=50., particle_eta_max=args.max_eta) if args.datalistAA: aa_data = DataBackgroundIO(name='PbPb', file_list=args.datalistAA, tree_name='tree_Particle_gen') ja_emb = JetAnalysis(jet_R=args.jetR, jet_algorithm=fj.antikt_algorithm, jet_pt_min=50., particle_eta_max=args.max_eta) ja_aa = JetAnalysis(jet_R=args.jetR, jet_algorithm=fj.antikt_algorithm, jet_pt_min=50., particle_eta_max=args.max_eta) dndeta_selector = fj.SelectorAbsEtaMax(1.) # tg = thg.ThermalGenerator() print(cs) # print the banner first fj.ClusterSequence.print_banner() print() gout = GroomerOutput(args.output_filename, enable_aa_trees=bool(args.datalistAA)) delta_t = 0 start_t = time.time() iev = 1 while pp_data.load_event(offset=0): iev = iev + 1 if args.nev > 0: if iev > args.nev: iev = iev - 1 break if iev % 1000 == 0: delta_t = time.time() - start_t pinfo('processing event', iev, ' - ev/sec =', iev / delta_t, 'elapsed =', delta_t) # find jets on detector level if len(pp_data.particles) < 1: pwarning(iev, 'pp event skipped N parts', len(pp_data.particles)) continue ja_pp.analyze_event(pp_data.particles) if len(ja_pp.jets) < 1: continue # pinfo('n particles', len(pp_data.particles)) dndeta0 = dndeta_selector(pp_data.particles) [ gout.fill_branches(j, syst=0, dndeta=len(dndeta0) / 2.) for j in ja_pp.jets ] # pinfo('n jets', len(ja_pp.jets)) if args.datalistAA: while True: aa_loaded = aa_data.load_event(offset=10000) if aa_data.event.npart < npart_min or aa_data.event.npart >= npart_max: continue else: if len(aa_data.particles) < 1: pwarning(iev, 'AA event skipped N parts', len(aa_data.particles)) continue else: break if aa_loaded: ja_aa.analyze_event(aa_data.particles) dndeta1 = dndeta_selector(aa_data.particles) if len(ja_aa.jets) > 0: [ gout.fill_branches(j, syst=1, dndeta=len(dndeta1) / 2.) for j in ja_aa.jets ] else: # pwarning('no jets in AA event?', len(ja_aa.jets), 'while dndeta=', len(dndeta1)/2.) pass emb_event = fj.vectorPJ() [emb_event.push_back(p) for p in pp_data.particles] [emb_event.push_back(p) for p in aa_data.particles] rho = 0 if cs: cs_parts = cs.process_event(emb_event) rho = cs.bge_rho.rho() ja_emb.analyze_event(cs_parts) else: ja_emb.analyze_event(emb_event) # matches = [[jpp, jemb] for jpp in ja_pp.jets for jemb in ja_emb.jets if fjtools.matched_pt(jemb, jpp) > 0.5] # for mj in matches: # gout.fill_branches(mj[0], syst=2, dndeta=len(dndeta1)/2., rho=rho) # gout.fill_branches(mj[1], syst=3) [ gout.fill_branches_prong_matching(j_pp, j_emb, dndeta=len(dndeta1) / 2., rho=rho) for j_pp in ja_pp.jets for j_emb in ja_emb.jets ] delta_t = time.time() - start_t pinfo('processed events', iev, ' - ev/sec =', iev / delta_t, 'elapsed =', delta_t) gout.write()
def main(): parser = argparse.ArgumentParser(description='pythia8 fastjet on the fly', prog=os.path.basename(__file__)) pyconf.add_standard_pythia_args(parser) _default_output_filename = os.path.basename(__file__).replace( ".py", "") + "_output.root" parser.add_argument('--output', default=_default_output_filename, type=str) parser.add_argument('--debug', default=0, type=int) args = parser.parse_args() # print the banner first fj.ClusterSequence.print_banner() print() # set up our jet definition and a jet selector jet_R0 = 0.6 jet_def = fj.JetDefinition(fj.antikt_algorithm, jet_R0) jet_selector = fj.SelectorPtMin(2.0) & fj.SelectorAbsEtaMax(2) print(jet_def) jet_def_lund = fj.JetDefinition(fj.cambridge_algorithm, jet_R0) lund_gen = fjcontrib.LundGenerator(jet_def_lund) print(jet_def_lund) print(lund_gen) outf = ROOT.TFile(args.output, 'recreate') outf.cd() t = ROOT.TTree('t', 't') tw = RTreeWriter(tree=t) # mycfg = ['PhaseSpace:pThatMin = 100'] mycfg = [] pythia = pyconf.create_and_init_pythia_from_args(args, mycfg) if args.nev < 100: args.nev = 100 for i in tqdm.tqdm(range(args.nev)): if not pythia.next(): continue if args.debug: pwarning('-- event', i) # parts = pythiafjext.vectorize(pythia, True, -1, 1, False) parts = pythiafjext.vectorize_select(pythia, [pythiafjext.kFinal], 0, True) if args.debug > 5: parts = pythiafjext.vectorize_select(pythia, [pythiafjext.kHadron], 0, True) if args.debug > 10: parts = pythiafjext.vectorize_select(pythia, [pythiafjext.kAny], 0, True) if args.debug > 0: for p in parts: pypart = pythiafjext.getPythia8Particle(p) if pypart.name()[:2] == 'D0': pinfo(pypart.name(), pypart.id(), pypart.status(), 'final =?', pypart.isFinal()) jets = jet_selector(jet_def(parts)) for j in jets: isD0_lead = False lead_part = fj.sorted_by_E(j.constituents())[0] pypart = pythiafjext.getPythia8Particle(lead_part) if args.debug: pinfo('leading id is', pypart.id(), pypart.name(), 'jet', j) if abs(pypart.id()) == 421: # pinfo('leading D0') isD0_lead = True l = lund_gen.result(j) if len(l) > 0: tw.fill_branch('Epair', [s.pair().e() for s in l]) tw.fill_branch('z', [s.z() for s in l]) tw.fill_branch('kt', [s.kt() for s in l]) tw.fill_branch('delta', [s.Delta() for s in l]) tw.fill_branch('D0lead', isD0_lead) tw.fill_branch('lead_id', pypart.id()) tw.fill_tree() else: if args.debug: pwarning("len of a lund is less than 1?", len(l), l) pythia.stat() outf.Write() outf.Close() print('[i] written', outf.GetName())
def run(self): # need to change this for data to drive... delta_t = 0 start_t = time.time() iev = 1 # while self.det_sim.load_event() and self.part_sim.load_event(): while self.det_sim.load_event(): iev = iev + 1 if self.nev > 0: if iev > self.nev: iev = iev - 1 break if iev % 1000 == 0: delta_t = time.time() - start_t pinfo('processing event', iev, ' - ev/sec =', iev / delta_t, 'elapsed =', delta_t) # find jets on detector level if len(self.det_sim.particles) < 1: pwarning(iev, 'event skipped N detector parts', len(self.det_sim.particles)) continue self.ja_det.analyze_event(self.det_sim.particles) _jets_det = self.ja_det.jets # _x = [pdebug(' -d ', j) for j in _jets_det] if len(_jets_det) < 1: continue _too_high_pt = [ p.pt() for j in _jets_det for p in j.constituents() if p.pt() > 100. ] if len(_too_high_pt) > 0: pwarning(iev, 'a likely fake high pT particle(s)', _too_high_pt, '- skipping whole event') continue _output_fname = os.path.expanduser( os.path.expandvars(self.det_sim.file_io.file_input)) _output_fname = _output_fname.replace("/", "_") self.output.initialize_output(_output_fname) self.output.fill_det_level(iev, _jets_det) # load the corresponding event on particle level self.part_sim.open_afile(afile=self.det_sim.file_io.file_input) if not self.part_sim.load_event_with_loc( self.det_sim.event.run_number, self.det_sim.event.ev_id, 0): perror('unable to load partL event run#:', self.det_sim.event.run_number, 'ev_id:', self.det_sim.event.ev_id) continue if self.det_sim.event.run_number != self.part_sim.event.run_number: perror('run# missmatch detL:', self.det_sim.event.run_number, 'partL:', self.part_sim.event.run_number) continue if self.det_sim.event.ev_id != self.part_sim.event.ev_id: perror('ev_id# missmatch detL:', self.det_sim.event.ev_id, 'partL:', self.part_sim.event.ev_id) continue # find jets on particle level if len(self.part_sim.particles) < 1: pwarning(iev, 'event skipped N particle parts', len(self.part_sim.particles)) continue self.ja_part.analyze_event(self.part_sim.particles) _jets_part = self.ja_part.jets # _x = [pdebug(' -p ', j) for j in _jets_part] if len(_jets_part) < 1: continue # match in pp simulations _det_part_matches = [] _n_matches = 0 _part_psjv = self.ja_part.jets_as_psj_vector() for j_det in _jets_det: _mactches_pp = fjtools.matched_Reta(j_det, _part_psjv, 0.6 * self.jetR) #_mactches_pp = fjtools.matched_Ry(j_det, _part_psjv, 0.6 * self.jetR) _n_matches = _n_matches + len(_mactches_pp) if len(_mactches_pp) > 1: pwarning('event:', iev, 'jet pt=', j_det.pt(), 'more than one match in pp jets', [i for i in _mactches_pp]) if len(_mactches_pp) == 1: j_part = _part_psjv[_mactches_pp[0]] # pinfo('j_det', j_det, 'j_part', j_part) _det_part_matches.append([j_det, j_part]) self.output.fill_pp_pairs(iev, [j_det, j_part]) if _n_matches < 1: if _n_matches < 1: pwarning('event:', iev, '- no matched jets in simulation!?', len(_det_part_matches)) # here embedding to PbPb data _offset = 10000 while _offset < len(self.det_sim.particles): _offset = _offset + 1000 pwarning('increasing bg index offset to', _offset) _PbPb_loaded = 0 while _PbPb_loaded == 0: if not self.dataPbPb.load_event(offset=_offset): perror('unable to load next PbPb event') _PbPb_loaded = -1 else: _hybrid_event = self.dataPbPb.particles _nparts_hybrid_no_emb = len(_hybrid_event) if _nparts_hybrid_no_emb < 1: pwarning( 'hybrid event with no particles! trying another one' ) _PbPb_loaded = 0 else: _PbPb_loaded = 1 if _PbPb_loaded < 0: perror( 'unable to load PbPb event - permanent - bailing out here.' ) break _tmp = [_hybrid_event.push_back(p) for p in self.det_sim.particles] if self.cs: cs_parts = self.cs.process_event(_hybrid_event) rho = self.cs.bge_rho.rho() self.ja_hybrid.analyze_event(cs_parts) else: self.ja_hybrid.analyze_event(_hybrid_event) _hybrid_matches = [] _hybrid_psjv = self.ja_hybrid.jets_as_psj_vector() for m in _det_part_matches: j_det = m[0] j_part = m[1] _mactches_hybrid = fjtools.matched_Reta( j_det, _hybrid_psjv, 0.6 * self.jetR) if len(_mactches_hybrid) > 1: pwarning('event:', iev, 'jet pt=', j_det.pt(), 'more than one match in hybrid jets', [i for i in _mactches_hybrid]) if len(_mactches_hybrid) == 1: # m.append(_hybrid_psjv[_mactches_hybrid[0]]) j_hybr = _hybrid_psjv[_mactches_hybrid[0]] # pdebug('L302', 'j_det', j_det, 'j_part', j_part, 'j_hybr', j_hybr) _hybrid_matches.append([j_det, j_part, j_hybr]) self.output.fill_emb_3(iev, [j_det, j_part, j_hybr]) _n_matches_hybrid = len(_hybrid_matches) if _n_matches_hybrid < 1: if _n_matches_hybrid < 1: pwarning('event:', iev, '- no matched jets in embedding!?', _n_matches_hybrid) delta_t = time.time() - start_t pinfo('processed events', iev, ' - ev/sec =', iev / delta_t, 'elapsed =', delta_t) self.output.close()
def write(self): if self.outf: self.outf.Write() self.outf.Close() pinfo('written', self.outf.GetName())
def close(self): if self.outf: pinfo('closing output file', self.outf.GetName()) self.outf.Write() self.outf.Close() self.outf = None
def main(): parser = argparse.ArgumentParser(description='test groomers', prog=os.path.basename(__file__)) parser.add_argument('-o', '--output-filename', default="centrality_output.root", type=str) parser.add_argument('datalist', help='run through a file list', default='', type=str) parser.add_argument('--overwrite', help="overwrite output", default=False, action='store_true') parser.add_argument('--nev', help='number of events to run', default=0, type=int) parser.add_argument('--max-eta', help='max eta for particles', default=0.9) parser.add_argument('--thermal', help='enable thermal generator', action='store_true', default=False) parser.add_argument('--thermal-default', help='enable thermal generator', action='store_true', default=False) parser.add_argument('--particles', help='stream particles', action='store_true', default=False) parser.add_argument('--npart-cut', help='npart cut on centrality low,high hint:' + npart_cents, default='325,450', type=str) parser.add_argument('--nch-cut', help='nch cut on centrality low,high hint:' + nch_cents, default='18467,50000', type=str) args = parser.parse_args() try: npart_min = int(args.npart_cut.split(',')[0]) npart_max = int(args.npart_cut.split(',')[1]) except: perror( 'unable to parse npart centrality selection - two integer numbers with a coma in-between needed - specified:', args.npart_cut) return 1 try: nch_min = int(args.nch_cut.split(',')[0]) nch_max = int(args.nch_cut.split(',')[1]) except: perror( 'unable to parse nch centrality selection - two integer numbers with a coma in-between needed - specified:', args.nch_cut) return 1 outf = ROOT.TFile(args.output_filename, 'recreate') outf.cd() t = ROOT.TTree('t', 't') tw = RTreeWriter(tree=t) hpt_antyr = ROOT.TH1F('hpt_antyr', 'hpt_antyr', 100, 0, 100) hpt_antyr_c = ROOT.TH1F('hpt_antyr_c', 'hpt_antyr_c', 100, 0, 100) hpt_therm = ROOT.TH1F('hpt_therm', 'hpt_therm', 100, 0, 100) hpt_therm_c = ROOT.TH1F('hpt_therm_c', 'hpt_therm_c', 100, 0, 100) data = DataIO(name='Sim Pythia Detector level', file_list=args.datalist, random_file_order=False, tree_name='tree_Particle_gen') dndeta_selector = fj.SelectorAbsEtaMax(abs( args.max_eta)) & fj.SelectorPtMin(0.15) tg_default = None if args.thermal_default: tg_default = thg.ThermalGenerator() print(tg_default) tg_central = None if args.thermal: tg_central = thg.ThermalGenerator(beta=0.5, N_avg=3000, sigma_N=500) print(tg_central) delta_t = 0 start_t = time.time() iev = 1 while data.load_event(offset=0): iev = iev + 1 if args.nev > 0: if iev > args.nev: iev = iev - 1 break if iev % 1000 == 0: delta_t = time.time() - start_t pinfo('processing event', iev, ' - ev/sec =', iev / delta_t, 'elapsed =', delta_t) # find jets on detector level if len(data.particles) < 1: pwarning(iev, 'pp event skipped N parts', len(data.particles)) continue # print(data.event) dndeta0_parts = dndeta_selector(data.particles) dndeta0 = len(dndeta0_parts) / (abs(args.max_eta * 2.)) [hpt_antyr.Fill(p.perp()) for p in dndeta0_parts] if args.particles: tw.fill_branches(dndeta=dndeta0, p=data.particles) else: tw.fill_branches(dndeta=dndeta0) tw.fill_branches_attribs( data.event, ['sigma', 'npart', 'nch', 'nchfwd', 'nchselect'], prefix='antyr_') if data.event.npart < npart_min or data.event.npart >= npart_max: tw.fill_branches(cent10npart=0) else: tw.fill_branches(cent10npart=1) [hpt_antyr_c.Fill(p.perp()) for p in dndeta0_parts] if data.event.nch < nch_min or data.event.nch >= nch_max: tw.fill_branches(cent10nch=0) else: tw.fill_branches(cent10nch=1) if tg_default: thg_particles = tg_default.load_event() dndetathg_default = dndeta_selector(thg_particles) if args.particles: tw.fill_branches(dndeta_thg_0=len(dndetathg_default) / (abs(args.max_eta * 2.)), p_thg_0=thg_particles) else: tw.fill_branches(dndeta_thg_0=len(dndetathg_default) / (abs(args.max_eta * 2.))) if tg_central: thg_parts_central = tg_central.load_event() dndetathg_central = dndeta_selector(thg_parts_central) [hpt_therm_c.Fill(p.perp()) for p in dndetathg_central] if args.particles: tw.fill_branches(dndeta_thg_c=len(dndetathg_central) / (abs(args.max_eta * 2.)), p_thg_c=thg_parts_central) else: tw.fill_branches(dndeta_thg_c=len(dndetathg_central) / (abs(args.max_eta * 2.))) tw.fill_tree() delta_t = time.time() - start_t pinfo('processed events', iev, ' - ev/sec =', iev / delta_t, 'elapsed =', delta_t) outf.Write() outf.Close()