def skim(self, file_index): try: dfreco = pickle.load(openfile(self.l_reco[file_index], "rb")) except Exception as e: # pylint: disable=broad-except print('failed to open file', self.l_reco[file_index], str(e)) for ipt in range(self.p_nptbins): dfrecosk = seldf_singlevar(dfreco, self.v_var_binning, self.lpt_anbinmin[ipt], self.lpt_anbinmax[ipt]) dfrecosk = selectdfquery(dfrecosk, self.s_reco_skim[ipt]) dfrecosk = dfrecosk.reset_index(drop=True) f = openfile(self.mptfiles_recosk[ipt][file_index], "wb") pickle.dump(dfrecosk, f, protocol=4) f.close() if self.mcordata == "mc": try: dfgen = pickle.load(openfile(self.l_gen[file_index], "rb")) except Exception as e: # pylint: disable=broad-except print('failed to open MC file', self.l_gen[file_index], str(e)) dfgensk = seldf_singlevar(dfgen, self.v_var_binning, self.lpt_anbinmin[ipt], self.lpt_anbinmax[ipt]) dfgensk = selectdfquery(dfgensk, self.s_gen_skim[ipt]) dfgensk = dfgensk.reset_index(drop=True) pickle.dump(dfgensk, openfile(self.mptfiles_gensk[ipt][file_index], "wb"), protocol=4)
def unpack(self, file_index): treeevtorig = uproot.open(self.l_root[file_index])[self.n_treeevt] dfevtorig = treeevtorig.pandas.df(branches=self.v_evt) dfevtorig = selectdfrunlist(dfevtorig, self.runlist, "run_number") dfevtorig = selectdfquery(dfevtorig, self.s_cen_unp) dfevtorig = dfevtorig.reset_index(drop=True) pickle.dump(dfevtorig, openfile(self.l_evtorig[file_index], "wb"), protocol=4) dfevt = selectdfquery(dfevtorig, self.s_good_evt_unp) dfevt = dfevt.reset_index(drop=True) pickle.dump(dfevt, openfile(self.l_evt[file_index], "wb"), protocol=4) treereco = uproot.open(self.l_root[file_index])[self.n_treereco] dfreco = treereco.pandas.df(branches=self.v_all) dfreco = selectdfrunlist(dfreco, self.runlist, "run_number") dfreco = selectdfquery(dfreco, self.s_reco_unp) dfreco = pd.merge(dfreco, dfevt, on=self.v_evtmatch) isselacc = selectfidacc(dfreco.pt_cand.values, dfreco.y_cand.values) dfreco = dfreco[np.array(isselacc, dtype=bool)] if self.b_trackcuts is not None: dfreco = filter_bit_df(dfreco, self.v_bitvar, self.b_trackcuts) dfreco[self.v_isstd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_std), dtype=int) dfreco = dfreco.reset_index(drop=True) if self.mcordata == "mc": dfreco[self.v_ismcsignal] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsig), dtype=int) dfreco[self.v_ismcprompt] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfreco[self.v_ismcfd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsigfd), dtype=int) dfreco[self.v_ismcbkg] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcbkg), dtype=int) pickle.dump(dfreco, openfile(self.l_reco[file_index], "wb"), protocol=4) if self.mcordata == "mc": treegen = uproot.open(self.l_root[file_index])[self.n_treegen] dfgen = treegen.pandas.df(branches=self.v_gen) dfgen = selectdfrunlist(dfgen, self.runlist, "run_number") dfgen = pd.merge(dfgen, dfevtorig, on=self.v_evtmatch) dfgen = selectdfquery(dfgen, self.s_gen_unp) dfgen[self.v_isstd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_std), dtype=int) dfgen[self.v_ismcsignal] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsig), dtype=int) dfgen[self.v_ismcprompt] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfgen[self.v_ismcfd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigfd), dtype=int) dfgen[self.v_ismcbkg] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcbkg), dtype=int) dfgen = dfgen.reset_index(drop=True) pickle.dump(dfgen, openfile(self.l_gen[file_index], "wb"), protocol=4)
def skim(self, file_index): dfreco = pickle.load(open(self.l_reco[file_index], "rb")) for ipt in range(self.p_nptbins): dfrecosk = seldf_singlevar(dfreco, self.v_var_binning, self.lpt_anbinmin[ipt], self.lpt_anbinmax[ipt]) dfrecosk = selectdfquery(dfrecosk, self.s_reco_skim[ipt]) dfrecosk = dfrecosk.reset_index(drop=True) dfrecosk.to_pickle(self.mptfiles_recosk[ipt][file_index]) if self.mcordata == "mc": dfgen = pickle.load(open(self.l_gen[file_index], "rb")) dfgensk = seldf_singlevar(dfgen, self.v_var_binning, self.lpt_anbinmin[ipt], self.lpt_anbinmax[ipt]) dfgensk = selectdfquery(dfgensk, self.s_gen_skim[ipt]) dfgensk = dfgensk.reset_index(drop=True) dfgensk.to_pickle(self.mptfiles_gensk[ipt][file_index])
def prepare_data_mc_mcgen(self): self.logger.info("Prepare data reco as well as MC reco and gen") if os.path.exists(self.f_reco_applieddata) \ and os.path.exists(self.f_reco_appliedmc) \ and self.step_done("preparemlsamples_data_mc_mcgen"): self.df_data = pickle.load(openfile(self.f_reco_applieddata, "rb")) self.df_mc = pickle.load(openfile(self.f_reco_appliedmc, "rb")) else: self.df_data = pickle.load(openfile(self.f_reco_data, "rb")) self.df_mc = pickle.load(openfile(self.f_reco_mc, "rb")) self.df_data = selectdfquery(self.df_data, self.p_evtsel) self.df_mc = selectdfquery(self.df_mc, self.p_evtsel) self.df_data = selectdfquery(self.df_data, self.p_triggersel_data) self.df_mc = selectdfquery(self.df_mc, self.p_triggersel_mc) self.df_mcgen = pickle.load(openfile(self.f_gen_mc, "rb")) self.df_mcgen = selectdfquery(self.df_mcgen, self.p_evtsel) self.df_mcgen = selectdfquery(self.df_mcgen, self.p_triggersel_mc) self.df_mcgen = self.df_mcgen.query(self.p_presel_gen_eff) self.arraydf = [self.df_data, self.df_mc] self.df_mc = seldf_singlevar(self.df_mc, self.v_bin, self.p_binmin, self.p_binmax) self.df_mcgen = seldf_singlevar(self.df_mcgen, self.v_bin, self.p_binmin, self.p_binmax) self.df_data = seldf_singlevar(self.df_data, self.v_bin, self.p_binmin, self.p_binmax)
def preparesample(self): self.logger.info("Prepare Sample") self.df_data = pickle.load(openfile(self.f_reco_data, "rb")) self.df_mc = pickle.load(openfile(self.f_reco_mc, "rb")) self.df_mcgen = pickle.load(openfile(self.f_gen_mc, "rb")) self.df_data = selectdfquery(self.df_data, self.p_evtsel) self.df_mc = selectdfquery(self.df_mc, self.p_evtsel) self.df_mcgen = selectdfquery(self.df_mcgen, self.p_evtsel) self.df_data = selectdfquery(self.df_data, self.p_triggersel_data) self.df_mc = selectdfquery(self.df_mc, self.p_triggersel_mc) self.df_mcgen = selectdfquery(self.df_mcgen, self.p_triggersel_mc) self.df_mcgen = self.df_mcgen.query(self.p_presel_gen_eff) arraydf = [self.df_data, self.df_mc] self.df_mc = seldf_singlevar(self.df_mc, self.v_bin, self.p_binmin, self.p_binmax) self.df_mcgen = seldf_singlevar(self.df_mcgen, self.v_bin, self.p_binmin, self.p_binmax) self.df_data = seldf_singlevar(self.df_data, self.v_bin, self.p_binmin, self.p_binmax) self.df_sig, self.df_bkg = arraydf[self.p_tagsig], arraydf[ self.p_tagbkg] self.df_sig = seldf_singlevar(self.df_sig, self.v_bin, self.p_binmin, self.p_binmax) self.df_bkg = seldf_singlevar(self.df_bkg, self.v_bin, self.p_binmin, self.p_binmax) self.df_sig = self.df_sig.query(self.s_selsigml) self.df_bkg = self.df_bkg.query(self.s_selbkgml) self.df_bkg["ismcsignal"] = 0 self.df_bkg["ismcprompt"] = 0 self.df_bkg["ismcfd"] = 0 self.df_bkg["ismcbkg"] = 0 if self.p_nsig > len(self.df_sig): self.logger.warning("There are not enough signal events") if self.p_nbkg > len(self.df_bkg): self.logger.warning("There are not enough background events") self.p_nsig = min(len(self.df_sig), self.p_nsig) self.p_nbkg = min(len(self.df_bkg), self.p_nbkg) self.logger.info("Used number of signal events is %d", self.p_nsig) self.logger.info("Used number of background events is %d", self.p_nbkg) self.df_ml = pd.DataFrame() self.df_sig = shuffle(self.df_sig, random_state=self.rnd_shuffle) self.df_bkg = shuffle(self.df_bkg, random_state=self.rnd_shuffle) self.df_sig = self.df_sig[:self.p_nsig] self.df_bkg = self.df_bkg[:self.p_nbkg] self.df_sig[self.v_sig] = 1 self.df_bkg[self.v_sig] = 0 self.df_ml = pd.concat([self.df_sig, self.df_bkg]) self.df_mltrain, self.df_mltest = train_test_split(self.df_ml, \ test_size=self.test_frac, random_state=self.rnd_splt) self.df_mltrain = self.df_mltrain.reset_index(drop=True) self.df_mltest = self.df_mltest.reset_index(drop=True) self.df_sigtrain, self.df_bkgtrain = split_df_sigbkg( self.df_mltrain, self.v_sig) self.df_sigtest, self.df_bkgtest = split_df_sigbkg( self.df_mltest, self.v_sig) self.logger.info("Total number of candidates: train %d and test %d", len(self.df_mltrain), len(self.df_mltest)) self.logger.info("Number of signal candidates: train %d and test %d", len(self.df_sigtrain), len(self.df_sigtest)) self.logger.info("Number of bkg candidates: %d and test %d", len(self.df_bkgtrain), len(self.df_bkgtest)) self.df_xtrain = self.df_mltrain[self.v_train] self.df_ytrain = self.df_mltrain[self.v_sig] self.df_xtest = self.df_mltest[self.v_train] self.df_ytest = self.df_mltest[self.v_sig]
def unpack(self, file_index): treeevtorig = uproot.open(self.l_root[file_index])[self.n_treeevt] dfevtorig = treeevtorig.pandas.df(branches=self.v_evt) dfevtorig = selectdfrunlist(dfevtorig, self.runlist, "run_number") dfevtorig = selectdfquery(dfevtorig, self.s_cen_unp) dfevtorig = dfevtorig.reset_index(drop=True) pickle.dump(dfevtorig, openfile(self.l_evtorig[file_index], "wb"), protocol=4) dfevt = selectdfquery(dfevtorig, self.s_good_evt_unp) dfevt = dfevt.reset_index(drop=True) pickle.dump(dfevt, openfile(self.l_evt[file_index], "wb"), protocol=4) treereco = uproot.open(self.l_root[file_index])[self.n_treereco] dfreco = treereco.pandas.df(branches=self.v_all) dfreco = selectdfrunlist(dfreco, self.runlist, "run_number") dfreco = selectdfquery(dfreco, self.s_reco_unp) dfreco = pd.merge(dfreco, dfevt, on=self.v_evtmatch) isselacc = selectfidacc(dfreco.pt_cand.values, dfreco.y_cand.values) dfreco = dfreco[np.array(isselacc, dtype=bool)] arraysub = [0 for ival in range(len(dfreco))] n_tracklets_corr = dfreco["n_tracklets_corr"].values n_tracklets_corr_sub = None for iprong in range(self.nprongs): spdhits_thisprong = dfreco["spdhits_prong%s" % iprong].values ntrackletsthisprong = [1 if spdhits_thisprong[index] == 3 else 0 \ for index in range(len(dfreco))] arraysub = np.add(ntrackletsthisprong, arraysub) n_tracklets_corr_sub = np.subtract(n_tracklets_corr, arraysub) dfreco["n_tracklets_corr_sub"] = n_tracklets_corr_sub if self.b_trackcuts is not None: dfreco = filter_bit_df(dfreco, self.v_bitvar, self.b_trackcuts) dfreco[self.v_isstd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_std), dtype=int) dfreco = dfreco.reset_index(drop=True) if self.mcordata == "mc": dfreco[self.v_ismcsignal] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsig), dtype=int) dfreco[self.v_ismcprompt] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfreco[self.v_ismcfd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsigfd), dtype=int) dfreco[self.v_ismcbkg] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcbkg), dtype=int) pickle.dump(dfreco, openfile(self.l_reco[file_index], "wb"), protocol=4) if self.mcordata == "mc": treegen = uproot.open(self.l_root[file_index])[self.n_treegen] dfgen = treegen.pandas.df(branches=self.v_gen) dfgen = selectdfrunlist(dfgen, self.runlist, "run_number") dfgen = pd.merge(dfgen, dfevtorig, on=self.v_evtmatch) dfgen = selectdfquery(dfgen, self.s_gen_unp) dfgen[self.v_isstd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_std), dtype=int) dfgen[self.v_ismcsignal] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsig), dtype=int) dfgen[self.v_ismcprompt] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfgen[self.v_ismcfd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigfd), dtype=int) dfgen[self.v_ismcbkg] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcbkg), dtype=int) dfgen = dfgen.reset_index(drop=True) pickle.dump(dfgen, openfile(self.l_gen[file_index], "wb"), protocol=4)
def unpack(self, file_index): treeevtorig = uproot.open(self.l_root[file_index])[self.n_treeevt] try: dfevtorig = treeevtorig.pandas.df(branches=self.v_evt) except Exception as e: # pylint: disable=broad-except print('Missing variable in the event root tree', str(e)) print('Missing variable in the candidate root tree') print('I am sorry, I am dying ...\n \n \n') sys.exit() dfevtorig = selectdfquery(dfevtorig, self.s_cen_unp) dfevtorig = dfevtorig.reset_index(drop=True) pickle.dump(dfevtorig, openfile(self.l_evtorig[file_index], "wb"), protocol=4) dfevt = selectdfquery(dfevtorig, self.s_good_evt_unp) dfevt = dfevt.reset_index(drop=True) pickle.dump(dfevt, openfile(self.l_evt[file_index], "wb"), protocol=4) treereco = uproot.open(self.l_root[file_index])[self.n_treereco] try: dfreco = treereco.pandas.df(branches=self.v_all) except Exception as e: # pylint: disable=broad-except print('Missing variable in the candidate root tree') print('I am sorry, I am dying ...\n \n \n') sys.exit() dfreco = selectdfquery(dfreco, self.s_reco_unp) dfreco = pd.merge(dfreco, dfevt, on=self.v_evtmatch) isselacc = selectfidacc(dfreco.pt_cand.values, dfreco.y_cand.values) dfreco = dfreco[np.array(isselacc, dtype=bool)] arraysub = [0 for ival in range(len(dfreco))] n_tracklets_corr = dfreco["n_tracklets_corr"].values n_tracklets_corr_shm = dfreco["n_tracklets_corr_shm"].values n_tracklets_corr_sub = None n_tracklets_corr_shm_sub = None for iprong in range(self.nprongs): if self.prongformultsub[iprong] == 0: continue #print("considering prong %d for sub" % iprong) spdhits_thisprong = dfreco["spdhits_prong%s" % iprong].values ntrackletsthisprong = [1 if spdhits_thisprong[index] == 3 else 0 \ for index in range(len(dfreco))] arraysub = np.add(ntrackletsthisprong, arraysub) n_tracklets_corr_sub = np.subtract(n_tracklets_corr, arraysub) n_tracklets_corr_shm_sub = np.subtract(n_tracklets_corr_shm, arraysub) dfreco["n_tracklets_corr_sub"] = n_tracklets_corr_sub dfreco["n_tracklets_corr_shm_sub"] = n_tracklets_corr_shm_sub if self.b_trackcuts is not None: dfreco = filter_bit_df(dfreco, self.v_bitvar, self.b_trackcuts) dfreco[self.v_isstd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_std), dtype=int) dfreco = dfreco.reset_index(drop=True) if self.mcordata == "mc": dfreco[self.v_ismcsignal] = np.array(tag_bit_df( dfreco, self.v_bitvar, self.b_mcsig), dtype=int) dfreco[self.v_ismcprompt] = np.array(tag_bit_df( dfreco, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfreco[self.v_ismcfd] = np.array(tag_bit_df( dfreco, self.v_bitvar, self.b_mcsigfd), dtype=int) dfreco[self.v_ismcbkg] = np.array(tag_bit_df( dfreco, self.v_bitvar, self.b_mcbkg), dtype=int) pickle.dump(dfreco, openfile(self.l_reco[file_index], "wb"), protocol=4) if self.mcordata == "mc": treegen = uproot.open(self.l_root[file_index])[self.n_treegen] dfgen = treegen.pandas.df(branches=self.v_gen) dfgen = pd.merge(dfgen, dfevtorig, on=self.v_evtmatch) dfgen = selectdfquery(dfgen, self.s_gen_unp) dfgen[self.v_isstd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_std), dtype=int) dfgen[self.v_ismcsignal] = np.array(tag_bit_df( dfgen, self.v_bitvar, self.b_mcsig), dtype=int) dfgen[self.v_ismcprompt] = np.array(tag_bit_df( dfgen, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfgen[self.v_ismcfd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigfd), dtype=int) dfgen[self.v_ismcbkg] = np.array(tag_bit_df( dfgen, self.v_bitvar, self.b_mcbkg), dtype=int) dfgen = dfgen.reset_index(drop=True) pickle.dump(dfgen, openfile(self.l_gen[file_index], "wb"), protocol=4)
def unpack(self, file_index): treeevtorig = uproot.open(self.l_root[file_index])[self.n_treeevt] dfevtorig = treeevtorig.pandas.df(branches=self.v_evt) dfevtorig = selectdfrunlist(dfevtorig, self.runlist, "run_number") dfevtorig = selectdfquery(dfevtorig, self.s_cen_unp) dfevtorig = dfevtorig.reset_index(drop=True) pickle.dump(dfevtorig, openfile(self.l_evtorig[file_index], "wb"), protocol=4) dfevt = selectdfquery(dfevtorig, self.s_good_evt_unp) dfevt = dfevt.reset_index(drop=True) pickle.dump(dfevt, openfile(self.l_evt[file_index], "wb"), protocol=4) fileevtroot = TFile.Open(self.l_evtorigroot[file_index], "recreate") hNorm = TH1F("hEvForNorm", ";;Normalisation", 2, 0.5, 2.5) hNorm.GetXaxis().SetBinLabel(1, "normsalisation factor") hNorm.GetXaxis().SetBinLabel(2, "selected events") nselevt = 0 norm = 0 # Handle silent weird behaviour of Pandas if dataframe is empty # Otherwise, if it is empty it might just silently return from this frunction for some # reason and everything what follows would just be skipped. if not dfevt.empty: nselevt = len(dfevt.query("is_ev_rej==0")) norm = getnormforselevt(dfevt) hNorm.SetBinContent(1, norm) hNorm.SetBinContent(2, nselevt) hNorm.Write() fileevtroot.Close() treereco = uproot.open(self.l_root[file_index])[self.n_treereco] dfreco = treereco.pandas.df(branches=self.v_all) dfreco = selectdfrunlist(dfreco, self.runlist, "run_number") dfreco = selectdfquery(dfreco, self.s_reco_unp) dfreco = pd.merge(dfreco, dfevt, on=self.v_evtmatch) isselacc = selectfidacc(dfreco.pt_cand.values, dfreco.y_cand.values) dfreco = dfreco[np.array(isselacc, dtype=bool)] if self.b_trackcuts is not None: dfreco = filter_bit_df(dfreco, self.v_bitvar, self.b_trackcuts) dfreco[self.v_isstd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_std), dtype=int) dfreco = dfreco.reset_index(drop=True) if self.mcordata == "mc": dfreco[self.v_ismcsignal] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsig), dtype=int) dfreco[self.v_ismcprompt] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfreco[self.v_ismcfd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcsigfd), dtype=int) dfreco[self.v_ismcbkg] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_mcbkg), dtype=int) pickle.dump(dfreco, openfile(self.l_reco[file_index], "wb"), protocol=4) if self.mcordata == "mc": treegen = uproot.open(self.l_root[file_index])[self.n_treegen] dfgen = treegen.pandas.df(branches=self.v_gen) dfgen = selectdfrunlist(dfgen, self.runlist, "run_number") dfgen = pd.merge(dfgen, dfevtorig, on=self.v_evtmatch) dfgen = selectdfquery(dfgen, self.s_gen_unp) dfgen[self.v_isstd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_std), dtype=int) dfgen[self.v_ismcsignal] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsig), dtype=int) dfgen[self.v_ismcprompt] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfgen[self.v_ismcfd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigfd), dtype=int) dfgen[self.v_ismcbkg] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcbkg), dtype=int) dfgen = dfgen.reset_index(drop=True) pickle.dump(dfgen, openfile(self.l_gen[file_index], "wb"), protocol=4)
def unpack(self, file_index): # Open root file and save event tree to dataframe treeevtorig = uproot.open(self.l_root[file_index])[self.n_treeevt] dfevtorig = treeevtorig.pandas.df(branches=self.v_evt) # Only save events within the given run period & required centrality dfevtorig = selectdfrunlist(dfevtorig, self.runlist, "run_number") dfevtorig = selectdfquery(dfevtorig, self.s_cen_unp) # Reset dataframe index and save to "original" pickle file dfevtorig = dfevtorig.reset_index(drop=True) dfevtorig.to_pickle(self.l_evtorig[file_index]) # Select "good" events and save to a second pickle file dfevt = selectdfquery(dfevtorig, self.s_good_evt_unp) dfevt = dfevt.reset_index(drop=True) dfevt.to_pickle(self.l_evt[file_index]) # Open root file again, get the reconstructed tree into a dataframe treereco = uproot.open(self.l_root[file_index])[self.n_treereco] if not treereco: print('Couldn\'t find tree %s in file %s' % \ (self.n_treereco, self.l_root[file_index])) dfreco = treereco.pandas.df(branches=self.v_all) # Only save events within the given run period & required cuts dfreco = selectdfrunlist(dfreco, self.runlist, "run_number") dfreco = selectdfquery(dfreco, self.s_reco_unp) dfreco = pd.merge(dfreco, dfevt, on=self.v_evtmatch) if 'Jet' not in self.case: isselacc = selectfidacc(dfreco.pt_cand.values, dfreco.y_cand.values) dfreco = dfreco[np.array(isselacc, dtype=bool)] if self.b_trackcuts is not None: dfreco = filter_bit_df(dfreco, self.v_bitvar, self.b_trackcuts) dfreco[self.v_isstd] = np.array(tag_bit_df(dfreco, self.v_bitvar, self.b_std), dtype=int) dfreco = dfreco.reset_index(drop=True) if self.mcordata == "mc": dfreco[self.v_ismcsignal] = np.array(tag_bit_df( dfreco, self.v_bitvar, self.b_mcsig), dtype=int) dfreco[self.v_ismcprompt] = np.array(tag_bit_df( dfreco, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfreco[self.v_ismcfd] = np.array(tag_bit_df( dfreco, self.v_bitvar, self.b_mcsigfd), dtype=int) dfreco[self.v_ismcbkg] = np.array(tag_bit_df( dfreco, self.v_bitvar, self.b_mcbkg), dtype=int) # Save reconstructed data to another pickle file dfreco.to_pickle(self.l_reco[file_index]) if self.mcordata == "mc": treegen = uproot.open(self.l_root[file_index])[self.n_treegen] dfgen = treegen.pandas.df(branches=self.v_gen) dfgen = selectdfrunlist(dfgen, self.runlist, "run_number") dfgen = pd.merge(dfgen, dfevtorig, on=self.v_evtmatch) dfgen = selectdfquery(dfgen, self.s_gen_unp) dfgen[self.v_isstd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_std), dtype=int) dfgen[self.v_ismcsignal] = np.array(tag_bit_df( dfgen, self.v_bitvar, self.b_mcsig), dtype=int) dfgen[self.v_ismcprompt] = np.array(tag_bit_df( dfgen, self.v_bitvar, self.b_mcsigprompt), dtype=int) dfgen[self.v_ismcfd] = np.array(tag_bit_df(dfgen, self.v_bitvar, self.b_mcsigfd), dtype=int) dfgen[self.v_ismcbkg] = np.array(tag_bit_df( dfgen, self.v_bitvar, self.b_mcbkg), dtype=int) dfgen = dfgen.reset_index(drop=True) dfgen.to_pickle(self.l_gen[file_index])