Exemplo n.º 1
0
 def skim(self, file_index):
     try:
         dfreco = pickle.load(openfile(self.l_reco[file_index], "rb"))
     except Exception as e:  # pylint: disable=broad-except
         print('failed to open file', self.l_reco[file_index], str(e))
     for ipt in range(self.p_nptbins):
         dfrecosk = seldf_singlevar(dfreco, self.v_var_binning,
                                    self.lpt_anbinmin[ipt],
                                    self.lpt_anbinmax[ipt])
         dfrecosk = selectdfquery(dfrecosk, self.s_reco_skim[ipt])
         dfrecosk = dfrecosk.reset_index(drop=True)
         f = openfile(self.mptfiles_recosk[ipt][file_index], "wb")
         pickle.dump(dfrecosk, f, protocol=4)
         f.close()
         if self.mcordata == "mc":
             try:
                 dfgen = pickle.load(openfile(self.l_gen[file_index], "rb"))
             except Exception as e:  # pylint: disable=broad-except
                 print('failed to open MC file', self.l_gen[file_index],
                       str(e))
             dfgensk = seldf_singlevar(dfgen, self.v_var_binning,
                                       self.lpt_anbinmin[ipt],
                                       self.lpt_anbinmax[ipt])
             dfgensk = selectdfquery(dfgensk, self.s_gen_skim[ipt])
             dfgensk = dfgensk.reset_index(drop=True)
             pickle.dump(dfgensk,
                         openfile(self.mptfiles_gensk[ipt][file_index],
                                  "wb"),
                         protocol=4)
Exemplo n.º 2
0
    def unpack(self, file_index):
        treeevtorig = uproot.open(self.l_root[file_index])[self.n_treeevt]
        dfevtorig = treeevtorig.pandas.df(branches=self.v_evt)
        dfevtorig = selectdfrunlist(dfevtorig, self.runlist, "run_number")
        dfevtorig = selectdfquery(dfevtorig, self.s_cen_unp)
        dfevtorig = dfevtorig.reset_index(drop=True)
        pickle.dump(dfevtorig, openfile(self.l_evtorig[file_index], "wb"), protocol=4)
        dfevt = selectdfquery(dfevtorig, self.s_good_evt_unp)
        dfevt = dfevt.reset_index(drop=True)
        pickle.dump(dfevt, openfile(self.l_evt[file_index], "wb"), protocol=4)


        treereco = uproot.open(self.l_root[file_index])[self.n_treereco]
        dfreco = treereco.pandas.df(branches=self.v_all)
        dfreco = selectdfrunlist(dfreco, self.runlist, "run_number")
        dfreco = selectdfquery(dfreco, self.s_reco_unp)
        dfreco = pd.merge(dfreco, dfevt, on=self.v_evtmatch)
        isselacc = selectfidacc(dfreco.pt_cand.values, dfreco.y_cand.values)
        dfreco = dfreco[np.array(isselacc, dtype=bool)]
        if self.b_trackcuts is not None:
            dfreco = filter_bit_df(dfreco, self.v_bitvar, self.b_trackcuts)
        dfreco[self.v_isstd] = np.array(tag_bit_df(dfreco, self.v_bitvar,
                                                   self.b_std), dtype=int)
        dfreco = dfreco.reset_index(drop=True)
        if self.mcordata == "mc":
            dfreco[self.v_ismcsignal] = np.array(tag_bit_df(dfreco, self.v_bitvar,
                                                            self.b_mcsig), dtype=int)
            dfreco[self.v_ismcprompt] = np.array(tag_bit_df(dfreco, self.v_bitvar,
                                                            self.b_mcsigprompt), dtype=int)
            dfreco[self.v_ismcfd] = np.array(tag_bit_df(dfreco, self.v_bitvar,
                                                        self.b_mcsigfd), dtype=int)
            dfreco[self.v_ismcbkg] = np.array(tag_bit_df(dfreco, self.v_bitvar,
                                                         self.b_mcbkg), dtype=int)
        pickle.dump(dfreco, openfile(self.l_reco[file_index], "wb"), protocol=4)

        if self.mcordata == "mc":
            treegen = uproot.open(self.l_root[file_index])[self.n_treegen]
            dfgen = treegen.pandas.df(branches=self.v_gen)
            dfgen = selectdfrunlist(dfgen, self.runlist, "run_number")
            dfgen = pd.merge(dfgen, dfevtorig, on=self.v_evtmatch)
            dfgen = selectdfquery(dfgen, self.s_gen_unp)
            dfgen[self.v_isstd] = np.array(tag_bit_df(dfgen, self.v_bitvar,
                                                      self.b_std), dtype=int)
            dfgen[self.v_ismcsignal] = np.array(tag_bit_df(dfgen, self.v_bitvar,
                                                           self.b_mcsig), dtype=int)
            dfgen[self.v_ismcprompt] = np.array(tag_bit_df(dfgen, self.v_bitvar,
                                                           self.b_mcsigprompt), dtype=int)
            dfgen[self.v_ismcfd] = np.array(tag_bit_df(dfgen, self.v_bitvar,
                                                       self.b_mcsigfd), dtype=int)
            dfgen[self.v_ismcbkg] = np.array(tag_bit_df(dfgen, self.v_bitvar,
                                                        self.b_mcbkg), dtype=int)
            dfgen = dfgen.reset_index(drop=True)
            pickle.dump(dfgen, openfile(self.l_gen[file_index], "wb"), protocol=4)
Exemplo n.º 3
0
 def skim(self, file_index):
     dfreco = pickle.load(open(self.l_reco[file_index], "rb"))
     for ipt in range(self.p_nptbins):
         dfrecosk = seldf_singlevar(dfreco, self.v_var_binning,
                                    self.lpt_anbinmin[ipt],
                                    self.lpt_anbinmax[ipt])
         dfrecosk = selectdfquery(dfrecosk, self.s_reco_skim[ipt])
         dfrecosk = dfrecosk.reset_index(drop=True)
         dfrecosk.to_pickle(self.mptfiles_recosk[ipt][file_index])
         if self.mcordata == "mc":
             dfgen = pickle.load(open(self.l_gen[file_index], "rb"))
             dfgensk = seldf_singlevar(dfgen, self.v_var_binning,
                                       self.lpt_anbinmin[ipt],
                                       self.lpt_anbinmax[ipt])
             dfgensk = selectdfquery(dfgensk, self.s_gen_skim[ipt])
             dfgensk = dfgensk.reset_index(drop=True)
             dfgensk.to_pickle(self.mptfiles_gensk[ipt][file_index])
Exemplo n.º 4
0
    def prepare_data_mc_mcgen(self):

        self.logger.info("Prepare data reco as well as MC reco and gen")
        if os.path.exists(self.f_reco_applieddata) \
                and os.path.exists(self.f_reco_appliedmc) \
                and self.step_done("preparemlsamples_data_mc_mcgen"):
            self.df_data = pickle.load(openfile(self.f_reco_applieddata, "rb"))
            self.df_mc = pickle.load(openfile(self.f_reco_appliedmc, "rb"))
        else:
            self.df_data = pickle.load(openfile(self.f_reco_data, "rb"))
            self.df_mc = pickle.load(openfile(self.f_reco_mc, "rb"))
            self.df_data = selectdfquery(self.df_data, self.p_evtsel)
            self.df_mc = selectdfquery(self.df_mc, self.p_evtsel)

            self.df_data = selectdfquery(self.df_data, self.p_triggersel_data)
            self.df_mc = selectdfquery(self.df_mc, self.p_triggersel_mc)

        self.df_mcgen = pickle.load(openfile(self.f_gen_mc, "rb"))
        self.df_mcgen = selectdfquery(self.df_mcgen, self.p_evtsel)
        self.df_mcgen = selectdfquery(self.df_mcgen, self.p_triggersel_mc)
        self.df_mcgen = self.df_mcgen.query(self.p_presel_gen_eff)

        self.arraydf = [self.df_data, self.df_mc]
        self.df_mc = seldf_singlevar(self.df_mc, self.v_bin, self.p_binmin,
                                     self.p_binmax)
        self.df_mcgen = seldf_singlevar(self.df_mcgen, self.v_bin,
                                        self.p_binmin, self.p_binmax)
        self.df_data = seldf_singlevar(self.df_data, self.v_bin, self.p_binmin,
                                       self.p_binmax)
Exemplo n.º 5
0
    def preparesample(self):
        self.logger.info("Prepare Sample")
        self.df_data = pickle.load(openfile(self.f_reco_data, "rb"))
        self.df_mc = pickle.load(openfile(self.f_reco_mc, "rb"))
        self.df_mcgen = pickle.load(openfile(self.f_gen_mc, "rb"))
        self.df_data = selectdfquery(self.df_data, self.p_evtsel)
        self.df_mc = selectdfquery(self.df_mc, self.p_evtsel)
        self.df_mcgen = selectdfquery(self.df_mcgen, self.p_evtsel)

        self.df_data = selectdfquery(self.df_data, self.p_triggersel_data)
        self.df_mc = selectdfquery(self.df_mc, self.p_triggersel_mc)
        self.df_mcgen = selectdfquery(self.df_mcgen, self.p_triggersel_mc)

        self.df_mcgen = self.df_mcgen.query(self.p_presel_gen_eff)
        arraydf = [self.df_data, self.df_mc]
        self.df_mc = seldf_singlevar(self.df_mc, self.v_bin, self.p_binmin,
                                     self.p_binmax)
        self.df_mcgen = seldf_singlevar(self.df_mcgen, self.v_bin,
                                        self.p_binmin, self.p_binmax)
        self.df_data = seldf_singlevar(self.df_data, self.v_bin, self.p_binmin,
                                       self.p_binmax)

        self.df_sig, self.df_bkg = arraydf[self.p_tagsig], arraydf[
            self.p_tagbkg]
        self.df_sig = seldf_singlevar(self.df_sig, self.v_bin, self.p_binmin,
                                      self.p_binmax)
        self.df_bkg = seldf_singlevar(self.df_bkg, self.v_bin, self.p_binmin,
                                      self.p_binmax)
        self.df_sig = self.df_sig.query(self.s_selsigml)
        self.df_bkg = self.df_bkg.query(self.s_selbkgml)
        self.df_bkg["ismcsignal"] = 0
        self.df_bkg["ismcprompt"] = 0
        self.df_bkg["ismcfd"] = 0
        self.df_bkg["ismcbkg"] = 0

        if self.p_nsig > len(self.df_sig):
            self.logger.warning("There are not enough signal events")
        if self.p_nbkg > len(self.df_bkg):
            self.logger.warning("There are not enough background events")

        self.p_nsig = min(len(self.df_sig), self.p_nsig)
        self.p_nbkg = min(len(self.df_bkg), self.p_nbkg)

        self.logger.info("Used number of signal events is %d", self.p_nsig)
        self.logger.info("Used number of background events is %d", self.p_nbkg)

        self.df_ml = pd.DataFrame()
        self.df_sig = shuffle(self.df_sig, random_state=self.rnd_shuffle)
        self.df_bkg = shuffle(self.df_bkg, random_state=self.rnd_shuffle)
        self.df_sig = self.df_sig[:self.p_nsig]
        self.df_bkg = self.df_bkg[:self.p_nbkg]
        self.df_sig[self.v_sig] = 1
        self.df_bkg[self.v_sig] = 0
        self.df_ml = pd.concat([self.df_sig, self.df_bkg])
        self.df_mltrain, self.df_mltest = train_test_split(self.df_ml, \
                                           test_size=self.test_frac, random_state=self.rnd_splt)
        self.df_mltrain = self.df_mltrain.reset_index(drop=True)
        self.df_mltest = self.df_mltest.reset_index(drop=True)
        self.df_sigtrain, self.df_bkgtrain = split_df_sigbkg(
            self.df_mltrain, self.v_sig)
        self.df_sigtest, self.df_bkgtest = split_df_sigbkg(
            self.df_mltest, self.v_sig)
        self.logger.info("Total number of candidates: train %d and test %d",
                         len(self.df_mltrain), len(self.df_mltest))
        self.logger.info("Number of signal candidates: train %d and test %d",
                         len(self.df_sigtrain), len(self.df_sigtest))
        self.logger.info("Number of bkg candidates: %d and test %d",
                         len(self.df_bkgtrain), len(self.df_bkgtest))

        self.df_xtrain = self.df_mltrain[self.v_train]
        self.df_ytrain = self.df_mltrain[self.v_sig]
        self.df_xtest = self.df_mltest[self.v_train]
        self.df_ytest = self.df_mltest[self.v_sig]
Exemplo n.º 6
0
    def unpack(self, file_index):
        treeevtorig = uproot.open(self.l_root[file_index])[self.n_treeevt]
        dfevtorig = treeevtorig.pandas.df(branches=self.v_evt)
        dfevtorig = selectdfrunlist(dfevtorig, self.runlist, "run_number")
        dfevtorig = selectdfquery(dfevtorig, self.s_cen_unp)
        dfevtorig = dfevtorig.reset_index(drop=True)
        pickle.dump(dfevtorig, openfile(self.l_evtorig[file_index], "wb"), protocol=4)
        dfevt = selectdfquery(dfevtorig, self.s_good_evt_unp)
        dfevt = dfevt.reset_index(drop=True)
        pickle.dump(dfevt, openfile(self.l_evt[file_index], "wb"), protocol=4)


        treereco = uproot.open(self.l_root[file_index])[self.n_treereco]
        dfreco = treereco.pandas.df(branches=self.v_all)
        dfreco = selectdfrunlist(dfreco, self.runlist, "run_number")
        dfreco = selectdfquery(dfreco, self.s_reco_unp)
        dfreco = pd.merge(dfreco, dfevt, on=self.v_evtmatch)
        isselacc = selectfidacc(dfreco.pt_cand.values, dfreco.y_cand.values)
        dfreco = dfreco[np.array(isselacc, dtype=bool)]
        arraysub = [0 for ival in range(len(dfreco))]
        n_tracklets_corr = dfreco["n_tracklets_corr"].values
        n_tracklets_corr_sub = None
        for iprong in range(self.nprongs):
            spdhits_thisprong = dfreco["spdhits_prong%s" % iprong].values
            ntrackletsthisprong = [1 if spdhits_thisprong[index] == 3 else 0 \
                                   for index in range(len(dfreco))]
            arraysub = np.add(ntrackletsthisprong, arraysub)
        n_tracklets_corr_sub = np.subtract(n_tracklets_corr, arraysub)

        dfreco["n_tracklets_corr_sub"] = n_tracklets_corr_sub
        if self.b_trackcuts is not None:
            dfreco = filter_bit_df(dfreco, self.v_bitvar, self.b_trackcuts)
        dfreco[self.v_isstd] = np.array(tag_bit_df(dfreco, self.v_bitvar,
                                                   self.b_std), dtype=int)
        dfreco = dfreco.reset_index(drop=True)
        if self.mcordata == "mc":
            dfreco[self.v_ismcsignal] = np.array(tag_bit_df(dfreco, self.v_bitvar,
                                                            self.b_mcsig), dtype=int)
            dfreco[self.v_ismcprompt] = np.array(tag_bit_df(dfreco, self.v_bitvar,
                                                            self.b_mcsigprompt), dtype=int)
            dfreco[self.v_ismcfd] = np.array(tag_bit_df(dfreco, self.v_bitvar,
                                                        self.b_mcsigfd), dtype=int)
            dfreco[self.v_ismcbkg] = np.array(tag_bit_df(dfreco, self.v_bitvar,
                                                         self.b_mcbkg), dtype=int)
        pickle.dump(dfreco, openfile(self.l_reco[file_index], "wb"), protocol=4)

        if self.mcordata == "mc":
            treegen = uproot.open(self.l_root[file_index])[self.n_treegen]
            dfgen = treegen.pandas.df(branches=self.v_gen)
            dfgen = selectdfrunlist(dfgen, self.runlist, "run_number")
            dfgen = pd.merge(dfgen, dfevtorig, on=self.v_evtmatch)
            dfgen = selectdfquery(dfgen, self.s_gen_unp)
            dfgen[self.v_isstd] = np.array(tag_bit_df(dfgen, self.v_bitvar,
                                                      self.b_std), dtype=int)
            dfgen[self.v_ismcsignal] = np.array(tag_bit_df(dfgen, self.v_bitvar,
                                                           self.b_mcsig), dtype=int)
            dfgen[self.v_ismcprompt] = np.array(tag_bit_df(dfgen, self.v_bitvar,
                                                           self.b_mcsigprompt), dtype=int)
            dfgen[self.v_ismcfd] = np.array(tag_bit_df(dfgen, self.v_bitvar,
                                                       self.b_mcsigfd), dtype=int)
            dfgen[self.v_ismcbkg] = np.array(tag_bit_df(dfgen, self.v_bitvar,
                                                        self.b_mcbkg), dtype=int)
            dfgen = dfgen.reset_index(drop=True)
            pickle.dump(dfgen, openfile(self.l_gen[file_index], "wb"), protocol=4)
Exemplo n.º 7
0
    def unpack(self, file_index):
        treeevtorig = uproot.open(self.l_root[file_index])[self.n_treeevt]
        try:
            dfevtorig = treeevtorig.pandas.df(branches=self.v_evt)
        except Exception as e:  # pylint: disable=broad-except
            print('Missing variable in the event root tree', str(e))
            print('Missing variable in the candidate root tree')
            print('I am sorry, I am dying ...\n \n \n')
            sys.exit()

        dfevtorig = selectdfquery(dfevtorig, self.s_cen_unp)
        dfevtorig = dfevtorig.reset_index(drop=True)
        pickle.dump(dfevtorig,
                    openfile(self.l_evtorig[file_index], "wb"),
                    protocol=4)
        dfevt = selectdfquery(dfevtorig, self.s_good_evt_unp)
        dfevt = dfevt.reset_index(drop=True)
        pickle.dump(dfevt, openfile(self.l_evt[file_index], "wb"), protocol=4)

        treereco = uproot.open(self.l_root[file_index])[self.n_treereco]
        try:
            dfreco = treereco.pandas.df(branches=self.v_all)
        except Exception as e:  # pylint: disable=broad-except
            print('Missing variable in the candidate root tree')
            print('I am sorry, I am dying ...\n \n \n')
            sys.exit()
        dfreco = selectdfquery(dfreco, self.s_reco_unp)
        dfreco = pd.merge(dfreco, dfevt, on=self.v_evtmatch)
        isselacc = selectfidacc(dfreco.pt_cand.values, dfreco.y_cand.values)
        dfreco = dfreco[np.array(isselacc, dtype=bool)]
        arraysub = [0 for ival in range(len(dfreco))]
        n_tracklets_corr = dfreco["n_tracklets_corr"].values
        n_tracklets_corr_shm = dfreco["n_tracklets_corr_shm"].values
        n_tracklets_corr_sub = None
        n_tracklets_corr_shm_sub = None
        for iprong in range(self.nprongs):
            if self.prongformultsub[iprong] == 0:
                continue
            #print("considering prong %d for sub" % iprong)
            spdhits_thisprong = dfreco["spdhits_prong%s" % iprong].values
            ntrackletsthisprong = [1 if spdhits_thisprong[index] == 3 else 0 \
                                   for index in range(len(dfreco))]
            arraysub = np.add(ntrackletsthisprong, arraysub)
        n_tracklets_corr_sub = np.subtract(n_tracklets_corr, arraysub)
        n_tracklets_corr_shm_sub = np.subtract(n_tracklets_corr_shm, arraysub)

        dfreco["n_tracklets_corr_sub"] = n_tracklets_corr_sub
        dfreco["n_tracklets_corr_shm_sub"] = n_tracklets_corr_shm_sub
        if self.b_trackcuts is not None:
            dfreco = filter_bit_df(dfreco, self.v_bitvar, self.b_trackcuts)
        dfreco[self.v_isstd] = np.array(tag_bit_df(dfreco, self.v_bitvar,
                                                   self.b_std),
                                        dtype=int)
        dfreco = dfreco.reset_index(drop=True)
        if self.mcordata == "mc":
            dfreco[self.v_ismcsignal] = np.array(tag_bit_df(
                dfreco, self.v_bitvar, self.b_mcsig),
                                                 dtype=int)
            dfreco[self.v_ismcprompt] = np.array(tag_bit_df(
                dfreco, self.v_bitvar, self.b_mcsigprompt),
                                                 dtype=int)
            dfreco[self.v_ismcfd] = np.array(tag_bit_df(
                dfreco, self.v_bitvar, self.b_mcsigfd),
                                             dtype=int)
            dfreco[self.v_ismcbkg] = np.array(tag_bit_df(
                dfreco, self.v_bitvar, self.b_mcbkg),
                                              dtype=int)
        pickle.dump(dfreco,
                    openfile(self.l_reco[file_index], "wb"),
                    protocol=4)

        if self.mcordata == "mc":
            treegen = uproot.open(self.l_root[file_index])[self.n_treegen]
            dfgen = treegen.pandas.df(branches=self.v_gen)
            dfgen = pd.merge(dfgen, dfevtorig, on=self.v_evtmatch)
            dfgen = selectdfquery(dfgen, self.s_gen_unp)
            dfgen[self.v_isstd] = np.array(tag_bit_df(dfgen, self.v_bitvar,
                                                      self.b_std),
                                           dtype=int)
            dfgen[self.v_ismcsignal] = np.array(tag_bit_df(
                dfgen, self.v_bitvar, self.b_mcsig),
                                                dtype=int)
            dfgen[self.v_ismcprompt] = np.array(tag_bit_df(
                dfgen, self.v_bitvar, self.b_mcsigprompt),
                                                dtype=int)
            dfgen[self.v_ismcfd] = np.array(tag_bit_df(dfgen, self.v_bitvar,
                                                       self.b_mcsigfd),
                                            dtype=int)
            dfgen[self.v_ismcbkg] = np.array(tag_bit_df(
                dfgen, self.v_bitvar, self.b_mcbkg),
                                             dtype=int)
            dfgen = dfgen.reset_index(drop=True)
            pickle.dump(dfgen,
                        openfile(self.l_gen[file_index], "wb"),
                        protocol=4)
Exemplo n.º 8
0
    def unpack(self, file_index):
        treeevtorig = uproot.open(self.l_root[file_index])[self.n_treeevt]
        dfevtorig = treeevtorig.pandas.df(branches=self.v_evt)
        dfevtorig = selectdfrunlist(dfevtorig, self.runlist, "run_number")
        dfevtorig = selectdfquery(dfevtorig, self.s_cen_unp)
        dfevtorig = dfevtorig.reset_index(drop=True)
        pickle.dump(dfevtorig, openfile(self.l_evtorig[file_index], "wb"), protocol=4)
        dfevt = selectdfquery(dfevtorig, self.s_good_evt_unp)
        dfevt = dfevt.reset_index(drop=True)
        pickle.dump(dfevt, openfile(self.l_evt[file_index], "wb"), protocol=4)

        fileevtroot = TFile.Open(self.l_evtorigroot[file_index], "recreate")
        hNorm = TH1F("hEvForNorm", ";;Normalisation", 2, 0.5, 2.5)
        hNorm.GetXaxis().SetBinLabel(1, "normsalisation factor")
        hNorm.GetXaxis().SetBinLabel(2, "selected events")
        nselevt = 0
        norm = 0
        # Handle silent weird behaviour of Pandas if dataframe is empty
        # Otherwise, if it is empty it might just silently return from this frunction for some
        # reason and everything what follows would just be skipped.
        if not dfevt.empty:
            nselevt = len(dfevt.query("is_ev_rej==0"))
            norm = getnormforselevt(dfevt)
        hNorm.SetBinContent(1, norm)
        hNorm.SetBinContent(2, nselevt)
        hNorm.Write()
        fileevtroot.Close()

        treereco = uproot.open(self.l_root[file_index])[self.n_treereco]
        dfreco = treereco.pandas.df(branches=self.v_all)
        dfreco = selectdfrunlist(dfreco, self.runlist, "run_number")
        dfreco = selectdfquery(dfreco, self.s_reco_unp)
        dfreco = pd.merge(dfreco, dfevt, on=self.v_evtmatch)
        isselacc = selectfidacc(dfreco.pt_cand.values, dfreco.y_cand.values)
        dfreco = dfreco[np.array(isselacc, dtype=bool)]
        if self.b_trackcuts is not None:
            dfreco = filter_bit_df(dfreco, self.v_bitvar, self.b_trackcuts)
        dfreco[self.v_isstd] = np.array(tag_bit_df(dfreco, self.v_bitvar,
                                                   self.b_std), dtype=int)
        dfreco = dfreco.reset_index(drop=True)
        if self.mcordata == "mc":
            dfreco[self.v_ismcsignal] = np.array(tag_bit_df(dfreco, self.v_bitvar,
                                                            self.b_mcsig), dtype=int)
            dfreco[self.v_ismcprompt] = np.array(tag_bit_df(dfreco, self.v_bitvar,
                                                            self.b_mcsigprompt), dtype=int)
            dfreco[self.v_ismcfd] = np.array(tag_bit_df(dfreco, self.v_bitvar,
                                                        self.b_mcsigfd), dtype=int)
            dfreco[self.v_ismcbkg] = np.array(tag_bit_df(dfreco, self.v_bitvar,
                                                         self.b_mcbkg), dtype=int)
        pickle.dump(dfreco, openfile(self.l_reco[file_index], "wb"), protocol=4)

        if self.mcordata == "mc":
            treegen = uproot.open(self.l_root[file_index])[self.n_treegen]
            dfgen = treegen.pandas.df(branches=self.v_gen)
            dfgen = selectdfrunlist(dfgen, self.runlist, "run_number")
            dfgen = pd.merge(dfgen, dfevtorig, on=self.v_evtmatch)
            dfgen = selectdfquery(dfgen, self.s_gen_unp)
            dfgen[self.v_isstd] = np.array(tag_bit_df(dfgen, self.v_bitvar,
                                                      self.b_std), dtype=int)
            dfgen[self.v_ismcsignal] = np.array(tag_bit_df(dfgen, self.v_bitvar,
                                                           self.b_mcsig), dtype=int)
            dfgen[self.v_ismcprompt] = np.array(tag_bit_df(dfgen, self.v_bitvar,
                                                           self.b_mcsigprompt), dtype=int)
            dfgen[self.v_ismcfd] = np.array(tag_bit_df(dfgen, self.v_bitvar,
                                                       self.b_mcsigfd), dtype=int)
            dfgen[self.v_ismcbkg] = np.array(tag_bit_df(dfgen, self.v_bitvar,
                                                        self.b_mcbkg), dtype=int)
            dfgen = dfgen.reset_index(drop=True)
            pickle.dump(dfgen, openfile(self.l_gen[file_index], "wb"), protocol=4)
Exemplo n.º 9
0
    def unpack(self, file_index):
        # Open root file and save event tree to dataframe
        treeevtorig = uproot.open(self.l_root[file_index])[self.n_treeevt]
        dfevtorig = treeevtorig.pandas.df(branches=self.v_evt)

        # Only save events within the given run period & required centrality
        dfevtorig = selectdfrunlist(dfevtorig, self.runlist, "run_number")
        dfevtorig = selectdfquery(dfevtorig, self.s_cen_unp)

        # Reset dataframe index and save to "original" pickle file
        dfevtorig = dfevtorig.reset_index(drop=True)
        dfevtorig.to_pickle(self.l_evtorig[file_index])

        # Select "good" events and save to a second pickle file
        dfevt = selectdfquery(dfevtorig, self.s_good_evt_unp)
        dfevt = dfevt.reset_index(drop=True)
        dfevt.to_pickle(self.l_evt[file_index])

        # Open root file again, get the reconstructed tree into a dataframe
        treereco = uproot.open(self.l_root[file_index])[self.n_treereco]
        if not treereco:
            print('Couldn\'t find tree %s in file %s' % \
                  (self.n_treereco, self.l_root[file_index]))
        dfreco = treereco.pandas.df(branches=self.v_all)

        # Only save events within the given run period & required cuts
        dfreco = selectdfrunlist(dfreco, self.runlist, "run_number")
        dfreco = selectdfquery(dfreco, self.s_reco_unp)
        dfreco = pd.merge(dfreco, dfevt, on=self.v_evtmatch)

        if 'Jet' not in self.case:
            isselacc = selectfidacc(dfreco.pt_cand.values,
                                    dfreco.y_cand.values)
            dfreco = dfreco[np.array(isselacc, dtype=bool)]
            if self.b_trackcuts is not None:
                dfreco = filter_bit_df(dfreco, self.v_bitvar, self.b_trackcuts)
            dfreco[self.v_isstd] = np.array(tag_bit_df(dfreco, self.v_bitvar,
                                                       self.b_std),
                                            dtype=int)
            dfreco = dfreco.reset_index(drop=True)
            if self.mcordata == "mc":
                dfreco[self.v_ismcsignal] = np.array(tag_bit_df(
                    dfreco, self.v_bitvar, self.b_mcsig),
                                                     dtype=int)
                dfreco[self.v_ismcprompt] = np.array(tag_bit_df(
                    dfreco, self.v_bitvar, self.b_mcsigprompt),
                                                     dtype=int)
                dfreco[self.v_ismcfd] = np.array(tag_bit_df(
                    dfreco, self.v_bitvar, self.b_mcsigfd),
                                                 dtype=int)
                dfreco[self.v_ismcbkg] = np.array(tag_bit_df(
                    dfreco, self.v_bitvar, self.b_mcbkg),
                                                  dtype=int)

        # Save reconstructed data to another pickle file
        dfreco.to_pickle(self.l_reco[file_index])

        if self.mcordata == "mc":
            treegen = uproot.open(self.l_root[file_index])[self.n_treegen]
            dfgen = treegen.pandas.df(branches=self.v_gen)
            dfgen = selectdfrunlist(dfgen, self.runlist, "run_number")
            dfgen = pd.merge(dfgen, dfevtorig, on=self.v_evtmatch)
            dfgen = selectdfquery(dfgen, self.s_gen_unp)
            dfgen[self.v_isstd] = np.array(tag_bit_df(dfgen, self.v_bitvar,
                                                      self.b_std),
                                           dtype=int)
            dfgen[self.v_ismcsignal] = np.array(tag_bit_df(
                dfgen, self.v_bitvar, self.b_mcsig),
                                                dtype=int)
            dfgen[self.v_ismcprompt] = np.array(tag_bit_df(
                dfgen, self.v_bitvar, self.b_mcsigprompt),
                                                dtype=int)
            dfgen[self.v_ismcfd] = np.array(tag_bit_df(dfgen, self.v_bitvar,
                                                       self.b_mcsigfd),
                                            dtype=int)
            dfgen[self.v_ismcbkg] = np.array(tag_bit_df(
                dfgen, self.v_bitvar, self.b_mcbkg),
                                             dtype=int)
            dfgen = dfgen.reset_index(drop=True)
            dfgen.to_pickle(self.l_gen[file_index])