Пример #1
0
def test_lumimask():
    lumimask = LumiMask(
        "tests/samples/Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON.txt"
    )

    # pickle & unpickle
    lumimask_pickle = cloudpickle.loads(cloudpickle.dumps(lumimask))

    # check same mask keys
    keys = lumimask._masks.keys()
    assert keys == lumimask_pickle._masks.keys()
    # check same mask values
    assert all(np.all(lumimask._masks[k] == lumimask_pickle._masks[k]) for k in keys)

    runs = np.array([303825, 123], dtype=np.uint32)
    lumis = np.array([115, 123], dtype=np.uint32)

    for lm in lumimask, lumimask_pickle:
        mask = lm(runs, lumis)
        print("mask:", mask)
        assert mask[0]
        assert not mask[1]

        # test underlying py_func
        py_mask = np.zeros(dtype="bool", shape=runs.shape)
        LumiMask._apply_run_lumi_mask_kernel.py_func(lm._masks, runs, lumis, py_mask)

        assert np.all(mask == py_mask)

    assert np.all(lumimask(runs, lumis) == lumimask_pickle(runs, lumis))
Пример #2
0
def test_lumimask():
    lumimask = LumiMask(
        "tests/samples/Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON.txt"
    )
    runs = np.array([303825, 123], dtype=np.uint32)
    lumis = np.array([115, 123], dtype=np.uint32)
    mask = lumimask(runs, lumis)
    print("mask:", mask)
    assert (mask[0] == True)
    assert (mask[1] == False)
Пример #3
0
def test_lumimask():
    lumimask = LumiMask(
        "tests/samples/Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON.txt"
    )
    runs = np.array([303825, 123], dtype=np.uint32)
    lumis = np.array([115, 123], dtype=np.uint32)
    mask = lumimask(runs, lumis)
    print("mask:", mask)
    assert (mask[0] == True)
    assert (mask[1] == False)

    # test underlying py_func
    py_mask = np.zeros(dtype='bool', shape=runs.shape)
    LumiMask.apply_run_lumi_mask_kernel.py_func(lumimask._masks, runs, lumis,
                                                py_mask)

    assert (np.all(mask == py_mask))
Пример #4
0
    def get_data(self, f):
        import uproot
        from coffea.lumi_tools import LumiData, LumiList
        import numpy as np
        ret = {}
        file = uproot.open(f)
        tree = file['Events']
        ret['data_entries'] = tree.numentries

        bl = file.get("LuminosityBlocks")
        runs = bl.array("run")
        lumis = bl.array("luminosityBlock")
        lumi_mask = LumiMask(self.parameters['lumimask'])
        lumi_filter = lumi_mask(runs, lumis)
        #print("Lumi filter eff.: ",lumi_filter.mean())
        if len(runs[lumi_filter]) > 0:
            ret['lumi_list'] = LumiList(runs[lumi_filter], lumis[lumi_filter])
        else:
            ret['lumi_list'] = LumiList()
        return ret
Пример #5
0
    def process(self, events):

        output = self.accumulator.identity()

        # we can use a very loose preselection to filter the events. nothing is done with this presel, though
        presel = ak.num(events.Jet) > 0

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        #output['totalEvents']['all'] += len(events)
        #output['skimmedEvents']['all'] += len(ev)

        if self.year == 2018:
            triggers = ev.HLT.Ele23_Ele12_CaloIdL_TrackIdL_IsoVL
        elif self.year == 2017:
            triggers = ev.HLT.Ele23_Ele12_CaloIdL_TrackIdL_IsoVL
        elif self.year == 2016:
            triggers = ev.HLT.Ele23_Ele12_CaloIdL_TrackIdL_IsoVL_DZ

        if self.year == 2018:
            lumimask = LumiMask(
                'processors/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt'
            )

        ## Electrons
        electron = Collections(ev, "Electron", "tight").get()
        electron = electron[(electron.pt > 25) & (np.abs(electron.eta) < 2.4)]

        loose_electron = Collections(ev, "Electron", "veto").get()
        loose_electron = loose_electron[(loose_electron.pt > 25)
                                        & (np.abs(loose_electron.eta) < 2.4)]

        SSelectron = (ak.sum(electron.charge, axis=1) != 0) & (ak.num(electron)
                                                               == 2)
        OSelectron = (ak.sum(electron.charge, axis=1) == 0) & (ak.num(electron)
                                                               == 2)

        dielectron = choose(electron, 2)
        dielectron_mass = (dielectron['0'] + dielectron['1']).mass
        dielectron_pt = (dielectron['0'] + dielectron['1']).pt

        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[(leading_electron_idx)]
        leading_electron = leading_electron[(leading_electron.pt > 30)]

        trailing_electron_idx = ak.singletons(ak.argmin(electron.pt, axis=1))
        trailing_electron = electron[trailing_electron_idx]

        ##Muons

        loose_muon = Collections(ev, "Muon", "veto").get()
        loose_muon = loose_muon[(loose_muon.pt > 20)
                                & (np.abs(loose_muon.eta) < 2.4)]

        #jets
        jet = getJets(ev, minPt=40, maxEta=2.4, pt_var='pt', UL=False)
        jet = jet[ak.argsort(
            jet.pt, ascending=False
        )]  # need to sort wrt smeared and recorrected jet pt
        jet = jet[~match(jet, loose_muon,
                         deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~match(
            jet, electron,
            deltaRCut=0.4)]  # remove jets that overlap with electrons

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        #selections
        filters = getFilters(ev, year=self.year, dataset=dataset)
        mask = lumimask(ev.run, ev.luminosityBlock)
        ss = (SSelectron)
        os = (OSelectron)
        mass = (ak.min(np.abs(dielectron_mass - 91.2), axis=1) < 15)
        lead_electron = (ak.min(leading_electron.pt, axis=1) > 30)
        jet1 = (ak.num(jet) >= 1)
        jet2 = (ak.num(jet) >= 2)
        num_loose = ((ak.num(loose_electron) == 2) & (ak.num(loose_muon) == 0))

        selection = PackedSelection()
        selection.add('filter', (filters))
        selection.add('mask', (mask))
        selection.add('ss', ss)
        selection.add('os', os)
        selection.add('mass', mass)
        selection.add('leading', lead_electron)
        selection.add('triggers', triggers)
        selection.add('one jet', jet1)
        selection.add('two jets', jet2)
        selection.add('num_loose', num_loose)

        bl_reqs = ['filter'] + ['mass'] + ['mask'] + ['triggers'] + [
            'leading'
        ] + ['num_loose']
        #bl_reqs = ['filter'] + ['mass'] + ['triggers'] + ['leading'] + ['num_loose']

        bl_reqs_d = {sel: True for sel in bl_reqs}
        baseline = selection.require(**bl_reqs_d)

        s_reqs = bl_reqs + ['ss']
        s_reqs_d = {sel: True for sel in s_reqs}
        ss_sel = selection.require(**s_reqs_d)

        o_reqs = bl_reqs + ['os']
        o_reqs_d = {sel: True for sel in o_reqs}
        os_sel = selection.require(**o_reqs_d)

        j1s_reqs = s_reqs + ['one jet']
        j1s_reqs_d = {sel: True for sel in j1s_reqs}
        j1ss_sel = selection.require(**j1s_reqs_d)

        j1o_reqs = o_reqs + ['one jet']
        j1o_reqs_d = {sel: True for sel in j1o_reqs}
        j1os_sel = selection.require(**j1o_reqs_d)

        j2s_reqs = s_reqs + ['two jets']
        j2s_reqs_d = {sel: True for sel in j2s_reqs}
        j2ss_sel = selection.require(**j2s_reqs_d)

        j2o_reqs = o_reqs + ['two jets']
        j2o_reqs_d = {sel: True for sel in j2o_reqs}
        j2os_sel = selection.require(**j2o_reqs_d)

        output["N_jet"].fill(
            dataset=dataset,
            multiplicity=ak.num(jet)[os_sel],
        )

        return output
Пример #6
0
    from coffea.lookup_tools import extractor
    from coffea.btag_tools import BTagScaleFactor

    # load definitions
    from definitions_analysis import parameters, eraDependentParameters, samples_info
    parameters.update(eraDependentParameters[args.year])
    print(parameters)

    outdir = args.outdir
    if not os.path.exists(outdir):
        print(os.getcwd())
        os.makedirs(outdir)

    if "Run" in args.sample:
        is_mc = False
        lumimask = LumiMask(parameters["lumimask"])
    else:
        is_mc = True
        lumimask = None

    #define arrays to load: these are objects that will be kept together
    arrays_objects = [
        "Jet_eta",
        "Jet_phi",
        "Jet_btagDeepB",
        "Jet_btagCSVV2",
        "Jet_btagDeepFlavB",
        "Jet_jetId",
        "Jet_puId",  #"Jet_btagDeepFlavB" add for DeepFlavour
        "Muon_pt",
        "Muon_eta",
Пример #7
0
    def load_sample(self, sample, parallelize=1):
        import glob, tqdm
        import uproot
        import multiprocessing as mp
        print("Loading", sample)

        if sample not in self.paths:
            print(f"Couldn't load {sample}! Skipping.")
            return {
                'sample': sample,
                'metadata': {},
                'files': {},
                'data_entries': 0,
                'is_missing': True
            }

        all_files = []
        metadata = {}
        data_entries = 0
        data_runs = []
        data_lumis = []
        lumi_list = LumiList()

        if self.at_purdue:
            all_files = read_via_xrootd(self.server, self.paths[sample])
        else:
            all_files = [
                server + f
                for f in glob.glob(self.paths[sample] + '/**/**/*.root')
            ]

#        if 'ttjets_sl' in sample:
#            all_files = all_files[0:10]

        if self.debug:
            all_files = [all_files[0]]
#            all_files = [all_files[31]]

        sumGenWgts = 0
        nGenEvts = 0

        if parallelize > 1:
            pool = mp.Pool(parallelize)
            if 'data' in sample:
                a = [
                    pool.apply_async(self.get_data, args=(f, ))
                    for f in all_files
                ]
            else:
                a = [
                    pool.apply_async(self.get_mc, args=(f, ))
                    for f in all_files
                ]
            results = []
            for process in a:
                process.wait()
                results.append(process.get())
                pool.close()
            for ret in results:
                if 'data' in sample:
                    data_entries += ret['data_entries']
                    lumi_list += ret['lumi_list']
                else:
                    sumGenWgts += ret['sumGenWgts']
                    nGenEvts += ret['nGenEvts']
        else:
            for f in all_files:
                if 'data' in sample:
                    tree = uproot.open(f)['Events']
                    data_entries += tree.numentries
                    lumi_mask = LumiMask(self.parameters['lumimask'])
                    lumi_filter = lumi_mask(tree.array('run'),
                                            tree.array('luminosityBlock'))
                    lumi_list += LumiList(
                        tree.array('run')[lumi_filter],
                        tree.array('luminosityBlock')[lumi_filter])
                else:
                    tree = uproot.open(f)['Runs']
                    if 'NanoAODv6' in self.paths[sample]:
                        sumGenWgts += tree.array('genEventSumw_')[0]
                        nGenEvts += tree.array('genEventCount_')[0]
                    else:
                        sumGenWgts += tree.array('genEventSumw')[0]
                        nGenEvts += tree.array('genEventCount')[0]
        metadata['sumGenWgts'] = sumGenWgts
        metadata['nGenEvts'] = nGenEvts

        files = {'files': all_files, 'treename': 'Events'}
        return {'sample': sample, 'metadata': metadata, 'files': files,\
                'data_entries':data_entries, 'lumi_list':lumi_list, 'is_missing':False}
Пример #8
0
    def process(self, df):
        # Initialize timer
        if self.timer:
            self.timer.update()

        # Dataset name (see definitions in config/datasets.py)
        dataset = df.metadata["dataset"]
        is_mc = "data" not in dataset
        numevents = len(df)

        # ------------------------------------------------------------#
        # Apply HLT, lumimask, genweights, PU weights
        # and L1 prefiring weights
        # ------------------------------------------------------------#

        # All variables that we want to save
        # will be collected into the 'output' dataframe
        output = pd.DataFrame({"run": df.run, "event": df.event})
        output.index.name = "entry"
        output["npv"] = df.PV.npvs
        output["met"] = df.MET.pt

        # Separate dataframe to keep track on weights
        # and their systematic variations
        weights = Weights(output)

        if is_mc:
            # For MC: Apply gen.weights, pileup weights, lumi weights,
            # L1 prefiring weights
            mask = np.ones(numevents, dtype=bool)
            genweight = df.genWeight
            weights.add_weight("genwgt", genweight)
            weights.add_weight("lumi", self.lumi_weights[dataset])

            pu_wgts = pu_evaluator(
                self.pu_lookups,
                self.parameters,
                numevents,
                np.array(df.Pileup.nTrueInt),
                self.auto_pu,
            )
            weights.add_weight("pu_wgt", pu_wgts, how="all")

            if self.parameters["do_l1prefiring_wgts"]:
                if "L1PreFiringWeight" in df.fields:
                    l1pfw = l1pf_weights(df)
                    weights.add_weight("l1prefiring_wgt", l1pfw, how="all")
                else:
                    weights.add_weight("l1prefiring_wgt", how="dummy_vars")

        else:
            # For Data: apply Lumi mask
            lumi_info = LumiMask(self.parameters["lumimask"])
            mask = lumi_info(df.run, df.luminosityBlock)

        # Apply HLT to both Data and MC
        hlt_columns = [c for c in self.parameters["hlt"] if c in df.HLT.fields]
        hlt = ak.to_pandas(df.HLT[hlt_columns])
        if len(hlt_columns) == 0:
            hlt = False
        else:
            hlt = hlt[hlt_columns].sum(axis=1)

        if self.timer:
            self.timer.add_checkpoint("HLT, lumimask, PU weights")

        # ------------------------------------------------------------#
        # Update muon kinematics with Rochester correction,
        # FSR recovery and GeoFit correction
        # Raw pT and eta are stored to be used in event selection
        # ------------------------------------------------------------#

        # Save raw variables before computing any corrections
        df["Muon", "pt_raw"] = df.Muon.pt
        df["Muon", "eta_raw"] = df.Muon.eta
        df["Muon", "phi_raw"] = df.Muon.phi
        df["Muon", "pfRelIso04_all_raw"] = df.Muon.pfRelIso04_all

        # Rochester correction
        if self.do_roccor:
            apply_roccor(df, self.roccor_lookup, is_mc)
            df["Muon", "pt"] = df.Muon.pt_roch

            # variations will be in branches pt_roch_up and pt_roch_down
            # muons_pts = {
            #     'nominal': df.Muon.pt,
            #     'roch_up':df.Muon.pt_roch_up,
            #     'roch_down':df.Muon.pt_roch_down
            # }

        # for ...
        if True:  # indent reserved for loop over muon pT variations
            # According to HIG-19-006, these variations have negligible
            # effect on significance, but it's better to have them
            # implemented in the future

            # FSR recovery
            if self.do_fsr:
                has_fsr = fsr_recovery(df)
                df["Muon", "pt"] = df.Muon.pt_fsr
                df["Muon", "eta"] = df.Muon.eta_fsr
                df["Muon", "phi"] = df.Muon.phi_fsr
                df["Muon", "pfRelIso04_all"] = df.Muon.iso_fsr

            # if FSR was applied, 'pt_fsr' will be corrected pt
            # if FSR wasn't applied, just copy 'pt' to 'pt_fsr'
            df["Muon", "pt_fsr"] = df.Muon.pt

            # GeoFit correction
            if self.do_geofit and ("dxybs" in df.Muon.fields):
                apply_geofit(df, self.year, ~has_fsr)
                df["Muon", "pt"] = df.Muon.pt_fsr

            if self.timer:
                self.timer.add_checkpoint("Muon corrections")

            # --- conversion from awkward to pandas --- #
            muon_columns = [
                "pt",
                "pt_fsr",
                "eta",
                "phi",
                "charge",
                "ptErr",
                "mass",
                "pt_raw",
                "eta_raw",
                "pfRelIso04_all",
            ] + [self.parameters["muon_id"]]
            muons = ak.to_pandas(df.Muon[muon_columns])

            # --------------------------------------------------------#
            # Select muons that pass pT, eta, isolation cuts,
            # muon ID and quality flags
            # Select events with 2 OS muons, no electrons,
            # passing quality cuts and at least one good PV
            # --------------------------------------------------------#

            # Apply event quality flags
            flags = ak.to_pandas(df.Flag[self.parameters["event_flags"]])
            flags = flags[self.parameters["event_flags"]].product(axis=1)
            muons["pass_flags"] = True
            if self.parameters["muon_flags"]:
                muons["pass_flags"] = muons[
                    self.parameters["muon_flags"]].product(axis=1)

            # Define baseline muon selection (applied to pandas DF!)
            muons["selection"] = (
                (muons.pt_raw > self.parameters["muon_pt_cut"])
                & (abs(muons.eta_raw) < self.parameters["muon_eta_cut"])
                & (muons.pfRelIso04_all < self.parameters["muon_iso_cut"])
                & muons[self.parameters["muon_id"]]
                & muons.pass_flags)

            # Count muons
            nmuons = (muons[muons.selection].reset_index().groupby("entry")
                      ["subentry"].nunique())

            # Find opposite-sign muons
            mm_charge = muons.loc[muons.selection,
                                  "charge"].groupby("entry").prod()

            # Veto events with good quality electrons
            electrons = df.Electron[
                (df.Electron.pt > self.parameters["electron_pt_cut"])
                & (abs(df.Electron.eta) < self.parameters["electron_eta_cut"])
                & (df.Electron[self.parameters["electron_id"]] == 1)]
            electron_veto = ak.to_numpy(ak.count(electrons.pt, axis=1) == 0)

            # Find events with at least one good primary vertex
            good_pv = ak.to_pandas(df.PV).npvsGood > 0

            # Define baseline event selection
            output["two_muons"] = nmuons == 2
            output["event_selection"] = (mask
                                         & (hlt > 0)
                                         & (flags > 0)
                                         & (nmuons == 2)
                                         & (mm_charge == -1)
                                         & electron_veto
                                         & good_pv)

            # --------------------------------------------------------#
            # Select two leading-pT muons
            # --------------------------------------------------------#

            # Find pT-leading and subleading muons
            # This is slow for large chunk size.
            # Consider reimplementing using sort_values().groupby().nth()
            # or sort_values().drop_duplicates()
            # or using Numba
            # https://stackoverflow.com/questions/50381064/select-the-max-row-per-group-pandas-performance-issue
            muons = muons[muons.selection & (nmuons == 2)]
            mu1 = muons.loc[muons.pt.groupby("entry").idxmax()]
            mu2 = muons.loc[muons.pt.groupby("entry").idxmin()]
            mu1.index = mu1.index.droplevel("subentry")
            mu2.index = mu2.index.droplevel("subentry")

            # --------------------------------------------------------#
            # Select events with muons passing leading pT cut
            # and trigger matching (trig match not done in final vrsn)
            # --------------------------------------------------------#

            # Events where there is at least one muon passing
            # leading muon pT cut
            pass_leading_pt = mu1.pt_raw > self.parameters["muon_leading_pt"]

            # update event selection with leading muon pT cut
            output["pass_leading_pt"] = pass_leading_pt
            output[
                "event_selection"] = output.event_selection & output.pass_leading_pt

            # --------------------------------------------------------#
            # Fill dimuon and muon variables
            # --------------------------------------------------------#

            fill_muons(self, output, mu1, mu2, is_mc)

            if self.timer:
                self.timer.add_checkpoint("Event & muon selection")

        # ------------------------------------------------------------#
        # Prepare jets
        # ------------------------------------------------------------#

        prepare_jets(df, is_mc)

        # ------------------------------------------------------------#
        # Apply JEC, get JEC and JER variations
        # ------------------------------------------------------------#

        jets = df.Jet

        self.do_jec = False

        # We only need to reapply JEC for 2018 data
        # (unless new versions of JEC are released)
        if ("data" in dataset) and ("2018" in self.year):
            self.do_jec = True

        jets = apply_jec(
            df,
            jets,
            dataset,
            is_mc,
            self.year,
            self.do_jec,
            self.do_jecunc,
            self.do_jerunc,
            self.jec_factories,
            self.jec_factories_data,
        )

        # ------------------------------------------------------------#
        # Calculate other event weights
        # ------------------------------------------------------------#

        if is_mc:
            do_nnlops = self.do_nnlops and ("ggh" in dataset)
            if do_nnlops:
                nnlopsw = nnlops_weights(df, numevents, self.parameters,
                                         dataset)
                weights.add_weight("nnlops", nnlopsw)
            else:
                weights.add_weight("nnlops", how="dummy")
            # --- --- --- --- --- --- --- --- --- --- --- --- --- --- #
            # do_zpt = ('dy' in dataset)
            #
            # if do_zpt:
            #     zpt_weight = np.ones(numevents, dtype=float)
            #     zpt_weight[two_muons] =\
            #         self.evaluator[self.zpt_path](
            #             output['dimuon_pt'][two_muons]
            #         ).flatten()
            #     weights.add_weight('zpt_wgt', zpt_weight)
            # --- --- --- --- --- --- --- --- --- --- --- --- --- --- #
            do_musf = True
            if do_musf:
                muID, muIso, muTrig = musf_evaluator(self.musf_lookup,
                                                     self.year, numevents, mu1,
                                                     mu2)
                weights.add_weight("muID", muID, how="all")
                weights.add_weight("muIso", muIso, how="all")
                weights.add_weight("muTrig", muTrig, how="all")
            else:
                weights.add_weight("muID", how="dummy_all")
                weights.add_weight("muIso", how="dummy_all")
                weights.add_weight("muTrig", how="dummy_all")
            # --- --- --- --- --- --- --- --- --- --- --- --- --- --- #
            do_lhe = (("LHEScaleWeight" in df.fields)
                      and ("LHEPdfWeight" in df.fields)
                      and ("nominal" in self.pt_variations))
            if do_lhe:
                lhe_ren, lhe_fac = lhe_weights(df, output, dataset, self.year)
                weights.add_weight("LHERen", lhe_ren, how="only_vars")
                weights.add_weight("LHEFac", lhe_fac, how="only_vars")
            else:
                weights.add_weight("LHERen", how="dummy_vars")
                weights.add_weight("LHEFac", how="dummy_vars")
            # --- --- --- --- --- --- --- --- --- --- --- --- --- --- #
            do_thu = (("vbf" in dataset) and ("dy" not in dataset)
                      and ("nominal" in self.pt_variations)
                      and ("stage1_1_fine_cat_pTjet30GeV" in df.HTXS.fields))
            if do_thu:
                for i, name in enumerate(self.sths_names):
                    wgt_up = stxs_uncert(
                        i,
                        ak.to_numpy(df.HTXS.stage1_1_fine_cat_pTjet30GeV),
                        1.0,
                        self.stxs_acc_lookups,
                        self.powheg_xsec_lookup,
                    )
                    wgt_down = stxs_uncert(
                        i,
                        ak.to_numpy(df.HTXS.stage1_1_fine_cat_pTjet30GeV),
                        -1.0,
                        self.stxs_acc_lookups,
                        self.powheg_xsec_lookup,
                    )
                    thu_wgts = {"up": wgt_up, "down": wgt_down}
                    weights.add_weight("THU_VBF_" + name,
                                       thu_wgts,
                                       how="only_vars")
            else:
                for i, name in enumerate(self.sths_names):
                    weights.add_weight("THU_VBF_" + name, how="dummy_vars")
            # --- --- --- --- --- --- --- --- --- --- --- --- --- --- #
            do_pdf = (self.do_pdf and ("nominal" in self.pt_variations)
                      and ("dy" in dataset or "ewk" in dataset
                           or "ggh" in dataset or "vbf" in dataset)
                      and ("mg" not in dataset))
            if "2016" in self.year:
                max_replicas = 0
                if "dy" in dataset:
                    max_replicas = 100
                elif "ewk" in dataset:
                    max_replicas = 33
                else:
                    max_replicas = 100
                if do_pdf:
                    pdf_wgts = df.LHEPdfWeight[:, 0:self.
                                               parameters["n_pdf_variations"]]
                for i in range(100):
                    if (i < max_replicas) and do_pdf:
                        output[f"pdf_mcreplica{i}"] = pdf_wgts[:, i]
                    else:
                        output[f"pdf_mcreplica{i}"] = np.nan
            else:
                if do_pdf:
                    pdf_wgts = df.LHEPdfWeight[:, 0:self.
                                               parameters["n_pdf_variations"]][
                                                   0]
                    pdf_wgts = np.array(pdf_wgts)
                    pdf_vars = {
                        "up": (1 + 2 * pdf_wgts.std()),
                        "down": (1 - 2 * pdf_wgts.std()),
                    }
                    weights.add_weight("pdf_2rms", pdf_vars, how="only_vars")
                else:
                    weights.add_weight("pdf_2rms", how="dummy_vars")
            # --- --- --- --- --- --- --- --- --- --- --- --- --- --- #

        if is_mc:
            output = fill_gen_jets(df, output)

        # ------------------------------------------------------------#
        # Loop over JEC variations and fill jet variables
        # ------------------------------------------------------------#

        output.columns = pd.MultiIndex.from_product(
            [output.columns, [""]], names=["Variable", "Variation"])

        if self.timer:
            self.timer.add_checkpoint("Jet preparation & event weights")

        for v_name in self.pt_variations:
            output_updated = self.jet_loop(
                v_name,
                is_mc,
                df,
                dataset,
                mask,
                muons,
                mu1,
                mu2,
                jets,
                weights,
                numevents,
                output,
            )
            if output_updated is not None:
                output = output_updated

        if self.timer:
            self.timer.add_checkpoint("Jet loop")

        # ------------------------------------------------------------#
        # Fill outputs
        # ------------------------------------------------------------#
        mass = output.dimuon_mass
        output["region"] = None
        output.loc[((mass > 76) & (mass < 106)), "region"] = "z-peak"
        output.loc[((mass > 110) & (mass < 115.03)) | ((mass > 135.03) &
                                                       (mass < 150)),
                   "region", ] = "h-sidebands"
        output.loc[((mass > 115.03) & (mass < 135.03)), "region"] = "h-peak"
        output["dataset"] = dataset
        output["year"] = int(self.year)

        for wgt in weights.df.columns:
            skip_saving = (("nominal" not in wgt) and ("up" not in wgt)
                           and ("down" not in wgt))
            if skip_saving:
                continue
            output[f"wgt_{wgt}"] = weights.get_weight(wgt)

        columns_to_save = [
            c for c in output.columns
            if (c[0] in self.vars_to_save) or ("wgt_" in c[0]) or (
                "mcreplica" in c[0]) or (c[0] in ["region", "dataset", "year"])
            or ("gjet" in c[0]) or ("gjj" in c[0])
        ]

        output = output.loc[output.event_selection, columns_to_save]
        output = output.reindex(sorted(output.columns), axis=1)

        output.columns = [
            " ".join(col).strip() for col in output.columns.values
        ]

        output = output[output.region.isin(self.regions)]
        """
        input_evts = numevents
        output_evts = output.shape[0]
        out_yield = output.wgt_nominal.sum()
        out_vbf = output[
                (output["jj_mass nominal"]>400) & (output["jj_dEta nominal"]>2.5) & (output["jet1_pt nominal"]>35)
            ].wgt_nominal.sum()
        out_ggh = out_yield - out_vbf

        print(f"\n{dataset}:    {input_evts}  ->  {output_evts};    yield = {out_ggh} (ggH) + {out_vbf} (VBF) = {out_yield}")
        """

        to_return = None
        if self.apply_to_output is None:
            to_return = output
        else:
            self.apply_to_output(output)
            to_return = self.accumulator.identity()

        if self.timer:
            self.timer.add_checkpoint("Saving outputs")
            self.timer.summary()

        return to_return
Пример #9
0
def build_lumimask(filename):
    from coffea.lumi_tools import LumiMask
    with importlib.resources.path("boostedhiggs.data", filename) as path:
        return LumiMask(path)
    def process(self, df):
        self._configure(df)
        output = self.accumulator.identity()
        dataset = df['dataset']

        # Lumi mask
        year = extract_year(dataset)
        if is_data(dataset):
            if year == 2016:
                json = bucoffea_path(
                    'data/json/Cert_271036-284044_13TeV_ReReco_07Aug2017_Collisions16_JSON.txt'
                )
            elif year == 2017:
                json = bucoffea_path(
                    'data/json/Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON_v1.txt'
                )
            elif year == 2018:
                json = bucoffea_path(
                    'data/json/Cert_314472-325175_13TeV_17SeptEarlyReReco2018ABC_PromptEraD_Collisions18_JSON.txt'
                )
            lumi_mask = LumiMask(json)(df['run'], df['luminosityBlock'])
        else:
            lumi_mask = np.ones(df.size) == 1

        # MET filters
        if is_data(dataset):
            filt_met = mask_and(df, cfg.FILTERS.DATA)
        else:
            filt_met = mask_and(df, cfg.FILTERS.MC)

        if year == 2016:
            trigger = 'HLT_Photon175'
        else:
            trigger = 'HLT_Photon200'

        photons = setup_photons(df)

        ak4 = setup_jets(df)
        ak4 = ak4[
                  object_overlap(ak4, photons) \
                  & ak4.tightId \
                  & (ak4.pt > 100) \
                  & (ak4.abseta < 2.4)
                  ]

        event_mask = filt_met \
                     & lumi_mask \
                     & (ak4.counts > 0) \
                     & df[trigger] \
                     & (df['MET_pt'] < 60)

        # Generator weight
        weights = processor.Weights(size=df.size, storeIndividual=True)

        if is_data(dataset):
            weights.add('gen', np.ones(df.size))
        else:
            weights.add('gen', df['Generator_weight'])

        photon_kinematics = (photons.pt > 200) & (photons.barrel)

        # Medium
        vals = photons[photon_kinematics & photons.mediumId].sieie[event_mask]
        pt = photons[photon_kinematics & photons.mediumId].pt[event_mask]
        output['sieie'].fill(dataset=dataset,
                             cat='medium',
                             sieie=vals.flatten(),
                             pt=pt.flatten(),
                             weights=weight_shape(
                                 vals,
                                 weights.weight()[event_mask]))

        # No Sieie
        vals = photons[photon_kinematics
                       & medium_id_no_sieie(photons)].sieie[event_mask]
        pt = photons[photon_kinematics
                     & medium_id_no_sieie(photons)].pt[event_mask]
        output['sieie'].fill(dataset=dataset,
                             cat='medium_nosieie',
                             sieie=vals.flatten(),
                             pt=pt.flatten(),
                             weights=weight_shape(
                                 vals,
                                 weights.weight()[event_mask]))

        # No Sieie, inverted isolation
        vals = photons[photon_kinematics
                       & medium_id_no_sieie_inv_iso(photons)].sieie[event_mask]
        pt = photons[photon_kinematics
                     & medium_id_no_sieie_inv_iso(photons)].pt[event_mask]
        output['sieie'].fill(dataset=dataset,
                             cat='medium_nosieie_invertiso',
                             sieie=vals.flatten(),
                             pt=pt.flatten(),
                             weights=weight_shape(
                                 vals,
                                 weights.weight()[event_mask]))

        # Keep track of weight sum
        if not is_data(dataset):
            output['sumw'][dataset] += df['genEventSumw']
            output['sumw2'][dataset] += df['genEventSumw2']
        return output
Пример #11
0
    def process(self, events):

        output = self.accumulator.identity()

        # we can use a very loose preselection to filter the events. nothing is done with this presel, though
        presel = ak.num(events.Jet) > 0

        if self.year == 2016:
            lumimask = LumiMask(
                '../data/lumi/Cert_271036-284044_13TeV_Legacy2016_Collisions16_JSON.txt'
            )
        if self.year == 2017:
            lumimask = LumiMask(
                '../data/lumi/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt'
            )
        if self.year == 2018:
            lumimask = LumiMask(
                '../data/lumi/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt'
            )

        ev = events[presel]
        dataset = ev.metadata['dataset']

        # load the config - probably not needed anymore
        cfg = loadConfig()

        output['totalEvents']['all'] += len(events)
        output['skimmedEvents']['all'] += len(ev)

        if self.year == 2018:
            triggers = ev.HLT.Ele23_Ele12_CaloIdL_TrackIdL_IsoVL
        elif self.year == 2017:
            triggers = ev.HLT.Ele23_Ele12_CaloIdL_TrackIdL_IsoVL
        elif self.year == 2016:
            triggers = ev.HLT.Ele23_Ele12_CaloIdL_TrackIdL_IsoVL_DZ

        ## Electrons
        electron = Collections(ev, "Electron", "tightFCNC", 0, self.year).get()
        electron = electron[(electron.pt > 25) & (np.abs(electron.eta) < 2.4)]

        loose_electron = Collections(ev, "Electron", "looseFCNC", 0,
                                     self.year).get()
        loose_electron = loose_electron[(loose_electron.pt > 25)
                                        & (np.abs(loose_electron.eta) < 2.4)]

        SSelectron = (ak.sum(electron.charge, axis=1) != 0) & (ak.num(electron)
                                                               == 2)
        OSelectron = (ak.sum(electron.charge, axis=1) == 0) & (ak.num(electron)
                                                               == 2)

        dielectron = choose(electron, 2)
        dielectron_mass = (dielectron['0'] + dielectron['1']).mass
        dielectron_pt = (dielectron['0'] + dielectron['1']).pt

        leading_electron_idx = ak.singletons(ak.argmax(electron.pt, axis=1))
        leading_electron = electron[(leading_electron_idx)]
        leading_electron = leading_electron[(leading_electron.pt > 30)]

        trailing_electron_idx = ak.singletons(ak.argmin(electron.pt, axis=1))
        trailing_electron = electron[trailing_electron_idx]

        ##Muons

        loose_muon = Collections(ev, "Muon", "looseFCNC", 0, self.year).get()
        loose_muon = loose_muon[(loose_muon.pt > 20)
                                & (np.abs(loose_muon.eta) < 2.4)]

        #jets
        jet = getJets(ev, minPt=40, maxEta=2.4, pt_var='pt')
        jet = jet[~match(jet, loose_muon,
                         deltaRCut=0.4)]  # remove jets that overlap with muons
        jet = jet[~match(
            jet, electron,
            deltaRCut=0.4)]  # remove jets that overlap with electrons

        ## MET -> can switch to puppi MET
        met_pt = ev.MET.pt
        met_phi = ev.MET.phi

        #weights
        weight = Weights(len(ev))
        weight2 = Weights(len(ev))
        weight2.add("charge flip",
                    self.charge_flip_ratio.flip_weight(electron))

        #selections
        filters = getFilters(ev, year=self.year, dataset=dataset, UL=False)
        mask = lumimask(ev.run, ev.luminosityBlock)
        ss = (SSelectron)
        os = (OSelectron)
        mass = (ak.min(np.abs(dielectron_mass - 91.2), axis=1) < 15)
        lead_electron = (ak.min(leading_electron.pt, axis=1) > 30)
        jet1 = (ak.num(jet) >= 1)
        jet2 = (ak.num(jet) >= 2)
        num_loose = ((ak.num(loose_electron) == 2) & (ak.num(loose_muon) == 0))

        selection = PackedSelection()
        selection.add('filter', (filters))
        selection.add('mask', (mask))
        selection.add('ss', ss)
        selection.add('os', os)
        selection.add('mass', mass)
        selection.add('leading', lead_electron)
        selection.add('triggers', triggers)
        selection.add('one jet', jet1)
        selection.add('two jets', jet2)
        selection.add('num_loose', num_loose)

        bl_reqs = ['filter'] + ['triggers'] + ['mask']

        bl_reqs_d = {sel: True for sel in bl_reqs}
        baseline = selection.require(**bl_reqs_d)

        s_reqs = bl_reqs + ['ss'] + ['mass'] + ['num_loose'] + ['leading']
        s_reqs_d = {sel: True for sel in s_reqs}
        ss_sel = selection.require(**s_reqs_d)

        o_reqs = bl_reqs + ['os'] + ['mass'] + ['num_loose'] + ['leading']
        o_reqs_d = {sel: True for sel in o_reqs}
        os_sel = selection.require(**o_reqs_d)

        j1s_reqs = s_reqs + ['one jet']
        j1s_reqs_d = {sel: True for sel in j1s_reqs}
        j1ss_sel = selection.require(**j1s_reqs_d)

        j1o_reqs = o_reqs + ['one jet']
        j1o_reqs_d = {sel: True for sel in j1o_reqs}
        j1os_sel = selection.require(**j1o_reqs_d)

        j2s_reqs = s_reqs + ['two jets']
        j2s_reqs_d = {sel: True for sel in j2s_reqs}
        j2ss_sel = selection.require(**j2s_reqs_d)

        j2o_reqs = o_reqs + ['two jets']
        j2o_reqs_d = {sel: True for sel in j2o_reqs}
        j2os_sel = selection.require(**j2o_reqs_d)

        #outputs

        output["electron_data1"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_electron[os_sel].pt)),
            eta=ak.to_numpy(ak.flatten(leading_electron[os_sel].eta)),
            phi=ak.to_numpy(ak.flatten(leading_electron[os_sel].phi)),
            weight=weight2.weight()[os_sel])

        output["electron_data2"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(trailing_electron[os_sel].pt)),
            eta=ak.to_numpy(ak.flatten(trailing_electron[os_sel].eta)),
            phi=ak.to_numpy(ak.flatten(trailing_electron[os_sel].phi)),
            weight=weight2.weight()[os_sel])

        output["electron_data3"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_electron[j1os_sel].pt)),
            eta=ak.to_numpy(ak.flatten(leading_electron[j1os_sel].eta)),
            phi=ak.to_numpy(ak.flatten(leading_electron[j1os_sel].phi)),
            weight=weight2.weight()[j1os_sel])

        output["electron_data4"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(trailing_electron[j1os_sel].pt)),
            eta=ak.to_numpy(ak.flatten(trailing_electron[j1os_sel].eta)),
            phi=ak.to_numpy(ak.flatten(trailing_electron[j1os_sel].phi)),
            weight=weight2.weight()[j1os_sel])

        output["electron_data5"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_electron[j2os_sel].pt)),
            eta=ak.to_numpy(ak.flatten(leading_electron[j2os_sel].eta)),
            phi=ak.to_numpy(ak.flatten(leading_electron[j2os_sel].phi)),
            weight=weight2.weight()[j2os_sel])

        output["electron_data6"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(trailing_electron[j2os_sel].pt)),
            eta=ak.to_numpy(ak.flatten(trailing_electron[j2os_sel].eta)),
            phi=ak.to_numpy(ak.flatten(trailing_electron[j2os_sel].phi)),
            weight=weight2.weight()[j2os_sel])

        output["electron_data7"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_electron[ss_sel].pt)),
            eta=ak.to_numpy(ak.flatten(leading_electron[ss_sel].eta)),
            phi=ak.to_numpy(ak.flatten(leading_electron[ss_sel].phi)),
            weight=weight.weight()[ss_sel])

        output["electron_data8"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(trailing_electron[ss_sel].pt)),
            eta=ak.to_numpy(ak.flatten(trailing_electron[ss_sel].eta)),
            phi=ak.to_numpy(ak.flatten(trailing_electron[ss_sel].phi)),
            weight=weight.weight()[ss_sel])

        output["electron_data9"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_electron[j1ss_sel].pt)),
            eta=ak.to_numpy(ak.flatten(leading_electron[j1ss_sel].eta)),
            phi=ak.to_numpy(ak.flatten(leading_electron[j1ss_sel].phi)),
            weight=weight.weight()[j1ss_sel])

        output["electron_data10"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(trailing_electron[j1ss_sel].pt)),
            eta=ak.to_numpy(ak.flatten(trailing_electron[j1ss_sel].eta)),
            phi=ak.to_numpy(ak.flatten(trailing_electron[j1ss_sel].phi)),
            weight=weight.weight()[j1ss_sel])

        output["electron_data11"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(leading_electron[j2ss_sel].pt)),
            eta=ak.to_numpy(ak.flatten(leading_electron[j2ss_sel].eta)),
            phi=ak.to_numpy(ak.flatten(leading_electron[j2ss_sel].phi)),
            weight=weight.weight()[j2ss_sel])

        output["electron_data12"].fill(
            dataset=dataset,
            pt=ak.to_numpy(ak.flatten(trailing_electron[j2ss_sel].pt)),
            eta=ak.to_numpy(ak.flatten(trailing_electron[j2ss_sel].eta)),
            phi=ak.to_numpy(ak.flatten(trailing_electron[j2ss_sel].phi)),
            weight=weight.weight()[j2ss_sel])

        output["dilep_mass1"].fill(
            dataset=dataset,
            mass=ak.to_numpy(ak.flatten(dielectron_mass[os_sel])),
            pt=ak.to_numpy(ak.flatten(dielectron_pt[os_sel])),
            weight=weight2.weight()[os_sel])

        output["dilep_mass2"].fill(
            dataset=dataset,
            mass=ak.to_numpy(ak.flatten(dielectron_mass[j1os_sel])),
            pt=ak.to_numpy(ak.flatten(dielectron_pt[j1os_sel])),
            weight=weight2.weight()[j1os_sel])

        output["dilep_mass3"].fill(
            dataset=dataset,
            mass=ak.to_numpy(ak.flatten(dielectron_mass[j2os_sel])),
            pt=ak.to_numpy(ak.flatten(dielectron_pt[j2os_sel])),
            weight=weight2.weight()[j2os_sel])

        output["dilep_mass4"].fill(
            dataset=dataset,
            mass=ak.to_numpy(ak.flatten(dielectron_mass[ss_sel])),
            pt=ak.to_numpy(ak.flatten(dielectron_pt[ss_sel])),
            weight=weight.weight()[ss_sel])

        output["dilep_mass5"].fill(
            dataset=dataset,
            mass=ak.to_numpy(ak.flatten(dielectron_mass[j1ss_sel])),
            pt=ak.to_numpy(ak.flatten(dielectron_pt[j1ss_sel])),
            weight=weight.weight()[j1ss_sel])

        output["dilep_mass6"].fill(
            dataset=dataset,
            mass=ak.to_numpy(ak.flatten(dielectron_mass[j2ss_sel])),
            pt=ak.to_numpy(ak.flatten(dielectron_pt[j2ss_sel])),
            weight=weight.weight()[j2ss_sel])

        output["MET"].fill(dataset=dataset,
                           pt=met_pt[os_sel],
                           weight=weight2.weight()[os_sel])

        output["MET2"].fill(dataset=dataset,
                            pt=met_pt[j1os_sel],
                            weight=weight2.weight()[j1os_sel])

        output["MET3"].fill(dataset=dataset,
                            pt=met_pt[j2os_sel],
                            weight=weight2.weight()[j2os_sel])

        output["MET4"].fill(dataset=dataset,
                            pt=met_pt[ss_sel],
                            weight=weight.weight()[ss_sel])

        output["MET5"].fill(dataset=dataset,
                            pt=met_pt[j1ss_sel],
                            weight=weight.weight()[j1ss_sel])

        output["MET6"].fill(dataset=dataset,
                            pt=met_pt[j2ss_sel],
                            weight=weight.weight()[j2ss_sel])

        output["N_jet"].fill(dataset=dataset,
                             multiplicity=ak.num(jet)[os_sel],
                             weight=weight2.weight()[os_sel])

        output["N_jet2"].fill(dataset=dataset,
                              multiplicity=ak.num(jet)[j1os_sel],
                              weight=weight2.weight()[j1os_sel])

        output["N_jet3"].fill(dataset=dataset,
                              multiplicity=ak.num(jet)[j2os_sel],
                              weight=weight2.weight()[j2os_sel])

        output["N_jet4"].fill(dataset=dataset,
                              multiplicity=ak.num(jet)[ss_sel],
                              weight=weight.weight()[ss_sel])

        output["N_jet5"].fill(dataset=dataset,
                              multiplicity=ak.num(jet)[j1ss_sel],
                              weight=weight.weight()[j1ss_sel])

        output["N_jet6"].fill(dataset=dataset,
                              multiplicity=ak.num(jet)[j2ss_sel],
                              weight=weight.weight()[j2ss_sel])

        output["PV_npvsGood"].fill(dataset=dataset,
                                   multiplicity=ev.PV[os_sel].npvsGood,
                                   weight=weight2.weight()[os_sel])

        output["PV_npvsGood2"].fill(dataset=dataset,
                                    multiplicity=ev.PV[j1os_sel].npvsGood,
                                    weight=weight2.weight()[j1os_sel])

        output["PV_npvsGood3"].fill(dataset=dataset,
                                    multiplicity=ev.PV[j2os_sel].npvsGood,
                                    weight=weight2.weight()[j2os_sel])

        output["PV_npvsGood4"].fill(dataset=dataset,
                                    multiplicity=ev.PV[ss_sel].npvsGood,
                                    weight=weight.weight()[ss_sel])

        output["PV_npvsGood5"].fill(dataset=dataset,
                                    multiplicity=ev.PV[j1ss_sel].npvsGood,
                                    weight=weight.weight()[j1ss_sel])

        output["PV_npvsGood6"].fill(dataset=dataset,
                                    multiplicity=ev.PV[j2ss_sel].npvsGood,
                                    weight=weight.weight()[j2ss_sel])

        return output