Exemplo n.º 1
0
def test():
    builder = ak.ArrayBuilder()
    with builder.list():
        with builder.record():
            builder.field("x").integer(1)
            builder.field("y").real(1.1)
        with builder.record():
            builder.field("x").integer(2)
            builder.field("y").real(2.2)
        with builder.record():
            builder.field("x").integer(3)
            builder.field("y").real(3.3)
    with builder.list():
        pass
    with builder.list():
        with builder.record():
            builder.field("x").integer(4)
            builder.field("y").real(4.4)
        with builder.record():
            builder.field("x").integer(5)
            builder.field("y").real(5.5)
            builder.field("z").string("five")
    with builder.list():
        with builder.record():
            builder.field("x").integer(6)
            builder.field("y").real(6.6)
            builder.field("z").string("six")
    with builder.list():
        with builder.record():
            builder.field("x").integer(7)
            builder.field("y").real(7.7)
        with builder.record():
            builder.field("x").integer(8)
            builder.field("y").real(8.8)
    assert ak.to_list(builder) == [
        [
            {
                "x": 1,
                "y": 1.1,
                "z": None
            },
            {
                "x": 2,
                "y": 2.2,
                "z": None
            },
            {
                "x": 3,
                "y": 3.3,
                "z": None
            },
        ],
        [],
        [{
            "x": 4,
            "y": 4.4,
            "z": None
        }, {
            "x": 5,
            "y": 5.5,
            "z": "five"
        }],
        [{
            "x": 6,
            "y": 6.6,
            "z": "six"
        }],
        [{
            "x": 7,
            "y": 7.7,
            "z": None
        }, {
            "x": 8,
            "y": 8.8,
            "z": None
        }],
    ]
    def process(self, events):

        # Initialize accumulator
        out = self.accumulator.identity()
        dataset = sample_name
        #events.metadata['dataset']

        # Data or MC
        isData = 'genWeight' not in events.fields

        #Stop processing if there is no event remain
        if len(events) == 0:
            return out

        # Golden Json file
        if (self._year == "2018") and isData:
            injson = "/x5/cms/jwkim/gitdir/JWCorp/JW_analysis/Coffea_WZG/Corrections/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt.RunABCD"

        if (self._year == "2017") and isData:
            injson = "/x5/cms/jwkim/gitdir/JWCorp/JW_analysis/Coffea_WZG/Corrections/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt"

        # <----- Get Scale factors ------>#

        if not isData:

            # Egamma reco ID
            get_ele_reco_above20_sf = self._corrections[
                'get_ele_reco_above20_sf'][self._year]
            get_ele_medium_id_sf = self._corrections['get_ele_medium_id_sf'][
                self._year]
            get_pho_medium_id_sf = self._corrections['get_pho_medium_id_sf'][
                self._year]

            # DoubleEG trigger # 2016, 2017 are not applied yet
            if self._year == "2018":
                get_ele_trig_leg1_SF = self._corrections[
                    'get_ele_trig_leg1_SF'][self._year]
                get_ele_trig_leg1_data_Eff = self._corrections[
                    'get_ele_trig_leg1_data_Eff'][self._year]
                get_ele_trig_leg1_mc_Eff = self._corrections[
                    'get_ele_trig_leg1_mc_Eff'][self._year]
                get_ele_trig_leg2_SF = self._corrections[
                    'get_ele_trig_leg2_SF'][self._year]
                get_ele_trig_leg2_data_Eff = self._corrections[
                    'get_ele_trig_leg2_data_Eff'][self._year]
                get_ele_trig_leg2_mc_Eff = self._corrections[
                    'get_ele_trig_leg2_mc_Eff'][self._year]

            # PU weight with custom made npy and multi-indexing
            pu_weight_idx = ak.values_astype(events.Pileup.nTrueInt, "int64")
            pu = self._puweight_arr[pu_weight_idx]

        selection = processor.PackedSelection()

        # Cut flow
        cut0 = np.zeros(len(events))

        # <----- Helper functions ------>#

        #  Sort by PT  helper function
        def sort_by_pt(ele, pho, jet):
            ele = ele[ak.argsort(ele.pt, ascending=False, axis=1)]
            pho = pho[ak.argsort(pho.pt, ascending=False, axis=1)]
            jet = jet[ak.argsort(jet.pt, ascending=False, axis=1)]

            return ele, pho, jet

        # Lorentz vectors
        from coffea.nanoevents.methods import vector
        ak.behavior.update(vector.behavior)

        def TLorentz_vector(vec):
            vec = ak.zip({
                "x": vec.x,
                "y": vec.y,
                "z": vec.z,
                "t": vec.t
            },
                         with_name="LorentzVector")
            return vec

        def TLorentz_vector_cylinder(vec):

            vec = ak.zip(
                {
                    "pt": vec.pt,
                    "eta": vec.eta,
                    "phi": vec.phi,
                    "mass": vec.mass,
                },
                with_name="PtEtaPhiMLorentzVector",
            )

            return vec

        # Cut-based ID modification
        @numba.njit
        def PhotonVID(vid, idBit):
            rBit = 0
            for x in range(0, 7):
                rBit |= (1 << x) if ((vid >> (x * 2)) & 0b11 >= idBit) else 0
            return rBit

        # Inverse Sieie and upper limit
        @numba.njit
        def make_fake_obj_mask(Pho, builder):

            #for eventIdx,pho in enumerate(tqdm(Pho)):   # --Event Loop
            for eventIdx, pho in enumerate(Pho):
                builder.begin_list()
                if len(pho) < 1: continue

                for phoIdx, _ in enumerate(pho):  # --Photon Loop

                    vid = Pho[eventIdx][phoIdx].vidNestedWPBitmap
                    vid_cuts1 = PhotonVID(vid, 1)  # Loose photon
                    vid_cuts2 = PhotonVID(vid, 2)  # Medium photon
                    vid_cuts3 = PhotonVID(vid, 3)  # Tight photon

                    # Field name
                    # |0|0|0|0|0|0|0|
                    # |IsoPho|IsoNeu|IsoChg|Sieie|hoe|scEta|PT|

                    # 1. Turn off cut (ex turn off Sieie
                    # |1|1|1|0|1|1|1| = |1|1|1|0|1|1|1|

                    # 2. Inverse cut (ex inverse Sieie)
                    # |1|1|1|1|1|1|1| = |1|1|1|0|1|1|1|

                    #if (vid_cuts2 & 0b1111111 == 0b1111111): # Cut applied
                    #if (vid_cuts2 & 0b1111111 == 0b1110111): # Inverse Sieie
                    if (vid_cuts2 & 0b1110111 == 0b1110111):  # Without Sieie

                        builder.boolean(True)

                    else:

                        builder.boolean(False)

                builder.end_list()

            return builder

        # <----- Selection ------>#

        Initial_events = events
        # Good Run ( Golden Json files )
        from coffea import lumi_tools

        if isData:
            lumi_mask_builder = lumi_tools.LumiMask(injson)
            lumimask = ak.Array(
                lumi_mask_builder.__call__(events.run, events.luminosityBlock))
            events = events[lumimask]
            #print("{0}%  of files pass good-run conditions".format(len(events)/ len(Initial_events)))

        # Stop processing if there is no event remain
        if len(events) == 0:
            return out

        ##----------- Cut flow1: Passing Triggers

        # double lepton trigger
        is_double_ele_trigger = True
        if not is_double_ele_trigger:
            double_ele_triggers_arr = np.ones(len(events), dtype=np.bool)
        else:
            double_ele_triggers_arr = np.zeros(len(events), dtype=np.bool)
            for path in self._doubleelectron_triggers[self._year]:
                if path not in events.HLT.fields: continue
                double_ele_triggers_arr = double_ele_triggers_arr | events.HLT[
                    path]

        # single lepton trigger
        is_single_ele_trigger = True
        if not is_single_ele_trigger:
            single_ele_triggers_arr = np.ones(len(events), dtype=np.bool)
        else:
            single_ele_triggers_arr = np.zeros(len(events), dtype=np.bool)
            for path in self._singleelectron_triggers[self._year]:
                if path not in events.HLT.fields: continue
                single_ele_triggers_arr = single_ele_triggers_arr | events.HLT[
                    path]

        events.Electron, events.Photon, events.Jet = sort_by_pt(
            events.Electron, events.Photon, events.Jet)

        # Good Primary vertex
        nPV = events.PV.npvsGood
        if not isData: nPV = nPV * pu
        nPV_nw = nPV

        # Apply cut1
        events = events[double_ele_triggers_arr]
        if not isData: pu = pu[double_ele_triggers_arr]

        cut1 = np.ones(len(events))

        # Set Particles
        Electron = events.Electron
        Muon = events.Muon
        Photon = events.Photon
        MET = events.MET
        Jet = events.Jet

        # Stop processing if there is no event remain
        if len(Electron) == 0:
            return out

        # --Gen Photon for dR
        genparts = events.GenPart
        pdgID_mask = (genparts.pdgId == 22)
        # mask2: isPrompt | fromHardProcess | isLastCopy
        mask2 = (1 << 0) | (1 << 8) | (1 << 13)
        # https://github.com/PKUHEPEWK/WGamma/blob/master/2018/wgRealPhotonTemplateModule.py

        status_mask = ((genparts.statusFlags & mask2) == mask2)
        gen_photons = genparts[pdgID_mask & status_mask]

        assert (ak.all(ak.num(gen_photons) == 1)
                )  # Raise error if len(gen_photon) != 1

        #  --Muon ( only used to calculate dR )
        MuSelmask = (Muon.pt >= 10) & (abs(
            Muon.eta) <= 2.5) & (Muon.tightId) & (Muon.pfRelIso04_all < 0.15)
        Muon = Muon[MuSelmask]

        ##----------- Cut flow2: Electron Selection

        EleSelmask = ((Electron.pt >= 20) & (np.abs(Electron.eta + Electron.deltaEtaSC) < 1.479)  &  (Electron.cutBased > 2) & (abs(Electron.dxy) < 0.05) & (abs(Electron.dz) < 0.1)) | \
           ((Electron.pt >= 20) & (np.abs(Electron.eta + Electron.deltaEtaSC) > 1.479) & (np.abs(Electron.eta + Electron.deltaEtaSC) <= 2.5) & (Electron.cutBased > 2) & (abs(Electron.dxy) < 0.1) & (abs(Electron.dz) < 0.2))

        Electron = Electron[EleSelmask]

        # apply cut 2
        Tri_electron_mask = ak.num(Electron) >= 2
        Electron = Electron[Tri_electron_mask]
        Photon = Photon[Tri_electron_mask]
        Jet = Jet[Tri_electron_mask]
        MET = MET[Tri_electron_mask]
        Muon = Muon[Tri_electron_mask]
        if not isData: pu = pu[Tri_electron_mask]
        events = events[Tri_electron_mask]
        gen_photons = gen_photons[Tri_electron_mask]

        # Stop processing if there is no event remain
        if len(Electron) == 0:
            return out

        cut2 = np.ones(len(Photon)) * 2

        ##----------- Cut flow3: Photon Selection

        # Basic photon selection
        isgap_mask = (abs(Photon.eta) < 1.442) | ((abs(Photon.eta) > 1.566) &
                                                  (abs(Photon.eta) < 2.5))
        Pixel_seed_mask = ~Photon.pixelSeed
        PT_mask = Photon.pt >= 20

        # dR cut with selected Muon and Electrons
        dr_pho_ele_mask = ak.all(Photon.metric_table(Electron) >= 0.5,
                                 axis=-1)  # default metric table: delta_r
        dr_pho_mu_mask = ak.all(Photon.metric_table(Muon) >= 0.5, axis=-1)

        PhoSelmask = PT_mask & isgap_mask & Pixel_seed_mask & dr_pho_ele_mask & dr_pho_mu_mask
        Photon = Photon[PhoSelmask]

        # Apply cut 3
        A_photon_mask = ak.num(Photon) > 0
        Electron = Electron[A_photon_mask]
        Photon = Photon[A_photon_mask]
        Jet = Jet[A_photon_mask]
        Muon = Muon[A_photon_mask]
        MET = MET[A_photon_mask]
        if not isData: pu = pu[A_photon_mask]
        events = events[A_photon_mask]
        gen_photons = gen_photons[A_photon_mask]

        Photon_template_mask = make_fake_obj_mask(
            Photon, ak.ArrayBuilder()).snapshot()
        Photon = Photon[Photon_template_mask]

        # Apply cut 3
        A_photon_mask = ak.num(Photon) > 0
        Electron = Electron[A_photon_mask]
        Photon = Photon[A_photon_mask]
        Jet = Jet[A_photon_mask]
        Muon = Muon[A_photon_mask]
        MET = MET[A_photon_mask]
        if not isData: pu = pu[A_photon_mask]
        events = events[A_photon_mask]
        gen_photons = gen_photons[A_photon_mask]

        # Stop processing if there is no event remain
        if len(Electron) == 0:
            return out

        cut3 = np.ones(len(Photon)) * 3

        ## --  Additional photon selection: Photon gen-matching

        # Choose Photons that dR(genPhoton,Photon) <= 0.1
        gen_match_photon_mask = ak.all(Photon.metric_table(gen_photons) <= 0.1,
                                       axis=-1)

        # Apply cut
        Photon = Photon[gen_match_photon_mask]
        gen_match_photon_evt_mask = ak.num(Photon) >= 1

        Electron = Electron[gen_match_photon_evt_mask]
        Photon = Photon[gen_match_photon_evt_mask]
        Jet = Jet[gen_match_photon_evt_mask]
        MET = MET[gen_match_photon_evt_mask]
        gen_photons = gen_photons[gen_match_photon_evt_mask]
        if not isData: pu = pu[gen_match_photon_evt_mask]
        events = events[gen_match_photon_evt_mask]

        ##-----------  Cut flow4:  Select 2 OSSF electrons from Z
        @numba.njit
        def find_2lep(events_leptons, builder):
            for leptons in events_leptons:

                builder.begin_list()
                nlep = len(leptons)
                for i0 in range(nlep):
                    for i1 in range(i0 + 1, nlep):
                        if leptons[i0].charge + leptons[i1].charge != 0:
                            continue

                        if nlep == 2:
                            builder.begin_tuple(2)
                            builder.index(0).integer(i0)
                            builder.index(1).integer(i1)
                            builder.end_tuple()

                        else:
                            for i2 in range(nlep):
                                if len({i0, i1, i2}) < 3: continue
                                builder.begin_tuple(3)
                                builder.index(0).integer(i0)
                                builder.index(1).integer(i1)
                                builder.index(2).integer(i2)
                                builder.end_tuple()
                builder.end_list()
            return builder

        ossf_idx = find_2lep(Electron, ak.ArrayBuilder()).snapshot()

        # OSSF cut
        ossf_mask = ak.num(ossf_idx) >= 1
        ossf_idx = ossf_idx[ossf_mask]
        Electron = Electron[ossf_mask]
        Photon = Photon[ossf_mask]
        Jet = Jet[ossf_mask]
        MET = MET[ossf_mask]
        events = events[ossf_mask]
        if not isData: pu = pu[ossf_mask]

        Double_electron = [Electron[ossf_idx[idx]] for idx in "01"]
        from coffea.nanoevents.methods import vector
        ak.behavior.update(vector.behavior)

        Diele = ak.zip({
            "lep1":
            Double_electron[0],
            "lep2":
            Double_electron[1],
            "p4":
            TLorentz_vector(Double_electron[0] + Double_electron[1])
        })

        bestZ_idx = ak.singletons(
            ak.argmin(abs(Diele.p4.mass - 91.1876), axis=1))
        Diele = Diele[bestZ_idx]

        # Stop processing if there is no event remain
        if len(Electron) == 0:
            return out
        cut4 = np.ones(len(Electron)) * 4

        leading_ele = Diele.lep1
        subleading_ele = Diele.lep2

        def make_leading_pair(target, base):
            return target[ak.argmax(base.pt, axis=1, keepdims=True)]

        leading_pho = make_leading_pair(Photon, Photon)

        # -- Scale Factor for each electron

        # Trigger weight helper function
        def Trigger_Weight(eta1, pt1, eta2, pt2):
            per_ev_MC =\
            get_ele_trig_leg1_mc_Eff(eta1,pt1) * get_ele_trig_leg2_mc_Eff(eta2,pt2) +\
            get_ele_trig_leg1_mc_Eff(eta2,pt2) * get_ele_trig_leg2_mc_Eff(eta1,pt1) -\
            get_ele_trig_leg1_mc_Eff(eta1,pt1) * get_ele_trig_leg1_mc_Eff(eta2,pt2)

            per_ev_data =\
            get_ele_trig_leg1_data_Eff(eta1,pt1) * get_ele_trig_leg1_SF(eta1,pt1) * get_ele_trig_leg2_data_Eff(eta2,pt2) * get_ele_trig_leg2_SF(eta2,pt2) +\
            get_ele_trig_leg1_data_Eff(eta2,pt2) * get_ele_trig_leg1_SF(eta2,pt2) * get_ele_trig_leg2_data_Eff(eta1,pt1) * get_ele_trig_leg2_SF(eta1,pt1) -\
            get_ele_trig_leg1_data_Eff(eta1,pt1) * get_ele_trig_leg1_SF(eta1,pt1) * get_ele_trig_leg1_data_Eff(eta2,pt2) * get_ele_trig_leg1_SF(eta2,pt2)

            return per_ev_data / per_ev_MC

        if not isData:

            ## -------------< Egamma ID and Reco Scale factor > -----------------##
            get_pho_medium_id_sf = get_pho_medium_id_sf(
                ak.flatten(leading_pho.eta), ak.flatten(leading_pho.pt))

            ele_reco_sf = get_ele_reco_above20_sf(
                ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta),
                ak.flatten(leading_ele.pt)) * get_ele_reco_above20_sf(
                    ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta),
                    ak.flatten(subleading_ele.pt))

            ele_medium_id_sf = get_ele_medium_id_sf(
                ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta),
                ak.flatten(leading_ele.pt)) * get_ele_medium_id_sf(
                    ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta),
                    ak.flatten(subleading_ele.pt))

            ## -------------< Double Electron Trigger Scale factor > -----------------##
            eta1 = ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta)
            eta2 = ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta)
            pt1 = ak.flatten(leading_ele.pt)
            pt2 = ak.flatten(subleading_ele.pt)

            # -- 2017,2016 are not applied yet
            if self._year == '2018':
                ele_trig_weight = Trigger_Weight(eta1, pt1, eta2, pt2)

        ##----------- Cut flow5: Event selection

        # Mee cut
        Mee_cut_mask = ak.firsts(Diele.p4.mass) > 4

        # Electron PT cuts
        Elept_mask = ak.firsts((Diele.lep1.pt >= 25) & (Diele.lep2.pt >= 20))

        # MET cuts
        MET_mask = MET.pt > 20

        # --------Mask -------#
        Event_sel_mask = Mee_cut_mask & Elept_mask & MET_mask
        Diele_sel = Diele[Event_sel_mask]
        leading_pho_sel = leading_pho[Event_sel_mask]
        Jet_sel = Jet[Event_sel_mask]
        MET_sel = MET[Event_sel_mask]

        # Photon  EE and EB
        isEE_mask = leading_pho.isScEtaEE
        isEB_mask = leading_pho.isScEtaEB
        Pho_EE = leading_pho[isEE_mask & Event_sel_mask]
        Pho_EB = leading_pho[isEB_mask & Event_sel_mask]

        #Stop processing if there is no event remain
        if len(leading_pho_sel) == 0:
            return out

        cut5 = np.ones(len(Diele)) * 5

        # -------------------- Flatten variables ---------------------------#

        # -- Ele1 --#
        Ele1_PT = ak.flatten(Diele_sel.lep1.pt)
        Ele1_Eta = ak.flatten(Diele_sel.lep1.eta)
        Ele1_Phi = ak.flatten(Diele_sel.lep1.phi)

        # -- Ele2 --#
        Ele2_PT = ak.flatten(Diele_sel.lep2.pt)
        Ele2_Eta = ak.flatten(Diele_sel.lep2.eta)
        Ele2_Phi = ak.flatten(Diele_sel.lep2.phi)

        # -- Pho -- #
        Pho_PT = ak.flatten(leading_pho_sel.pt)
        Pho_Eta = ak.flatten(leading_pho_sel.eta)
        Pho_Phi = ak.flatten(leading_pho_sel.phi)

        # -- Pho EB --#
        Pho_EB_PT = ak.flatten(Pho_EB.pt)
        Pho_EB_Eta = ak.flatten(Pho_EB.eta)
        Pho_EB_Phi = ak.flatten(Pho_EB.phi)
        Pho_EB_Isochg = ak.flatten(Pho_EE.pfRelIso03_chg)
        Pho_EB_Sieie = ak.flatten(Pho_EE.sieie)

        # -- Pho EE --#
        Pho_EE_PT = ak.flatten(Pho_EE.pt)
        Pho_EE_Eta = ak.flatten(Pho_EE.eta)
        Pho_EE_Phi = ak.flatten(Pho_EE.phi)
        Pho_EE_Isochg = ak.flatten(Pho_EE.pfRelIso03_chg)
        Pho_EE_Sieie = ak.flatten(Pho_EE.sieie)

        # --Kinematics --#
        Diele_mass = ak.flatten(Diele_sel.p4.mass)

        leading_ele, subleading_ele = ak.flatten(
            TLorentz_vector_cylinder(Diele_sel.lep1)), ak.flatten(
                TLorentz_vector_cylinder(Diele_sel.lep2))
        dR_e1pho = ak.flatten(
            leading_ele.delta_r(leading_pho_sel))  # dR pho,ele1
        dR_e2pho = ak.flatten(
            subleading_ele.delta_r(leading_pho_sel))  # dR pho,ele2
        dR_jpho = ak.flatten(Jet_sel[:, 0].delta_r(leading_pho_sel))

        MET_PT = ak.to_numpy(MET_sel.pt)

        # -------------------- Sieie bins---------------------------#
        def make_bins(pt, eta, sieie, bin_range_str):

            bin_dict = {
                'PT_1_eta_1': (pt > 20) & (pt < 30) & (eta < 1),
                'PT_1_eta_2': (pt > 20) & (pt < 30) & (eta > 1) & (eta < 1.5),
                'PT_1_eta_3': (pt > 20) & (pt < 30) & (eta > 1.5) & (eta < 2),
                'PT_1_eta_4': (pt > 20) & (pt < 30) & (eta > 2) & (eta < 2.5),
                'PT_2_eta_1': (pt > 30) & (pt < 40) & (eta < 1),
                'PT_2_eta_2': (pt > 30) & (pt < 40) & (eta > 1) & (eta < 1.5),
                'PT_2_eta_3': (pt > 30) & (pt < 40) & (eta > 1.5) & (eta < 2),
                'PT_2_eta_4': (pt > 30) & (pt < 40) & (eta > 2) & (eta < 2.5),
                'PT_3_eta_1': (pt > 40) & (pt < 50) & (eta < 1),
                'PT_3_eta_2': (pt > 40) & (pt < 50) & (eta > 1) & (eta < 1.5),
                'PT_3_eta_3': (pt > 40) & (pt < 50) & (eta > 1.5) & (eta < 2),
                'PT_3_eta_4': (pt > 40) & (pt < 50) & (eta > 2) & (eta < 2.5),
                'PT_4_eta_1': (pt > 50) & (eta < 1),
                'PT_4_eta_2': (pt > 50) & (eta > 1) & (eta < 1.5),
                'PT_4_eta_3': (pt > 50) & (eta > 1.5) & (eta < 2),
                'PT_4_eta_4': (pt > 50) & (eta > 2) & (eta < 2.5)
            }

            binmask = bin_dict[bin_range_str]

            return ak.to_numpy(sieie[binmask]), binmask

        bin_name_list = [
            'PT_1_eta_1', 'PT_1_eta_2', 'PT_1_eta_3', 'PT_1_eta_4',
            'PT_2_eta_1', 'PT_2_eta_2', 'PT_2_eta_3', 'PT_2_eta_4',
            'PT_3_eta_1', 'PT_3_eta_2', 'PT_3_eta_3', 'PT_3_eta_4',
            'PT_4_eta_1', 'PT_4_eta_2', 'PT_4_eta_3', 'PT_4_eta_4'
        ]

        binned_sieie_hist = {}
        binmask_dict = {}
        for name in bin_name_list:
            binned_sieie_hist[name], _ = make_bins(
                ak.flatten(leading_pho_sel.pt),
                ak.flatten(abs(leading_pho_sel.eta)),
                ak.flatten(leading_pho_sel.sieie), name)
            _, binmask_dict[name] = make_bins(ak.flatten(leading_pho.pt),
                                              ak.flatten(abs(leading_pho.eta)),
                                              ak.flatten(leading_pho.sieie),
                                              name)

        print("Show me the last bin: ", binned_sieie_hist['PT_4_eta_4'])

        # --- Apply weight and hist
        weights = processor.Weights(len(cut4))

        # --- skim cut-weight
        def skim_weight(arr):
            mask1 = ~ak.is_none(arr)
            subarr = arr[mask1]
            mask2 = subarr != 0
            return ak.to_numpy(subarr[mask2])

        cuts = Event_sel_mask
        cuts_pho_EE = ak.flatten(isEE_mask)
        cuts_pho_EB = ak.flatten(isEB_mask)

        print(
            "cut0: {0}, cut1: {1}, cut2: {2}, cut3: {3}, cut4: {4} ,cut5 {5} ".
            format(len(Initial_events), len(cut1), len(cut2), len(cut3),
                   len(cut4), len(cut5)))

        # Weight and SF here
        if not isData:
            weights.add('pileup', pu)
            weights.add('ele_id', ele_medium_id_sf)
            weights.add('pho_id', get_pho_medium_id_sf)
            weights.add('ele_reco', ele_reco_sf)

            # 2016,2017 are not applied yet
            if self._year == "2018":
                weights.add('ele_trigger', ele_trig_weight)

        # ---------------------------- Fill hist --------------------------------------#

        # Initial events
        out["sumw"][dataset] += len(Initial_events)

        # Cut flow loop
        for cut in [cut0, cut1, cut2, cut3, cut4, cut5]:
            out["cutflow"].fill(dataset=dataset, cutflow=cut)

        # Primary vertex
        out['nPV'].fill(
            dataset=dataset,
            nPV=nPV,
        )
        out['nPV_nw'].fill(dataset=dataset, nPV_nw=nPV_nw)

        # Fill hist

        # -- met -- #
        out["met"].fill(dataset=dataset,
                        met=MET_PT,
                        weight=skim_weight(weights.weight() * cuts))

        # --mass -- #
        out["mass"].fill(dataset=dataset,
                         mass=Diele_mass,
                         weight=skim_weight(weights.weight() * cuts))
        # -- Ele1 -- #
        out["ele1pt"].fill(dataset=dataset,
                           ele1pt=Ele1_PT,
                           weight=skim_weight(weights.weight() * cuts))
        out["ele1eta"].fill(dataset=dataset,
                            ele1eta=Ele1_Eta,
                            weight=skim_weight(weights.weight() * cuts))
        out["ele1phi"].fill(dataset=dataset,
                            ele1phi=Ele1_Phi,
                            weight=skim_weight(weights.weight() * cuts))

        # --Ele2 --#
        out["ele2pt"].fill(dataset=dataset,
                           ele2pt=Ele2_PT,
                           weight=skim_weight(weights.weight() * cuts))
        out["ele2eta"].fill(dataset=dataset,
                            ele2eta=Ele2_Eta,
                            weight=skim_weight(weights.weight() * cuts))
        out["ele2phi"].fill(dataset=dataset,
                            ele2phi=Ele2_Phi,
                            weight=skim_weight(weights.weight() * cuts))

        # -- Photon -- #

        out["phopt"].fill(dataset=dataset,
                          phopt=Pho_PT,
                          weight=skim_weight(weights.weight() * cuts))
        out["phoeta"].fill(dataset=dataset,
                           phoeta=Pho_Eta,
                           weight=skim_weight(weights.weight() * cuts))
        out["phophi"].fill(dataset=dataset,
                           phophi=Pho_Phi,
                           weight=skim_weight(weights.weight() * cuts))

        # -- Binned sieie hist -- #
        if len(binned_sieie_hist['PT_1_eta_1'] > 0):
            out['PT_1_eta_1'].fill(dataset=dataset,
                                   PT_1_eta_1=binned_sieie_hist['PT_1_eta_1'])
        if len(binned_sieie_hist['PT_1_eta_2'] > 0):
            out['PT_1_eta_2'].fill(dataset=dataset,
                                   PT_1_eta_2=binned_sieie_hist['PT_1_eta_2'])
        if len(binned_sieie_hist['PT_1_eta_3'] > 0):
            out['PT_1_eta_3'].fill(dataset=dataset,
                                   PT_1_eta_3=binned_sieie_hist['PT_1_eta_3'])
        if len(binned_sieie_hist['PT_1_eta_4'] > 0):
            out['PT_1_eta_4'].fill(dataset=dataset,
                                   PT_1_eta_4=binned_sieie_hist['PT_1_eta_4'])
        if len(binned_sieie_hist['PT_2_eta_1'] > 0):
            out['PT_2_eta_1'].fill(dataset=dataset,
                                   PT_2_eta_1=binned_sieie_hist['PT_2_eta_1'])
        if len(binned_sieie_hist['PT_2_eta_2'] > 0):
            out['PT_2_eta_2'].fill(dataset=dataset,
                                   PT_2_eta_2=binned_sieie_hist['PT_2_eta_2'])
        if len(binned_sieie_hist['PT_2_eta_3'] > 0):
            out['PT_2_eta_3'].fill(dataset=dataset,
                                   PT_2_eta_3=binned_sieie_hist['PT_2_eta_3'])
        if len(binned_sieie_hist['PT_2_eta_4'] > 0):
            out['PT_2_eta_4'].fill(dataset=dataset,
                                   PT_2_eta_4=binned_sieie_hist['PT_2_eta_4'])
        if len(binned_sieie_hist['PT_3_eta_1'] > 0):
            out['PT_3_eta_1'].fill(dataset=dataset,
                                   PT_3_eta_1=binned_sieie_hist['PT_3_eta_1'])
        if len(binned_sieie_hist['PT_3_eta_2'] > 0):
            out['PT_3_eta_2'].fill(dataset=dataset,
                                   PT_3_eta_2=binned_sieie_hist['PT_3_eta_2'])
        if len(binned_sieie_hist['PT_3_eta_3'] > 0):
            out['PT_3_eta_3'].fill(dataset=dataset,
                                   PT_3_eta_3=binned_sieie_hist['PT_3_eta_3'])
        if len(binned_sieie_hist['PT_3_eta_4'] > 0):
            out['PT_3_eta_4'].fill(dataset=dataset,
                                   PT_3_eta_4=binned_sieie_hist['PT_3_eta_4'])
        if len(binned_sieie_hist['PT_4_eta_1'] > 0):
            out['PT_4_eta_1'].fill(dataset=dataset,
                                   PT_4_eta_1=binned_sieie_hist['PT_4_eta_1'])
        if len(binned_sieie_hist['PT_4_eta_2'] > 0):
            out['PT_4_eta_2'].fill(dataset=dataset,
                                   PT_4_eta_2=binned_sieie_hist['PT_4_eta_2'])
        if len(binned_sieie_hist['PT_4_eta_3'] > 0):
            out['PT_4_eta_3'].fill(dataset=dataset,
                                   PT_4_eta_3=binned_sieie_hist['PT_4_eta_3'])
        if len(binned_sieie_hist['PT_4_eta_4'] > 0):
            out['PT_4_eta_4'].fill(dataset=dataset,
                                   PT_4_eta_4=binned_sieie_hist['PT_4_eta_4'])

        return out
Exemplo n.º 3
0
    def process(self, events):

        # Initialize accumulator
        out = self.accumulator.identity()
        dataset = sample_name
        # events.metadata['dataset']

        # Stop processing if there is no event remain
        if len(events) == 0:
            return out

        # Cut flow
        cut0 = np.zeros(len(events))

        # <----- Helper functions ------>#

        # Sort by PT helper function
        def sort_by_pt(ele, pho, jet):
            ele = ele[ak.argsort(ele.pt, ascending=False, axis=1)]
            pho = pho[ak.argsort(pho.pt, ascending=False, axis=1)]
            jet = jet[ak.argsort(jet.pt, ascending=False, axis=1)]

            return ele, pho, jet

        # Lorentz vectors
        from coffea.nanoevents.methods import vector

        ak.behavior.update(vector.behavior)

        def TLorentz_vector(vec):
            vec = ak.zip(
                {
                    "x": vec.x,
                    "y": vec.y,
                    "z": vec.z,
                    "t": vec.t
                },
                with_name="LorentzVector",
            )
            return vec

        def TLorentz_vector_cylinder(vec):

            vec = ak.zip(
                {
                    "pt": vec.pt,
                    "eta": vec.eta,
                    "phi": vec.phi,
                    "mass": vec.mass,
                },
                with_name="PtEtaPhiMLorentzVector",
            )

            return vec

        # Cut-based ID modification
        @numba.njit
        def PhotonVID(vid, idBit):
            rBit = 0
            for x in range(0, 7):
                rBit |= (1 << x) if ((vid >> (x * 2)) & 0b11 >= idBit) else 0
            return rBit

        # Inverse Sieie and upper limit
        @numba.njit
        def make_fake_obj_mask(Pho, builder):

            # for eventIdx,pho in enumerate(tqdm(Pho)):   # --Event Loop
            for eventIdx, pho in enumerate(Pho):
                builder.begin_list()
                if len(pho) < 1:
                    continue

                for phoIdx, _ in enumerate(pho):  # --Photon Loop

                    vid = Pho[eventIdx][phoIdx].vidNestedWPBitmap
                    vid_cuts1 = PhotonVID(vid, 1)  # Loose photon
                    vid_cuts2 = PhotonVID(vid, 2)  # Medium photon
                    vid_cuts3 = PhotonVID(vid, 3)  # Tight photon

                    # Field name
                    # |0|0|0|0|0|0|0|
                    # |IsoPho|IsoNeu|IsoChg|Sieie|hoe|scEta|PT|

                    # 1. Turn off cut (ex turn off Sieie
                    # |1|1|1|0|1|1|1| = |1|1|1|0|1|1|1|

                    # 2. Inverse cut (ex inverse Sieie)
                    # |1|1|1|1|1|1|1| = |1|1|1|0|1|1|1|

                    # if (vid_cuts2 & 0b1111111 == 0b1111111): # Cut applied
                    #if vid_cuts2 & 0b1111111 == 0b1110111:  # Inverse Sieie
                    if (vid_cuts2 & 0b1100111 == 0b1100111
                        ):  # Without Sieie and IsoChg
                        isochg = (Pho[eventIdx][phoIdx].pfRelIso03_chg *
                                  Pho[eventIdx][phoIdx].pt)
                        if (isochg >= 4) & (isochg <= 10):  #Full range
                            #if (isochg >= 4) & (isochg <= 6.84): # Sample1 2018Egamma RunABC x= 6.84
                            #if (isochg >= 6.84) & (isochg <= 10): # Sample2 2018Egamma RunABC x= 6.84
                            builder.boolean(True)
                        else:
                            builder.boolean(False)

                    else:

                        builder.boolean(False)

                builder.end_list()

            return builder

        # Golden Json file
        if self._year == "2018":
            injson = "/x5/cms/jwkim/gitdir/JWCorp/JW_analysis/Coffea_WZG/Corrections/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt.RunABCD"

        if self._year == "2017":
            injson = "/x5/cms/jwkim/gitdir/JWCorp/JW_analysis/Coffea_WZG/Corrections/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt"

        # --- Selection
        Initial_events = events
        # Good Run ( Golden Json files )
        from coffea import lumi_tools

        lumi_mask_builder = lumi_tools.LumiMask(injson)
        lumimask = ak.Array(
            lumi_mask_builder.__call__(events.run, events.luminosityBlock))
        events = events[lumimask]
        # print("{0}%  of files pass good-run conditions".format(len(events)/ len(Initial_events)))

        # Stop processing if there is no event remain
        if len(events) == 0:
            return out

        ##----------- Cut flow1: Passing Triggers

        # double lepton trigger
        is_double_ele_trigger = True
        if not is_double_ele_trigger:
            double_ele_triggers_arr = np.ones(len(events), dtype=np.bool)
        else:
            double_ele_triggers_arr = np.zeros(len(events), dtype=np.bool)
            for path in self._doubleelectron_triggers[self._year]:
                if path not in events.HLT.fields:
                    continue
                double_ele_triggers_arr = double_ele_triggers_arr | events.HLT[
                    path]

        # single lepton trigger
        is_single_ele_trigger = True
        if not is_single_ele_trigger:
            single_ele_triggers_arr = np.ones(len(events), dtype=np.bool)
        else:
            single_ele_triggers_arr = np.zeros(len(events), dtype=np.bool)
            for path in self._singleelectron_triggers[self._year]:
                if path not in events.HLT.fields:
                    continue
                single_ele_triggers_arr = single_ele_triggers_arr | events.HLT[
                    path]

        events.Electron, events.Photon, events.Jet = sort_by_pt(
            events.Electron, events.Photon, events.Jet)

        # Apply cut1
        Initial_events = events
        # events = events[single_ele_triggers_arr | double_ele_triggers_arr]
        events = events[double_ele_triggers_arr]

        cut1 = np.ones(len(events))

        # Set Particles
        Electron = events.Electron
        Muon = events.Muon
        Photon = events.Photon
        MET = events.MET
        Jet = events.Jet

        # Stop processing if there is no event remain
        if len(Electron) == 0:
            return out

        #  --Muon ( only used to calculate dR )
        MuSelmask = ((Muon.pt >= 10)
                     & (abs(Muon.eta) <= 2.5)
                     & (Muon.tightId)
                     & (Muon.pfRelIso04_all < 0.15))
        # Muon = ak.mask(Muon,MuSelmask)
        Muon = Muon[MuSelmask]

        ##----------- Cut flow2: Electron Selection

        EleSelmask = ((Electron.pt >= 20)
                      & (np.abs(Electron.eta + Electron.deltaEtaSC) < 1.479)
                      & (Electron.cutBased > 2)
                      & (abs(Electron.dxy) < 0.05)
                      & (abs(Electron.dz) < 0.1)) | (
                          (Electron.pt >= 20)
                          &
                          (np.abs(Electron.eta + Electron.deltaEtaSC) > 1.479)
                          & (np.abs(Electron.eta + Electron.deltaEtaSC) <= 2.5)
                          & (Electron.cutBased > 2)
                          & (abs(Electron.dxy) < 0.1)
                          & (abs(Electron.dz) < 0.2))

        Electron = Electron[EleSelmask]

        # apply cut 2
        Tri_electron_mask = ak.num(Electron) >= 2
        Electron = Electron[Tri_electron_mask]
        Photon = Photon[Tri_electron_mask]
        Jet = Jet[Tri_electron_mask]
        MET = MET[Tri_electron_mask]
        Muon = Muon[Tri_electron_mask]
        events = events[Tri_electron_mask]

        # Stop processing if there is no event remain
        if len(Electron) == 0:
            return out

        cut2 = np.ones(len(Photon)) * 2

        ##----------- Cut flow3: Photon Selection

        # Basic photon selection
        isgap_mask = (abs(Photon.eta) < 1.442) | ((abs(Photon.eta) > 1.566) &
                                                  (abs(Photon.eta) < 2.5))
        Pixel_seed_mask = ~Photon.pixelSeed
        PT_mask = Photon.pt >= 20

        # dR cut with selected Muon and Electrons
        dr_pho_ele_mask = ak.all(Photon.metric_table(Electron) >= 0.5,
                                 axis=-1)  # default metric table: delta_r
        dr_pho_mu_mask = ak.all(Photon.metric_table(Muon) >= 0.5, axis=-1)

        PhoSelmask = (PT_mask
                      & isgap_mask
                      & Pixel_seed_mask
                      & dr_pho_ele_mask
                      & dr_pho_mu_mask)
        Photon = Photon[PhoSelmask]

        # Apply cut 3
        A_photon_mask = ak.num(Photon) > 0
        Electron = Electron[A_photon_mask]
        Photon = Photon[A_photon_mask]
        Jet = Jet[A_photon_mask]
        Muon = Muon[A_photon_mask]
        MET = MET[A_photon_mask]
        events = events[A_photon_mask]

        # ID for fake photon
        Photon_template_mask = make_fake_obj_mask(
            Photon, ak.ArrayBuilder()).snapshot()

        Photon = Photon[Photon_template_mask]
        # Apply cut -Fake Photon -
        A_photon_mask = ak.num(Photon) > 0
        Electron = Electron[A_photon_mask]
        Photon = Photon[A_photon_mask]
        Jet = Jet[A_photon_mask]
        Muon = Muon[A_photon_mask]
        MET = MET[A_photon_mask]
        events = events[A_photon_mask]

        # Stop processing if there is no event remain
        if len(Electron) == 0:
            return out

        cut3 = np.ones(len(Photon)) * 3

        ##-----------  Cut flow4:  Select 2 OSSF electrons from Z
        @numba.njit
        def find_2lep(events_leptons, builder):
            for leptons in events_leptons:

                builder.begin_list()
                nlep = len(leptons)
                for i0 in range(nlep):
                    for i1 in range(i0 + 1, nlep):
                        if leptons[i0].charge + leptons[i1].charge != 0:
                            continue

                        if nlep == 2:
                            builder.begin_tuple(2)
                            builder.index(0).integer(i0)
                            builder.index(1).integer(i1)
                            builder.end_tuple()

                        else:
                            for i2 in range(nlep):
                                if len({i0, i1, i2}) < 3:
                                    continue
                                builder.begin_tuple(3)
                                builder.index(0).integer(i0)
                                builder.index(1).integer(i1)
                                builder.index(2).integer(i2)
                                builder.end_tuple()
                builder.end_list()
            return builder

        ossf_idx = find_2lep(Electron, ak.ArrayBuilder()).snapshot()

        # OSSF cut
        ossf_mask = ak.num(ossf_idx) >= 1
        ossf_idx = ossf_idx[ossf_mask]
        Electron = Electron[ossf_mask]
        Photon = Photon[ossf_mask]
        Jet = Jet[ossf_mask]
        MET = MET[ossf_mask]

        Double_electron = [Electron[ossf_idx[idx]] for idx in "01"]
        from coffea.nanoevents.methods import vector

        ak.behavior.update(vector.behavior)

        Diele = ak.zip({
            "lep1":
            Double_electron[0],
            "lep2":
            Double_electron[1],
            "p4":
            TLorentz_vector(Double_electron[0] + Double_electron[1]),
        })

        bestZ_idx = ak.singletons(
            ak.argmin(abs(Diele.p4.mass - 91.1876), axis=1))
        Diele = Diele[bestZ_idx]

        cut4 = np.ones(len(Electron)) * 4

        ##-----------  Cut flow 5: Event Selection

        def make_leading_pair(target, base):
            return target[ak.argmax(base.pt, axis=1, keepdims=True)]

        leading_pho = make_leading_pair(Photon, Photon)

        # Mee cut
        Mee_cut_mask = ak.firsts(Diele.p4.mass) > 4

        # Electron PT cuts
        Elept_mask = ak.firsts((Diele.lep1.pt >= 25) & (Diele.lep2.pt >= 20))

        # MET cuts
        MET_mask = MET.pt > 20

        # --------Mask -------#
        Event_sel_mask = Mee_cut_mask & Elept_mask & MET_mask
        Diele_sel = Diele[Event_sel_mask]
        leading_pho_sel = leading_pho[Event_sel_mask]
        Jet_sel = Jet[Event_sel_mask]
        MET_sel = MET[Event_sel_mask]

        cut5 = np.ones(len(Diele)) * 5

        # Photon  EE and EB
        isEE_mask = leading_pho.isScEtaEE
        isEB_mask = leading_pho.isScEtaEB
        Pho_EE = leading_pho[isEE_mask & Event_sel_mask]
        Pho_EB = leading_pho[isEB_mask & Event_sel_mask]

        # -------------------- Flatten variables ---------------------------#

        # -- Ele1 --#
        Ele1_PT = ak.flatten(Diele_sel.lep1.pt)
        Ele1_Eta = ak.flatten(Diele_sel.lep1.eta)
        Ele1_Phi = ak.flatten(Diele_sel.lep1.phi)

        # -- Ele2 --#
        Ele2_PT = ak.flatten(Diele_sel.lep2.pt)
        Ele2_Eta = ak.flatten(Diele_sel.lep2.eta)
        Ele2_Phi = ak.flatten(Diele_sel.lep2.phi)

        # -- Pho -- #
        Pho_PT = ak.flatten(leading_pho_sel.pt)
        Pho_Eta = ak.flatten(leading_pho_sel.eta)
        Pho_Phi = ak.flatten(leading_pho_sel.phi)
        Pho_IsoChg = ak.flatten(leading_pho_sel.pt *
                                leading_pho_sel.pfRelIso03_chg)

        # -- Pho EB --#
        Pho_EB_PT = ak.flatten(Pho_EB.pt)
        Pho_EB_Eta = ak.flatten(Pho_EB.eta)
        Pho_EB_Phi = ak.flatten(Pho_EB.phi)
        Pho_EB_Isochg = ak.flatten(Pho_EE.pfRelIso03_chg)
        Pho_EB_Sieie = ak.flatten(Pho_EE.sieie)

        # -- Pho EE --#
        Pho_EE_PT = ak.flatten(Pho_EE.pt)
        Pho_EE_Eta = ak.flatten(Pho_EE.eta)
        Pho_EE_Phi = ak.flatten(Pho_EE.phi)
        Pho_EE_Isochg = ak.flatten(Pho_EE.pfRelIso03_chg)
        Pho_EE_Sieie = ak.flatten(Pho_EE.sieie)

        # --Kinematics --#
        Diele_mass = ak.flatten(Diele_sel.p4.mass)
        eeg_vec = Diele_sel.p4 + leading_pho_sel
        eeg_mass = ak.flatten(eeg_vec.mass)

        leading_ele, subleading_ele = ak.flatten(
            TLorentz_vector_cylinder(Diele_sel.lep1)), ak.flatten(
                TLorentz_vector_cylinder(Diele_sel.lep2))
        dR_e1pho = ak.flatten(
            leading_ele.delta_r(leading_pho_sel))  # dR pho,ele1
        dR_e2pho = ak.flatten(
            subleading_ele.delta_r(leading_pho_sel))  # dR pho,ele2
        dR_jpho = ak.flatten(Jet_sel[:, 0].delta_r(leading_pho_sel))

        MET_PT = ak.to_numpy(MET_sel.pt)

        # -------------------- Sieie bins---------------------------#
        def make_bins(pt, eta, sieie, bin_range_str):

            bin_dict = {
                "PT_1_eta_1": (pt > 20) & (pt < 30) & (eta < 1),
                "PT_1_eta_2": (pt > 20) & (pt < 30) & (eta > 1) & (eta < 1.5),
                "PT_1_eta_3": (pt > 20) & (pt < 30) & (eta > 1.5) & (eta < 2),
                "PT_1_eta_4": (pt > 20) & (pt < 30) & (eta > 2) & (eta < 2.5),
                "PT_2_eta_1": (pt > 30) & (pt < 40) & (eta < 1),
                "PT_2_eta_2": (pt > 30) & (pt < 40) & (eta > 1) & (eta < 1.5),
                "PT_2_eta_3": (pt > 30) & (pt < 40) & (eta > 1.5) & (eta < 2),
                "PT_2_eta_4": (pt > 30) & (pt < 40) & (eta > 2) & (eta < 2.5),
                "PT_3_eta_1": (pt > 40) & (pt < 50) & (eta < 1),
                "PT_3_eta_2": (pt > 40) & (pt < 50) & (eta > 1) & (eta < 1.5),
                "PT_3_eta_3": (pt > 40) & (pt < 50) & (eta > 1.5) & (eta < 2),
                "PT_3_eta_4": (pt > 40) & (pt < 50) & (eta > 2) & (eta < 2.5),
                "PT_4_eta_1": (pt > 50) & (eta < 1),
                "PT_4_eta_2": (pt > 50) & (eta > 1) & (eta < 1.5),
                "PT_4_eta_3": (pt > 50) & (eta > 1.5) & (eta < 2),
                "PT_4_eta_4": (pt > 50) & (eta > 2) & (eta < 2.5),
            }

            binmask = bin_dict[bin_range_str]

            return ak.to_numpy(sieie[binmask])

        bin_name_list = [
            "PT_1_eta_1",
            "PT_1_eta_2",
            "PT_1_eta_3",
            "PT_1_eta_4",
            "PT_2_eta_1",
            "PT_2_eta_2",
            "PT_2_eta_3",
            "PT_2_eta_4",
            "PT_3_eta_1",
            "PT_3_eta_2",
            "PT_3_eta_3",
            "PT_3_eta_4",
            "PT_4_eta_1",
            "PT_4_eta_2",
            "PT_4_eta_3",
            "PT_4_eta_4",
        ]

        binned_sieie_hist = {}
        for name in bin_name_list:
            binned_sieie_hist[name] = make_bins(
                ak.flatten(leading_pho_sel.pt),
                ak.flatten(abs(leading_pho_sel.eta)),
                ak.flatten(leading_pho_sel.sieie),
                name,
            )

        # -------------------- Fill hist ---------------------------#

        # Initial events
        out["sumw"][dataset] += len(Initial_events)

        # print("cut5: ",len(cut5))

        # Cut flow loop
        for cut in [cut0, cut1, cut2, cut3, cut4, cut5]:
            out["cutflow"].fill(dataset=dataset, cutflow=cut)

        # --Ele1 -- #
        out["ele1pt"].fill(dataset=dataset, ele1pt=Ele1_PT)
        out["ele1eta"].fill(dataset=dataset, ele1eta=Ele1_Eta)
        out["ele1phi"].fill(dataset=dataset, ele1phi=Ele1_Phi)

        # --Ele2 -- #
        out["ele2pt"].fill(dataset=dataset, ele2pt=Ele2_PT)
        out["ele2eta"].fill(dataset=dataset, ele2eta=Ele2_Eta)
        out["ele2phi"].fill(dataset=dataset, ele2phi=Ele2_Phi)

        # --Photon-- #

        out["phopt"].fill(dataset=dataset, phopt=Pho_PT)
        out["phoeta"].fill(dataset=dataset, phoeta=Pho_Eta)
        out["phophi"].fill(dataset=dataset, phophi=Pho_Phi)
        out["phoIsoChg"].fill(dataset=dataset, phoIsoChg=Pho_IsoChg)

        # --Photon EB --#
        out["pho_EB_pt"].fill(
            dataset=dataset,
            pho_EB_pt=Pho_EB_PT,
        )
        out["pho_EB_eta"].fill(
            dataset=dataset,
            pho_EB_eta=Pho_EB_Eta,
        )
        out["pho_EB_phi"].fill(
            dataset=dataset,
            pho_EB_phi=Pho_EB_Phi,
        )
        out["pho_EB_sieie"].fill(
            dataset=dataset,
            pho_EB_sieie=Pho_EB_Sieie,
        )
        out["pho_EB_Iso_chg"].fill(dataset=dataset,
                                   pho_EB_Iso_chg=Pho_EB_Isochg)

        # --Photon EE --#
        out["pho_EE_pt"].fill(
            dataset=dataset,
            pho_EE_pt=Pho_EE_PT,
        )
        out["pho_EE_eta"].fill(
            dataset=dataset,
            pho_EE_eta=Pho_EE_Eta,
        )
        out["pho_EE_phi"].fill(
            dataset=dataset,
            pho_EE_phi=Pho_EE_Phi,
        )
        out["pho_EE_sieie"].fill(
            dataset=dataset,
            pho_EE_sieie=Pho_EE_Sieie,
        )
        out["pho_EE_Iso_chg"].fill(dataset=dataset,
                                   pho_EE_Iso_chg=Pho_EE_Isochg)

        # -- Kinematic variables -- #
        out["mass"].fill(dataset=dataset, Mee=Diele_mass)
        out["mass_eea"].fill(dataset=dataset, mass_eea=eeg_mass)
        out["met"].fill(dataset=dataset, met=MET_PT)
        out["dR_ae1"].fill(dataset=dataset, dR_ae1=dR_e1pho)
        out["dR_ae2"].fill(dataset=dataset, dR_ae2=dR_e2pho)
        out["dR_aj"].fill(dataset=dataset, dR_aj=dR_jpho)

        # test_target = binned_sieie_hist['PT_1_eta_1']
        # print("CheckAAA: ",test_target[test_target > 0.05])
        # print("CheckBBB: ",test_target[test_target > 0])

        # -- Binned sieie hist -- #

        if len(binned_sieie_hist["PT_1_eta_1"] > 0):
            out["PT_1_eta_1"].fill(dataset=dataset,
                                   PT_1_eta_1=binned_sieie_hist["PT_1_eta_1"])
        if len(binned_sieie_hist["PT_1_eta_2"] > 0):
            out["PT_1_eta_2"].fill(dataset=dataset,
                                   PT_1_eta_2=binned_sieie_hist["PT_1_eta_2"])
        if len(binned_sieie_hist["PT_1_eta_3"] > 0):
            out["PT_1_eta_3"].fill(dataset=dataset,
                                   PT_1_eta_3=binned_sieie_hist["PT_1_eta_3"])
        if len(binned_sieie_hist["PT_1_eta_4"] > 0):
            out["PT_1_eta_4"].fill(dataset=dataset,
                                   PT_1_eta_4=binned_sieie_hist["PT_1_eta_4"])
        if len(binned_sieie_hist["PT_2_eta_1"] > 0):
            out["PT_2_eta_1"].fill(dataset=dataset,
                                   PT_2_eta_1=binned_sieie_hist["PT_2_eta_1"])
        if len(binned_sieie_hist["PT_2_eta_2"] > 0):
            out["PT_2_eta_2"].fill(dataset=dataset,
                                   PT_2_eta_2=binned_sieie_hist["PT_2_eta_2"])
        if len(binned_sieie_hist["PT_2_eta_3"] > 0):
            out["PT_2_eta_3"].fill(dataset=dataset,
                                   PT_2_eta_3=binned_sieie_hist["PT_2_eta_3"])
        if len(binned_sieie_hist["PT_2_eta_4"] > 0):
            out["PT_2_eta_4"].fill(dataset=dataset,
                                   PT_2_eta_4=binned_sieie_hist["PT_2_eta_4"])
        if len(binned_sieie_hist["PT_3_eta_1"] > 0):
            out["PT_3_eta_1"].fill(dataset=dataset,
                                   PT_3_eta_1=binned_sieie_hist["PT_3_eta_1"])
        if len(binned_sieie_hist["PT_3_eta_2"] > 0):
            out["PT_3_eta_2"].fill(dataset=dataset,
                                   PT_3_eta_2=binned_sieie_hist["PT_3_eta_2"])
        if len(binned_sieie_hist["PT_3_eta_3"] > 0):
            out["PT_3_eta_3"].fill(dataset=dataset,
                                   PT_3_eta_3=binned_sieie_hist["PT_3_eta_3"])
        if len(binned_sieie_hist["PT_3_eta_4"] > 0):
            out["PT_3_eta_4"].fill(dataset=dataset,
                                   PT_3_eta_4=binned_sieie_hist["PT_3_eta_4"])
        if len(binned_sieie_hist["PT_4_eta_1"] > 0):
            out["PT_4_eta_1"].fill(dataset=dataset,
                                   PT_4_eta_1=binned_sieie_hist["PT_4_eta_1"])
        if len(binned_sieie_hist["PT_4_eta_2"] > 0):
            out["PT_4_eta_2"].fill(dataset=dataset,
                                   PT_4_eta_2=binned_sieie_hist["PT_4_eta_2"])
        if len(binned_sieie_hist["PT_4_eta_3"] > 0):
            out["PT_4_eta_3"].fill(dataset=dataset,
                                   PT_4_eta_3=binned_sieie_hist["PT_4_eta_3"])
        if len(binned_sieie_hist["PT_4_eta_4"] > 0):
            #print("## show me the last bin: ", binned_sieie_hist["PT_4_eta_4"])
            #print("## show me the first bin: ", binned_sieie_hist["PT_1_eta_1"])
            out["PT_4_eta_4"].fill(dataset=dataset,
                                   PT_4_eta_4=binned_sieie_hist["PT_4_eta_4"])

        return out
Exemplo n.º 4
0
	def process(self, events):

		# Initialize accumulator
		out = self.accumulator.identity()
		dataset = sample_name
		# events.metadata['dataset']

		# Data or MC
		isData = "genWeight" not in events.fields
		isFake = self._isFake


		# Stop processing if there is no event remain
		if len(events) == 0:
			return out

		# Golden Json file
		if (self._year == "2018") and isData:
			injson = "/x5/cms/jwkim/gitdir/JWCorp/JW_analysis/Coffea_WZG/Corrections/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt.RunABD"

		if (self._year == "2017") and isData:
			injson = "/x5/cms/jwkim/gitdir/JWCorp/JW_analysis/Coffea_WZG/Corrections/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt"

		# <----- Get Scale factors ------>#

		if not isData:

			# Egamma reco ID
			get_ele_reco_above20_sf = self._corrections["get_ele_reco_above20_sf"][
				self._year
			]
			get_ele_medium_id_sf = self._corrections["get_ele_medium_id_sf"][self._year]
			get_pho_medium_id_sf = self._corrections["get_pho_medium_id_sf"][self._year]

			# DoubleEG trigger # 2016, 2017 are not applied yet
			if self._year == "2018":
				get_ele_trig_leg1_SF = self._corrections["get_ele_trig_leg1_SF"][
					self._year
				]
				get_ele_trig_leg1_data_Eff = self._corrections[
					"get_ele_trig_leg1_data_Eff"
				][self._year]
				get_ele_trig_leg1_mc_Eff = self._corrections[
					"get_ele_trig_leg1_mc_Eff"
				][self._year]
				get_ele_trig_leg2_SF = self._corrections["get_ele_trig_leg2_SF"][
					self._year
				]
				get_ele_trig_leg2_data_Eff = self._corrections[
					"get_ele_trig_leg2_data_Eff"
				][self._year]
				get_ele_trig_leg2_mc_Eff = self._corrections[
					"get_ele_trig_leg2_mc_Eff"
				][self._year]

			# PU weight with custom made npy and multi-indexing
			pu_weight_idx = ak.values_astype(events.Pileup.nTrueInt, "int64")
			pu = self._puweight_arr[pu_weight_idx]

			print("## pu_idx: ",len(pu_weight_idx),pu_weight_idx)
			print("## pu_arr: ",len(self._puweight_arr),self._puweight_arr)
			print("## pu:",len(pu),pu)

		selection = processor.PackedSelection()

		# Cut flow
		cut0 = np.zeros(len(events))
		out["cutflow"].fill(dataset=dataset, cutflow=cut0)
		# <----- Helper functions ------>#

		#  Sort by PT  helper function
		def sort_by_pt(ele, pho, jet):
			ele = ele[ak.argsort(ele.pt, ascending=False, axis=1)]
			pho = pho[ak.argsort(pho.pt, ascending=False, axis=1)]
			jet = jet[ak.argsort(jet.pt, ascending=False, axis=1)]

			return ele, pho, jet

		# Lorentz vectors
		from coffea.nanoevents.methods import vector

		ak.behavior.update(vector.behavior)

		def TLorentz_vector(vec):
			vec = ak.zip(
				{"x": vec.x, "y": vec.y, "z": vec.z, "t": vec.t},
				with_name="LorentzVector",
			)
			return vec

		def TLorentz_vector_cylinder(vec):

			vec = ak.zip(
				{
					"pt": vec.pt,
					"eta": vec.eta,
					"phi": vec.phi,
					"mass": vec.mass,
				},
				with_name="PtEtaPhiMLorentzVector",
			)

			return vec

		# <----- Selection ------>#

		Initial_events = events
		# Good Run ( Golden Json files )
		from coffea import lumi_tools

		if isData:
			lumi_mask_builder = lumi_tools.LumiMask(injson)
			lumimask = ak.Array(
				lumi_mask_builder.__call__(events.run, events.luminosityBlock)
			)
			events = events[lumimask]
			# print("{0}%  of files pass good-run conditions".format(len(events)/ len(Initial_events)))

		# Stop processing if there is no event remain
		if len(events) == 0:
			return out

		##----------- Cut flow1: Passing Triggers

		# double lepton trigger
		is_double_ele_trigger = True
		if not is_double_ele_trigger:
			double_ele_triggers_arr = np.ones(len(events), dtype=np.bool)
		else:
			double_ele_triggers_arr = np.zeros(len(events), dtype=np.bool)
			for path in self._doubleelectron_triggers[self._year]:
				if path not in events.HLT.fields:
					continue
				double_ele_triggers_arr = double_ele_triggers_arr | events.HLT[path]

		# single lepton trigger
		is_single_ele_trigger = True
		if not is_single_ele_trigger:
			single_ele_triggers_arr = np.ones(len(events), dtype=np.bool)
		else:
			single_ele_triggers_arr = np.zeros(len(events), dtype=np.bool)
			for path in self._singleelectron_triggers[self._year]:
				if path not in events.HLT.fields:
					continue
				single_ele_triggers_arr = single_ele_triggers_arr | events.HLT[path]

		events.Electron, events.Photon, events.Jet = sort_by_pt(
			events.Electron, events.Photon, events.Jet
		)

		# Good Primary vertex
		nPV = events.PV.npvsGood
		nPV_nw = events.PV.npvsGood
		if not isData:
			nPV = nPV * pu

			print(pu)


		# Apply cut1
		events = events[double_ele_triggers_arr]
		if not isData:
			pu = pu[double_ele_triggers_arr]

		# Stop processing if there is no event remain
		if len(events) == 0:
			return out

		cut1 = np.ones(len(events))
		out["cutflow"].fill(dataset=dataset, cutflow=cut1)

		# Set Particles
		Electron = events.Electron
		Muon = events.Muon
		Photon = events.Photon
		MET = events.MET
		Jet = events.Jet


		#  --Muon ( only used to calculate dR )
		MuSelmask = (
			(Muon.pt >= 10)
			& (abs(Muon.eta) <= 2.5)
			& (Muon.tightId)
			& (Muon.pfRelIso04_all < 0.15)
		)
		Muon = Muon[MuSelmask]

		
		#  --Loose Muon ( For Loose Muon veto )
		LoooseMuSelmask = (
			(Muon.pt > 20)
			& (abs(Muon.eta) < 2.4)
			& (Muon.isPFcand)
			& (Muon.isGlobal | Muon.isTracker)
			& (Muon.pfRelIso03_all < 0.25)
		)
		# Reference: VBS Zgamma+2jets
		
		VetoMuon = Muon[LoooseMuSelmask]
		

		##----------- Cut flow2: Electron Selection

		EleSelmask = (
			(Electron.pt >= 10)
			& (np.abs(Electron.eta + Electron.deltaEtaSC) < 1.479)
			& (Electron.cutBased > 2)
			& (abs(Electron.dxy) < 0.05)
			& (abs(Electron.dz) < 0.1)
		) | (
			(Electron.pt >= 10)
			& (np.abs(Electron.eta + Electron.deltaEtaSC) > 1.479)
			& (np.abs(Electron.eta + Electron.deltaEtaSC) <= 2.5)
			& (Electron.cutBased > 2)
			& (abs(Electron.dxy) < 0.1)
			& (abs(Electron.dz) < 0.2)
		)

		Electron = Electron[EleSelmask]

		
		# Event with 3 Electrons
		# apply cut 2
		Tri_electron_mask = ak.num(Electron) == 3 
		Electron = Electron[Tri_electron_mask]
		Photon = Photon[Tri_electron_mask]
		Jet = Jet[Tri_electron_mask]
		MET = MET[Tri_electron_mask]
		Muon = Muon[Tri_electron_mask]
		VetoMuon = VetoMuon[Tri_electron_mask]
		if not isData:
			pu = pu[Tri_electron_mask]
		events = events[Tri_electron_mask]
		
		# Stop processing if there is no event remain
		if len(Electron) == 0:
			return out

		cut2 = np.ones(len(Photon)) * 2
		out["cutflow"].fill(dataset=dataset, cutflow=cut2)

		##----------- Cut flow3: 4th lepton veto (Loose Muon)
		# Veto 4th Loose muon
		# apply cut 3
		fourth_lepton_veto = ak.num(VetoMuon) < 1
		Electron = Electron[fourth_lepton_veto]
		Photon = Photon[fourth_lepton_veto]
		Jet = Jet[fourth_lepton_veto]
		MET = MET[fourth_lepton_veto]
		Muon = Muon[fourth_lepton_veto]
		if not isData:
			pu = pu[fourth_lepton_veto]
		events = events[fourth_lepton_veto]
		
		# Stop processing if there is no event remain
		if len(Electron) == 0:
			return out

		cut3 = np.ones(len(Photon)) * 3
		out["cutflow"].fill(dataset=dataset, cutflow=cut3)


		##----------- Cut flow4: Photon Selection

		# Basic photon selection
		isgap_mask = (abs(Photon.eta) < 1.442) | (
			(abs(Photon.eta) > 1.566) & (abs(Photon.eta) < 2.5)
		)
		Pixel_seed_mask = ~Photon.pixelSeed

		if (dataset == "ZZ") and (self._year == "2017"):
			PT_ID_mask = (Photon.pt >= 20) & (
				Photon.cutBasedBitmap >= 3
			)  # 2^0(Loose) + 2^1(Medium) + 2^2(Tights)
		else:
			PT_ID_mask = (Photon.pt >= 20) & (Photon.cutBased > 1)

		# dR cut with selected Muon and Electrons
		dr_pho_ele_mask = ak.all(
			Photon.metric_table(Electron) >= 0.5, axis=-1
		)  # default metric table: delta_r
		dr_pho_mu_mask = ak.all(Photon.metric_table(Muon) >= 0.5, axis=-1)

		# genPartFlav cut
		"""
		if dataset == "WZG":
			isPrompt = (Photon.genPartFlav == 1) | (Photon.genPartFlav == 11)
			PhoSelmask = PT_ID_mask & isgap_mask &  Pixel_seed_mask & isPrompt & dr_pho_ele_mask & dr_pho_mu_mask

		elif dataset == "WZ":
			isPrompt = (Photon.genPartFlav == 1) 
			PhoSelmask = PT_ID_mask & isgap_mask &  Pixel_seed_mask & ~isPrompt & dr_pho_ele_mask & dr_pho_mu_mask
				
		else:
			PhoSelmask = PT_ID_mask  & isgap_mask &  Pixel_seed_mask & dr_pho_ele_mask & dr_pho_mu_mask
		"""

		PhoSelmask = (
			PT_ID_mask & isgap_mask & Pixel_seed_mask & dr_pho_ele_mask & dr_pho_mu_mask
		)
		Photon = Photon[PhoSelmask]

		# Apply cut 4
		A_photon_mask = ak.num(Photon) > 0
		Electron = Electron[A_photon_mask]
		Photon = Photon[A_photon_mask]
		Jet = Jet[A_photon_mask]
		Muon = Muon[A_photon_mask]
		MET = MET[A_photon_mask]
		if not isData:
			pu = pu[A_photon_mask]
		events = events[A_photon_mask]

		# Stop processing if there is no event remain
		if len(Electron) == 0:
			return out

		cut4 = np.ones(len(Photon)) * 4
		out["cutflow"].fill(dataset=dataset, cutflow=cut4)

		##----------- Cut flow5: OSSF
		# OSSF index maker
		@numba.njit
		def find_3lep(events_leptons, builder):
			for leptons in events_leptons:

				builder.begin_list()
				nlep = len(leptons)
				for i0 in range(nlep):
					for i1 in range(i0 + 1, nlep):
						if leptons[i0].charge + leptons[i1].charge != 0:
							continue

						for i2 in range(nlep):
							if len({i0, i1, i2}) < 3:
								continue
							builder.begin_tuple(3)
							builder.index(0).integer(i0)
							builder.index(1).integer(i1)
							builder.index(2).integer(i2)
							builder.end_tuple()
				builder.end_list()
			return builder

		eee_triplet_idx = find_3lep(Electron, ak.ArrayBuilder()).snapshot()

		ossf_mask = ak.num(eee_triplet_idx) == 2

		# Apply cut 5
		eee_triplet_idx = eee_triplet_idx[ossf_mask]
		Electron = Electron[ossf_mask]
		Photon = Photon[ossf_mask]
		Jet = Jet[ossf_mask]
		MET = MET[ossf_mask]
		if not isData:
			pu = pu[ossf_mask]
		events = events[ossf_mask]

		# Stop processing if there is no event remain
		if len(Electron) == 0:
			return out

		cut5 = np.ones(ak.sum(ak.num(Electron) > 0)) * 5
		out["cutflow"].fill(dataset=dataset, cutflow=cut5)

		# Define Electron Triplet

		Triple_electron = [Electron[eee_triplet_idx[idx]] for idx in "012"]
		Triple_eee = ak.zip(
			{
				"lep1": Triple_electron[0],
				"lep2": Triple_electron[1],
				"lep3": Triple_electron[2],
				"p4": TLorentz_vector(Triple_electron[0] + Triple_electron[1]),
			}
		)

		# Ele pair selector --> Close to Z mass
		bestZ_idx = ak.singletons(ak.argmin(abs(Triple_eee.p4.mass - 91.1876), axis=1))
		Triple_eee = Triple_eee[bestZ_idx]

		leading_ele = Triple_eee.lep1
		subleading_ele = Triple_eee.lep2
		third_ele = Triple_eee.lep3

		def make_leading_pair(target, base):
			return target[ak.argmax(base.pt, axis=1, keepdims=True)]

		leading_pho = make_leading_pair(Photon, Photon)

		# -- Scale Factor for each electron

		# Trigger weight helper function
		def Trigger_Weight(eta1, pt1, eta2, pt2):
			per_ev_MC = (
				get_ele_trig_leg1_mc_Eff(eta1, pt1)
				* get_ele_trig_leg2_mc_Eff(eta2, pt2)
				+ get_ele_trig_leg1_mc_Eff(eta2, pt2)
				* get_ele_trig_leg2_mc_Eff(eta1, pt1)
				- get_ele_trig_leg1_mc_Eff(eta1, pt1)
				* get_ele_trig_leg1_mc_Eff(eta2, pt2)
			)

			per_ev_data = (
				get_ele_trig_leg1_data_Eff(eta1, pt1)
				* get_ele_trig_leg1_SF(eta1, pt1)
				* get_ele_trig_leg2_data_Eff(eta2, pt2)
				* get_ele_trig_leg2_SF(eta2, pt2)
				+ get_ele_trig_leg1_data_Eff(eta2, pt2)
				* get_ele_trig_leg1_SF(eta2, pt2)
				* get_ele_trig_leg2_data_Eff(eta1, pt1)
				* get_ele_trig_leg2_SF(eta1, pt1)
				- get_ele_trig_leg1_data_Eff(eta1, pt1)
				* get_ele_trig_leg1_SF(eta1, pt1)
				* get_ele_trig_leg1_data_Eff(eta2, pt2)
				* get_ele_trig_leg1_SF(eta2, pt2)
			)

			return per_ev_data / per_ev_MC

		if not isData:

			## -------------< Egamma ID and Reco Scale factor > -----------------##
			get_pho_medium_id_sf = get_pho_medium_id_sf(
				ak.flatten(leading_pho.eta), ak.flatten(leading_pho.pt)
			)

			ele_reco_sf = (
				get_ele_reco_above20_sf(
					ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta),
					ak.flatten(leading_ele.pt),
				)
				* get_ele_reco_above20_sf(
					ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta),
					ak.flatten(subleading_ele.pt),
				)
				* get_ele_reco_above20_sf(
					ak.flatten(third_ele.deltaEtaSC + third_ele.eta),
					ak.flatten(third_ele.pt),
				)
			)

			ele_medium_id_sf = (
				get_ele_medium_id_sf(
					ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta),
					ak.flatten(leading_ele.pt),
				)
				* get_ele_medium_id_sf(
					ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta),
					ak.flatten(subleading_ele.pt),
				)
				* get_ele_medium_id_sf(
					ak.flatten(third_ele.deltaEtaSC + third_ele.eta),
					ak.flatten(third_ele.pt),
				)
			)

			## -------------< Double Electron Trigger Scale factor > -----------------##
			eta1 = ak.flatten(leading_ele.deltaEtaSC + leading_ele.eta)
			eta2 = ak.flatten(subleading_ele.deltaEtaSC + subleading_ele.eta)
			pt1 = ak.flatten(leading_ele.pt)
			pt2 = ak.flatten(subleading_ele.pt)

			# -- 2017,2016 are not applied yet
			if self._year == "2018":
				ele_trig_weight = Trigger_Weight(eta1, pt1, eta2, pt2)

		##----------- Cut flow6: Event selection

		# Mee cut
		diele = Triple_eee.p4
		Mee_cut_mask = ak.firsts(diele.mass) > 4

		# Z mass window
		# zmass_window_mask = ak.firsts(abs(diele.mass - 91.1876)) < 15 # SR, CR_ZZA, CR_Z+jets, CR_Conversion
		# zmass_window_mask = ak.firsts(abs(diele.mass - 91.1876)) > 5 #  CR_t-enriched
		# zmass_window_mask = ak.firsts(abs(diele.mass - 91.1876)) > 15 #  CR_Conversion

		# M(eee) cut SR, CR_ZZA, CR_Z+jets, CR_t enriched
		# eee = Triple_eee.lep1 + Triple_eee.lep2 + Triple_eee.lep3
		# Meee_cut_mask = ak.firsts(eee.mass > 100)
		# Meee_cut_mask = ak.firsts(eee.mass <= 100)

		# b-Jet veto cut  #SR, CR_ZZA, CR_Z+jets, CR_Conversion
		# bjet_mask = (Jet.btagCSVV2 > 0.4184)	&  (Jet.pt > 30)
		# bjet_veto_mask = ak.num(Jet[bjet_mask]) == 0
		# bjet_veto_mask = ak.num(Jet[bjet_mask]) > 0 # CR_t-enriched

		# Electron PT cuts
		Elept_mask = ak.firsts(
			(leading_ele.pt >= 25) & (subleading_ele.pt >= 10) & (third_ele.pt >= 25)
		)

		# MET cuts
		MET_mask = MET > 20  # Baseline
		# MET_mask = MET.pt > 30 #  SR, CR-ZZE, CR-t-entirched
		# MET_mask = MET.pt <= 30 #  CR-Z+jets. CR-Conversion

		# Mask
		# Event_sel_mask = Elept_mask & MET_mask & bjet_veto_mask & Mee_cut_mask & zmass_window_mask  & Meee_cut_mask # SR,CR
		Event_sel_mask = Elept_mask & MET_mask & Mee_cut_mask  # SR,CR

		# Apply cut6
		Triple_eee_sel = Triple_eee[Event_sel_mask]
		leading_pho_sel = leading_pho[Event_sel_mask]
		MET_sel = MET[Event_sel_mask]
		events = events[Event_sel_mask]

		# Photon  EE and EB
		isEE_mask = leading_pho.isScEtaEE
		isEB_mask = leading_pho.isScEtaEB
		Pho_EE = leading_pho[isEE_mask & Event_sel_mask]
		Pho_EB = leading_pho[isEB_mask & Event_sel_mask]


		# Stop processing if there is no event remain
		if len(leading_pho_sel) == 0:
			return out

		cut6 = np.ones(ak.sum(ak.num(leading_pho_sel) > 0)) * 6
		out["cutflow"].fill(dataset=dataset, cutflow=cut6)

		## -------------------- Prepare making hist --------------#

		# Photon
		phoPT = ak.flatten(leading_pho_sel.pt)
		phoEta = ak.flatten(leading_pho_sel.eta)
		phoPhi = ak.flatten(leading_pho_sel.phi)

		# Photon EE
		if len(Pho_EE.pt) != 0:
			Pho_EE_PT = ak.flatten(Pho_EE.pt)
			Pho_EE_Eta = ak.flatten(Pho_EE.eta)
			Pho_EE_Phi = ak.flatten(Pho_EE.phi)
			Pho_EE_sieie = ak.flatten(Pho_EE.sieie)
			Pho_EE_hoe = ak.flatten(Pho_EE.hoe)
			Pho_EE_Iso_charge = ak.flatten(Pho_EE.pfRelIso03_chg)

		# Photon EB
		if len(Pho_EB.pt) != 0:
			Pho_EB_PT = ak.flatten(Pho_EB.pt)
			Pho_EB_Eta = ak.flatten(Pho_EB.eta)
			Pho_EB_Phi = ak.flatten(Pho_EB.phi)
			Pho_EB_sieie = ak.flatten(Pho_EB.sieie)
			Pho_EB_hoe = ak.flatten(Pho_EB.hoe)
			Pho_EB_Iso_charge = ak.flatten(Pho_EB.pfRelIso03_chg)

		# Electrons
		ele1PT = ak.flatten(Triple_eee_sel.lep1.pt)
		ele1Eta = ak.flatten(Triple_eee_sel.lep1.eta)
		ele1Phi = ak.flatten(Triple_eee_sel.lep1.phi)

		ele2PT = ak.flatten(Triple_eee_sel.lep2.pt)
		ele2Eta = ak.flatten(Triple_eee_sel.lep2.eta)
		ele2Phi = ak.flatten(Triple_eee_sel.lep2.phi)

		ele3PT = ak.flatten(Triple_eee_sel.lep3.pt)
		ele3Eta = ak.flatten(Triple_eee_sel.lep3.eta)
		ele3Phi = ak.flatten(Triple_eee_sel.lep3.phi)

		charge = ak.flatten(Triple_eee.lep1.charge + Triple_eee.lep2.charge)

		# MET
		met = ak.to_numpy(MET_sel)

		# M(eea) M(ee)
		diele = Triple_eee_sel.p4
		eeg_vec = diele + leading_pho_sel
		Meea = ak.flatten(eeg_vec.mass)
		Mee = ak.flatten(Triple_eee_sel.p4.mass)


		# --- Apply weight and hist
		
		if isFake:
			weights = processor.Weights(len(cut6))
		else:
			weights = processor.Weights(len(cut5))
			


		# -------------------- Sieie bins---------------------------#
		def make_bins(pt, eta, bin_range_str):

			bin_dict = {
				"PT_1_eta_1": (pt > 20) & (pt < 30) & (eta < 1),
				"PT_1_eta_2": (pt > 20) & (pt < 30) & (eta > 1) & (eta < 1.5),
				"PT_1_eta_3": (pt > 20) & (pt < 30) & (eta > 1.5) & (eta < 2),
				"PT_1_eta_4": (pt > 20) & (pt < 30) & (eta > 2) & (eta < 2.5),
				"PT_2_eta_1": (pt > 30) & (pt < 40) & (eta < 1),
				"PT_2_eta_2": (pt > 30) & (pt < 40) & (eta > 1) & (eta < 1.5),
				"PT_2_eta_3": (pt > 30) & (pt < 40) & (eta > 1.5) & (eta < 2),
				"PT_2_eta_4": (pt > 30) & (pt < 40) & (eta > 2) & (eta < 2.5),
				"PT_3_eta_1": (pt > 40) & (pt < 50) & (eta < 1),
				"PT_3_eta_2": (pt > 40) & (pt < 50) & (eta > 1) & (eta < 1.5),
				"PT_3_eta_3": (pt > 40) & (pt < 50) & (eta > 1.5) & (eta < 2),
				"PT_3_eta_4": (pt > 40) & (pt < 50) & (eta > 2) & (eta < 2.5),
				"PT_4_eta_1": (pt > 50) & (eta < 1),
				"PT_4_eta_2": (pt > 50) & (eta > 1) & (eta < 1.5),
				"PT_4_eta_3": (pt > 50) & (eta > 1.5) & (eta < 2),
				"PT_4_eta_4": (pt > 50) & (eta > 2) & (eta < 2.5),
			}

			binmask = bin_dict[bin_range_str]

			return binmask

		bin_name_list = [
			"PT_1_eta_1",
			"PT_1_eta_2",
			"PT_1_eta_3",
			"PT_1_eta_4",
			"PT_2_eta_1",
			"PT_2_eta_2",
			"PT_2_eta_3",
			"PT_2_eta_4",
			"PT_3_eta_1",
			"PT_3_eta_2",
			"PT_3_eta_3",
			"PT_3_eta_4",
			"PT_4_eta_1",
			"PT_4_eta_2",
			"PT_4_eta_3",
			"PT_4_eta_4",
		]



		## -- Fake-fraction Lookup table --##
		if isFake:
			# Make Bin-range mask
			binned_pteta_mask = {}
			for name in bin_name_list:
				binned_pteta_mask[name] = make_bins(
					ak.flatten(leading_pho_sel.pt),
					ak.flatten(abs(leading_pho_sel.eta)),
					name,
				)
			# Read Fake fraction --> Mapping bin name to int()
			in_dict = np.load('Fitting_v2/results_210517.npy',allow_pickle="True")[()]
			idx=0
			fake_dict ={}
			for i,j in in_dict.items():
				fake_dict[idx] = j
				idx+=1


			# Reconstruct Fake_weight
			fw= 0
			for i,j in binned_pteta_mask.items():
				fw = fw + j*fake_dict[bin_name_list.index(i)]


			# Process 0 weight to 1
			@numba.njit
			def zero_one(x):
				if x == 0:
					x = 1
				return x
			vec_zero_one = np.vectorize(zero_one)
			fw = vec_zero_one(fw)




		# --- skim cut-weight
		if not isFake:
			def skim_weight(arr):
				mask1 = ~ak.is_none(arr)
				subarr = arr[mask1]
				mask2 = subarr != 0
				return ak.to_numpy(subarr[mask2])
		else:
			def skim_weight(arr):
				return arr


		if not isFake:
			cuts = Event_sel_mask
			cuts_pho_EE = ak.flatten(isEE_mask)
			cuts_pho_EB = ak.flatten(isEB_mask)

		if isFake:
			cuts = np.ones(len(Event_sel_mask))
			cuts_pho_EE = ak.flatten(isEE_mask & Event_sel_mask)
			cuts_pho_EB = ak.flatten(isEB_mask & Event_sel_mask)


		if isFake:
			weights.add("fake_fraction", fw)
			
		# Weight and SF here
		if not (isData | isFake):
			weights.add("pileup", pu)
			weights.add("ele_id", ele_medium_id_sf)
			weights.add("pho_id", get_pho_medium_id_sf)
			weights.add("ele_reco", ele_reco_sf)

			# 2016,2017 are not applied yet
			if self._year == "2018":
				weights.add("ele_trigger", ele_trig_weight)

		# ---------------------------- Fill hist --------------------------------------#

		# Initial events
		out["sumw"][dataset] += len(Initial_events)


		print("cut1: {0},cut2: {1},cut3: {2},cut4: {3},cut5: {4},cut6: {5},cut7: {6}".format(len(cut0), len(cut1), len(cut2), len(cut3), len(cut4), len(cut5),len(cut6)))


		## Cut flow loop
		#for cut in [cut0, cut1, cut2, cut3, cut4, cut5,cut6]:
		#	out["cutflow"].fill(dataset=dataset, cutflow=cut)

		# Primary vertex
		out["nPV"].fill(
			dataset=dataset,
			nPV=nPV,
		)
		out["nPV_nw"].fill(dataset=dataset, nPV_nw=nPV_nw)

		# Fill hist

		# -- met -- #
		out["met"].fill(
			dataset=dataset, met=met, weight=skim_weight(weights.weight() * cuts)
		)

		# --mass -- #
		out["mass"].fill(
			dataset=dataset, mass=Mee, weight=skim_weight(weights.weight() * cuts)
		)
		out["mass_eea"].fill(
			dataset=dataset, mass_eea=Meea, weight=skim_weight(weights.weight() * cuts)
		)

		# -- Electron -- #
		out["ele1pt"].fill(
			dataset=dataset, ele1pt=ele1PT, weight=skim_weight(weights.weight() * cuts)
		)
		out["ele1eta"].fill(
			dataset=dataset,
			ele1eta=ele1Eta,
			weight=skim_weight(weights.weight() * cuts),
		)
		out["ele1phi"].fill(
			dataset=dataset,
			ele1phi=ele1Phi,
			weight=skim_weight(weights.weight() * cuts),
		)
		out["ele2pt"].fill(
			dataset=dataset, ele2pt=ele2PT, weight=skim_weight(weights.weight() * cuts)
		)
		out["ele2eta"].fill(
			dataset=dataset,
			ele2eta=ele2Eta,
			weight=skim_weight(weights.weight() * cuts),
		)
		out["ele2phi"].fill(
			dataset=dataset,
			ele2phi=ele2Phi,
			weight=skim_weight(weights.weight() * cuts),
		)
		out["ele3pt"].fill(
			dataset=dataset, ele3pt=ele3PT, weight=skim_weight(weights.weight() * cuts)
		)

		# -- Photon -- #

		out["phopt"].fill(
			dataset=dataset, phopt=phoPT, weight=skim_weight(weights.weight() * cuts)
		)
		out["phoeta"].fill(
			dataset=dataset, phoeta=phoEta, weight=skim_weight(weights.weight() * cuts)
		)
		out["phophi"].fill(
			dataset=dataset, phophi=phoPhi, weight=skim_weight(weights.weight() * cuts)
		)

		if len(Pho_EE.pt) != 0:

			out["pho_EE_pt"].fill(
				dataset=dataset,
				pho_EE_pt=Pho_EE_PT,
				weight=skim_weight(weights.weight() * cuts * cuts_pho_EE),
			)
			out["pho_EE_eta"].fill(
				dataset=dataset,
				pho_EE_eta=Pho_EE_Eta,
				weight=skim_weight(weights.weight() * cuts * cuts_pho_EE),
			)
			out["pho_EE_phi"].fill(
				dataset=dataset,
				pho_EE_phi=Pho_EE_Phi,
				weight=skim_weight(weights.weight() * cuts * cuts_pho_EE),
			)
			out["pho_EE_hoe"].fill(
				dataset=dataset,
				pho_EE_hoe=Pho_EE_hoe,
				weight=skim_weight(weights.weight() * cuts * cuts_pho_EE),
			)
			out["pho_EE_sieie"].fill(
				dataset=dataset,
				pho_EE_sieie=Pho_EE_sieie,
				weight=skim_weight(weights.weight() * cuts * cuts_pho_EE),
			)
			out["pho_EE_Iso_chg"].fill(
				dataset=dataset,
				pho_EE_Iso_chg=Pho_EE_Iso_charge,
				weight=skim_weight(weights.weight() * cuts * cuts_pho_EE),
			)

		if len(Pho_EB.pt) != 0:
			out["pho_EB_pt"].fill(
				dataset=dataset,
				pho_EB_pt=Pho_EB_PT,
				weight=skim_weight(weights.weight() * cuts * cuts_pho_EB),
			)
			out["pho_EB_eta"].fill(
				dataset=dataset,
				pho_EB_eta=Pho_EB_Eta,
				weight=skim_weight(weights.weight() * cuts * cuts_pho_EB),
			)
			out["pho_EB_phi"].fill(
				dataset=dataset,
				pho_EB_phi=Pho_EB_Phi,
				weight=skim_weight(weights.weight() * cuts * cuts_pho_EB),
			)
			out["pho_EB_hoe"].fill(
				dataset=dataset,
				pho_EB_hoe=Pho_EB_hoe,
				weight=skim_weight(weights.weight() * cuts * cuts_pho_EB),
			)
			out["pho_EB_sieie"].fill(
				dataset=dataset,
				pho_EB_sieie=Pho_EB_sieie,
				weight=skim_weight(weights.weight() * cuts * cuts_pho_EB),
			)
			out["pho_EB_Iso_chg"].fill(
				dataset=dataset,
				pho_EB_Iso_chg=Pho_EB_Iso_charge,
				weight=skim_weight(weights.weight() * cuts * cuts_pho_EB),
			)

		return out
Exemplo n.º 5
0
    def build_p4(self, filename=None):

        signal_builder = ak.ArrayBuilder()
        bkgd_builder = ak.ArrayBuilder()

        jet_idx = self.jet_idx
        jet_pt = self.jet_pt
        jet_eta = self.jet_eta
        jet_phi = self.jet_phi
        jet_m = self.jet_m
        jet_btag = self.jet_btag

        signal_idx = []
        signal_btag = []

        background_btag = []
        available_bkgd = []

        n_background = []

        mask_7 = []
        mask_8 = []

        pass_count = 0

        info("Looping through events. This may take a few minutes.")
        for evt in tqdm(range(self.nevents)):

            # signal and background masks
            signal_mask = [i for i, obj in enumerate(jet_idx[evt]) if obj > -1]
            signal_mask = np.array((signal_mask))
            background_mask = [
                i for i, obj in enumerate(jet_idx[evt]) if obj == -1
            ]
            # background_mask = np.array((background_mask))

            # Cound the number of background jets
            n_bkgd = len(jet_pt[evt][background_mask])

            # Skip and events with less than 6 matching signal bs (for now)
            if len(jet_pt[evt][signal_mask]) < 6: continue

            # Skip any events with duplicate matches (for now)
            if len(np.unique(jet_idx[evt][signal_mask])) < len(
                    jet_idx[evt][signal_mask]):
                continue

            if (n_bkgd < 1): continue
            elif (n_bkgd == 1): mask_7.append(evt)
            elif (n_bkgd == 2): mask_8.append(evt)
            available_bkgd.append(n_bkgd)

            bkgd_ind = np.arange(1, n_bkgd)
            sgnl_ind = np.arange(0, 6)

            if n_bkgd > 1:
                # Choose random background and signal jets to swap
                random_bkgd = random.choices(bkgd_ind, k=n_bkgd)
                random_sgnl = random.choices(sgnl_ind, k=n_bkgd)
            else:
                random_bkgd = [0]  # Not so random lol
                random_sgnl = random.choices(sgnl_ind,
                                             k=1)  # This is randomized

            n_background.append(len(random_bkgd))

            sixb_pt = jet_pt[evt][signal_mask]
            non_sixb_pt = sixb_pt.copy()
            for nH, sb in zip(random_bkgd, random_sgnl):
                bkgd_pt = jet_pt[evt][background_mask][nH]
                non_sixb_pt[sb] = bkgd_pt

            sixb_eta = jet_eta[evt][signal_mask]
            non_sixb_eta = sixb_eta.copy()
            for nH, sb in zip(random_bkgd, random_sgnl):
                bkgd_eta = jet_eta[evt][background_mask][nH]
                non_sixb_eta[sb] = bkgd_eta

            sixb_phi = jet_phi[evt][signal_mask]
            non_sixb_phi = sixb_phi.copy()
            for nH, sb in zip(random_bkgd, random_sgnl):
                bkgd_phi = jet_phi[evt][background_mask][nH]
                non_sixb_phi[sb] = bkgd_phi

            sixb_m = jet_m[evt][signal_mask]
            non_sixb_m = sixb_m.copy()
            for nH, sb in zip(random_bkgd, random_sgnl):
                bkgd_m = jet_m[evt][background_mask][nH]
                non_sixb_m[sb] = bkgd_m

            sixb_btag = jet_btag[evt][signal_mask]
            non_sixb_btag = sixb_btag.copy()
            for nH, sb in zip(random_bkgd, random_sgnl):
                bkgd_btag = jet_btag[evt][background_mask][nH]
                non_sixb_btag[sb] = bkgd_btag

            sixb_idx = signal_mask[np.argsort(sixb_pt)][::-1]
            signal_idx.append(sixb_idx)

            sixb_eta = sixb_eta[np.argsort(sixb_pt)][::-1]
            sixb_phi = sixb_phi[np.argsort(sixb_pt)][::-1]
            sixb_m = sixb_m[np.argsort(sixb_pt)][::-1]
            sixb_btag = sixb_btag[np.argsort(sixb_pt)][::-1]
            signal_btag.append(sixb_btag)
            # pt must be sorted last because it is used to sort everything else
            sixb_pt = np.sort(sixb_pt)[::-1]

            non_sixb_eta = non_sixb_eta[np.argsort(non_sixb_pt)][::-1]
            non_sixb_phi = non_sixb_phi[np.argsort(non_sixb_pt)][::-1]
            non_sixb_m = non_sixb_m[np.argsort(non_sixb_pt)][::-1]
            non_sixb_btag = non_sixb_btag[np.argsort(non_sixb_pt)][::-1]
            background_btag.append(non_sixb_btag)
            # pt must be sorted last because it is used to sort everything else
            non_sixb_pt = np.sort(non_sixb_pt)[::-1]

            with signal_builder.list():

                for pt, eta, phi, m in zip(sixb_pt, sixb_eta, sixb_phi,
                                           sixb_m):

                    with signal_builder.record(
                            "Momentum4D"):  # not MomentumObject4D

                        signal_builder.field("pt")
                        signal_builder.real(pt)
                        signal_builder.field("eta")
                        signal_builder.real(eta)
                        signal_builder.field("phi")
                        signal_builder.real(phi)
                        signal_builder.field("m")
                        signal_builder.real(m)

            with bkgd_builder.list():

                for pt, eta, phi, m in zip(non_sixb_pt, non_sixb_eta,
                                           non_sixb_phi, non_sixb_m):

                    with bkgd_builder.record(
                            "Momentum4D"):  # not MomentumObject4D

                        bkgd_builder.field("pt")
                        bkgd_builder.real(pt)
                        bkgd_builder.field("eta")
                        bkgd_builder.real(eta)
                        bkgd_builder.field("phi")
                        bkgd_builder.real(phi)
                        bkgd_builder.field("m")
                        bkgd_builder.real(m)

        self.sgnl_p4 = signal_builder.snapshot()
        self.bkgd_p4 = bkgd_builder.snapshot()

        self.sgnl_evt_p4 = get_evt_p4(self.sgnl_p4)
        self.bkgd_evt_p4 = get_evt_p4(self.bkgd_p4)

        self.signal_idx = np.array((signal_idx))
        self.signal_btag = np.array((signal_btag))
        self.bkgd_btag = np.array((background_btag))
        self.bkgd = np.array((available_bkgd))

        self.n_bkgd = np.array((n_background))
        self.mask_7 = np.array((mask_7))
        self.mask_8 = np.array((mask_8))
Exemplo n.º 6
0
    def nCk(self, n=7, k=6):

        combo_builder = ak.ArrayBuilder()

        jet_idx = self.jet_idx
        jet_pt = self.jet_pt
        jet_eta = self.jet_eta
        jet_phi = self.jet_phi
        jet_m = self.jet_m
        jet_btag = self.jet_btag

        combo_btag = []

        for evt in tqdm(range(self.nevents)):

            # signal and background masks
            signal_mask = [i for i, obj in enumerate(jet_idx[evt]) if obj > -1]
            signal_mask = np.array((signal_mask))
            background_mask = [
                i for i, obj in enumerate(jet_idx[evt]) if obj == -1
            ]
            # background_mask = np.array((background_mask))

            # Cound the number of background jets
            n_bkgd = len(jet_pt[evt][background_mask])

            # Skip and events with less than 6 matching signal bs (for now)
            if len(jet_pt[evt][signal_mask]) < k: continue

            # Skip any events with duplicate matches (for now)
            if len(np.unique(jet_idx[evt][signal_mask])) < len(
                    jet_idx[evt][signal_mask]):
                continue

            if (n_bkgd < n - k): continue

            bkgd_ind = np.arange(1, n_bkgd)
            sgnl_ind = np.arange(0, k)

            pt_combos = list(itertools.combinations(jet_pt[evt], k))
            eta_combos = list(itertools.combinations(jet_eta[evt], k))
            phi_combos = list(itertools.combinations(jet_phi[evt], k))
            m_combos = list(itertools.combinations(jet_m[evt], k))
            btag_combos = list(itertools.combinations(jet_btag[evt], k))
            idx_combos = list(itertools.combinations(jet_idx[evt], k))

            evt_tag = []

            for pt, eta, phi, m, btag, idx in zip(pt_combos, eta_combos,
                                                  phi_combos, m_combos,
                                                  btag_combos, idx_combos):

                if set(idx) == set(signal_mask):
                    evt_tag.append(True)
                else:
                    evt_tag.append(False)

                sort_mask = np.array((np.argsort(pt)[::-1]))

                eta = np.asarray(eta)[sort_mask]
                phi = np.asarray(phi)[sort_mask]
                m = np.asarray(m)[sort_mask]
                btag = np.asarray(btag)[sort_mask]
                combo_btag.append(btag)
                # pt must be sorted last because it is used to sort everything else
                pt = np.sort(np.asarray(pt))[::-1]

                with combo_builder.list():

                    for pt, eta, phi, m in zip(pt, eta, phi, m):

                        with combo_builder.record(
                                "Momentum4D"):  # not MomentumObject4D

                            combo_builder.field("pt")
                            combo_builder.real(pt)
                            combo_builder.field("eta")
                            combo_builder.real(eta)
                            combo_builder.field("phi")
                            combo_builder.real(phi)
                            combo_builder.field("m")
                            combo_builder.real(m)

        combos_builder = combo_builder.snapshot()
        combo_evt_p4 = get_evt_p4(combos_builder)
        evt_tag = np.array((evt_tag))
        combo_btag = np.array((combo_btag))

        combo_features = self.construct_combo_features(combos_builder,
                                                       combo_btag,
                                                       combo_evt_p4)

        return combo_builder, evt_tag, combo_features
def test_ArrayBuilder_append():
    array = ak.Array(
        [[0.0, 1.1, 2.2], [], [3.3, 4.4], [5.5], [6.6, 7.7, 8.8, 9.9]], check_valid=True
    )

    builder = ak.ArrayBuilder()
    builder.append(array, 3)
    builder.append(array, 2)
    builder.append(array, 2)
    builder.append(array, 0)
    builder.append(array, 1)
    builder.append(array, -1)
    assert ak.to_list(builder.snapshot()) == [
        [5.5],
        [3.3, 4.4],
        [3.3, 4.4],
        [0.0, 1.1, 2.2],
        [],
        [6.6, 7.7, 8.8, 9.9],
    ]

    builder.extend(array)
    assert ak.to_list(builder.snapshot()) == [
        [5.5],
        [3.3, 4.4],
        [3.3, 4.4],
        [0.0, 1.1, 2.2],
        [],
        [6.6, 7.7, 8.8, 9.9],
        [0.0, 1.1, 2.2],
        [],
        [3.3, 4.4],
        [5.5],
        [6.6, 7.7, 8.8, 9.9],
    ]

    builder = ak.ArrayBuilder()
    builder.null()
    builder.null()
    builder.null()
    builder.append(array, 3)
    builder.append(array, 2)
    builder.append(array, 2)
    builder.append(array, -1)

    assert ak.to_list(builder.snapshot()) == [
        None,
        None,
        None,
        [5.5],
        [3.3, 4.4],
        [3.3, 4.4],
        [6.6, 7.7, 8.8, 9.9],
    ]

    builder.null()
    assert ak.to_list(builder.snapshot()) == [
        None,
        None,
        None,
        [5.5],
        [3.3, 4.4],
        [3.3, 4.4],
        [6.6, 7.7, 8.8, 9.9],
        None,
    ]

    one = ak.Array(
        [[0.0, 1.1, 2.2], [], [3.3, 4.4], [5.5], [6.6, 7.7, 8.8, 9.9]], check_valid=True
    )
    two = ak.Array([[3.3, 2.2, 1.1, 0.0], [5.5, 4.4], [], [6.6]], check_valid=True)

    builder = ak.ArrayBuilder()
    builder.append(one, 2)
    builder.append(two, 1)
    builder.append(one, 0)
    builder.append(two, -1)
    builder.append(one, -1)

    assert ak.to_list(builder.snapshot()) == [
        [3.3, 4.4],
        [5.5, 4.4],
        [0.0, 1.1, 2.2],
        [6.6],
        [6.6, 7.7, 8.8, 9.9],
    ]

    builder = ak.ArrayBuilder()
    builder.null()
    builder.append(one, 2)
    builder.null()
    builder.append(two, 1)
    builder.null()
    assert ak.to_list(builder.snapshot()) == [None, [3.3, 4.4], None, [5.5, 4.4], None]

    builder = ak.ArrayBuilder()
    builder.string("hello")
    builder.append(one, 2)
    builder.string("there")
    builder.append(one, 0)
    assert ak.to_list(builder.snapshot()) == [
        "hello",
        [3.3, 4.4],
        "there",
        [0.0, 1.1, 2.2],
    ]

    builder = ak.ArrayBuilder()
    builder.null()
    builder.string("hello")
    builder.null()
    builder.append(one, 2)
    builder.null()
    builder.string("there")
    builder.append(one, 0)
    assert ak.to_list(builder.snapshot()) == [
        None,
        "hello",
        None,
        [3.3, 4.4],
        None,
        "there",
        [0.0, 1.1, 2.2],
    ]

    builder = ak.ArrayBuilder()
    builder.append(one, 2)
    builder.string("there")
    builder.append(one, 0)
    assert ak.to_list(builder.snapshot()) == [[3.3, 4.4], "there", [0.0, 1.1, 2.2]]

    builder = ak.ArrayBuilder()
    builder.null()
    builder.append(one, 2)
    builder.null()
    builder.string("there")
    builder.null()
    builder.append(one, 0)
    assert ak.to_list(builder.snapshot()) == [
        None,
        [3.3, 4.4],
        None,
        "there",
        None,
        [0.0, 1.1, 2.2],
    ]

    array = ak.Array(
        [
            "zero",
            "one",
            "two",
            "three",
            "four",
            "five",
            "six",
            "seven",
            "eight",
            "nine",
        ],
        check_valid=True,
    )
    builder = ak.ArrayBuilder()
    builder.begin_list()
    builder.append(array, 1)
    builder.append(array, 2)
    builder.append(array, 3)
    builder.end_list()
    builder.begin_list()
    builder.end_list()
    builder.begin_list()
    builder.append(array, 4)
    builder.append(array, 5)
    builder.end_list()
    assert ak.to_list(builder.snapshot()) == [
        ["one", "two", "three"],
        [],
        ["four", "five"],
    ]

    builder.append(array, -1)
    assert ak.to_list(builder.snapshot()) == [
        ["one", "two", "three"],
        [],
        ["four", "five"],
        "nine",
    ]

    array = ak.Array(
        [
            {"x": 0.0, "y": []},
            {"x": 1.1, "y": [1]},
            {"x": 2.2, "y": [2, 2]},
            {"x": 3.3, "y": [3, 3, 3]},
        ],
        check_valid=True,
    )
    builder = ak.ArrayBuilder()
    builder.append(array[2])
    builder.append(array[2])
    builder.append(array[1])
    builder.append(array[-1])
    tmp = builder.snapshot()
    assert ak.to_list(tmp) == [
        {"x": 2.2, "y": [2, 2]},
        {"x": 2.2, "y": [2, 2]},
        {"x": 1.1, "y": [1]},
        {"x": 3.3, "y": [3, 3, 3]},
    ]
    assert isinstance(tmp.layout, ak.layout.IndexedArray64)
    assert isinstance(tmp.layout.content, ak.layout.RecordArray)

    builder.extend(array)
    tmp = builder.snapshot()
    assert ak.to_list(tmp) == [
        {"x": 2.2, "y": [2, 2]},
        {"x": 2.2, "y": [2, 2]},
        {"x": 1.1, "y": [1]},
        {"x": 3.3, "y": [3, 3, 3]},
        {"x": 0.0, "y": []},
        {"x": 1.1, "y": [1]},
        {"x": 2.2, "y": [2, 2]},
        {"x": 3.3, "y": [3, 3, 3]},
    ]
    assert isinstance(tmp.layout, ak.layout.IndexedArray64)
    assert isinstance(tmp.layout.content, ak.layout.RecordArray)

    builder.append(999)
    tmp = builder.snapshot()
    assert ak.to_list(tmp) == [
        {"x": 2.2, "y": [2, 2]},
        {"x": 2.2, "y": [2, 2]},
        {"x": 1.1, "y": [1]},
        {"x": 3.3, "y": [3, 3, 3]},
        {"x": 0.0, "y": []},
        {"x": 1.1, "y": [1]},
        {"x": 2.2, "y": [2, 2]},
        {"x": 3.3, "y": [3, 3, 3]},
        999,
    ]
    assert isinstance(tmp.layout, ak.layout.UnionArray8_64)

    builder.append([1, 2, 3, 4, 5])
    tmp = builder.snapshot()
    assert ak.to_list(tmp) == [
        {"x": 2.2, "y": [2, 2]},
        {"x": 2.2, "y": [2, 2]},
        {"x": 1.1, "y": [1]},
        {"x": 3.3, "y": [3, 3, 3]},
        {"x": 0.0, "y": []},
        {"x": 1.1, "y": [1]},
        {"x": 2.2, "y": [2, 2]},
        {"x": 3.3, "y": [3, 3, 3]},
        999,
        [1, 2, 3, 4, 5],
    ]
    assert isinstance(tmp.layout, ak.layout.UnionArray8_64)

    array1 = ak.Array([[1.1, 2.2, 3.3], [], [4.4, 5.5]], check_valid=True)

    builder = ak.ArrayBuilder()
    builder.append(array1, 2)
    builder.append(array1, 1)
    builder.append(array1, 0)
    array2 = builder.snapshot()
    assert isinstance(array2.layout.content, ak.layout.ListOffsetArray64)

    builder = ak.ArrayBuilder()
    builder.append(array2, 2)
    builder.append(array2, 1)
    builder.append(array2, 0)
    array3 = builder.snapshot()
    assert isinstance(array3.layout.content, ak.layout.ListOffsetArray64)

    builder = ak.ArrayBuilder()
    builder.append(array3, 2)
    builder.append(array3, 1)
    builder.append(array3, 0)
    array4 = builder.snapshot()
    assert isinstance(array4.layout.content, ak.layout.ListOffsetArray64)