def theory_weights_monojet(weights, df, evaluator, gen_v_pt): if df['is_lo_w']: if extract_year(df['dataset']) == 2016: qcd_nlo = evaluator["qcd_nlo_w_2016"](gen_v_pt) else: qcd_nlo = fitfun(gen_v_pt, 1.053, 3.163e-3, 0.746) theory_weights = qcd_nlo * evaluator["ewk_nlo_w"](gen_v_pt) elif df['is_lo_z']: if extract_year(df['dataset']) == 2016: qcd_nlo = evaluator["qcd_nlo_z_2016"](gen_v_pt) else: qcd_nlo = fitfun(gen_v_pt, 1.434, 2.210e-3, 0.443) theory_weights = qcd_nlo * evaluator["ewk_nlo_z"](gen_v_pt) elif df['is_nlo_w']: theory_weights = evaluator["ewk_nlo_w"](gen_v_pt) elif df['is_nlo_z']: theory_weights = evaluator["ewk_nlo_z"](gen_v_pt) elif df['is_lo_g']: theory_weights = fitfun(gen_v_pt, 1.159, 1.944e-3, 1.0) * evaluator["ewk_nlo_g"](gen_v_pt) else: theory_weights = np.ones(df.size) # Guard against invalid input pt invalid = (gen_v_pt <= 0) | np.isinf(gen_v_pt) | np.isnan(gen_v_pt) theory_weights[invalid] = 1 weights.add('theory', theory_weights) return weights
def scale_xs_lumi(histogram, scale_lumi=True): """MC normalization so that it's ready to compare to data :param histogram: Histogram to normalize :type histogram: coffea Hist """ # Get the list of datasets and filter MC data sets datasets = list(map(str, histogram.axis('dataset').identifiers())) mcs = [x for x in datasets if not is_data(x)] # Normalize to XS * lumi/ sumw known_xs = load_xs() xs_map = {} for mc in mcs: try: ixs = known_xs[re.sub('_new_*pmx', '', mc)] except KeyError: print( f"WARNING: Cross section not found for dataset {mc}. Using 0.") ixs = 0 xs_map[mc] = ixs norm_dict = { mc: 1e3 * xs_map[mc] * (lumi(extract_year(mc)) if scale_lumi else 1) for mc in mcs } histogram.scale(norm_dict, axis='dataset')
def photon_trigger_sf(weights, photons, df): """MC-to-data photon trigger scale factor. The scale factor is obtained by separately fitting the trigger turn-on with a sigmoid function in data and MC. The scale factor is then the ratio of the two sigmoid functions as a function of the photon pt. :param weights: Weights object to write information into :type weights: WeightsContainer :param photons: Photon candidates :type photons: JaggedCandidateArray :param df: Data frame :type df: LazyDataFrame """ year = extract_year(df['dataset']) x = photons.pt.max() if year == 2016: sf = np.ones(df.size) elif year == 2017: sf = sigmoid(x, 0.335, 217.91, 0.065, 0.996) / sigmoid( x, 0.244, 212.34, 0.050, 1.000) elif year == 2018: sf = sigmoid(x, 1.022, 218.39, 0.086, 0.999) / sigmoid( x, 0.301, 212.83, 0.062, 1.000) sf[np.isnan(sf) | np.isinf(sf)] == 1 weights.add("trigger_photon", sf)
def candidate_weights(weights, df, evaluator, muons, electrons, photons): # Muon ID and Isolation for tight and loose WP # Function of pT, eta (Order!) weights.add("muon_id_tight", evaluator['muon_id_tight'](muons[df['is_tight_muon']].pt, muons[df['is_tight_muon']].abseta).prod()) weights.add("muon_iso_tight", evaluator['muon_iso_tight'](muons[df['is_tight_muon']].pt, muons[df['is_tight_muon']].abseta).prod()) weights.add("muon_id_loose", evaluator['muon_id_loose'](muons[~df['is_tight_muon']].pt, muons[~df['is_tight_muon']].abseta).prod()) weights.add("muon_iso_loose", evaluator['muon_iso_loose'](muons[~df['is_tight_muon']].pt, muons[~df['is_tight_muon']].abseta).prod()) # Electron ID and reco # Function of eta, pT (Other way round relative to muons!) weights.add("ele_reco", evaluator['ele_reco'](electrons.eta, electrons.pt).prod()) weights.add("ele_id_tight", evaluator['ele_id_tight'](electrons[df['is_tight_electron']].eta, electrons[df['is_tight_electron']].pt).prod()) weights.add("ele_id_loose", evaluator['ele_id_loose'](electrons[~df['is_tight_electron']].eta, electrons[~df['is_tight_electron']].pt).prod()) # Photon ID and electron veto weights.add("photon_id_tight", evaluator['photon_id_tight'](photons[df['is_tight_photon']].eta, photons[df['is_tight_photon']].pt).prod()) year = extract_year(df['dataset']) if year in [2016,2017]: csev_sf_index = 0.5 * photons.barrel + 3.5 * ~photons.barrel + 1 * (photons.r9 > 0.94) + 2 * (photons.r9 <= 0.94) weights.add("photon_csev", evaluator['photon_csev'](csev_sf_index).prod()) elif year == 2018: csev_weight = evaluator['photon_csev'](photons.pt, photons.eta).prod() csev_weight[csev_weight==0] = 1 weights.add("photon_csev", csev_weight) return weights
def setup_photons(df): # Setup photons if extract_year(df['dataset']) == 2016: id_branch = 'Photon_cutBased' else: id_branch = 'Photon_cutBasedBitmap' photons = JaggedCandidateArray.candidatesfromcounts( df['nPhoton'], pt=df['Photon_pt'], eta=df['Photon_eta'], abseta=np.abs(df['Photon_eta']), phi=df['Photon_phi'], mass=0 * df['Photon_pt'], mediumId=(df[id_branch] >= 2) & df['Photon_electronVeto'], r9=df['Photon_r9'], barrel=np.abs(df['Photon_eta']) < 1.479, vid=df['Photon_vidNestedWPBitmap'], eleveto=df['Photon_electronVeto'], sieie=df['Photon_sieie'], ) photons = photons[(photons.pt > 200) & photons.barrel & photons.eleveto] return photons
def _configure(self, df): dataset = df['dataset'] self._year = extract_year(dataset) # Reload config based on year cfg.DYNACONF_WORKS = "merge_configs" cfg.MERGE_ENABLED_FOR_DYNACONF = True cfg.SETTINGS_FILE_FOR_DYNACONF = bucoffea_path("config/monojet.yaml") cfg.ENV_FOR_DYNACONF = f"era{self._year}" cfg.reload()
def _configure(self, df=None): cfg.DYNACONF_WORKS = "merge_configs" cfg.MERGE_ENABLED_FOR_DYNACONF = True cfg.SETTINGS_FILE_FOR_DYNACONF = bucoffea_path("config/vbfhinv.yaml") # Reload config based on year if df: dataset = df['dataset'] self._year = extract_year(dataset) cfg.ENV_FOR_DYNACONF = f"era{self._year}" else: cfg.ENV_FOR_DYNACONF = f"default" cfg.reload()
def candidate_weights(weights, df, evaluator, muons, electrons, photons, cfg): year = extract_year(df['dataset']) # Muon ID and Isolation for tight and loose WP # Function of pT, eta (Order!) weight_muons_id_tight = evaluator['muon_id_tight']( muons[df['is_tight_muon']].pt, muons[df['is_tight_muon']].abseta).prod() weight_muons_iso_tight = evaluator['muon_iso_tight']( muons[df['is_tight_muon']].pt, muons[df['is_tight_muon']].abseta).prod() if cfg.SF.DIMUO_ID_SF.USE_AVERAGE: tight_dimuons = muons[df["is_tight_muon"]].distincts() t0 = (evaluator['muon_id_tight'](tight_dimuons.i0.pt, tight_dimuons.i0.abseta) \ * evaluator['muon_iso_tight'](tight_dimuons.i0.pt, tight_dimuons.i0.abseta)).prod() t1 = (evaluator['muon_id_tight'](tight_dimuons.i1.pt, tight_dimuons.i1.abseta) \ * evaluator['muon_iso_tight'](tight_dimuons.i1.pt, tight_dimuons.i1.abseta)).prod() l0 = (evaluator['muon_id_loose'](tight_dimuons.i0.pt, tight_dimuons.i0.abseta) \ * evaluator['muon_iso_loose'](tight_dimuons.i0.pt, tight_dimuons.i0.abseta)).prod() l1 = (evaluator['muon_id_loose'](tight_dimuons.i1.pt, tight_dimuons.i1.abseta) \ * evaluator['muon_iso_loose'](tight_dimuons.i1.pt, tight_dimuons.i1.abseta)).prod() weights_2m_tight = 0.5 * (l0 * t1 + l1 * t0) weights.add( "muon_id_iso_tight", weight_muons_id_tight * weight_muons_iso_tight * (tight_dimuons.counts != 1) + weights_2m_tight * (tight_dimuons.counts == 1)) else: weights.add("muon_id_iso_tight", weight_muons_id_tight * weight_muons_iso_tight) weights.add( "muon_id_loose", evaluator['muon_id_loose'](muons[~df['is_tight_muon']].pt, muons[~df['is_tight_muon']].abseta).prod()) weights.add( "muon_iso_loose", evaluator['muon_iso_loose'](muons[~df['is_tight_muon']].pt, muons[~df['is_tight_muon']].abseta).prod()) # Electron ID and reco # Function of eta, pT (Other way round relative to muons!) # For 2017, the reco SF is split below/above 20 GeV if year == 2017: high_et = electrons.pt > 20 ele_reco_sf = evaluator['ele_reco'](electrons.etasc[high_et], electrons.pt[high_et]).prod() ele_reco_sf *= evaluator['ele_reco_pt_lt_20']( electrons.etasc[~high_et], electrons.pt[~high_et]).prod() else: ele_reco_sf = evaluator['ele_reco'](electrons.etasc, electrons.pt).prod() weights.add("ele_reco", ele_reco_sf) # ID/iso SF is not split # in case of 2 tight electrons, we want to apply 0.5*(T1L2+T2L1) instead of T1T2 weights_electrons_tight = evaluator['ele_id_tight']( electrons[df['is_tight_electron']].etasc, electrons[df['is_tight_electron']].pt).prod() if cfg.SF.DIELE_ID_SF.USE_AVERAGE: tight_dielectrons = electrons[df["is_tight_electron"]].distincts() l0 = evaluator['ele_id_loose'](tight_dielectrons.i0.etasc, tight_dielectrons.i0.pt).prod() t0 = evaluator['ele_id_tight'](tight_dielectrons.i0.etasc, tight_dielectrons.i0.pt).prod() l1 = evaluator['ele_id_loose'](tight_dielectrons.i1.etasc, tight_dielectrons.i1.pt).prod() t1 = evaluator['ele_id_tight'](tight_dielectrons.i1.etasc, tight_dielectrons.i1.pt).prod() weights_2e_tight = 0.5 * (l0 * t1 + l1 * t0) weights.add( "ele_id_tight", weights_electrons_tight * (tight_dielectrons.counts != 1) + weights_2e_tight * (tight_dielectrons.counts == 1)) else: weights.add("ele_id_tight", weights_electrons_tight) weights.add( "ele_id_loose", evaluator['ele_id_loose']( electrons[~df['is_tight_electron']].etasc, electrons[~df['is_tight_electron']].pt).prod()) # Photon ID and electron veto if cfg.SF.PHOTON.USETNP: weights.add( "photon_id_tight", evaluator['photon_id_tight_tnp'](np.abs( photons[df['is_tight_photon']].eta)).prod()) else: weights.add( "photon_id_tight", evaluator['photon_id_tight']( photons[df['is_tight_photon']].eta, photons[df['is_tight_photon']].pt).prod()) if year == 2016: csev_weight = evaluator["photon_csev"](photons.abseta, photons.pt).prod() elif year == 2017: csev_sf_index = 0.5 * photons.barrel + 3.5 * ~photons.barrel + 1 * ( photons.r9 > 0.94) + 2 * (photons.r9 <= 0.94) csev_weight = evaluator['photon_csev'](csev_sf_index).prod() elif year == 2018: csev_weight = evaluator['photon_csev'](photons.pt, photons.abseta).prod() csev_weight[csev_weight == 0] = 1 weights.add("photon_csev", csev_weight) return weights
def setup_candidates(df, cfg): if df['is_data'] and extract_year(df['dataset']) != 2018: # 2016, 2017 data jes_suffix = '' jes_suffix_met = '' elif df['is_data']: # 2018 data jes_suffix = '_nom' jes_suffix_met = '_nom' else: # MC, all years jes_suffix = '_nom' if cfg.MET.JER: jes_suffix_met = '_jer' else: jes_suffix_met = '_nom' muons = JaggedCandidateArray.candidatesfromcounts( df['nMuon'], pt=df['Muon_pt'], eta=df['Muon_eta'], abseta=np.abs(df['Muon_eta']), phi=df['Muon_phi'], mass=0 * df['Muon_pt'], charge=df['Muon_charge'], looseId=df['Muon_looseId'], iso=df["Muon_pfRelIso04_all"], tightId=df['Muon_tightId'], dxy=df['Muon_dxy'], dz=df['Muon_dz']) # All muons must be at least loose muons = muons[muons.looseId \ & (muons.iso < cfg.MUON.CUTS.LOOSE.ISO) \ & (muons.pt > cfg.MUON.CUTS.LOOSE.PT) \ & (muons.abseta<cfg.MUON.CUTS.LOOSE.ETA) \ ] electrons = JaggedCandidateArray.candidatesfromcounts( df['nElectron'], pt=df['Electron_pt'], eta=df['Electron_eta'], abseta=np.abs(df['Electron_eta']), etasc=df['Electron_eta'] + df['Electron_deltaEtaSC'], absetasc=np.abs(df['Electron_eta'] + df['Electron_deltaEtaSC']), phi=df['Electron_phi'], mass=0 * df['Electron_pt'], charge=df['Electron_charge'], looseId=(df[cfg.ELECTRON.BRANCH.ID] >= 1), tightId=(df[cfg.ELECTRON.BRANCH.ID] == 4), dxy=np.abs(df['Electron_dxy']), dz=np.abs(df['Electron_dz']), barrel=np.abs(df['Electron_eta'] + df['Electron_deltaEtaSC']) <= 1.4442) # All electrons must be at least loose pass_dxy = (electrons.barrel & (np.abs(electrons.dxy) < cfg.ELECTRON.CUTS.LOOSE.DXY.BARREL)) \ | (~electrons.barrel & (np.abs(electrons.dxy) < cfg.ELECTRON.CUTS.LOOSE.DXY.ENDCAP)) pass_dz = (electrons.barrel & (np.abs(electrons.dz) < cfg.ELECTRON.CUTS.LOOSE.DZ.BARREL)) \ | (~electrons.barrel & (np.abs(electrons.dz) < cfg.ELECTRON.CUTS.LOOSE.DZ.ENDCAP)) electrons = electrons[electrons.looseId \ & (electrons.pt>cfg.ELECTRON.CUTS.LOOSE.PT) \ & (electrons.absetasc<cfg.ELECTRON.CUTS.LOOSE.ETA) \ & pass_dxy \ & pass_dz ] if cfg.OVERLAP.ELECTRON.MUON.CLEAN: electrons = electrons[object_overlap(electrons, muons, dr=cfg.OVERLAP.ELECTRON.MUON.DR)] taus = JaggedCandidateArray.candidatesfromcounts( df['nTau'], pt=df['Tau_pt'], eta=df['Tau_eta'], abseta=np.abs(df['Tau_eta']), phi=df['Tau_phi'], mass=0 * df['Tau_pt'], decaymode=df[cfg.TAU.BRANCH.ID], iso=df[cfg.TAU.BRANCH.ISO]) # For MC, add the matched gen-particle info for checking if not df['is_data']: kwargs = {'genpartflav': df['Tau_genPartFlav']} taus.add_attributes(**kwargs) taus = taus[ (taus.decaymode) \ & (taus.pt > cfg.TAU.CUTS.PT)\ & (taus.abseta < cfg.TAU.CUTS.ETA) \ & ((taus.iso&2)==2)] if cfg.OVERLAP.TAU.MUON.CLEAN: taus = taus[object_overlap(taus, muons, dr=cfg.OVERLAP.TAU.MUON.DR)] if cfg.OVERLAP.TAU.ELECTRON.CLEAN: taus = taus[object_overlap(taus, electrons, dr=cfg.OVERLAP.TAU.ELECTRON.DR)] # choose the right branch name for photon ID bitmap depending on the actual name in the file (different between nano v5 and v7) if cfg.PHOTON.BRANCH.ID in df.keys(): PHOTON_BRANCH_ID = cfg.PHOTON.BRANCH.ID else: PHOTON_BRANCH_ID = cfg.PHOTON.BRANCH.IDV7 photons = JaggedCandidateArray.candidatesfromcounts( df['nPhoton'], pt=df['Photon_pt'], eta=df['Photon_eta'], abseta=np.abs(df['Photon_eta']), phi=df['Photon_phi'], mass=0 * df['Photon_pt'], looseId=(df[PHOTON_BRANCH_ID] >= 1) & df['Photon_electronVeto'], mediumId=(df[PHOTON_BRANCH_ID] >= 2) & df['Photon_electronVeto'], r9=df['Photon_r9'], barrel=df['Photon_isScEtaEB'], ) photons = photons[photons.looseId \ & (photons.pt > cfg.PHOTON.CUTS.LOOSE.pt) \ & (photons.abseta < cfg.PHOTON.CUTS.LOOSE.eta) ] if cfg.OVERLAP.PHOTON.MUON.CLEAN: photons = photons[object_overlap(photons, muons, dr=cfg.OVERLAP.PHOTON.MUON.DR)] if cfg.OVERLAP.PHOTON.ELECTRON.CLEAN: photons = photons[object_overlap(photons, electrons, dr=cfg.OVERLAP.PHOTON.ELECTRON.DR)] ak4 = JaggedCandidateArray.candidatesfromcounts( df['nJet'], pt=df[f'Jet_pt{jes_suffix}'] if (df['is_data'] or cfg.AK4.JER) else df[f'Jet_pt{jes_suffix}'] / df['Jet_corr_JER'], eta=df['Jet_eta'], abseta=np.abs(df['Jet_eta']), phi=df['Jet_phi'], mass=np.zeros_like(df['Jet_pt']), looseId=( df['Jet_jetId'] & 2) == 2, # bitmask: 1 = loose, 2 = tight, 3 = tight + lep veto tightId=( df['Jet_jetId'] & 2) == 2, # bitmask: 1 = loose, 2 = tight, 3 = tight + lep veto puid=((df['Jet_puId'] & 2 > 0) | ((df[f'Jet_pt{jes_suffix}'] if (df['is_data'] or cfg.AK4.JER) else df[f'Jet_pt{jes_suffix}'] / df['Jet_corr_JER']) > 50)), # medium pileup jet ID csvv2=df["Jet_btagCSVV2"], deepcsv=df['Jet_btagDeepB'], nef=df['Jet_neEmEF'], nhf=df['Jet_neHEF'], chf=df['Jet_chHEF'], ptraw=df['Jet_pt'] * (1 - df['Jet_rawFactor']), nconst=df['Jet_nConstituents'], hadflav=0 * df['Jet_pt'] if df['is_data'] else df['Jet_hadronFlavour']) # Before cleaning, apply HEM veto hem_ak4 = ak4[(ak4.pt > 30) & (-3.0 < ak4.eta) & (ak4.eta < -1.3) & (-1.57 < ak4.phi) & (ak4.phi < -0.87)] df['hemveto'] = hem_ak4.counts == 0 # B jets have their own overlap cleaning, # so deal with them before applying filtering to jets btag_discriminator = getattr(ak4, cfg.BTAG.algo) btag_cut = cfg.BTAG.CUTS[cfg.BTAG.algo][cfg.BTAG.wp] bjets = ak4[ (ak4.pt > cfg.BTAG.PT) \ & (ak4.abseta < cfg.BTAG.ETA) \ & (btag_discriminator > btag_cut) ] if cfg.OVERLAP.BTAG.MUON.CLEAN: bjets = bjets[object_overlap(bjets, muons, dr=cfg.OVERLAP.BTAG.MUON.DR)] if cfg.OVERLAP.BTAG.ELECTRON.CLEAN: bjets = bjets[object_overlap(bjets, electrons, dr=cfg.OVERLAP.BTAG.ELECTRON.DR)] if cfg.OVERLAP.BTAG.PHOTON.CLEAN: bjets = bjets[object_overlap(bjets, photons, dr=cfg.OVERLAP.BTAG.PHOTON.DR)] ak4 = ak4[ak4.looseId] if cfg.OVERLAP.AK4.MUON.CLEAN: ak4 = ak4[object_overlap(ak4, muons, dr=cfg.OVERLAP.AK4.MUON.DR)] if cfg.OVERLAP.AK4.ELECTRON.CLEAN: ak4 = ak4[object_overlap(ak4, electrons, dr=cfg.OVERLAP.AK4.ELECTRON.DR)] if cfg.OVERLAP.AK4.PHOTON.CLEAN: ak4 = ak4[object_overlap(ak4, photons, dr=cfg.OVERLAP.AK4.PHOTON.DR)] if df['is_data']: msd = df[f'FatJet_msoftdrop{jes_suffix}'] else: msd = df[f'FatJet_msoftdrop{jes_suffix}'] / ( df['FatJet_msoftdrop_corr_JMR'] * df['FatJet_msoftdrop_corr_JMS']) if not cfg.AK8.JER: msd = msd / df['FatJet_corr_JER'] ak8 = JaggedCandidateArray.candidatesfromcounts( df['nFatJet'], pt=df[f'FatJet_pt{jes_suffix}'] if (df['is_data'] or cfg.AK8.JER) else df[f'FatJet_pt{jes_suffix}'] / df['FatJet_corr_JER'], eta=df['FatJet_eta'], abseta=np.abs(df['FatJet_eta']), phi=df['FatJet_phi'], mass=msd, tightId=(df['FatJet_jetId'] & 2) == 2, # Tight csvv2=df["FatJet_btagCSVV2"], deepcsv=df['FatJet_btagDeepB'], tau1=df['FatJet_tau1'], tau2=df['FatJet_tau2'], tau21=df['FatJet_tau2'] / df['FatJet_tau1'], wvsqcd=df['FatJet_deepTag_WvsQCD'], wvsqcdmd=df['FatJet_deepTagMD_WvsQCD'], zvsqcd=df['FatJet_deepTag_ZvsQCD'], zvsqcdmd=df['FatJet_deepTagMD_ZvsQCD'], tvsqcd=df['FatJet_deepTag_TvsQCD'], tvsqcdmd=df['FatJet_deepTagMD_TvsQCD'], wvstqcd=df['FatJet_deepTag_WvsQCD'] * (1 - df['FatJet_deepTag_TvsQCD']) / (1 - df['FatJet_deepTag_WvsQCD'] * df['FatJet_deepTag_TvsQCD']), wvstqcdmd=df['FatJet_deepTagMD_WvsQCD'] * (1 - df['FatJet_deepTagMD_TvsQCD']) / (1 - df['FatJet_deepTagMD_WvsQCD'] * df['FatJet_deepTagMD_TvsQCD']), ) ak8 = ak8[ak8.tightId & object_overlap(ak8, muons) & object_overlap(ak8, electrons) & object_overlap(ak8, photons)] if extract_year(df['dataset']) == 2017: met_branch = 'METFixEE2017' else: met_branch = 'MET' met_pt = df[f'{met_branch}_pt{jes_suffix_met}'] met_phi = df[f'{met_branch}_phi{jes_suffix_met}'] return met_pt, met_phi, ak4, bjets, ak8, muons, electrons, taus, photons
def data_driven_qcd_dataset(dataset): """Dataset name to use for data-driven QCD estimate""" year = extract_year(dataset) return f"QCD_data_{year}"
def candidate_weights(weights, df, evaluator, muons, electrons, photons): year = extract_year(df['dataset']) # Muon ID and Isolation for tight and loose WP # Function of pT, eta (Order!) weights.add( "muon_id_tight", evaluator['muon_id_tight'](muons[df['is_tight_muon']].pt, muons[df['is_tight_muon']].abseta).prod()) weights.add( "muon_iso_tight", evaluator['muon_iso_tight'](muons[df['is_tight_muon']].pt, muons[df['is_tight_muon']].abseta).prod()) weights.add( "muon_id_loose", evaluator['muon_id_loose'](muons[~df['is_tight_muon']].pt, muons[~df['is_tight_muon']].abseta).prod()) weights.add( "muon_iso_loose", evaluator['muon_iso_loose'](muons[~df['is_tight_muon']].pt, muons[~df['is_tight_muon']].abseta).prod()) # Electron ID and reco # Function of eta, pT (Other way round relative to muons!) # For 2017, the reco SF is split below/above 20 GeV if year == 2017: high_et = electrons.pt > 20 ele_reco_sf = evaluator['ele_reco'](electrons.etasc[high_et], electrons.pt[high_et]).prod() ele_reco_sf *= evaluator['ele_reco_pt_lt_20']( electrons.etasc[~high_et], electrons.pt[~high_et]).prod() else: ele_reco_sf = evaluator['ele_reco'](electrons.etasc, electrons.pt).prod() weights.add("ele_reco", ele_reco_sf) # ID/iso SF is not split weights.add( "ele_id_tight", evaluator['ele_id_tight']( electrons[df['is_tight_electron']].etasc, electrons[df['is_tight_electron']].pt).prod()) weights.add( "ele_id_loose", evaluator['ele_id_loose']( electrons[~df['is_tight_electron']].etasc, electrons[~df['is_tight_electron']].pt).prod()) # Photon ID and electron veto weights.add( "photon_id_tight", evaluator['photon_id_tight'](photons[df['is_tight_photon']].eta, photons[df['is_tight_photon']].pt).prod()) if year == 2016: csev_weight = evaluator["photon_csev"](photons.abseta, photons.pt).prod() elif year == 2017: csev_sf_index = 0.5 * photons.barrel + 3.5 * ~photons.barrel + 1 * ( photons.r9 > 0.94) + 2 * (photons.r9 <= 0.94) csev_weight = evaluator['photon_csev'](csev_sf_index).prod() elif year == 2018: csev_weight = evaluator['photon_csev'](photons.pt, photons.abseta).prod() csev_weight[csev_weight == 0] = 1 weights.add("photon_csev", csev_weight) return weights
def process(self, df): if not df.size: return self.accumulator.identity() self._configure(df) dataset = df['dataset'] df['is_lo_w'] = is_lo_w(dataset) df['is_lo_z'] = is_lo_z(dataset) df['is_lo_g'] = is_lo_g(dataset) df['is_nlo_z'] = is_nlo_z(dataset) df['is_nlo_w'] = is_nlo_w(dataset) df['has_v_jet'] = has_v_jet(dataset) df['has_lhe_v_pt'] = df['is_lo_w'] | df['is_lo_z'] | df['is_nlo_z'] | df['is_nlo_w'] | df['is_lo_g'] df['is_data'] = is_data(dataset) gen_v_pt = None if not df['is_data']: gen = setup_gen_candidates(df) if df['is_lo_w'] or df['is_lo_z'] or df['is_nlo_z'] or df['is_nlo_w']: dressed = setup_dressed_gen_candidates(df) fill_gen_v_info(df, gen, dressed) gen_v_pt = df['gen_v_pt_combined'] elif df['is_lo_g']: gen_v_pt = gen[(gen.pdg==22) & (gen.status==1)].pt.max() # Candidates # Already pre-filtered! # All leptons are at least loose # Check out setup_candidates for filtering details met_pt, met_phi, ak4, bjets, ak8, muons, electrons, taus, photons = setup_candidates(df, cfg) # Muons df['is_tight_muon'] = muons.tightId \ & (muons.iso < cfg.MUON.CUTS.TIGHT.ISO) \ & (muons.pt>cfg.MUON.CUTS.TIGHT.PT) \ & (muons.abseta<cfg.MUON.CUTS.TIGHT.ETA) dimuons = muons.distincts() dimuon_charge = dimuons.i0['charge'] + dimuons.i1['charge'] df['MT_mu'] = ((muons.counts==1) * mt(muons.pt, muons.phi, met_pt, met_phi)).max() # Electrons df['is_tight_electron'] = electrons.tightId \ & (electrons.pt > cfg.ELECTRON.CUTS.TIGHT.PT) \ & (electrons.abseta < cfg.ELECTRON.CUTS.TIGHT.ETA) dielectrons = electrons.distincts() dielectron_charge = dielectrons.i0['charge'] + dielectrons.i1['charge'] df['MT_el'] = ((electrons.counts==1) * mt(electrons.pt, electrons.phi, met_pt, met_phi)).max() # ak4 leadak4_index=ak4.pt.argmax() elejet_pairs = ak4[:,:1].cross(electrons) df['dREleJet'] = np.hypot(elejet_pairs.i0.eta-elejet_pairs.i1.eta , dphi(elejet_pairs.i0.phi,elejet_pairs.i1.phi)).min() muonjet_pairs = ak4[:,:1].cross(muons) df['dRMuonJet'] = np.hypot(muonjet_pairs.i0.eta-muonjet_pairs.i1.eta , dphi(muonjet_pairs.i0.phi,muonjet_pairs.i1.phi)).min() # Photons # Angular distance leading photon - leading jet phojet_pairs = ak4[:,:1].cross(photons[:,:1]) df['dRPhotonJet'] = np.hypot(phojet_pairs.i0.eta-phojet_pairs.i1.eta , dphi(phojet_pairs.i0.phi,phojet_pairs.i1.phi)).min() # Recoil df['recoil_pt'], df['recoil_phi'] = recoil(met_pt,met_phi, electrons, muons, photons) df["dPFCalo"] = (met_pt - df["CaloMET_pt"]) / df["recoil_pt"] df["minDPhiJetRecoil"] = min_dphi_jet_met(ak4, df['recoil_phi'], njet=4, ptmin=30, etamax=2.4) df["minDPhiJetMet"] = min_dphi_jet_met(ak4, met_phi, njet=4, ptmin=30, etamax=2.4) selection = processor.PackedSelection() # Triggers pass_all = np.ones(df.size)==1 selection.add('inclusive', pass_all) selection = trigger_selection(selection, df, cfg) selection.add('mu_pt_trig_safe', muons.pt.max() > 30) # Common selection selection.add('veto_ele', electrons.counts==0) selection.add('veto_muo', muons.counts==0) selection.add('veto_photon', photons.counts==0) selection.add('veto_tau', taus.counts==0) selection.add('veto_b', bjets.counts==0) selection.add('mindphijr',df['minDPhiJetRecoil'] > cfg.SELECTION.SIGNAL.MINDPHIJR) selection.add('mindphijm',df['minDPhiJetMet'] > cfg.SELECTION.SIGNAL.MINDPHIJR) selection.add('dpfcalo',np.abs(df['dPFCalo']) < cfg.SELECTION.SIGNAL.DPFCALO) selection.add('recoil', df['recoil_pt']>cfg.SELECTION.SIGNAL.RECOIL) if(cfg.MITIGATION.HEM and extract_year(df['dataset']) == 2018 and not cfg.RUN.SYNC): selection.add('hemveto', df['hemveto']) else: selection.add('hemveto', np.ones(df.size)==1) # AK4 Jet leadak4_pt_eta = (ak4.pt.max() > cfg.SELECTION.SIGNAL.leadak4.PT) \ & (ak4.abseta[leadak4_index] < cfg.SELECTION.SIGNAL.leadak4.ETA).any() selection.add('leadak4_pt_eta', leadak4_pt_eta) selection.add('leadak4_id',(ak4.tightId[leadak4_index] \ & (ak4.chf[leadak4_index] >cfg.SELECTION.SIGNAL.leadak4.CHF) \ & (ak4.nhf[leadak4_index]<cfg.SELECTION.SIGNAL.leadak4.NHF)).any()) # AK8 Jet leadak8_index=ak8.pt.argmax() leadak8_pt_eta = (ak8.pt.max() > cfg.SELECTION.SIGNAL.leadak8.PT) \ & (ak8.abseta[leadak8_index] < cfg.SELECTION.SIGNAL.leadak8.ETA).any() selection.add('leadak8_pt_eta', leadak8_pt_eta) selection.add('leadak8_id',(ak8.tightId[leadak8_index]).any()) # Mono-V selection selection.add('leadak8_tau21', ((ak8.tau2[leadak8_index] / ak8.tau1[leadak8_index]) < cfg.SELECTION.SIGNAL.LEADAK8.TAU21).any()) selection.add('leadak8_mass', ((ak8.mass[leadak8_index] > cfg.SELECTION.SIGNAL.LEADAK8.MASS.MIN) \ & (ak8.mass[leadak8_index] < cfg.SELECTION.SIGNAL.LEADAK8.MASS.MAX)).any()) selection.add('leadak8_wvsqcd_loosemd', ((ak8.wvsqcdmd[leadak8_index] > cfg.WTAG.LOOSEMD) & (ak8.wvsqcdmd[leadak8_index] < cfg.WTAG.TIGHTMD)).any()) selection.add('leadak8_wvsqcd_tightmd', ((ak8.wvsqcdmd[leadak8_index] > cfg.WTAG.TIGHTMD)).any()) selection.add('leadak8_wvsqcd_loose', ((ak8.wvsqcd[leadak8_index] > cfg.WTAG.LOOSE) & (ak8.wvsqcd[leadak8_index] < cfg.WTAG.TIGHT)).any()) selection.add('leadak8_wvsqcd_tight', ((ak8.wvsqcd[leadak8_index] > cfg.WTAG.TIGHT)).any()) selection.add('veto_vtag', ~selection.all("leadak8_pt_eta", "leadak8_id", "leadak8_tau21", "leadak8_mass")) selection.add('only_one_ak8', ak8.counts==1) # Dimuon CR leadmuon_index=muons.pt.argmax() selection.add('at_least_one_tight_mu', df['is_tight_muon'].any()) selection.add('dimuon_mass', ((dimuons.mass > cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MIN) \ & (dimuons.mass < cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MAX)).any()) selection.add('dimuon_charge', (dimuon_charge==0).any()) selection.add('two_muons', muons.counts==2) # Single muon CR selection.add('one_muon', muons.counts==1) selection.add('mt_mu', df['MT_mu'] < cfg.SELECTION.CONTROL.SINGLEMU.MT) # Diele CR leadelectron_index=electrons.pt.argmax() selection.add('one_electron', electrons.counts==1) selection.add('two_electrons', electrons.counts==2) selection.add('at_least_one_tight_el', df['is_tight_electron'].any()) selection.add('dielectron_mass', ((dielectrons.mass > cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MIN) \ & (dielectrons.mass < cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MAX)).any()) selection.add('dielectron_charge', (dielectron_charge==0).any()) selection.add('two_electrons', electrons.counts==2) # Single Ele CR selection.add('met_el', met_pt > cfg.SELECTION.CONTROL.SINGLEEL.MET) selection.add('mt_el', df['MT_el'] < cfg.SELECTION.CONTROL.SINGLEEL.MT) # Photon CR leadphoton_index=photons.pt.argmax() df['is_tight_photon'] = photons.mediumId \ & (photons.abseta < cfg.PHOTON.CUTS.TIGHT.ETA) selection.add('one_photon', photons.counts==1) selection.add('at_least_one_tight_photon', df['is_tight_photon'].any()) selection.add('photon_pt', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PT) selection.add('photon_pt_trig', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PTTRIG) # Fill histograms output = self.accumulator.identity() # Gen if gen_v_pt is not None: output['genvpt_check'].fill(vpt=gen_v_pt,type="Nano", dataset=dataset, weight=df['Generator_weight']) if 'LHE_HT' in df: output['lhe_ht'].fill(dataset=dataset, ht=df['LHE_HT']) # Weights evaluator = evaluator_from_config(cfg) weights = processor.Weights(size=df.size, storeIndividual=True) if not df['is_data']: weights.add('gen', df['Generator_weight']) try: weights.add('prefire', df['PrefireWeight']) except KeyError: weights.add('prefire', np.ones(df.size)) weights = candidate_weights(weights, df, evaluator, muons, electrons, photons) weights = pileup_weights(weights, df, evaluator, cfg) if not (gen_v_pt is None): weights = theory_weights_monojet(weights, df, evaluator, gen_v_pt) # Save per-event values for synchronization if cfg.RUN.KINEMATICS.SAVE: for event in cfg.RUN.KINEMATICS.EVENTS: mask = df['event'] == event if not mask.any(): continue output['kinematics']['event'] += [event] output['kinematics']['met'] += [met_pt[mask].flatten()] output['kinematics']['met_phi'] += [met_phi[mask].flatten()] output['kinematics']['recoil'] += [df['recoil_pt'][mask].flatten()] output['kinematics']['recoil_phi'] += [df['recoil_phi'][mask].flatten()] output['kinematics']['ak4pt0'] += [ak4[leadak4_index][mask].pt.flatten()] output['kinematics']['ak4eta0'] += [ak4[leadak4_index][mask].eta.flatten()] output['kinematics']['leadbtag'] += [ak4.pt.max()<0][mask] output['kinematics']['nLooseMu'] += [muons.counts[mask]] output['kinematics']['nTightMu'] += [muons[df['is_tight_muon']].counts[mask].flatten()] output['kinematics']['mupt0'] += [muons[leadmuon_index][mask].pt.flatten()] output['kinematics']['mueta0'] += [muons[leadmuon_index][mask].eta.flatten()] output['kinematics']['muphi0'] += [muons[leadmuon_index][mask].phi.flatten()] output['kinematics']['nLooseEl'] += [electrons.counts[mask]] output['kinematics']['nTightEl'] += [electrons[df['is_tight_electron']].counts[mask].flatten()] output['kinematics']['elpt0'] += [electrons[leadelectron_index][mask].pt.flatten()] output['kinematics']['eleta0'] += [electrons[leadelectron_index][mask].eta.flatten()] output['kinematics']['nLooseGam'] += [photons.counts[mask]] output['kinematics']['nTightGam'] += [photons[df['is_tight_photon']].counts[mask].flatten()] output['kinematics']['gpt0'] += [photons[leadphoton_index][mask].pt.flatten()] output['kinematics']['geta0'] += [photons[leadphoton_index][mask].eta.flatten()] # Sum of all weights to use for normalization # TODO: Deal with systematic variations output['nevents'][dataset] += df.size if not df['is_data']: output['sumw'][dataset] += df['genEventSumw'] output['sumw2'][dataset] += df['genEventSumw2'] output['sumw_pileup'][dataset] += weights.partial_weight(include=['pileup']).sum() regions = monojet_regions(cfg) for region, cuts in regions.items(): region_weights = copy.deepcopy(weights) if not df['is_data']: if re.match(r'cr_(\d+)e.*', region): region_weights.add('trigger', np.ones(df.size)) elif re.match(r'cr_(\d+)m.*', region) or re.match('sr_.*', region): region_weights.add('trigger', evaluator["trigger_met"](df['recoil_pt'])) elif re.match(r'cr_g.*', region): region_weights.add('trigger', np.ones(df.size)) if not df['is_data']: genVs = gen[((gen.pdg==23) | (gen.pdg==24) | (gen.pdg==-24)) & (gen.pt>10)] leadak8 = ak8[ak8.pt.argmax()] leadak8_matched_mask = leadak8.match(genVs, deltaRCut=0.8) matched_leadak8 = leadak8[leadak8_matched_mask] unmatched_leadak8 = leadak8[~leadak8_matched_mask] for wp in ['loose','loosemd','tight','tightmd']: if re.match(r'.*_{wp}_v.*', region): if (wp == 'tight') or ('nomistag' in region): # no mistag SF available for tight cut matched_weights = evaluator[f'wtag_{wp}'](matched_leadak8.pt).prod() else: matched_weights = evaluator[f'wtag_{wp}'](matched_leadak8.pt).prod() \ * evaluator[f'wtag_mistag_{wp}'](unmatched_leadak8.pt).prod() region_weights.add('wtag_{wp}', matched_weights) # Blinding if(self._blind and df['is_data'] and region.startswith('sr')): continue # Cutflow plot for signal and control regions if any(x in region for x in ["sr", "cr", "tr"]): output['cutflow_' + region]['all']+=df.size for icut, cutname in enumerate(cuts): output['cutflow_' + region][cutname] += selection.all(*cuts[:icut+1]).sum() mask = selection.all(*cuts) if cfg.RUN.SAVE.TREE: def fill_tree(variable, values): treeacc = processor.column_accumulator(values) name = f'tree_{region}_{variable}' if dataset in output[name].keys(): output[name][dataset] += treeacc else: output[name][dataset] = treeacc if region in ['cr_2m_j','cr_1m_j','cr_2e_j','cr_1e_j','cr_g_j']: fill_tree('recoil',df['recoil_pt'][mask].flatten()) fill_tree('weight',region_weights.weight()[mask].flatten()) if gen_v_pt is not None: fill_tree('gen_v_pt',gen_v_pt[mask].flatten()) else: fill_tree('gen_v_pt', -1 * np.ones(sum(mask))) # Save the event numbers of events passing this selection if cfg.RUN.SAVE.PASSING: output['selected_events'][region] += list(df['event'][mask]) # Multiplicities def fill_mult(name, candidates): output[name].fill( dataset=dataset, region=region, multiplicity=candidates[mask].counts, weight=region_weights.weight()[mask] ) fill_mult('ak8_mult', ak8) fill_mult('ak4_mult', ak4) fill_mult('bjet_mult',bjets) fill_mult('loose_ele_mult',electrons) fill_mult('tight_ele_mult',electrons[df['is_tight_electron']]) fill_mult('loose_muo_mult',muons) fill_mult('tight_muo_mult',muons[df['is_tight_muon']]) fill_mult('tau_mult',taus) fill_mult('photon_mult',photons) def ezfill(name, **kwargs): """Helper function to make filling easier.""" output[name].fill( dataset=dataset, region=region, **kwargs ) # Monitor weights for wname, wvalue in region_weights._weights.items(): ezfill("weights", weight_type=wname, weight_value=wvalue[mask]) # All ak4 # This is a workaround to create a weight array of the right dimension w_alljets = weight_shape(ak4[mask].eta, region_weights.weight()[mask]) ezfill('ak4_eta', jeteta=ak4[mask].eta.flatten(), weight=w_alljets) ezfill('ak4_phi', jetphi=ak4[mask].phi.flatten(), weight=w_alljets) ezfill('ak4_eta_phi', phi=ak4[mask].phi.flatten(),eta=ak4[mask].eta.flatten(), weight=w_alljets) ezfill('ak4_pt', jetpt=ak4[mask].pt.flatten(), weight=w_alljets) # Leading ak4 w_leadak4 = weight_shape(ak4[leadak4_index].eta[mask], region_weights.weight()[mask]) ezfill('ak4_eta0', jeteta=ak4[leadak4_index].eta[mask].flatten(), weight=w_leadak4) ezfill('ak4_phi0', jetphi=ak4[leadak4_index].phi[mask].flatten(), weight=w_leadak4) ezfill('ak4_pt0', jetpt=ak4[leadak4_index].pt[mask].flatten(), weight=w_leadak4) ezfill('ak4_ptraw0', jetpt=ak4[leadak4_index].ptraw[mask].flatten(), weight=w_leadak4) ezfill('ak4_chf0', frac=ak4[leadak4_index].chf[mask].flatten(), weight=w_leadak4) ezfill('ak4_nhf0', frac=ak4[leadak4_index].nhf[mask].flatten(), weight=w_leadak4) ezfill('drelejet', dr=df['dREleJet'][mask], weight=region_weights.weight()[mask]) ezfill('drmuonjet', dr=df['dRMuonJet'][mask], weight=region_weights.weight()[mask]) ezfill('drphotonjet', dr=df['dRPhotonJet'][mask], weight=region_weights.weight()[mask]) # AK8 jets if region=='inclusive' or region.endswith('v'): # All w_allak8 = weight_shape(ak8.eta[mask], region_weights.weight()[mask]) ezfill('ak8_eta', jeteta=ak8[mask].eta.flatten(), weight=w_allak8) ezfill('ak8_phi', jetphi=ak8[mask].phi.flatten(), weight=w_allak8) ezfill('ak8_pt', jetpt=ak8[mask].pt.flatten(), weight=w_allak8) ezfill('ak8_mass', mass=ak8[mask].mass.flatten(), weight=w_allak8) # Leading w_leadak8 = weight_shape(ak8[leadak8_index].eta[mask], region_weights.weight()[mask]) ezfill('ak8_eta0', jeteta=ak8[leadak8_index].eta[mask].flatten(), weight=w_leadak8) ezfill('ak8_phi0', jetphi=ak8[leadak8_index].phi[mask].flatten(), weight=w_leadak8) ezfill('ak8_pt0', jetpt=ak8[leadak8_index].pt[mask].flatten(), weight=w_leadak8 ) ezfill('ak8_mass0', mass=ak8[leadak8_index].mass[mask].flatten(), weight=w_leadak8) ezfill('ak8_tau210', tau21=ak8[leadak8_index].tau21[mask].flatten(), weight=w_leadak8) ezfill('ak8_wvsqcd0', tagger=ak8[leadak8_index].wvsqcd[mask].flatten(), weight=w_leadak8) ezfill('ak8_wvsqcdmd0', tagger=ak8[leadak8_index].wvsqcdmd[mask].flatten(), weight=w_leadak8) ezfill('ak8_zvsqcd0', tagger=ak8[leadak8_index].zvsqcd[mask].flatten(), weight=w_leadak8) ezfill('ak8_zvsqcdmd0', tagger=ak8[leadak8_index].zvsqcdmd[mask].flatten(), weight=w_leadak8) # histogram with only gen-matched lead ak8 pt if not df['is_data']: w_matchedleadak8 = weight_shape(matched_leadak8.eta[mask], region_weights.weight()[mask]) ezfill('ak8_Vmatched_pt0', jetpt=matched_leadak8.pt[mask].flatten(), weight=w_matchedleadak8 ) # Dimuon specifically for deepak8 mistag rate measurement if 'inclusive_v' in region: ezfill('ak8_passloose_pt0', wppass=ak8[leadak8_index].wvsqcd[mask].max()>cfg.WTAG.LOOSE, jetpt=ak8[leadak8_index].pt[mask].max(), weight=w_leadak8 ) ezfill('ak8_passtight_pt0', wppass=ak8[leadak8_index].wvsqcd[mask].max()>cfg.WTAG.TIGHT, jetpt=ak8[leadak8_index].pt[mask].max(), weight=w_leadak8 ) ezfill('ak8_passloosemd_pt0', wppass=ak8[leadak8_index].wvsqcdmd[mask].max()>cfg.WTAG.LOOSEMD, jetpt=ak8[leadak8_index].pt[mask].max(), weight=w_leadak8 ) ezfill('ak8_passtightmd_pt0', wppass=ak8[leadak8_index].wvsqcdmd[mask].max()>cfg.WTAG.TIGHTMD, jetpt=ak8[leadak8_index].pt[mask].max(), weight=w_leadak8 ) ezfill('ak8_passloose_mass0', wppass=ak8[leadak8_index].wvsqcd[mask].max()>cfg.WTAG.LOOSE, mass=ak8[leadak8_index].mass[mask].max(), weight=w_leadak8 ) ezfill('ak8_passtight_mass0', wppass=ak8[leadak8_index].wvsqcd[mask].max()>cfg.WTAG.TIGHT, mass=ak8[leadak8_index].mass[mask].max(), weight=w_leadak8 ) ezfill('ak8_passloosemd_mass0', wppass=ak8[leadak8_index].wvsqcdmd[mask].max()>cfg.WTAG.LOOSEMD, mass=ak8[leadak8_index].mass[mask].max(), weight=w_leadak8 ) ezfill('ak8_passtightmd_mass0', wppass=ak8[leadak8_index].wvsqcdmd[mask].max()>cfg.WTAG.TIGHTMD, mass=ak8[leadak8_index].mass[mask].max(), weight=w_leadak8 ) # MET ezfill('dpfcalo', dpfcalo=df["dPFCalo"][mask], weight=region_weights.weight()[mask] ) ezfill('met', met=met_pt[mask], weight=region_weights.weight()[mask] ) ezfill('met_phi', phi=met_phi[mask], weight=region_weights.weight()[mask] ) ezfill('recoil', recoil=df["recoil_pt"][mask], weight=region_weights.weight()[mask] ) ezfill('recoil_phi', phi=df["recoil_phi"][mask], weight=region_weights.weight()[mask] ) ezfill('recoil_nopog', recoil=df["recoil_pt"][mask], weight=region_weights.partial_weight(include=['pileup','theory','gen','prefire'])[mask]) ezfill('recoil_nopref', recoil=df["recoil_pt"][mask], weight=region_weights.partial_weight(exclude=['prefire'])[mask]) ezfill('recoil_nopu', recoil=df["recoil_pt"][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask]) ezfill('recoil_notrg', recoil=df["recoil_pt"][mask], weight=region_weights.partial_weight(exclude=['trigger'])[mask]) ezfill('ak4_pt0_over_recoil', ratio=ak4.pt.max()[mask]/df["recoil_pt"][mask], weight=region_weights.weight()[mask]) ezfill('dphijm', dphi=df["minDPhiJetMet"][mask], weight=region_weights.weight()[mask] ) ezfill('dphijr', dphi=df["minDPhiJetRecoil"][mask], weight=region_weights.weight()[mask] ) if 'noveto' in region: continue # Muons if '_1m_' in region or '_2m_' in region: w_allmu = weight_shape(muons.pt[mask], region_weights.weight()[mask]) ezfill('muon_pt', pt=muons.pt[mask].flatten(), weight=w_allmu ) ezfill('muon_mt', mt=df['MT_mu'][mask], weight=region_weights.weight()[mask]) ezfill('muon_eta', eta=muons.eta[mask].flatten(), weight=w_allmu) ezfill('muon_eta_phi', phi=muons.phi[mask].flatten(),eta=muons.eta[mask].flatten(), weight=w_allmu) ezfill('muon_phi', phi=muons.phi[mask].flatten(), weight=w_allmu) ezfill('muon_dxy', dxy=muons.dxy[mask].flatten(), weight=w_allmu) ezfill('muon_dz', dz=muons.dz[mask].flatten(), weight=w_allmu) # Leading muon w_leadmu = weight_shape(muons[leadmuon_index].pt[mask], region_weights.weight()[mask]) ezfill('muon_pt0', pt=muons[leadmuon_index].pt[mask].flatten(), weight=w_leadmu ) ezfill('muon_eta0', eta=muons[leadmuon_index].eta[mask].flatten(), weight=w_leadmu) ezfill('muon_phi0', phi=muons[leadmuon_index].phi[mask].flatten(), weight=w_leadmu) ezfill('muon_dxy0', dxy=muons[leadmuon_index].dxy[mask].flatten(), weight=w_leadmu) ezfill('muon_dz0', dz=muons[leadmuon_index].dz[mask].flatten(), weight=w_leadmu) # Dimuon if '_2m_' in region: w_dimu = weight_shape(dimuons.pt[mask], region_weights.weight()[mask]) ezfill('dimuon_pt', pt=dimuons.pt[mask].flatten(), weight=w_dimu) ezfill('dimuon_eta', eta=dimuons.eta[mask].flatten(), weight=w_dimu) ezfill('dimuon_mass', dilepton_mass=dimuons.mass[mask].flatten(), weight=w_dimu ) ezfill('dimuon_dr', dr=dimuons.i0.p4.delta_r(dimuons.i1.p4)[mask].flatten(), weight=w_dimu ) ezfill('muon_pt1', pt=muons[~leadmuon_index].pt[mask].flatten(), weight=w_leadmu ) ezfill('muon_eta1', eta=muons[~leadmuon_index].eta[mask].flatten(), weight=w_leadmu) ezfill('muon_phi1', phi=muons[~leadmuon_index].phi[mask].flatten(), weight=w_leadmu) # Electrons if '_1e_' in region or '_2e_' in region: w_allel = weight_shape(electrons.pt[mask], region_weights.weight()[mask]) ezfill('electron_pt', pt=electrons.pt[mask].flatten(), weight=w_allel) ezfill('electron_mt', mt=df['MT_el'][mask], weight=region_weights.weight()[mask]) ezfill('electron_eta', eta=electrons.eta[mask].flatten(), weight=w_allel) ezfill('electron_phi', phi=electrons.phi[mask].flatten(), weight=w_allel) ezfill('electron_eta_phi', phi=electrons.phi[mask].flatten(),eta=electrons.eta[mask].flatten(), weight=w_allel) ezfill('electron_dz', dz=electrons.dz[mask].flatten(), weight=w_allel) ezfill('electron_dxy', dxy=electrons.dxy[mask].flatten(), weight=w_allel) w_leadel = weight_shape(electrons[leadelectron_index].pt[mask], region_weights.weight()[mask]) ezfill('electron_pt0', pt=electrons[leadelectron_index].pt[mask].flatten(), weight=w_leadel) ezfill('electron_eta0', eta=electrons[leadelectron_index].eta[mask].flatten(), weight=w_leadel) ezfill('electron_phi0', phi=electrons[leadelectron_index].phi[mask].flatten(), weight=w_leadel) w_trailel = weight_shape(electrons[~leadelectron_index].pt[mask], region_weights.weight()[mask]) ezfill('electron_tightid1', id=electrons[~leadelectron_index].tightId[mask].flatten(), weight=w_trailel) # Dielectron if '_2e_' in region: w_diel = weight_shape(dielectrons.pt[mask], region_weights.weight()[mask]) ezfill('dielectron_pt', pt=dielectrons.pt[mask].flatten(), weight=w_diel) ezfill('dielectron_eta', eta=dielectrons.eta[mask].flatten(), weight=w_diel) ezfill('dielectron_mass', dilepton_mass=dielectrons.mass[mask].flatten(), weight=w_diel) ezfill('dielectron_dr', dr=dielectrons.i0.p4.delta_r(dielectrons.i1.p4)[mask].flatten(), weight=w_diel ) ezfill('electron_pt1', pt=electrons[~leadelectron_index].pt[mask].flatten(), weight=w_leadel) ezfill('electron_eta1', eta=electrons[~leadelectron_index].eta[mask].flatten(), weight=w_leadel) ezfill('electron_phi1', phi=electrons[~leadelectron_index].phi[mask].flatten(), weight=w_leadel) # Photon if '_g_' in region: w_leading_photon = weight_shape(photons[leadphoton_index].pt[mask],region_weights.weight()[mask]); ezfill('photon_pt0', pt=photons[leadphoton_index].pt[mask].flatten(), weight=w_leading_photon) ezfill('photon_eta0', eta=photons[leadphoton_index].eta[mask].flatten(), weight=w_leading_photon) ezfill('photon_phi0', phi=photons[leadphoton_index].phi[mask].flatten(), weight=w_leading_photon) ezfill('photon_eta_phi', phi=photons[leadphoton_index].phi[mask].flatten(),eta=photons[leadphoton_index].eta[mask].flatten(), weight=w_leading_photon) # w_drphoton_jet = weight_shape(df['dRPhotonJet'][mask], region_weights.weight()[mask]) # PV ezfill('npv', nvtx=df['PV_npvs'][mask], weight=region_weights.weight()[mask]) ezfill('npvgood', nvtx=df['PV_npvsGood'][mask], weight=region_weights.weight()[mask]) ezfill('npv_nopu', nvtx=df['PV_npvs'][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask]) ezfill('npvgood_nopu', nvtx=df['PV_npvsGood'][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask]) ezfill('rho_all', rho=df['fixedGridRhoFastjetAll'][mask], weight=region_weights.weight()[mask]) ezfill('rho_central', rho=df['fixedGridRhoFastjetCentral'][mask], weight=region_weights.weight()[mask]) ezfill('rho_all_nopu', rho=df['fixedGridRhoFastjetAll'][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask]) ezfill('rho_central_nopu', rho=df['fixedGridRhoFastjetCentral'][mask], weight=region_weights.partial_weight(exclude=['pileup'])[mask]) return output
def process(self, df): if not df.size: return self.accumulator.identity() self._configure(df) dataset = df['dataset'] df['is_lo_w'] = is_lo_w(dataset) df['is_lo_z'] = is_lo_z(dataset) df['is_lo_w_ewk'] = is_lo_w_ewk(dataset) df['is_lo_z_ewk'] = is_lo_z_ewk(dataset) df['is_lo_g'] = is_lo_g(dataset) df['is_nlo_z'] = is_nlo_z(dataset) df['is_nlo_w'] = is_nlo_w(dataset) df['has_lhe_v_pt'] = df['is_lo_w'] | df['is_lo_z'] | df[ 'is_nlo_z'] | df['is_nlo_w'] | df['is_lo_g'] | df[ 'is_lo_w_ewk'] | df['is_lo_z_ewk'] df['is_data'] = is_data(dataset) gen_v_pt = None if df['is_lo_w'] or df['is_lo_z'] or df['is_nlo_z'] or df[ 'is_nlo_w'] or df['is_lo_z_ewk'] or df['is_lo_w_ewk']: gen = setup_gen_candidates(df) dressed = setup_dressed_gen_candidates(df) fill_gen_v_info(df, gen, dressed) gen_v_pt = df['gen_v_pt_dress'] elif df['is_lo_g']: gen = setup_gen_candidates(df) gen_v_pt = gen[(gen.pdg == 22) & (gen.status == 1)].pt.max() # Generator-level leading dijet mass if df['has_lhe_v_pt']: genjets = setup_lhe_cleaned_genjets(df) digenjet = genjets[:, :2].distincts() df['mjj_gen'] = digenjet.mass.max() # Candidates # Already pre-filtered! # All leptons are at least loose # Check out setup_candidates for filtering details met_pt, met_phi, ak4, bjets, _, muons, electrons, taus, photons = setup_candidates( df, cfg) # Filtering ak4 jets according to pileup ID ak4 = ak4[ak4.puid] bjets = bjets[bjets.puid] # Muons df['is_tight_muon'] = muons.tightId \ & (muons.iso < cfg.MUON.CUTS.TIGHT.ISO) \ & (muons.pt>cfg.MUON.CUTS.TIGHT.PT) \ & (muons.abseta<cfg.MUON.CUTS.TIGHT.ETA) dimuons = muons.distincts() dimuon_charge = dimuons.i0['charge'] + dimuons.i1['charge'] df['MT_mu'] = ((muons.counts == 1) * mt(muons.pt, muons.phi, met_pt, met_phi)).max() # Electrons df['is_tight_electron'] = electrons.tightId \ & (electrons.pt > cfg.ELECTRON.CUTS.TIGHT.PT) \ & (electrons.abseta < cfg.ELECTRON.CUTS.TIGHT.ETA) dielectrons = electrons.distincts() dielectron_charge = dielectrons.i0['charge'] + dielectrons.i1['charge'] df['MT_el'] = ((electrons.counts == 1) * mt(electrons.pt, electrons.phi, met_pt, met_phi)).max() # ak4 leadak4_index = ak4.pt.argmax() elejet_pairs = ak4[:, :1].cross(electrons) df['dREleJet'] = np.hypot( elejet_pairs.i0.eta - elejet_pairs.i1.eta, dphi(elejet_pairs.i0.phi, elejet_pairs.i1.phi)).min() muonjet_pairs = ak4[:, :1].cross(muons) df['dRMuonJet'] = np.hypot( muonjet_pairs.i0.eta - muonjet_pairs.i1.eta, dphi(muonjet_pairs.i0.phi, muonjet_pairs.i1.phi)).min() # Recoil df['recoil_pt'], df['recoil_phi'] = recoil(met_pt, met_phi, electrons, muons, photons) df["dPFCalo"] = (met_pt - df["CaloMET_pt"]) / df["recoil_pt"] df["minDPhiJetRecoil"] = min_dphi_jet_met(ak4, df['recoil_phi'], njet=4, ptmin=30, etamax=4.7) df["minDPhiJetMet"] = min_dphi_jet_met(ak4, met_phi, njet=4, ptmin=30, etamax=4.7) selection = processor.PackedSelection() # Triggers pass_all = np.ones(df.size) == 1 selection.add('inclusive', pass_all) selection = trigger_selection(selection, df, cfg) selection.add('mu_pt_trig_safe', muons.pt.max() > 30) # Common selection selection.add('veto_ele', electrons.counts == 0) selection.add('veto_muo', muons.counts == 0) selection.add('veto_photon', photons.counts == 0) selection.add('veto_tau', taus.counts == 0) selection.add('veto_b', bjets.counts == 0) selection.add('mindphijr', df['minDPhiJetRecoil'] > cfg.SELECTION.SIGNAL.MINDPHIJR) selection.add('dpfcalo', np.abs(df['dPFCalo']) < cfg.SELECTION.SIGNAL.DPFCALO) selection.add('recoil', df['recoil_pt'] > cfg.SELECTION.SIGNAL.RECOIL) if (cfg.MITIGATION.HEM and extract_year(df['dataset']) == 2018 and not cfg.RUN.SYNC): selection.add('hemveto', df['hemveto']) else: selection.add('hemveto', np.ones(df.size) == 1) # AK4 dijet diak4 = ak4[:, :2].distincts() leadak4_pt_eta = (diak4.i0.pt > cfg.SELECTION.SIGNAL.LEADAK4.PT) & ( np.abs(diak4.i0.eta) < cfg.SELECTION.SIGNAL.LEADAK4.ETA) trailak4_pt_eta = (diak4.i1.pt > cfg.SELECTION.SIGNAL.TRAILAK4.PT) & ( np.abs(diak4.i1.eta) < cfg.SELECTION.SIGNAL.TRAILAK4.ETA) hemisphere = (diak4.i0.eta * diak4.i1.eta < 0).any() has_track0 = np.abs(diak4.i0.eta) <= 2.5 has_track1 = np.abs(diak4.i1.eta) <= 2.5 leadak4_id = diak4.i0.tightId & (has_track0 * ( (diak4.i0.chf > cfg.SELECTION.SIGNAL.LEADAK4.CHF) & (diak4.i0.nhf < cfg.SELECTION.SIGNAL.LEADAK4.NHF)) + ~has_track0) trailak4_id = has_track1 * ( (diak4.i1.chf > cfg.SELECTION.SIGNAL.TRAILAK4.CHF) & (diak4.i1.nhf < cfg.SELECTION.SIGNAL.TRAILAK4.NHF)) + ~has_track1 df['mjj'] = diak4.mass.max() df['dphijj'] = dphi(diak4.i0.phi.min(), diak4.i1.phi.max()) df['detajj'] = np.abs(diak4.i0.eta - diak4.i1.eta).max() selection.add('two_jets', diak4.counts > 0) selection.add('leadak4_pt_eta', leadak4_pt_eta.any()) selection.add('trailak4_pt_eta', trailak4_pt_eta.any()) selection.add('hemisphere', hemisphere) selection.add('leadak4_id', leadak4_id.any()) selection.add('trailak4_id', trailak4_id.any()) selection.add('mjj', df['mjj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.MASS) selection.add( 'dphijj', df['dphijj'] < cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DPHI) selection.add( 'detajj', df['detajj'] > cfg.SELECTION.SIGNAL.DIJET.SHAPE_BASED.DETA) # Divide into three categories for trigger study if cfg.RUN.TRIGGER_STUDY: two_central_jets = (np.abs(diak4.i0.eta) <= 2.4) & (np.abs( diak4.i1.eta) <= 2.4) two_forward_jets = (np.abs(diak4.i0.eta) > 2.4) & (np.abs( diak4.i1.eta) > 2.4) one_jet_forward_one_jet_central = (~two_central_jets) & ( ~two_forward_jets) selection.add('two_central_jets', two_central_jets.any()) selection.add('two_forward_jets', two_forward_jets.any()) selection.add('one_jet_forward_one_jet_central', one_jet_forward_one_jet_central.any()) # Dimuon CR leadmuon_index = muons.pt.argmax() selection.add('at_least_one_tight_mu', df['is_tight_muon'].any()) selection.add('dimuon_mass', ((dimuons.mass > cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MIN) \ & (dimuons.mass < cfg.SELECTION.CONTROL.DOUBLEMU.MASS.MAX)).any()) selection.add('dimuon_charge', (dimuon_charge == 0).any()) selection.add('two_muons', muons.counts == 2) # Single muon CR selection.add('one_muon', muons.counts == 1) selection.add('mt_mu', df['MT_mu'] < cfg.SELECTION.CONTROL.SINGLEMU.MT) # Diele CR leadelectron_index = electrons.pt.argmax() selection.add('one_electron', electrons.counts == 1) selection.add('two_electrons', electrons.counts == 2) selection.add('at_least_one_tight_el', df['is_tight_electron'].any()) selection.add('dielectron_mass', ((dielectrons.mass > cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MIN) \ & (dielectrons.mass < cfg.SELECTION.CONTROL.DOUBLEEL.MASS.MAX)).any()) selection.add('dielectron_charge', (dielectron_charge == 0).any()) selection.add('two_electrons', electrons.counts == 2) # Single Ele CR selection.add('met_el', met_pt > cfg.SELECTION.CONTROL.SINGLEEL.MET) selection.add('mt_el', df['MT_el'] < cfg.SELECTION.CONTROL.SINGLEEL.MT) # Photon CR leadphoton_index = photons.pt.argmax() df['is_tight_photon'] = photons.mediumId \ & (photons.abseta < cfg.PHOTON.CUTS.TIGHT.ETA) selection.add('one_photon', photons.counts == 1) selection.add('at_least_one_tight_photon', df['is_tight_photon'].any()) selection.add('photon_pt', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PT) selection.add('photon_pt_trig', photons.pt.max() > cfg.PHOTON.CUTS.TIGHT.PTTRIG) # Fill histograms output = self.accumulator.identity() # Gen if df['has_lhe_v_pt']: output['genvpt_check'].fill(vpt=gen_v_pt, type="Nano", dataset=dataset) if 'LHE_Njets' in df: output['lhe_njets'].fill(dataset=dataset, multiplicity=df['LHE_Njets']) if 'LHE_HT' in df: output['lhe_ht'].fill(dataset=dataset, ht=df['LHE_HT']) if 'LHE_HTIncoming' in df: output['lhe_htinc'].fill(dataset=dataset, ht=df['LHE_HTIncoming']) # Weights evaluator = evaluator_from_config(cfg) weights = processor.Weights(size=df.size, storeIndividual=True) if not df['is_data']: weights.add('gen', df['Generator_weight']) try: weights.add('prefire', df['PrefireWeight']) except KeyError: weights.add('prefire', np.ones(df.size)) weights = candidate_weights(weights, df, evaluator, muons, electrons, photons) weights = pileup_weights(weights, df, evaluator, cfg) if not (gen_v_pt is None): weights = theory_weights_vbf(weights, df, evaluator, gen_v_pt, df['mjj_gen']) # Save per-event values for synchronization if cfg.RUN.KINEMATICS.SAVE: for event in cfg.RUN.KINEMATICS.EVENTS: mask = df['event'] == event if not mask.any(): continue output['kinematics']['event'] += [event] output['kinematics']['met'] += [met_pt[mask]] output['kinematics']['met_phi'] += [met_phi[mask]] output['kinematics']['recoil'] += [df['recoil_pt'][mask]] output['kinematics']['recoil_phi'] += [df['recoil_phi'][mask]] output['kinematics']['ak4pt0'] += [ak4[leadak4_index][mask].pt] output['kinematics']['ak4eta0'] += [ ak4[leadak4_index][mask].eta ] output['kinematics']['leadbtag'] += [ak4.pt.max() < 0][mask] output['kinematics']['nLooseMu'] += [muons.counts[mask]] output['kinematics']['nTightMu'] += [ muons[df['is_tight_muon']].counts[mask] ] output['kinematics']['mupt0'] += [ muons[leadmuon_index][mask].pt ] output['kinematics']['mueta0'] += [ muons[leadmuon_index][mask].eta ] output['kinematics']['nLooseEl'] += [electrons.counts[mask]] output['kinematics']['nTightEl'] += [ electrons[df['is_tight_electron']].counts[mask] ] output['kinematics']['elpt0'] += [ electrons[leadelectron_index][mask].pt ] output['kinematics']['eleta0'] += [ electrons[leadelectron_index][mask].eta ] output['kinematics']['nLooseGam'] += [photons.counts[mask]] output['kinematics']['nTightGam'] += [ photons[df['is_tight_photon']].counts[mask] ] output['kinematics']['gpt0'] += [ photons[leadphoton_index][mask].pt ] output['kinematics']['geta0'] += [ photons[leadphoton_index][mask].eta ] # Sum of all weights to use for normalization # TODO: Deal with systematic variations output['nevents'][dataset] += df.size if not df['is_data']: output['sumw'][dataset] += df['genEventSumw'] output['sumw2'][dataset] += df['genEventSumw2'] output['sumw_pileup'][dataset] += weights._weights['pileup'].sum() regions = vbfhinv_regions(cfg) for region, cuts in regions.items(): # Blinding if (self._blind and df['is_data'] and region.startswith('sr')): continue # Cutflow plot for signal and control regions if any(x in region for x in ["sr", "cr", "tr"]): output['cutflow_' + region]['all'] += df.size for icut, cutname in enumerate(cuts): output['cutflow_' + region][cutname] += selection.all( *cuts[:icut + 1]).sum() mask = selection.all(*cuts) # Save the event numbers of events passing this selection if cfg.RUN.SAVE.PASSING: output['selected_events'][region] += list(df['event'][mask]) # Multiplicities def fill_mult(name, candidates): output[name].fill(dataset=dataset, region=region, multiplicity=candidates[mask].counts, weight=weights.weight()[mask]) fill_mult('ak4_mult', ak4) fill_mult('bjet_mult', bjets) fill_mult('loose_ele_mult', electrons) fill_mult('tight_ele_mult', electrons[df['is_tight_electron']]) fill_mult('loose_muo_mult', muons) fill_mult('tight_muo_mult', muons[df['is_tight_muon']]) fill_mult('tau_mult', taus) fill_mult('photon_mult', photons) def ezfill(name, **kwargs): """Helper function to make filling easier.""" output[name].fill(dataset=dataset, region=region, **kwargs) # Monitor weights for wname, wvalue in weights._weights.items(): ezfill("weights", weight_type=wname, weight_value=wvalue[mask]) ezfill("weights_wide", weight_type=wname, weight_value=wvalue[mask]) # All ak4 # This is a workaround to create a weight array of the right dimension w_alljets = weight_shape(ak4[mask].eta, weights.weight()[mask]) w_alljets_nopref = weight_shape( ak4[mask].eta, weights.partial_weight(exclude=['prefire'])[mask]) ezfill('ak4_eta', jeteta=ak4[mask].eta.flatten(), weight=w_alljets) ezfill('ak4_phi', jetphi=ak4[mask].phi.flatten(), weight=w_alljets) ezfill('ak4_pt', jetpt=ak4[mask].pt.flatten(), weight=w_alljets) ezfill('ak4_eta_nopref', jeteta=ak4[mask].eta.flatten(), weight=w_alljets_nopref) ezfill('ak4_phi_nopref', jetphi=ak4[mask].phi.flatten(), weight=w_alljets_nopref) ezfill('ak4_pt_nopref', jetpt=ak4[mask].pt.flatten(), weight=w_alljets_nopref) # Leading ak4 w_diak4 = weight_shape(diak4.pt[mask], weights.weight()[mask]) ezfill('ak4_eta0', jeteta=diak4.i0.eta[mask].flatten(), weight=w_diak4) ezfill('ak4_phi0', jetphi=diak4.i0.phi[mask].flatten(), weight=w_diak4) ezfill('ak4_pt0', jetpt=diak4.i0.pt[mask].flatten(), weight=w_diak4) ezfill('ak4_ptraw0', jetpt=diak4.i0.ptraw[mask].flatten(), weight=w_diak4) ezfill('ak4_chf0', frac=diak4.i0.chf[mask].flatten(), weight=w_diak4) ezfill('ak4_nhf0', frac=diak4.i0.nhf[mask].flatten(), weight=w_diak4) ezfill('ak4_nconst0', nconst=diak4.i0.nconst[mask].flatten(), weight=w_diak4) # Trailing ak4 ezfill('ak4_eta1', jeteta=diak4.i1.eta[mask].flatten(), weight=w_diak4) ezfill('ak4_phi1', jetphi=diak4.i1.phi[mask].flatten(), weight=w_diak4) ezfill('ak4_pt1', jetpt=diak4.i1.pt[mask].flatten(), weight=w_diak4) ezfill('ak4_ptraw1', jetpt=diak4.i1.ptraw[mask].flatten(), weight=w_diak4) ezfill('ak4_chf1', frac=diak4.i1.chf[mask].flatten(), weight=w_diak4) ezfill('ak4_nhf1', frac=diak4.i1.nhf[mask].flatten(), weight=w_diak4) ezfill('ak4_nconst1', nconst=diak4.i1.nconst[mask].flatten(), weight=w_diak4) # B tag discriminator btag = getattr(ak4, cfg.BTAG.ALGO) w_btag = weight_shape(btag[mask], weights.weight()[mask]) ezfill('ak4_btag', btag=btag[mask].flatten(), weight=w_btag) # MET ezfill('dpfcalo', dpfcalo=df["dPFCalo"][mask], weight=weights.weight()[mask]) ezfill('met', met=met_pt[mask], weight=weights.weight()[mask]) ezfill('met_phi', phi=met_phi[mask], weight=weights.weight()[mask]) ezfill('recoil', recoil=df["recoil_pt"][mask], weight=weights.weight()[mask]) ezfill('recoil_phi', phi=df["recoil_phi"][mask], weight=weights.weight()[mask]) ezfill('dphijm', dphi=df["minDPhiJetMet"][mask], weight=weights.weight()[mask]) ezfill('dphijr', dphi=df["minDPhiJetRecoil"][mask], weight=weights.weight()[mask]) ezfill('dphijj', dphi=df["dphijj"][mask], weight=weights.weight()[mask]) ezfill('detajj', deta=df["detajj"][mask], weight=weights.weight()[mask]) ezfill('mjj', mjj=df["mjj"][mask], weight=weights.weight()[mask]) # Two dimensional ezfill('recoil_mjj', recoil=df["recoil_pt"][mask], mjj=df["mjj"][mask], weight=weights.weight()[mask]) # Muons if '_1m_' in region or '_2m_' in region: w_allmu = weight_shape(muons.pt[mask], weights.weight()[mask]) ezfill('muon_pt', pt=muons.pt[mask].flatten(), weight=w_allmu) ezfill('muon_mt', mt=df['MT_mu'][mask], weight=weights.weight()[mask]) ezfill('muon_eta', eta=muons.eta[mask].flatten(), weight=w_allmu) ezfill('muon_phi', phi=muons.phi[mask].flatten(), weight=w_allmu) # Dimuon if '_2m_' in region: w_dimu = weight_shape(dimuons.pt[mask], weights.weight()[mask]) ezfill('muon_pt0', pt=dimuons.i0.pt[mask].flatten(), weight=w_dimu) ezfill('muon_pt1', pt=dimuons.i1.pt[mask].flatten(), weight=w_dimu) ezfill('muon_eta0', eta=dimuons.i0.eta[mask].flatten(), weight=w_dimu) ezfill('muon_eta1', eta=dimuons.i1.eta[mask].flatten(), weight=w_dimu) ezfill('muon_phi0', phi=dimuons.i0.phi[mask].flatten(), weight=w_dimu) ezfill('muon_phi1', phi=dimuons.i1.phi[mask].flatten(), weight=w_dimu) ezfill('dimuon_pt', pt=dimuons.pt[mask].flatten(), weight=w_dimu) ezfill('dimuon_eta', eta=dimuons.eta[mask].flatten(), weight=w_dimu) ezfill('dimuon_mass', dilepton_mass=dimuons.mass[mask].flatten(), weight=w_dimu) # Electrons if '_1e_' in region or '_2e_' in region: w_allel = weight_shape(electrons.pt[mask], weights.weight()[mask]) ezfill('electron_pt', pt=electrons.pt[mask].flatten(), weight=w_allel) ezfill('electron_mt', mt=df['MT_el'][mask], weight=weights.weight()[mask]) ezfill('electron_eta', eta=electrons.eta[mask].flatten(), weight=w_allel) ezfill('electron_phi', phi=electrons.phi[mask].flatten(), weight=w_allel) # Dielectron if '_2e_' in region: w_diel = weight_shape(dielectrons.pt[mask], weights.weight()[mask]) ezfill('electron_pt0', pt=dielectrons.i0.pt[mask].flatten(), weight=w_diel) ezfill('electron_pt1', pt=dielectrons.i1.pt[mask].flatten(), weight=w_diel) ezfill('electron_eta0', eta=dielectrons.i0.eta[mask].flatten(), weight=w_diel) ezfill('electron_eta1', eta=dielectrons.i1.eta[mask].flatten(), weight=w_diel) ezfill('electron_phi0', phi=dielectrons.i0.phi[mask].flatten(), weight=w_diel) ezfill('electron_phi1', phi=dielectrons.i1.phi[mask].flatten(), weight=w_diel) ezfill('dielectron_pt', pt=dielectrons.pt[mask].flatten(), weight=w_diel) ezfill('dielectron_eta', eta=dielectrons.eta[mask].flatten(), weight=w_diel) ezfill('dielectron_mass', dilepton_mass=dielectrons.mass[mask].flatten(), weight=w_diel) # Photon if '_g_' in region: w_leading_photon = weight_shape( photons[leadphoton_index].pt[mask], weights.weight()[mask]) ezfill('photon_pt0', pt=photons[leadphoton_index].pt[mask].flatten(), weight=w_leading_photon) ezfill('photon_eta0', eta=photons[leadphoton_index].eta[mask].flatten(), weight=w_leading_photon) ezfill('photon_phi0', phi=photons[leadphoton_index].phi[mask].flatten(), weight=w_leading_photon) ezfill('photon_pt0_recoil', pt=photons[leadphoton_index].pt[mask].flatten(), recoil=df['recoil_pt'][mask & (leadphoton_index.counts > 0)], weight=w_leading_photon) ezfill('photon_eta_phi', eta=photons[leadphoton_index].eta[mask].flatten(), phi=photons[leadphoton_index].phi[mask].flatten(), weight=w_leading_photon) # w_drphoton_jet = weight_shape(df['dRPhotonJet'][mask], weights.weight()[mask]) # PV ezfill('npv', nvtx=df['PV_npvs'][mask], weight=weights.weight()[mask]) ezfill('npvgood', nvtx=df['PV_npvsGood'][mask], weight=weights.weight()[mask]) ezfill('npv_nopu', nvtx=df['PV_npvs'][mask], weight=weights.partial_weight(exclude=['pileup'])[mask]) ezfill('npvgood_nopu', nvtx=df['PV_npvsGood'][mask], weight=weights.partial_weight(exclude=['pileup'])[mask]) ezfill('rho_all', rho=df['fixedGridRhoFastjetAll'][mask], weight=weights.weight()[mask]) ezfill('rho_central', rho=df['fixedGridRhoFastjetCentral'][mask], weight=weights.weight()[mask]) ezfill('rho_all_nopu', rho=df['fixedGridRhoFastjetAll'][mask], weight=weights.partial_weight(exclude=['pileup'])[mask]) ezfill('rho_central_nopu', rho=df['fixedGridRhoFastjetCentral'][mask], weight=weights.partial_weight(exclude=['pileup'])[mask]) return output
def process(self, df): self._configure(df) output = self.accumulator.identity() dataset = df['dataset'] # Lumi mask year = extract_year(dataset) if is_data(dataset): if year == 2016: json = bucoffea_path( 'data/json/Cert_271036-284044_13TeV_ReReco_07Aug2017_Collisions16_JSON.txt' ) elif year == 2017: json = bucoffea_path( 'data/json/Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON_v1.txt' ) elif year == 2018: json = bucoffea_path( 'data/json/Cert_314472-325175_13TeV_17SeptEarlyReReco2018ABC_PromptEraD_Collisions18_JSON.txt' ) lumi_mask = LumiMask(json)(df['run'], df['luminosityBlock']) else: lumi_mask = np.ones(df.size) == 1 # MET filters if is_data(dataset): filt_met = mask_and(df, cfg.FILTERS.DATA) else: filt_met = mask_and(df, cfg.FILTERS.MC) if year == 2016: trigger = 'HLT_Photon175' else: trigger = 'HLT_Photon200' photons = setup_photons(df) ak4 = setup_jets(df) ak4 = ak4[ object_overlap(ak4, photons) \ & ak4.tightId \ & (ak4.pt > 100) \ & (ak4.abseta < 2.4) ] event_mask = filt_met \ & lumi_mask \ & (ak4.counts > 0) \ & df[trigger] \ & (df['MET_pt'] < 60) # Generator weight weights = processor.Weights(size=df.size, storeIndividual=True) if is_data(dataset): weights.add('gen', np.ones(df.size)) else: weights.add('gen', df['Generator_weight']) photon_kinematics = (photons.pt > 200) & (photons.barrel) # Medium vals = photons[photon_kinematics & photons.mediumId].sieie[event_mask] pt = photons[photon_kinematics & photons.mediumId].pt[event_mask] output['sieie'].fill(dataset=dataset, cat='medium', sieie=vals.flatten(), pt=pt.flatten(), weights=weight_shape( vals, weights.weight()[event_mask])) # No Sieie vals = photons[photon_kinematics & medium_id_no_sieie(photons)].sieie[event_mask] pt = photons[photon_kinematics & medium_id_no_sieie(photons)].pt[event_mask] output['sieie'].fill(dataset=dataset, cat='medium_nosieie', sieie=vals.flatten(), pt=pt.flatten(), weights=weight_shape( vals, weights.weight()[event_mask])) # No Sieie, inverted isolation vals = photons[photon_kinematics & medium_id_no_sieie_inv_iso(photons)].sieie[event_mask] pt = photons[photon_kinematics & medium_id_no_sieie_inv_iso(photons)].pt[event_mask] output['sieie'].fill(dataset=dataset, cat='medium_nosieie_invertiso', sieie=vals.flatten(), pt=pt.flatten(), weights=weight_shape( vals, weights.weight()[event_mask])) # Keep track of weight sum if not is_data(dataset): output['sumw'][dataset] += df['genEventSumw'] output['sumw2'][dataset] += df['genEventSumw2'] return output
def met_trigger_sf(weights, diak4, df, apply_categorized=True): ''' Data/MC SF for the MET trigger, determined as the ratio of two sigmoid functions which are fit to data and MC efficiencies. If apply_categorized is set to True, two categories of SF will be applied, depending on the leading two jets. Otherwise, one single SF will be applied. ''' year = extract_year(df['dataset']) x = df['recoil_pt'] data_params = { 'two_central_jets': { 2017: (0.044, 164.881, 0.990), 2018: (0.045, 176.266, 0.993) }, 'mixed': { 2017: (0.039, 173.351, 0.986), 2018: (0.041, 182.607, 0.990) }, 'inclusive': { 2017: (0.043, 167.896, 0.99), 2018: (0.044, 178.364, 0.992) } } mc_params = { 'two_central_jets': { 2017: (0.046, 144.881, 0.994), 2018: (0.052, 152.838, 0.993) }, 'mixed': { 2017: (0.039, 154.035, 0.992), 2018: (0.048, 159.329, 0.992) }, 'inclusive': { 2017: (0.044, 147.932, 0.994), 2018: (0.051, 155.016, 0.993) } } if year == 2016: sf = np.ones(df.size) else: if apply_categorized: # Two categories: Two central jets & others two_central_jets = (diak4.i0.abseta < 2.5) & (diak4.i1.abseta < 2.5) two_hf_jets = (diak4.i0.abseta > 3.0) & (diak4.i1.eta > 3.0) one_jet_forward_one_jet_central = (~two_central_jets) & ( ~two_hf_jets) sf = np.where( two_central_jets, sigmoid3(x, *data_params['two_central_jets'][year]) / sigmoid3(x, *mc_params['two_central_jets'][year]), sigmoid3(x, *data_params['mixed'][year]) / sigmoid3(x, *mc_params['mixed'][year])) else: sf = sigmoid3(x, *data_params['inclusive'][year]) / sigmoid3( x, *mc_params['inclusive'][year]) sf[np.isnan(sf) | np.isinf(sf)] == 1 weights.add("trigger_met", sf)
def get_veto_weights(df, evaluator, electrons, muons, taus, do_variations=False): """ Calculate veto weights for SR W The weights are effectively: w = product(1-SF) where the product runs overveto-able e, mu, tau. """ veto_weights = processor.Weights(size=df.size, storeIndividual=True) variations = ["nominal"] if do_variations: variations.extend([ 'ele_reco_up','ele_reco_dn', 'ele_id_up','ele_id_dn', 'muon_id_up','muon_id_dn', 'muon_iso_up','muon_iso_dn', 'tau_id_up','tau_id_dn' ]) for variation in variations: def varied_weight(sfname, *args): '''Helper function to easily get the correct weights for a given variation''' # For the nominal variation, just pass through if 'nominal' in variation: return evaluator[sfname](*args) # If this variation is unrelated to the SF at hand, # pass through as well if not (re.sub('_(up|dn)', '', variation) in sfname): return evaluator[sfname](*args) # Direction of variation sgn = 1 if variation.endswith("up") else -1 return evaluator[sfname](*args) + sgn * evaluator[f"{sfname}_error"](*args) ### Electrons if extract_year(df['dataset']) == 2017: high_et = electrons.pt>20 # Low pt SFs low_pt_args = (electrons.etasc[~high_et], electrons.pt[~high_et]) ele_reco_sf_low = varied_weight('ele_reco_pt_lt_20', *low_pt_args) ele_id_sf_low = varied_weight("ele_id_loose", *low_pt_args) # High pt SFs high_pt_args = (electrons.etasc[high_et], electrons.pt[high_et]) ele_reco_sf_high = varied_weight("ele_reco", *high_pt_args) ele_id_sf_high = varied_weight("ele_id_loose", *high_pt_args) # Combine veto_weight_ele = (1 - ele_reco_sf_low*ele_id_sf_low).prod() * (1-ele_reco_sf_high*ele_id_sf_high).prod() else: # No split for 2018 args = (electrons.etasc, electrons.pt) ele_reco_sf = varied_weight("ele_reco", *args) ele_id_sf = varied_weight("ele_id_loose", *args) # Combine veto_weight_ele = (1 - ele_id_sf*ele_reco_sf).prod() ### Muons args = (muons.pt, muons.abseta) veto_weight_muo = (1 - varied_weight("muon_id_loose", *args)*varied_weight("muon_iso_loose", *args)).prod() ### Taus # Taus have their variations saves as separate histograms, # so our cool trick from above is replaced by the pedestrian way if "tau_id" in variation: direction = variation.split("_")[-1] tau_sf_name = f"tau_id_{direction}" else: tau_sf_name = "tau_id" veto_weight_tau = (1 - evaluator[tau_sf_name](taus.pt)).prod() ### Combine total = veto_weight_ele * veto_weight_muo * veto_weight_tau # Cap weights just in case total[np.abs(total)>5] = 1 veto_weights.add(variation, total) return veto_weights
def setup_candidates(df, cfg): if df['is_data'] and extract_year(df['dataset']) != 2018: # 2016, 2017 data jes_suffix = '' jes_suffix_met = '' elif df['is_data']: # 2018 data jes_suffix = '_nom' jes_suffix_met = '_nom' else: # MC, all years jes_suffix = '_nom' jes_suffix_met = '_jer' muons = JaggedCandidateArray.candidatesfromcounts( df['nMuon'], pt=df['Muon_pt'], eta=df['Muon_eta'], abseta=np.abs(df['Muon_eta']), phi=df['Muon_phi'], mass=0 * df['Muon_pt'], charge=df['Muon_charge'], looseId=df['Muon_looseId'], iso=df["Muon_pfRelIso04_all"], tightId=df['Muon_tightId'], dxy=df['Muon_dxy'], dz=df['Muon_dz'] ) # All muons must be at least loose muons = muons[muons.looseId \ & (muons.iso < cfg.MUON.CUTS.LOOSE.ISO) \ & (muons.pt > cfg.MUON.CUTS.LOOSE.PT) \ & (muons.abseta<cfg.MUON.CUTS.LOOSE.ETA) \ ] electrons = JaggedCandidateArray.candidatesfromcounts( df['nElectron'], pt=df['Electron_pt'], eta=df['Electron_eta'], abseta=np.abs(df['Electron_eta']), phi=df['Electron_phi'], mass=0 * df['Electron_pt'], charge=df['Electron_charge'], looseId=(df[cfg.ELECTRON.BRANCH.ID]>=1), tightId=(df[cfg.ELECTRON.BRANCH.ID]==4), dxy=np.abs(df['Electron_dxy']), dz=np.abs(df['Electron_dz']), barrel=np.abs(df['Electron_eta']) <= 1.479 ) # All electrons must be at least loose pass_dxy = (electrons.barrel & (np.abs(electrons.dxy) < cfg.ELECTRON.CUTS.LOOSE.DXY.BARREL)) \ | (~electrons.barrel & (np.abs(electrons.dxy) < cfg.ELECTRON.CUTS.LOOSE.DXY.ENDCAP)) pass_dz = (electrons.barrel & (np.abs(electrons.dz) < cfg.ELECTRON.CUTS.LOOSE.DZ.BARREL)) \ | (~electrons.barrel & (np.abs(electrons.dz) < cfg.ELECTRON.CUTS.LOOSE.DZ.ENDCAP)) electrons = electrons[electrons.looseId \ & (electrons.pt>cfg.ELECTRON.CUTS.LOOSE.PT) \ & (electrons.abseta<cfg.ELECTRON.CUTS.LOOSE.ETA) \ & pass_dxy \ & pass_dz ] if cfg.OVERLAP.ELECTRON.MUON.CLEAN: electrons = electrons[object_overlap(electrons, muons, dr=cfg.OVERLAP.ELECTRON.MUON.DR)] taus = JaggedCandidateArray.candidatesfromcounts( df['nTau'], pt=df['Tau_pt'], eta=df['Tau_eta'], abseta=np.abs(df['Tau_eta']), phi=df['Tau_phi'], mass=0 * df['Tau_pt'], decaymode=df['Tau_idDecayMode'], iso=df['Tau_idMVAoldDM2017v2'], ) taus = taus[ (taus.decaymode) \ & (taus.pt > cfg.TAU.CUTS.PT)\ & (taus.abseta < cfg.TAU.CUTS.ETA) \ & ((taus.iso&2)==2)] if cfg.OVERLAP.TAU.MUON.CLEAN: taus = taus[object_overlap(taus, muons, dr=cfg.OVERLAP.TAU.MUON.DR)] if cfg.OVERLAP.TAU.ELECTRON.CLEAN: taus = taus[object_overlap(taus, electrons, dr=cfg.OVERLAP.TAU.ELECTRON.DR)] photons = JaggedCandidateArray.candidatesfromcounts( df['nPhoton'], pt=df['Photon_pt'], eta=df['Photon_eta'], abseta=np.abs(df['Photon_eta']), phi=df['Photon_phi'], mass=0*df['Photon_pt'], looseId=(df[cfg.PHOTON.BRANCH.ID]>=1) & df['Photon_electronVeto'], mediumId=(df[cfg.PHOTON.BRANCH.ID]>=2) & df['Photon_electronVeto'], r9=df['Photon_r9'], barrel=np.abs(df['Photon_eta']) < 1.479, ) photons = photons[photons.looseId \ & (photons.pt > cfg.PHOTON.CUTS.LOOSE.pt) \ & (photons.abseta < cfg.PHOTON.CUTS.LOOSE.eta) ] if cfg.OVERLAP.PHOTON.MUON.CLEAN: photons = photons[object_overlap(photons, muons, dr=cfg.OVERLAP.PHOTON.MUON.DR)] if cfg.OVERLAP.PHOTON.ELECTRON.CLEAN: photons = photons[object_overlap(photons, electrons, dr=cfg.OVERLAP.PHOTON.ELECTRON.DR)] ak4 = JaggedCandidateArray.candidatesfromcounts( df['nJet'], pt=df[f'Jet_pt{jes_suffix}'], eta=df['Jet_eta'], abseta=np.abs(df['Jet_eta']), phi=df['Jet_phi'], mass=np.zeros_like(df['Jet_pt']), looseId=(df['Jet_jetId']&2) == 2, # bitmask: 1 = loose, 2 = tight, 3 = tight + lep veto tightId=(df['Jet_jetId']&2) == 2, # bitmask: 1 = loose, 2 = tight, 3 = tight + lep veto puid=((df['Jet_puId']&2>0) | (df[f'Jet_pt{jes_suffix}']>50)), # medium pileup jet ID csvv2=df["Jet_btagCSVV2"], deepcsv=df['Jet_btagDeepB'], # nef=df['Jet_neEmEF'], nhf=df['Jet_neHEF'], chf=df['Jet_chHEF'], ptraw=df['Jet_pt']*(1-df['Jet_rawFactor']), nconst=df['Jet_nConstituents'] # clean=df['Jet_cleanmask'] # cef=df['Jet_chEmEF'], ) # Before cleaning, apply HEM veto hem_ak4 = ak4[ (ak4.pt>30) & (-3.0 < ak4.eta) & (ak4.eta < -1.3) & (-1.57 < ak4.phi) & (ak4.phi < -0.87) ] df['hemveto'] = hem_ak4.counts == 0 # B jets have their own overlap cleaning, # so deal with them before applying filtering to jets btag_discriminator = getattr(ak4, cfg.BTAG.algo) btag_cut = cfg.BTAG.CUTS[cfg.BTAG.algo][cfg.BTAG.wp] bjets = ak4[ (ak4.looseId) \ & (ak4.pt > cfg.BTAG.PT) \ & (ak4.abseta < cfg.BTAG.ETA) \ & (btag_discriminator > btag_cut) ] if cfg.OVERLAP.BTAG.MUON.CLEAN: bjets = bjets[object_overlap(bjets, muons, dr=cfg.OVERLAP.BTAG.MUON.DR)] if cfg.OVERLAP.BTAG.ELECTRON.CLEAN: bjets = bjets[object_overlap(bjets, electrons, dr=cfg.OVERLAP.BTAG.ELECTRON.DR)] if cfg.OVERLAP.BTAG.PHOTON.CLEAN: bjets = bjets[object_overlap(bjets, photons, dr=cfg.OVERLAP.BTAG.PHOTON.DR)] ak4 = ak4[ak4.looseId] if cfg.OVERLAP.AK4.MUON.CLEAN: ak4 = ak4[object_overlap(ak4, muons, dr=cfg.OVERLAP.AK4.MUON.DR)] if cfg.OVERLAP.AK4.ELECTRON.CLEAN: ak4 = ak4[object_overlap(ak4, electrons, dr=cfg.OVERLAP.AK4.ELECTRON.DR)] if cfg.OVERLAP.AK4.PHOTON.CLEAN: ak4 = ak4[object_overlap(ak4, photons, dr=cfg.OVERLAP.AK4.PHOTON.DR)] ak8 = JaggedCandidateArray.candidatesfromcounts( df['nFatJet'], pt=df[f'FatJet_pt{jes_suffix}'], eta=df['FatJet_eta'], abseta=np.abs(df['FatJet_eta']), phi=df['FatJet_phi'], mass=df[f'FatJet_msoftdrop{jes_suffix}'], tightId=(df['FatJet_jetId']&2) == 2, # Tight csvv2=df["FatJet_btagCSVV2"], deepcsv=df['FatJet_btagDeepB'], tau1=df['FatJet_tau1'], tau2=df['FatJet_tau2'], tau21=df['FatJet_tau2']/df['FatJet_tau1'], wvsqcd=df['FatJet_deepTag_WvsQCD'], wvsqcdmd=df['FatJet_deepTagMD_WvsQCD'], zvsqcd=df['FatJet_deepTag_ZvsQCD'], zvsqcdmd=df['FatJet_deepTagMD_ZvsQCD'] ) ak8 = ak8[ak8.tightId & object_overlap(ak8, muons) & object_overlap(ak8, electrons) & object_overlap(ak8, photons)] if extract_year(df['dataset']) == 2017: met_branch = 'METFixEE2017' else: met_branch = 'MET' met_pt = df[f'{met_branch}_pt{jes_suffix_met}'] met_phi = df[f'{met_branch}_phi{jes_suffix_met}'] return met_pt, met_phi, ak4, bjets, ak8, muons, electrons, taus, photons