def qcdewk_weight(ev, source, nsig, parent, input_paths, variations, input_df): weights = {} if parent not in input_paths: weights[""] = np.ones(ev.size, dtype=np.float32) for variation in variations[1:]: weights[variation] = np.zeros(ev.size, dtype=np.float32) else: indices = get_bin_indices( [ev.GenPartBoson(ev, 'pt')], [input_df.index.get_level_values("bin_min").values], [input_df.index.get_level_values("bin_max").values], 1, )[:, 0] corrections = input_df.iloc[indices] weights[""] = corrections[""].values.astype(np.float32) for variation in variations[1:]: weights[variation] = ( (corrections[variation] / corrections[""]).values - 1.).astype( np.float32) central = weights[""] try: up = weights[source + "Up"] down = weights[source + "Down"] except KeyError: up = np.zeros_like(central, dtype=np.float32) down = np.zeros_like(central, dtype=np.float32) return weight_numba(central, nsig, up, down)
def jet_pt_res(ev, jers): indices = get_bin_indices( [ np.abs(ev.Jet_eta.content), event_to_object_var(ev.fixedGridRhoFastjetAll, ev.Jet_ptJESOnly.starts, ev.Jet_ptJESOnly.stops) ], [jers["eta_low"].values, jers["rho_low"].values], [jers["eta_high"].values, jers["rho_high"].values], 1, )[:, 0] df = jers.iloc[indices] params = df[["param0", "param1", "param2", "param3"]].values.astype(np.float32) ptbounds = df[["pt_low", "pt_high"]].values return awk.JaggedArray( ev.Jet_ptJESOnly.starts, ev.Jet_ptJESOnly.stops, jer_formula( np.minimum(np.maximum(ev.Jet_ptJESOnly.content, ptbounds[:, 0]), ptbounds[:, 1]).astype(np.float32), params[:, 0], params[:, 1], params[:, 2], params[:, 3], ), )
def get_efficiencies(event, source, nsig, obj, selection, effmap): indices = get_bin_indices( [getattr(event, "{}_eta".format(obj))[selection].content, getattr(event, "{}_ptShift".format(obj))(event, source, nsig)[selection].content.astype(np.float32)], [effmap["xlow"].values, effmap["ylow"].values], [effmap["xupp"].values, effmap["yupp"].values], 1, )[:,0] df = effmap.iloc[indices] return df["content"].values, df["error"].values
def evaluate_pu(ev, source, nsig, var, corrs): mins = corrs["nTrueInt"].values.astype(np.float32) maxs = mins[:] + 1 mins[0] = -np.inf maxs[-1] = np.inf indices = get_bin_indices([getattr(ev, var)], [mins], [maxs], 1)[:, 0] ev_corrs = corrs.iloc[indices] nominal = ev_corrs["corr"].values up = (ev_corrs["corr_up"].values / nominal - 1.) * (source == "pileup") down = (ev_corrs["corr_down"].values / nominal - 1.) * (source == "pileup") return weight_numba(nominal, nsig, up, down).astype(np.float32)
def jes_corr(ev, source, nsig, jesuncs): flavour = source if source in ev.JesSources and nsig != 0.: updown = "Up" if nsig > 0. else "Down" flavour += updown else: starts = ev.Jet_pt.starts stops = ev.Jet_pt.stops return awk.JaggedArray( starts, stops, np.ones_like(ev.Jet_pt.content, dtype=np.float32), ) if not ev.hasbranch("Jet_JEC{}".format(flavour)): df = jesuncs[jesuncs["source"] == ( source[3:] if source.startswith("jes") else source)] indices = get_bin_indices( [ev.Jet_eta.content], [df["eta_low"].values], [df["eta_high"].values], 1, )[:, 0] pt = np.array(list(df.iloc[indices]["pt"].values)) corr_up = np.array(list(df.iloc[indices]["corr_up"].values)) corr_down = np.array(list(df.iloc[indices]["corr_down"].values)) corr_up = interpolate(ev.Jet_ptJESOnly.content, pt, corr_up).astype(np.float32) corr_down = interpolate(ev.Jet_ptJESOnly.content, pt, corr_down).astype(np.float32) starts = ev.Jet_eta.starts stops = ev.Jet_eta.stops setattr(ev, "Jet_JEC{}Up".format(source), awk.JaggedArray( starts, stops, corr_up, )) setattr(ev, "Jet_JEC{}Down".format(source), awk.JaggedArray( starts, stops, -1. * corr_down, )) return getattr(ev, "Jet_JEC{}".format(flavour))
def evaluate_btagsf(ev, source, nsig, df, attrs, h2f): jet_flavour = dict_apply(h2f, ev.Jet.hadronFlavour.content) # Create mask mask = np.ones((jet_flavour.shape[0], df.shape[0]), dtype=np.bool8) # Flavour mask event_attrs = [jet_flavour.astype(np.float32)] mins = [df["jetFlavor"].values.astype(np.float32)] maxs = [(df["jetFlavor"].values + 1).astype(np.float32)] for jet_attr, df_attr in attrs: obj_attr = getattr(ev.Jet, jet_attr) if callable(obj_attr): obj_attr = obj_attr(ev, source, nsig) event_attrs.append(obj_attr.content.astype(np.float32)) mins.append(df[df_attr + "Min"].values.astype(np.float32)) maxs.append(df[df_attr + "Max"].values.astype(np.float32)) # Create indices from mask indices = get_bin_indices(event_attrs, mins, maxs, 3) idx_central = indices[:, 0] idx_down = indices[:, 1] idx_up = indices[:, 2] jpt = ev.Jet.ptShift(ev, source, nsig) sf = btag_formula(jpt.content, df.iloc[idx_central]) sf_up = btag_formula(jpt.content, df.iloc[idx_up]) sf_down = btag_formula(jpt.content, df.iloc[idx_down]) sf_up = (source == "btagSF") * (sf_up / sf - 1.) sf_down = (source == "btagSF") * (sf_down / sf - 1.) return awk.JaggedArray( jpt.starts, jpt.stops, weight_numba(sf, nsig, sf_up, sf_down), )
def evaluate_object_weights( ev, source, nsig, df, bins_vars, add_syst, name, ): @nb.njit def weighted_mean_numba( objattr, w, k, statup, statdown, systup, systdown, addsyst, nweight, ): wsum = np.zeros_like(objattr, dtype=np.float32) wksum = np.zeros_like(objattr, dtype=np.float32) wdkstatupsum = np.zeros_like(objattr, dtype=np.float32) wdkstatdownsum = np.zeros_like(objattr, dtype=np.float32) wdksystupsum = np.zeros_like(objattr, dtype=np.float32) wdksystdownsum = np.zeros_like(objattr, dtype=np.float32) for idx in range(objattr.shape[0]): for subidx in range(nweight * idx, nweight * (idx + 1)): wsum[idx] += w[subidx] wksum[idx] += w[subidx] * k[subidx] wdkstatupsum[idx] += (w[subidx] * statup[subidx])**2 wdkstatdownsum[idx] += (w[subidx] * statdown[subidx])**2 wdksystupsum[idx] += (w[subidx] * systup[subidx])**2 wdksystdownsum[idx] += (w[subidx] * systdown[subidx])**2 mean = wksum / wsum stat_up = np.sqrt((wdkstatupsum / wsum**2) + addsyst**2) stat_down = -1. * np.sqrt((wdkstatdownsum / wsum**2) + addsyst**2) syst_up = np.sqrt((wdksystupsum / wsum**2) + addsyst**2) syst_down = -1. * np.sqrt((wdksystdownsum / wsum**2) + addsyst**2) return ( mean.astype(np.float32), stat_up.astype(np.float32), stat_down.astype(np.float32), syst_up.astype(np.float32), syst_down.astype(np.float32), ) event_vars = [v(ev, source, nsig) for v in bins_vars] for v in event_vars: v.content[np.isnan(v.content)] = 0. indices = get_bin_indices( [ event_vars[idx].content.astype(np.float32) for idx in range(len(event_vars)) ], [ df["bin{}_low".format(idx)].values.astype(np.float32) for idx in range(len(event_vars)) ], [ df["bin{}_upp".format(idx)].values.astype(np.float32) for idx in range(len(event_vars)) ], df["weight"].unique().shape[0], ).ravel() dfw = df.iloc[indices] sf, sf_statup, sf_statdown, sf_systup, sf_systdown = weighted_mean_numba( event_vars[0].content, dfw["weight"].values, dfw["corr"].values, dfw["stat_up"].values, dfw["stat_down"].values, dfw["syst_up"].values, dfw["syst_down"].values, add_syst(ev, source, nsig).content, df["weight"].unique().shape[0], ) sfup = sf_systup if "syst" in source.lower() else sf_statup sfdown = sf_systdown if "syst" in source.lower() else sf_statdown return awk.JaggedArray( event_vars[0].starts, event_vars[0].stops, weight_numba(sf, nsig, sfup, sfdown) if name.lower() == source.lower().replace("stat", "").replace( "syst", "") else weight_numba(sf, 0., sfup, sfdown))
def jer_corr(ev, source, nsig, jersfs, maxdr_jets_with_genjets, ndpt_jets_with_genjets): flavour = "jerSF" if source == "jerSF" and nsig != 0.: updown = "Up" if nsig > 0. else "Down" flavour += updown if not ev.hasbranch("Jet_JEC{}".format(flavour)): indices = get_bin_indices( [ev.Jet_eta.content], [jersfs["eta_low"].values], [jersfs["eta_high"].values], 1, )[:, 0] ressfs = jersfs.iloc[indices][["corr", "corr_up", "corr_down"]].values cjer = np.ones_like(ev.Jet_ptJESOnly.content, dtype=np.float32) cjer_up = np.ones_like(ev.Jet_ptJESOnly.content, dtype=np.float32) cjer_down = np.ones_like(ev.Jet_ptJESOnly.content, dtype=np.float32) # match gen jets gidx = match_jets_from_genjets( ev, maxdr_jets_with_genjets, ndpt_jets_with_genjets, ) mask = (gidx >= 0) indices = (ev.GenJet_pt.starts + gidx[mask]).content gpt_matched = ev.GenJet_pt.content[indices] mask = mask.content gen_var = np.abs(1. - gpt_matched / ev.Jet_ptJESOnly.content[mask]) gaus_var = np.random.normal(0., gen_var) cjer[mask] = 1. + (ressfs[mask, 0] - 1.) * gaus_var cjer_up[mask] = 1. + (ressfs[mask, 1] - 1.) * gaus_var cjer_down[mask] = 1. + (ressfs[mask, 2] - 1.) * gaus_var # unmatched gen jets gaus_var = np.random.normal(0., ev.Jet_ptResolution(ev).content[~mask]) ressfs_mod = ressfs[~mask]**2 - 1. ressfs_mod[ressfs_mod < 0.] = 0. cjer[~mask] = 1. + gaus_var * np.sqrt(ressfs_mod[:, 0]) cjer_up[~mask] = 1. + gaus_var * np.sqrt(ressfs_mod[:, 1]) cjer_down[~mask] = 1. + gaus_var * np.sqrt(ressfs_mod[:, 2]) # negative checks cjer[cjer < 0.] = 0. cjer_up[cjer_up < 0.] = 0. cjer_down[cjer_down < 0.] = 0. cjer_up[cjer > 0.] = (cjer_up / cjer - 1.)[cjer > 0.] cjer_up[cjer == 0.] = 0. cjer_down[cjer > 0.] = (cjer_down / cjer - 1.)[cjer > 0.] cjer_down[cjer == 0.] = 0. # write to event starts, stops = ev.Jet_ptJESOnly.starts, ev.Jet_ptJESOnly.stops ev.Jet_JECjerSF = awk.JaggedArray(starts, stops, cjer) ev.Jet_JECjerSFUp = awk.JaggedArray(starts, stops, cjer_up) ev.Jet_JECjerSFDown = awk.JaggedArray(starts, stops, cjer_down) return getattr(ev, "Jet_JEC{}".format(flavour))