def fill_gen_jets(df, output): gjets = df.GenJet gleptons = df.GenPart[(abs(df.GenPart.pdgId) == 13) | (abs(df.GenPart.pdgId) == 11) | (abs(df.GenPart.pdgId) == 15)] gl_pair = ak.cartesian({ "jet": gjets, "lepton": gleptons }, axis=1, nested=True) _, _, dr_gl = delta_r( gl_pair["jet"].eta, gl_pair["lepton"].eta, gl_pair["jet"].phi, gl_pair["lepton"].phi, ) isolated = ak.all((dr_gl > 0.3), axis=-1) gjet1 = ak.to_pandas(gjets[isolated]).loc[pd.IndexSlice[:, 0], ["pt", "eta", "phi", "mass"]] gjet2 = ak.to_pandas(gjets[isolated]).loc[pd.IndexSlice[:, 1], ["pt", "eta", "phi", "mass"]] gjet1.index = gjet1.index.droplevel("subentry") gjet2.index = gjet2.index.droplevel("subentry") gjsum = p4_sum(gjet1, gjet2) for var in ["pt", "eta", "phi", "mass"]: output[f"gjet1_{var}"] = gjet1[var] output[f"gjet2_{var}"] = gjet2[var] output[f"gjj_{var}"] = gjsum[var] output["gjj_dEta"], output["gjj_dPhi"], output["gjj_dR"] = delta_r( output.gjet1_eta, output.gjet2_eta, output.gjet1_phi, output.gjet2_phi) return output
def fill_muons(processor, output, mu1, mu2, is_mc): mu1_variable_names = [ "mu1_pt", "mu1_pt_over_mass", "mu1_eta", "mu1_phi", "mu1_iso" ] mu2_variable_names = [ "mu2_pt", "mu2_pt_over_mass", "mu2_eta", "mu2_phi", "mu2_iso" ] dimuon_variable_names = [ "dimuon_mass", "dimuon_ebe_mass_res", "dimuon_ebe_mass_res_rel", "dimuon_pt", "dimuon_pt_log", "dimuon_eta", "dimuon_phi", "dimuon_dEta", "dimuon_dPhi", "dimuon_dR", "dimuon_rap", "dimuon_cos_theta_cs", "dimuon_phi_cs", ] v_names = mu1_variable_names + mu2_variable_names + dimuon_variable_names # Initialize columns for muon variables for n in v_names: output[n] = 0.0 # Fill single muon variables for v in ["pt", "ptErr", "eta", "phi"]: output[f"mu1_{v}"] = mu1[v] output[f"mu2_{v}"] = mu2[v] output["mu1_iso"] = mu1.pfRelIso04_all output["mu2_iso"] = mu2.pfRelIso04_all output["mu1_pt_over_mass"] = output.mu1_pt / output.dimuon_mass output["mu2_pt_over_mass"] = output.mu2_pt / output.dimuon_mass # Fill dimuon variables mm = p4_sum(mu1, mu2) for v in ["pt", "eta", "phi", "mass", "rap"]: name = f"dimuon_{v}" output[name] = mm[v] output[name] = output[name].fillna(-999.0) output["dimuon_pt_log"] = np.log(output.dimuon_pt) mm_deta, mm_dphi, mm_dr = delta_r(mu1.eta, mu2.eta, mu1.phi, mu2.phi) output["dimuon_dEta"] = mm_deta output["dimuon_dPhi"] = mm_dphi output["dimuon_dR"] = mm_dr output["dimuon_ebe_mass_res"] = mass_resolution(is_mc, processor.evaluator, output, processor.year) output[ "dimuon_ebe_mass_res_rel"] = output.dimuon_ebe_mass_res / output.dimuon_mass output["dimuon_cos_theta_cs"], output["dimuon_phi_cs"] = cs_variables( mu1, mu2)
def gen_jet_pair_mass(df): gjmass = None gjets = df.GenJet gleptons = df.GenPart[(abs(df.GenPart.pdgId) == 13) | (abs(df.GenPart.pdgId) == 11) | (abs(df.GenPart.pdgId) == 15)] gl_pair = ak.cartesian({ "jet": gjets, "lepton": gleptons }, axis=1, nested=True) _, _, dr_gl = delta_r( gl_pair["jet"].eta, gl_pair["lepton"].eta, gl_pair["jet"].phi, gl_pair["lepton"].phi, ) isolated = ak.all((dr_gl > 0.3), axis=-1) if ak.count(gjets[isolated], axis=None) > 0: # TODO: convert only relevant fields! gjet1 = ak.to_pandas(gjets[isolated]).loc[pd.IndexSlice[:, 0], ["pt", "eta", "phi", "mass"]] gjet2 = ak.to_pandas(gjets[isolated]).loc[pd.IndexSlice[:, 1], ["pt", "eta", "phi", "mass"]] gjet1.index = gjet1.index.droplevel("subentry") gjet2.index = gjet2.index.droplevel("subentry") gjsum = p4_sum(gjet1, gjet2) gjmass = gjsum.mass return gjmass
def fill_muons(output, mu1, mu2): mu1_variable_names = [ "mu1_pt", "mu1_pt_over_mass", "mu1_eta", "mu1_phi", "mu1_iso", "mu1_charge", ] mu2_variable_names = [ "mu2_pt", "mu2_pt_over_mass", "mu2_eta", "mu2_phi", "mu2_iso", "mu2_charge", ] dimuon_variable_names = [ "dimuon_mass", "dimuon_pt", "dimuon_pt_log", "dimuon_eta", "dimuon_phi", "dimuon_dEta", "dimuon_dPhi", "dimuon_dR", "dimuon_rap", "dimuon_cos_theta_cs", "dimuon_phi_cs", ] v_names = mu1_variable_names + mu2_variable_names + dimuon_variable_names # Initialize columns for muon variables for n in v_names: output[n] = 0.0 # Fill single muon variables for v in ["pt", "eta", "phi", "charge"]: output[f"mu1_{v}"] = mu1[v] output[f"mu2_{v}"] = mu2[v] mu1["mass"] = mu2["mass"] = 0.10566 # Fill dimuon variables mm = p4_sum(mu1, mu2) for v in ["pt", "eta", "phi", "mass", "rap"]: name = f"dimuon_{v}" output[name] = mm[v] output[name] = output[name].fillna(-999.0) output["dimuon_pt_log"] = np.log(output.dimuon_pt) mm_deta, mm_dphi, mm_dr = delta_r(mu1.eta, mu2.eta, mu1.phi, mu2.phi) output["dimuon_dEta"] = mm_deta output["dimuon_dPhi"] = mm_dphi output["dimuon_dR"] = mm_dr output["dimuon_cos_theta_cs"], output["dimuon_phi_cs"] = cs_variables( mu1, mu2)
def fill_softjets(df, output, variables, cutoff): saj_df = ak.to_pandas(df.SoftActivityJet) saj_df["mass"] = 0.0 nj_name = f"SoftActivityJetNjets{cutoff}" ht_name = f"SoftActivityJetHT{cutoff}" res = ak.to_pandas(df[[nj_name, ht_name]]) res["to_correct"] = output.two_muons | (variables.njets > 0) _, _, dR_m1 = delta_r(saj_df.eta, output.mu1_eta, saj_df.phi, output.mu1_phi) _, _, dR_m2 = delta_r(saj_df.eta, output.mu2_eta, saj_df.phi, output.mu2_phi) _, _, dR_j1 = delta_r(saj_df.eta, variables.jet1_eta, saj_df.phi, variables.jet1_phi) _, _, dR_j2 = delta_r(saj_df.eta, variables.jet2_eta, saj_df.phi, variables.jet2_phi) saj_df["dR_m1"] = dR_m1 < 0.4 saj_df["dR_m2"] = dR_m2 < 0.4 saj_df["dR_j1"] = dR_j1 < 0.4 saj_df["dR_j2"] = dR_j2 < 0.4 dr_cols = ["dR_m1", "dR_m2", "dR_j1", "dR_j2"] saj_df[dr_cols] = saj_df[dr_cols].fillna(False) saj_df["to_remove"] = saj_df[dr_cols].sum(axis=1).astype(bool) saj_df_filtered = saj_df[(~saj_df.to_remove) & (saj_df.pt > cutoff)] footprint = saj_df[(saj_df.to_remove) & (saj_df.pt > cutoff)] res["njets_corrected"] = ( saj_df_filtered.reset_index().groupby("entry")["subentry"].nunique()) res["njets_corrected"] = res["njets_corrected"].fillna(0).astype(int) res["footprint"] = footprint.pt.groupby(level=[0]).sum() res["footprint"] = res["footprint"].fillna(0.0) res["ht_corrected"] = res[ht_name] - res.footprint res.loc[res.ht_corrected < 0, "ht_corrected"] = 0.0 res.loc[res.to_correct, nj_name] = res.loc[res.to_correct, "njets_corrected"] res.loc[res.to_correct, ht_name] = res.loc[res.to_correct, "ht_corrected"] variables[f"nsoftjets{cutoff}"] = res[f"SoftActivityJetNjets{cutoff}"] variables[f"htsoft{cutoff}"] = res[f"SoftActivityJetHT{cutoff}"]
def fill_gen_jets(df, output): features = ["PT", "Eta", "Phi", "Mass"] gjets = df.GenJet[features] print(df.GenJet.fields) gleptons = df.MuonMedium # gleptons = df.GenPart[ # (abs(df.GenPart.pdgId) == 13) # | (abs(df.GenPart.pdgId) == 11) # | (abs(df.GenPart.pdgId) == 15) # ] gl_pair = ak.cartesian({"jet": gjets, "lepton": gleptons}, axis=1, nested=True) _, _, dr_gl = delta_r( gl_pair["jet"].Eta, gl_pair["lepton"].Eta, gl_pair["jet"].Phi, gl_pair["lepton"].Phi, ) isolated = ak.all((dr_gl > 0.3), axis=-1) gjet1 = ak.to_pandas(gjets[isolated]).loc[pd.IndexSlice[:, 0], features] gjet2 = ak.to_pandas(gjets[isolated]).loc[pd.IndexSlice[:, 1], features] gjet1.index = gjet1.index.droplevel("subentry") gjet2.index = gjet2.index.droplevel("subentry") feat_map = {"pt": "PT", "eta": "Eta", "phi": "Phi", "mass": "Mass"} for var in ["pt", "eta", "phi", "mass"]: gjet1[var] = gjet1[feat_map[var]] gjet2[var] = gjet2[feat_map[var]] gjsum = p4_sum(gjet1, gjet2) for var in ["pt", "eta", "phi", "mass"]: output[f"gjet1_{var}"] = gjet1[var] output[f"gjet2_{var}"] = gjet2[var] output[f"gjj_{var}"] = gjsum[var] output["gjj_dEta"], output["gjj_dPhi"], output["gjj_dR"] = delta_r( output.gjet1_eta, output.gjet2_eta, output.gjet1_phi, output.gjet2_phi ) return output
def fill_jets(output, jet1, jet2): variable_names = [ "jet1_pt", "jet1_eta", "jet1_rap", "jet1_phi", "jet2_pt", "jet2_eta", "jet2_rap", "jet2_phi", "jj_mass", "jj_mass_log", "jj_pt", "jj_eta", "jj_phi", "jj_dEta", "jj_dPhi", "mmj1_dEta", "mmj1_dPhi", "mmj1_dR", "mmj2_dEta", "mmj2_dPhi", "mmj2_dR", "mmj_min_dEta", "mmj_min_dPhi", "mmjj_pt", "mmjj_eta", "mmjj_phi", "mmjj_mass", "rpt", "zeppenfeld", "ll_zstar_log", ] for v in variable_names: output[v] = -999.0 # Fill single jet variables for v in ["pt", "eta", "phi"]: output[f"jet1_{v}"] = jet1[v] output[f"jet2_{v}"] = jet2[v] output.jet1_rap = rapidity(jet1) output.jet2_rap = rapidity(jet2) # Fill dijet variables jj = p4_sum(jet1, jet2) for v in ["pt", "eta", "phi", "mass"]: output[f"jj_{v}"] = jj[v] output.jj_mass_log = np.log(output.jj_mass) output.jj_dEta, output.jj_dPhi, _ = delta_r( output.jet1_eta, output.jet2_eta, output.jet1_phi, output.jet2_phi ) # Fill dimuon-dijet system variables mm_columns = ["dimuon_pt", "dimuon_eta", "dimuon_phi", "dimuon_mass"] jj_columns = ["jj_pt", "jj_eta", "jj_phi", "jj_mass"] dimuons = output.loc[:, mm_columns] dijets = output.loc[:, jj_columns] # careful with renaming dimuons.columns = ["mass", "pt", "eta", "phi"] dijets.columns = ["pt", "eta", "phi", "mass"] mmjj = p4_sum(dimuons, dijets) for v in ["pt", "eta", "phi", "mass"]: output[f"mmjj_{v}"] = mmjj[v] output.zeppenfeld = output.dimuon_eta - 0.5 * (output.jet1_eta + output.jet2_eta) output.rpt = output.mmjj_pt / (output.dimuon_pt + output.jet1_pt + output.jet2_pt) ll_ystar = output.dimuon_rap - (output.jet1_rap + output.jet2_rap) / 2 ll_zstar = abs(ll_ystar / (output.jet1_rap - output.jet2_rap)) output.ll_zstar_log = np.log(ll_zstar) output.mmj1_dEta, output.mmj1_dPhi, output.mmj1_dR = delta_r( output.dimuon_eta, output.jet1_eta, output.dimuon_phi, output.jet1_phi ) output.mmj2_dEta, output.mmj2_dPhi, output.mmj2_dR = delta_r( output.dimuon_eta, output.jet2_eta, output.dimuon_phi, output.jet2_phi ) output.mmj_min_dEta = np.where( output.mmj1_dEta, output.mmj2_dEta, (output.mmj1_dEta < output.mmj2_dEta) ) output.mmj_min_dPhi = np.where( output.mmj1_dPhi, output.mmj2_dPhi, (output.mmj1_dPhi < output.mmj2_dPhi) ) output[variable_names] = output[variable_names].fillna(-999.0)
def fill_jets(output, variables, jet1, jet2): variable_names = [ "jet1_pt", "jet1_eta", "jet1_rap", "jet1_phi", "jet1_qgl", "jet1_jetId", "jet1_puId", "jet2_pt", "jet2_eta", "jet2_rap", "jet2_phi", "jet2_qgl", "jet2_jetId", "jet2_puId", "jj_mass", "jj_mass_log", "jj_pt", "jj_eta", "jj_phi", "jj_dEta", "jj_dPhi", "mmj1_dEta", "mmj1_dPhi", "mmj1_dR", "mmj2_dEta", "mmj2_dPhi", "mmj2_dR", "mmj_min_dEta", "mmj_min_dPhi", "mmjj_pt", "mmjj_eta", "mmjj_phi", "mmjj_mass", "rpt", "zeppenfeld", "ll_zstar_log", "nsoftjets2", "nsoftjets5", "htsoft2", "htsoft5", "selection", ] for v in variable_names: variables[v] = -999.0 # Fill single jet variables for v in ["pt", "eta", "phi", "qgl", "jetId", "puId"]: variables[f"jet1_{v}"] = jet1[v] variables[f"jet2_{v}"] = jet2[v] variables.jet1_rap = rapidity(jet1) variables.jet2_rap = rapidity(jet2) # Fill dijet variables jj = p4_sum(jet1, jet2) for v in ["pt", "eta", "phi", "mass"]: variables[f"jj_{v}"] = jj[v] variables.jj_mass_log = np.log(variables.jj_mass) variables.jj_dEta, variables.jj_dPhi, _ = delta_r(variables.jet1_eta, variables.jet2_eta, variables.jet1_phi, variables.jet2_phi) # Fill dimuon-dijet system variables mm_columns = ["dimuon_pt", "dimuon_eta", "dimuon_phi", "dimuon_mass"] jj_columns = ["jj_pt", "jj_eta", "jj_phi", "jj_mass"] dimuons = output.loc[:, mm_columns] dijets = variables.loc[:, jj_columns] # careful with renaming dimuons.columns = ["mass", "pt", "eta", "phi"] dijets.columns = ["pt", "eta", "phi", "mass"] mmjj = p4_sum(dimuons, dijets) for v in ["pt", "eta", "phi", "mass"]: variables[f"mmjj_{v}"] = mmjj[v] variables.zeppenfeld = output.dimuon_eta - 0.5 * (variables.jet1_eta + variables.jet2_eta) variables.rpt = variables.mmjj_pt / (output.dimuon_pt + variables.jet1_pt + variables.jet2_pt) ll_ystar = output.dimuon_rap - (variables.jet1_rap + variables.jet2_rap) / 2 ll_zstar = abs(ll_ystar / (variables.jet1_rap - variables.jet2_rap)) variables.ll_zstar_log = np.log(ll_zstar) variables.mmj1_dEta, variables.mmj1_dPhi, variables.mmj1_dR = delta_r( output.dimuon_eta, variables.jet1_eta, output.dimuon_phi, variables.jet1_phi) variables.mmj2_dEta, variables.mmj2_dPhi, variables.mmj2_dR = delta_r( output.dimuon_eta, variables.jet2_eta, output.dimuon_phi, variables.jet2_phi) variables.mmj_min_dEta = np.where( variables.mmj1_dEta, variables.mmj2_dEta, (variables.mmj1_dEta < variables.mmj2_dEta), ) variables.mmj_min_dPhi = np.where( variables.mmj1_dPhi, variables.mmj2_dPhi, (variables.mmj1_dPhi < variables.mmj2_dPhi), )