def merge_closeby_particles(g, pid=22, deltar_cut=0.001): photons = [ elem for elem in g.nodes if g.nodes[elem]["typ"] == pid and (elem[0] == "tp" or elem[0] == "sc") ] phot_eta = [g.nodes[node]["eta"] for node in photons] phot_phi = [g.nodes[node]["phi"] for node in photons] merge_pairs = [] pairs_0, pairs_1 = deltar_pairs(phot_eta, phot_phi, deltar_cut) merge_pairs = [(photons[p0], photons[p1]) for p0, p1 in zip(pairs_0, pairs_1)] for pair in merge_pairs: if pair[0] in g.nodes and pair[1] in g.nodes: lv = vector.obj(pt=0, eta=0, phi=0, E=0) for gp in pair: lv += vector.obj(pt=g.nodes[gp]["pt"], eta=g.nodes[gp]["eta"], phi=g.nodes[gp]["phi"], E=g.nodes[gp]["e"]) g.nodes[pair[0]]["pt"] = lv.pt g.nodes[pair[0]]["eta"] = lv.eta g.nodes[pair[0]]["phi"] = lv.phi g.nodes[pair[0]]["e"] = lv.energy # add edge weights from the deleted photon to the remaining photon for suc in g.successors(pair[1]): if (pair[0], suc) in g.edges: g.edges[(pair[0], suc)]["weight"] += g.edges[(pair[1], suc)]["weight"] g.remove_nodes_from([pair[1]])
def test_mul(): assert v1 * 10 == vector.obj(x=10, y=50) assert 10 * v1 == vector.obj(x=10, y=50) assert numpy.allclose( a1 * 10, vector.array({"x": [10, 20, 30, 40], "y": [50, 60, 70, 80]}) ) assert numpy.allclose( 10 * a1, vector.array({"x": [10, 20, 30, 40], "y": [50, 60, 70, 80]}) ) with pytest.raises(TypeError): v1 * v2 with pytest.raises(TypeError): a1 * a2
def items2vectors(events, names): vectors = [] if "selectedPhoton" in names: vectors.append( vector.obj(pt=events['selectedPhoton_pt'], eta=events['selectedPhoton_eta'], phi=events['selectedPhoton_phi'], M=events['selectedPhoton_mass'])) if "FatJet" in names: vectors.append( vector.obj(pt=events['FatJet_pt'], eta=events['FatJet_eta'], phi=events['FatJet_phi'], M=events['FatJet_msoftdrop'])) return vectors
def pvec(vec4): vec3 = vector.obj( x=vec4.x, y=vec4.y, z=vec4.z ) return vec3
def getcosthetastar_cs(events,diphoton, fatjet): # https://github.com/cms-analysis/flashgg/blob/1453740b1e4adc7184d5d8aa8a981bdb6b2e5f8e/DataFormats/src/DoubleHTag.cc#L41 beam_energy = 6500 nevts = len(events) costhetastar_cs = np.ones(nevts)*-999 # is it needed? # convert using vector p1 = vector.obj( x=np.zeros(nevts), y=np.zeros(nevts), z=np.ones(nevts)*beam_energy, t=np.ones(nevts)*beam_energy, ) p2 = vector.obj( x=np.zeros(nevts), y=np.zeros(nevts), z=np.ones(nevts)*beam_energy*(-1), t=np.ones(nevts)*beam_energy, ) # ___________________ # # | _ _ | # # | /^^^\{6,6}/^^^\ | # # | \^^^/(""")\^^^/ | # # | /^^/ \"/ \^^\ | # # | /'` /|\ `'\ | # # |___________________| # # # hh = diphoton + fatjet boostvec = hh.to_beta3() * -1 p1_boost = p1.boost(boostvec) p2_boost = p2.boost(boostvec) CSaxis = (pvec(p1_boost).unit() - pvec(p2_boost).unit()).unit() diphoton_boost = diphoton.boost(boostvec) diphoton_vec_unit = pvec(diphoton_boost).unit() return CSaxis.dot(diphoton_vec_unit)
def test_array_casting(): obj = vector.obj(x=1, y=1) assert isinstance(obj, vector.VectorObject2D) assert isinstance(numpy.asanyarray(obj), vector.VectorNumpy2D) assert numpy.asanyarray(obj).shape == () obj = vector.obj(px=1, py=1) assert isinstance(obj, vector.MomentumObject2D) assert isinstance(numpy.asanyarray(obj), vector.MomentumNumpy2D) assert numpy.asanyarray(obj).shape == () obj = vector.obj(x=1, y=1, z=1) assert isinstance(obj, vector.VectorObject3D) assert isinstance(numpy.asanyarray(obj), vector.VectorNumpy3D) assert numpy.asanyarray(obj).shape == () obj = vector.obj(px=1, py=1, pz=1) assert isinstance(obj, vector.MomentumObject3D) assert isinstance(numpy.asanyarray(obj), vector.MomentumNumpy3D) assert numpy.asanyarray(obj).shape == () obj = vector.obj(x=1, y=1, z=1, t=1) assert isinstance(obj, vector.VectorObject4D) assert isinstance(numpy.asanyarray(obj), vector.VectorNumpy4D) assert numpy.asanyarray(obj).shape == () obj = vector.obj(px=1, py=1, pz=1, E=1) assert isinstance(obj, vector.MomentumObject4D) assert isinstance(numpy.asanyarray(obj), vector.MomentumNumpy4D) assert numpy.asanyarray(obj).shape == ()
def test_truediv(): assert v1 / 10 == vector.obj(x=0.1, y=0.5) with pytest.raises(TypeError): 10 / v1 assert numpy.allclose( a1 / 10, vector.array({"x": [0.1, 0.2, 0.3, 0.4], "y": [0.5, 0.6, 0.7, 0.8]}) ) with pytest.raises(TypeError): 10 / a1 with pytest.raises(TypeError): v1 / v2 with pytest.raises(TypeError): a1 / a2
def test_sub(): assert v1 - v2 == vector.obj(x=-9, y=-15) assert numpy.allclose( a1 - a2, vector.array({"x": [-9, -98, -997, -9996], "y": [-15, -194, -1993, -19992]}), ) assert numpy.allclose( v1 - a2, vector.array({"x": [-9, -99, -999, -9999], "y": [-15, -195, -1995, -19995]}), ) assert numpy.allclose( a2 - v1, vector.array({"x": [9, 99, 999, 9999], "y": [15, 195, 1995, 19995]}), ) with pytest.raises(TypeError): v1 - 5 with pytest.raises(TypeError): 5 - v1
def test_add(): assert v1 + v2 == vector.obj(x=11, y=25) assert numpy.allclose( a1 + a2, vector.array({"x": [11, 102, 1003, 10004], "y": [25, 206, 2007, 20008]}), ) assert numpy.allclose( v1 + a2, vector.array({"x": [11, 101, 1001, 10001], "y": [25, 205, 2005, 20005]}), ) assert numpy.allclose( a2 + v1, vector.array({"x": [11, 101, 1001, 10001], "y": [25, 205, 2005, 20005]}), ) with pytest.raises(TypeError): v1 + 5 with pytest.raises(TypeError): 5 + v1
def Loop(file_list): # define array histo = {} # --Start File Loop for arrays, doc in uproot.iterate(flist, branches, report=True): # for Uproot4 print("from: {0}, to: {1} -- Entries: {2}".format( doc.start, doc.stop, len(arrays))) Electron = ak.zip({ "PT": arrays[b"Electron.PT"], "Eta": arrays[b"Electron.Eta"], "Phi": arrays[b"Electron.Phi"], "Charge": arrays[b"Electron.Charge"], }) Muon = ak.zip({ "PT": arrays[b"MuonLoose.PT"], "Eta": arrays[b"MuonLoose.Eta"], "Phi": arrays[b"MuonLoose.Phi"], "Charge": arrays[b"MuonLoose.Charge"], }) Photon = ak.zip({ "PT": arrays[b"PhotonLoose.PT"], "Eta": arrays[b"PhotonLoose.Eta"], "Phi": arrays[b"PhotonLoose.Phi"], }) MET = ak.zip({ "PT": arrays[b"PuppiMissingET.MET"], "Phi": arrays[b"PuppiMissingET.Phi"], }) ## --- Electron Selection cut = (Electron.PT > 20) & (abs(Electron.Eta) < 2.5) Electron = Electron[cut] # Apply Electron Selection cut = ak.num(Electron) >= 2 Electron = Electron[cut] Photon = Photon[cut] Muon = Muon[cut] MET = MET[cut] ## --- Event Selection # Basics of OSSF os_cut = (Electron[:, 0].Charge + Electron[:, 1].Charge == 0) Electron = Electron[os_cut] Photon = Photon[os_cut] Muon = Muon[os_cut] MET = MET[os_cut] Ele1vec = vector.obj(pt=Electron[:, 0].PT, eta=Electron[:, 0].Eta, phi=Electron[:, 0].Phi, mass=0) Ele2vec = vector.obj(pt=Electron[:, 1].PT, eta=Electron[:, 1].Eta, phi=Electron[:, 1].Phi, mass=0) diele = Ele1vec + Ele2vec ## --- Flatten and Convert to numpy array diele_mass = ak.to_numpy(diele.mass) ## --- Fill Ntuple if len(histo) == 0: histo['diele_mass'] = diele_mass else: histo['diele_mass'] = np.concatenate( [histo['diele_mass'], diele_mass]) print("size of output array: ", len(histo['diele_mass'])) return histo
def get_good_kaons(self, photons='one'): """ photons : None, 'one', 'all' -- как работать с фотонами из калориметра. None -- не добавлять их в данные, 'one' -- только пару с лучшим соответствием pi0, 'all' -- добавить все пары """ dat_tracks = self.get_dat_tracks() dat_kaons = self.get_dat_kaons() dat_glob = self.get_dat_glob() dat_goods = dat_tracks.join(dat_kaons, how='inner') goods = dat_goods.groupby('entry').agg( num=('tz', 'count')).query('num==2').index dat_goods = dat_goods.reset_index().set_index( 'entry').loc[goods].reset_index().set_index(['entry', 'subentry']) dat_goods['tcharge'] = np.where(dat_goods['tcharge'] > 0, 'p', 'n') dat_goods = pd.pivot_table(dat_goods.reset_index(), values=[ 'trho', 'tz', 'tdedx', 'tptot', 'tth', 'tphi', 'ksminv', 'ksalign', 'ksptot', 'ksdpsi', 'ksz0', 'kslen', 'ksth', 'ksphi' ], index=['entry'], columns=['tcharge']) dat_goods.columns = [ '_'.join(map(lambda x: str(x), col)) for col in dat_goods.columns ] dat_goods.drop([ 'ksalign_n', 'ksminv_n', 'ksptot_n', 'ksdpsi_n', 'ksz0_n', 'kslen_n', 'ksth_n', 'ksphi_n' ], axis=1, inplace=True) #kick badruns dat_glob = dat_glob.query('badrun==False') dat_goods = dat_goods.join(dat_glob, how='inner') #add x1, x2 dat_goods = dat_goods.rename( { 'ksalign_p': 'ksalign', 'ksminv_p': 'ksminv', 'ksptot_p': 'ksptot', 'ksdpsi_p': 'ksdpsi', 'ksz0_p': 'ksz0', 'kslen_p': 'kslen', 'ksth_p': 'ksth', 'ksphi_p': 'ksphi' }, axis=1) dat_goods['x1'], dat_goods['x2'] = get_x(dat_goods) #calc recoil mass vec = vector.array({ 'pt': dat_goods['ksptot'] * np.sin(dat_goods['ksth']), 'theta': dat_goods['ksth'], 'phi': dat_goods['ksphi'], 'mass': dat_goods['ksminv'], }) vec0 = vector.obj(px=0, py=0, pz=0, E=dat_goods['emeas'].mean() * 2) dat_goods['recoil'] = (vec0 - vec).mass del vec #add photons if photons is not None: dat_photons = self.get_dat_photons() dat_goods = pd.merge(dat_goods.reset_index(), dat_photons.reset_index(), on='entry', how='left') dat_goods['subentry'] = dat_goods['subentry'].fillna(0).astype(int) dat_goods = dat_goods.set_index(['entry', 'subentry']) if photons == 'one': dat_goods = dat_goods.sort_values( 'M', ascending=True, key=lambda x: np.abs(x - 134.97)).groupby('entry').agg( 'first') return dat_goods
def test_add(): one = vector.Array([[{ "x": 1, "y": 1.1 }, { "x": 2, "y": 2.2 }], [], [{ "x": 3, "y": 3.3 }]]) two = vector.Array([{ "x": 10, "y": 20 }, { "x": 100, "y": 200 }, { "x": 1000, "y": 2000 }]) assert isinstance(one.add(two), vector._backends.awkward_.VectorArray2D) assert isinstance(two.add(one), vector._backends.awkward_.VectorArray2D) assert one.add(two).tolist() == [ [{ "x": 11, "y": 21.1 }, { "x": 12, "y": 22.2 }], [], [{ "x": 1003, "y": 2003.3 }], ] assert two.add(one).tolist() == [ [{ "x": 11, "y": 21.1 }, { "x": 12, "y": 22.2 }], [], [{ "x": 1003, "y": 2003.3 }], ] two = vector.array({"x": [10, 100, 1000], "y": [20, 200, 2000]}) assert isinstance(one.add(two), vector._backends.awkward_.VectorArray2D) assert isinstance(two.add(one), vector._backends.awkward_.VectorArray2D) assert one.add(two).tolist() == [ [{ "x": 11, "y": 21.1 }, { "x": 12, "y": 22.2 }], [], [{ "x": 1003, "y": 2003.3 }], ] assert two.add(one).tolist() == [ [{ "x": 11, "y": 21.1 }, { "x": 12, "y": 22.2 }], [], [{ "x": 1003, "y": 2003.3 }], ] two = vector.obj(x=10, y=20) assert isinstance(one.add(two), vector._backends.awkward_.VectorArray2D) assert isinstance(two.add(one), vector._backends.awkward_.VectorArray2D) assert one.add(two).tolist() == [ [{ "x": 11, "y": 21.1 }, { "x": 12, "y": 22.2 }], [], [{ "x": 13, "y": 23.3 }], ] assert two.add(one).tolist() == [ [{ "x": 11, "y": 21.1 }, { "x": 12, "y": 22.2 }], [], [{ "x": 13, "y": 23.3 }], ]
def prepare_normalized_table(g, genparticle_energy_threshold=0.2): # rg = g.reverse() all_genparticles = [] all_elements = [] all_pfcandidates = [] for node in g.nodes: if node[0] == "elem": all_elements += [node] for parent in g.predecessors(node): all_genparticles += [parent] elif node[0] == "pfcand": all_pfcandidates += [node] all_genparticles = list(set(all_genparticles)) all_elements = sorted(all_elements) # assign genparticles in reverse energy order uniquely to best element elem_to_gp = {} # map of element -> genparticles unmatched_gp = [] for gp in sorted(all_genparticles, key=lambda x: g.nodes[x]["e"], reverse=True): elems = [e for e in g.successors(gp)] # sort elements by energy deposit from genparticle elems_sorted = sorted([(g.edges[gp, e]["weight"], e) for e in elems], key=lambda x: x[0], reverse=True) chosen_elem = None for weight, elem in elems_sorted: if not (elem in elem_to_gp): chosen_elem = elem elem_to_gp[elem] = [] break if chosen_elem is None: unmatched_gp += [gp] else: elem_to_gp[elem] += [gp] # assign unmatched genparticles to best element, allowing for overlaps for gp in sorted(unmatched_gp, key=lambda x: g.nodes[x]["e"], reverse=True): elems = [e for e in g.successors(gp)] elems_sorted = sorted([(g.edges[gp, e]["weight"], e) for e in elems], key=lambda x: x[0], reverse=True) _, elem = elems_sorted[0] elem_to_gp[elem] += [gp] unmatched_cand = [] elem_to_cand = {} # Find primary element for each PFCandidate for cand in sorted(all_pfcandidates, key=lambda x: g.nodes[x]["e"], reverse=True): tp = g.nodes[cand]["typ"] neighbors = list(g.predecessors(cand)) chosen_elem = None # Pions, muons and electrons will be assigned to the best associated track if tp in [211, 13, 11]: for elem in neighbors: tp_neighbor = g.nodes[elem]["typ"] # track or gsf if tp_neighbor == 1 or tp_neighbor == 6: if not (elem in elem_to_cand): chosen_elem = elem elem_to_cand[elem] = cand break # other particles will be assigned to the highest-energy cluster (ECAL, HCAL, HFEM, HFHAD, SC) else: # neighbors = [n for n in neighbors if g.nodes[n]["typ"] in [4,5,8,9,10]] # sorted_neighbors = sorted(neighbors, key=lambda x: g.nodes[x]["e"], reverse=True) sorted_neighbors = sorted(neighbors, key=lambda x: g.edges[ (x, cand)]["weight"], reverse=True) for elem in sorted_neighbors: if not (elem in elem_to_cand): chosen_elem = elem elem_to_cand[elem] = cand break if chosen_elem is None: # print("unmatched candidate {}, {}".format(cand, g.nodes[cand])) unmatched_cand += [cand] Xelem = np.recarray((len(all_elements), ), dtype=[(name, np.float32) for name in elem_branches]) Xelem.fill(0.0) ygen = np.recarray((len(all_elements), ), dtype=[(name, np.float32) for name in target_branches]) ygen.fill(0.0) ycand = np.recarray((len(all_elements), ), dtype=[(name, np.float32) for name in target_branches]) ycand.fill(0.0) for ielem, elem in enumerate(all_elements): elem_type = g.nodes[elem]["typ"] genparticles = sorted(elem_to_gp.get(elem, []), key=lambda x: g.edges[(x, elem)]["weight"], reverse=True) genparticles = [ gp for gp in genparticles if g.nodes[gp]["e"] > genparticle_energy_threshold ] candidate = elem_to_cand.get(elem, None) for j in range(len(elem_branches)): Xelem[elem_branches[j]][ielem] = g.nodes[elem][elem_branches[j]] if not (candidate is None): for j in range(len(target_branches)): ycand[target_branches[j]][ielem] = g.nodes[candidate][ target_branches[j]] lv = vector.obj(x=0, y=0, z=0, t=0) if len(genparticles) > 0: # print( # "elem type={} E={:.2f} eta={:.2f} phi={:.2f} q={}".format( # g.nodes[elem]["typ"], # g.nodes[elem]["e"], # g.nodes[elem]["eta"], # g.nodes[elem]["phi"], # g.nodes[elem]["charge"], # ) # ) # for gp in genparticles: # print( # " gp type={} E={:.2f} eta={:.2f} phi={:.2f} q={} w={:.2f}".format( # g.nodes[gp]["typ"], # g.nodes[gp]["e"], # g.nodes[gp]["eta"], # g.nodes[gp]["phi"], # g.nodes[gp]["charge"], # g.edges[(gp, elem)]["weight"], # ) # ) pid = g.nodes[genparticles[0]]["typ"] charge = g.nodes[genparticles[0]]["charge"] for gp in genparticles: lv += vector.obj(pt=g.nodes[gp]["pt"], eta=g.nodes[gp]["eta"], phi=g.nodes[gp]["phi"], e=g.nodes[gp]["e"]) # remap PID in case of HCAL cluster to neutral if elem_type == 5 and (pid == 22 or pid == 11): pid = 130 # remap forward region to HFHAD or HFEM if elem_type in [8, 9]: if pid == 130: pid = 1 elif pid == 22: pid = 2 # Remap HF candidates to neutral hadron or photon in case not matched to HF if elem_type in [2, 3, 4, 5]: if pid == 1: pid = 130 elif pid == 2: pid = 22 gp = { "pt": lv.rho, "eta": lv.eta, "sin_phi": np.sin(lv.phi), "cos_phi": np.cos(lv.phi), "e": lv.t, "typ": pid, "px": lv.x, "py": lv.y, "pz": lv.z, "charge": charge if pid in [211, 11, 13] else 0, } # print(" mlpf: type={} E={:.2f} eta={:.2f} phi={:.2f} q={}".format(pid, lv.t, lv.eta, lv.phi, gp["charge"])) for j in range(len(target_branches)): ygen[target_branches[j]][ielem] = gp[target_branches[j]] return Xelem, ycand, ygen
def main(args): # Read nano, micro, EB or EE cuts nanoaod_arr = ak.from_parquet(args.nano_input_dir) print("Read nanoaod: {}".format(nanoaod_arr.type)) microaod_arr = uproot.concatenate( ["{}/*.root:diphotonDumper/trees/ggH_125_13TeV_All_$SYST".format(args.micro_input_dir)] ) print("Read microaod: {}".format(microaod_arr.type)) # Stupid typo in flashgg if "lead_ch_iso_worst__uncorr" in microaod_arr.fields: microaod_arr["lead_ch_iso_worst_uncorr"] = microaod_arr["lead_ch_iso_worst__uncorr"] if args.sd == "EB": nanoaod_arr = nanoaod_arr[np.abs(nanoaod_arr.lead_eta) < 1.5] nanoaod_arr = nanoaod_arr[np.abs(nanoaod_arr.sublead_eta) < 1.5] microaod_arr = microaod_arr[np.abs(microaod_arr.lead_eta) < 1.5] microaod_arr = microaod_arr[np.abs(microaod_arr.sublead_eta) < 1.5] if args.sd == "EE": nanoaod_arr = nanoaod_arr[np.abs(nanoaod_arr.lead_eta) > 1.5] nanoaod_arr = nanoaod_arr[np.abs(nanoaod_arr.sublead_eta) > 1.5] microaod_arr = microaod_arr[np.abs(microaod_arr.lead_eta) > 1.5] microaod_arr = microaod_arr[np.abs(microaod_arr.sublead_eta) > 1.5] # Read catalogue of variables to be plotted with open("plots_specs.json", "r") as f: columns = json.load(f) # Create dict where keys are names of variables in nano and values are names of variables in micro nano_micro_names = {var["nano_col"]: var["micro_col"] for var in columns} nano_micro_names["event"] = "event" nano_micro_names["lumi"] = "lumi" # Event by event nano_dict = {k: nanoaod_arr[k] for k in nano_micro_names.keys()} nano_dict["lead_fixedGridRhoAll"] = nanoaod_arr["lead_fixedGridRhoAll"] # needed for XGBoost vs TMVA test_nano = ak.Array(nano_dict) test_micro = microaod_arr[nano_micro_names.values()] pd_nano = ak.to_pandas(test_nano) pd_micro = ak.to_pandas(test_micro) pd_nano = pd_nano.set_index(["event", "lumi"]) pd_micro = pd_micro.set_index(["event", "lumi"]) pd_joined = pd_nano.join(pd_micro, lsuffix="_nano", rsuffix="_micro") print("Joined dataframe:\n{}".format(pd_joined)) #Remove NaN values for nano_name, micro_name in nano_micro_names.items(): if nano_name in ["event", "lumi"]: break if nano_name == micro_name: nano_name += "_nano" micro_name += "_micro" pd_joined = pd_joined[pd_joined[nano_name].notna()] pd_joined = pd_joined[pd_joined[micro_name].notna()] # Cut over delta R # Here https://github.com/CoffeaTeam/coffea/blob/3db3fab23064c70d0ca63b185d51c7fa3b7849dc/coffea/nanoevents/methods/vector.py#L74 # useful info deltaR_threshold = 0.1 four_lead_nano = vector.obj( pt=pd_joined["lead_pt"], phi=pd_joined["lead_phi_nano"], eta=pd_joined["lead_eta_nano"], E=pd_joined["lead_energyRaw"] ) four_sublead_nano = vector.obj( pt=pd_joined["sublead_pt"], phi=pd_joined["sublead_phi_nano"], eta=pd_joined["sublead_eta_nano"], E=pd_joined["sublead_energyRaw"] ) pd_joined["deltaR_nano"] = four_lead_nano.deltaR(four_sublead_nano) four_lead_micro = vector.obj( pt=pd_joined["leadPt"], phi=pd_joined["lead_phi_micro"], eta=pd_joined["lead_eta_micro"], E=pd_joined["lead_SCRawE"] ) four_sublead_micro = vector.obj( pt=pd_joined["subleadPt"], phi=pd_joined["sublead_phi_micro"], eta=pd_joined["sublead_eta_micro"], E=pd_joined["sublead_SCRawE"] ) pd_joined["lead_deltaR"] = four_lead_nano.deltaR(four_lead_micro) pd_joined["sublead_deltaR"] = four_sublead_nano.deltaR(four_sublead_micro) pd_joined = pd_joined[pd_joined["lead_deltaR"] < deltaR_threshold] pd_joined = pd_joined[pd_joined["sublead_deltaR"] < deltaR_threshold] print("Final joined dataframe:\n{}".format(pd_joined)) # Plot print("Start plotting") for column in columns: fig, (up, middle, down) = plt.subplots( nrows=3, ncols=1, gridspec_kw={"height_ratios": (2, 1, 1)} ) nano_name = column["nano_col"] micro_name = column["micro_col"] if nano_name == micro_name: nano_name += "_nano" micro_name += "_micro" range = column["range"] # Up n, n_, n__ = up.hist(pd_joined[nano_name], bins=column["bins"], range=range, histtype="step", label="NanoAOD", linewidth=2) m, m_, m__ = up.hist(pd_joined[micro_name], bins=column["bins"], range=range, histtype="step", label="MicroAOD", linewidth=2) up.legend(fontsize=18, loc="upper right") up.set_xlim(range) up.set_xlabel(column["var"]) up.set_ylabel("Events") if "log" in column: up.set_yscale("log") # Middle ylim = [0, 2] middle.set_ylim(ylim) #middle.axhline(1, xmin=range[0], xmax=range[1], color="black", alpha=0.6) centers = (n_[:-1] + n_[1:]) / 2 middle.plot(centers, n / m, "k.") middle.set_xlim(range) middle.set_xlabel(column["var"]) middle.set_ylabel("$n/\mu$") middle.grid(which="both") # Down perc_range = (-300, 300) perc_bins = 500 down.hist(100 * (pd_joined[nano_name] - pd_joined[micro_name]) / pd_joined[micro_name], bins=perc_bins, range=perc_range, histtype="step", density=True, color="black", linewidth=2) #down.set_yscale("log") down.set_xlabel("$(n_{ev} - \mu_{ev})/\mu_{ev}$ [%]") down.set_ylabel("Events / {}%".format((perc_range[1] - perc_range[0]) / perc_bins)) print(column["nano_col"]) print("nano: {}".format(np.sum(n))) print("micro: {}".format(np.sum(m))) print("diff = {}".format(abs(np.sum(n) - np.sum(m)))) print("rel diff = {}%\n".format(100 * abs(np.sum(n) - np.sum(m)) / max(np.sum(n), np.sum(m)))) fig.tight_layout() fig.savefig("{}/{}_{}.png".format(args.output_dir, column["nano_col"], args.sd), bbox_inches='tight') fig.savefig("{}/{}_{}.pdf".format(args.output_dir, column["nano_col"], args.sd), bbox_inches='tight') plt.close(fig) # Dump pandas dataframe to parquet file pd_joined.to_parquet("nano_micro_{}.parquet".format(args.sd), engine="fastparquet") print("Dumped dataframe to parquet file") # Redundant: dump separate dataframes for nano and micro with PhotonID inputs nano_vars = { "r9": "lead_r9_nano", "s4": "lead_s4_nano", "sieie": "lead_sieie_nano", "etaWidth": "lead_etaWidth", "phiWidth": "lead_phiWidth", "sieip": "lead_sieip_nano", "pfPhoIso03": "lead_pfPhoIso03", "pfChargedIsoPFPV": "lead_pfChargedIsoPFPV", "pfChargedIsoWorstVtx": "lead_pfChargedIsoWorstVtx", "mva_ID": "lead_mvaID_recomputed" } micro_vars = { "r9": "lead_r9_micro", "s4": "lead_s4_micro", "sieie": "lead_sieie_micro", "etaWidth": "lead_eta_width", "phiWidth": "lead_phi_width", "sieip": "lead_sieip_micro", "pfPhoIso03": "lead_pho_iso", "pfChargedIsoPFPV": "lead_ch_iso", "pfChargedIsoWorstVtx": "lead_ch_iso_worst", "mva_ID": "lead_mva" } nano_isos = { "pfPhoIso03": "lead_pfPhoIso03", "pfChargedIsoPFPV": "lead_pfChargedIsoPFPV", "pfChargedIsoWorstVtx": "lead_pfChargedIsoWorstVtx", "pfPhoIso03_uncorr": "lead_uncorr_pfPhoIso03", "pfChargedIsoPFPV_uncorr": "lead_uncorr_pfChargedIsoPFPV", "pfChargedIsoWorstVtx_uncorr": "lead_uncorr_pfChargedIsoWorstVtx", } micro_isos = { "pfPhoIso03": "lead_pho_iso", "pfChargedIsoPFPV": "lead_ch_iso", "pfChargedIsoWorstVtx": "lead_ch_iso_worst", "pfPhoIso03_uncorr": "lead_pho_iso_uncorr", "pfChargedIsoPFPV_uncorr": "lead_ch_iso_uncorr", "pfChargedIsoWorstVtx_uncorr": "lead_ch_iso_worst_uncorr", } nano_df = pd_joined[list(nano_vars.values())] nano_df.rename(columns=dict((v, k) for k, v in nano_vars.items()), inplace=True) nano_df.to_parquet("nano_{}.parquet".format(args.sd), engine="fastparquet") print("Dumped nano dataframe to parquet file") micro_df = pd_joined[list(micro_vars.values())] micro_df.rename(columns=dict((v, k) for k, v in micro_vars.items()), inplace=True) micro_df.to_parquet("micro_{}.parquet".format(args.sd), engine="fastparquet") print("Dumped micro dataframe to parquet file") nano_df = pd_joined[list(nano_isos.values())] nano_df.rename(columns=dict((v, k) for k, v in nano_isos.items()), inplace=True) nano_df.to_parquet("nano_{}_isos.parquet".format(args.sd), engine="fastparquet") print("Dumped nano dataframe for isos to parquet file") micro_df = pd_joined[list(micro_isos.values())] micro_df.rename(columns=dict((v, k) for k, v in micro_isos.items()), inplace=True) micro_df.to_parquet("micro_{}_isos.parquet".format(args.sd), engine="fastparquet") print("Dumped micro dataframe for isos to parquet file")
MET = MET[Electron_event_mask] # Photon = Photo[Electron_event_mask] # Jet = Jet[Electron_event_mask] # CutFlow cut1 = np.ones(len(Electron)) print("Cut0: {0}, Cut1: {1}".format(len(cut0), len(cut1))) import vector leading_Electron = Electron[:, 0] # Highest PT Ele2vec = vector.obj(pt=leading_Electron.PT, phi=leading_Electron.Phi) MET2vec = vector.obj(pt=MET.MET, phi=MET.Phi) MT = np.sqrt( 2 * leading_Electron.PT * MET.MET * (1 - np.cos(abs(MET2vec.deltaphi(Ele2vec)))) ) def draw(arr, title, start, end, bin): # Array, Name, x_min, x_max, bin-number plt.figure(figsize=(8, 5)) # Figure size bins = np.linspace(start, end, bin) # divide start-end range with 'bin' number binwidth = (end - start) / bin # width of one bin # Draw histogram plt.hist(arr, bins=bins, alpha=0.7, label=title) # label is needed to draw legend plt.xticks(fontsize=16) # xtick size
# Copyright (c) 2019-2021, Jonas Eschle, Jim Pivarski, Eduardo Rodrigues, and Henry Schreiner. # # Distributed under the 3-clause BSD license, see accompanying file LICENSE # or https://github.com/scikit-hep/vector for details. import numpy import pytest import vector v1 = vector.obj(x=1, y=5) a1 = vector.array({"x": [1, 2, 3, 4], "y": [5, 6, 7, 8]}) v2 = vector.obj(x=10, y=20) a2 = vector.array({"x": [10, 100, 1000, 10000], "y": [20, 200, 2000, 20000]}) def test_eq(): assert v1 == v1 assert not v1 == v2 assert (a1 == a1).all() assert not (a1 == a2).any() assert (v1 == a1).any() assert not (v1 == a1).all() assert (a1 == v1).any() assert not (a1 == v1).all() def test_ne(): assert not v1 != v1 assert v1 != v2
def test_pos(): assert +v1 == vector.obj(x=1, y=5) assert numpy.allclose(+a1, vector.array({"x": [1, 2, 3, 4], "y": [5, 6, 7, 8]}))
def test_neg(): assert -v1 == vector.obj(x=-1, y=-5) assert numpy.allclose( -a1, vector.array({"x": [-1, -2, -3, -4], "y": [-5, -6, -7, -8]}) )