def test_flatten_RecordArray(): array = ak.Array( [ {"x": [], "y": [[3, 3, 3]]}, {"x": [[1]], "y": [[2, 2]]}, {"x": [[2], [2]], "y": [[1]]}, {"x": [[3], [3], [3]], "y": [[]]}, ] ) assert ak.to_list(ak.flatten(array, axis=2)) == [ {"x": [], "y": [3, 3, 3]}, {"x": [1], "y": [2, 2]}, {"x": [2, 2], "y": [1]}, {"x": [3, 3, 3], "y": []}, ] assert ak.to_list(ak.flatten(array[1:], axis=2)) == [ {"x": [1], "y": [2, 2]}, {"x": [2, 2], "y": [1]}, {"x": [3, 3, 3], "y": []}, ] assert ak.to_list(ak.flatten(array[:, 1:], axis=2)) == [ {"x": [], "y": []}, {"x": [], "y": []}, {"x": [2], "y": []}, {"x": [3, 3], "y": []}, ]
def find_permutations(jets, leptons, MET, btagWP): ''' Inputs: Jets, leptons, MET, and if jets pass btag WP Returns: List of (jet assignment ordering, associated neutrino solutions) ''' jets_inputs = np.stack((ak.to_numpy(ak.flatten(jets.px)), ak.to_numpy(ak.flatten(jets.py)), ak.to_numpy(ak.flatten(jets.pz)), ak.to_numpy(ak.flatten(jets.energy)), ak.to_numpy(ak.flatten(jets[btagWP]))), axis=1).astype('float64') # one row has (px, py, pyz, E) lepton_inputs = np.stack((ak.to_numpy(ak.flatten(leptons.px)), ak.to_numpy(ak.flatten(leptons.py)), ak.to_numpy(ak.flatten(leptons.pz)), ak.to_numpy(ak.flatten(leptons.energy))), axis=1).astype('float64') # one row has (px, py, pyz, E) met_inputs = np.stack((ak.to_numpy(MET.px), ak.to_numpy(MET.py)), axis=1).astype('float64') # one row has (px, py) p_ordering, p_nu = get_test_permutations(njets_array=ak.num(jets), jets=jets_inputs, leptons=lepton_inputs, met=met_inputs) #set_trace() test_perms = ak.Array({ 'blepIdx' : ak.from_iter(p_ordering)[:, :, 0], 'bhadIdx' : ak.from_iter(p_ordering)[:, :, 1], 'wjaIdx' : ak.from_iter(p_ordering)[:, :, 2], 'wjbIdx' : ak.from_iter(p_ordering)[:, :, 3], 'Nu' : ak.Array({ 'px' : ak.from_iter(p_nu)[:, :, 0], 'py' : ak.from_iter(p_nu)[:, :, 1], 'pz' : ak.from_iter(p_nu)[:, :, 2], 'chi2' : ak.from_iter(p_nu)[:, :, 3], }) }) return test_perms
def _kExtra(self, kpt, eta, nl, u, s=0, m=0): # if it is a jagged array, save the offsets then flatten everything # needed for the ternary conditions later abseta = abs(eta) kData = self._kRes[s][m][1](abseta) # type 1 is data kMC = self._kRes[s][m][0](abseta) # type 0 is MC mask = kData > kMC x = awkward.zeros_like(kpt) sigma = self._sigma(kpt, eta, nl, s, m) # Rochester cbA = beta, cbN = m, as well as cbM (always 0?) = loc and cbS = scale to transform y = (x-loc)/scale in the pdf method cbA = self._cbA[s][m](abseta, nl) cbN = self._cbN[s][m](abseta, nl) cbS = self._cbS[s][m](abseta, nl) counts = awkward.num(u) u_flat = awkward.flatten(u) loc = awkward.zeros_like(u_flat) cbA_flat = awkward.flatten(cbA) cbN_flat = awkward.flatten(cbN) cbS_flat = awkward.flatten(cbS) invcdf = awkward.unflatten( doublecrystalball.ppf(u_flat, cbA_flat, cbA_flat, cbN_flat, cbN_flat, loc, cbS_flat), counts, ) x = awkward.where( mask, (numpy.sqrt(kData * kData - kMC * kMC) * sigma * invcdf), x, ) result = awkward.where(x > -1, 1.0 / (1.0 + x), awkward.ones_like(kpt)) if isinstance(kpt, numpy.ndarray): result = numpy.array(result) return result
def plot_distributions(obj): global tree blacklist = ["hitIdx","simTrkIdx","layer","pt","eta","phi","sim_pt","sim_eta","sim_phi","type", "ring", "moduleType_binary","layer_binary","isFake","isDuplicate"] print("object = ",obj) quantities = [] for name in tree.keys(): if name[:len(obj)] == obj and name not in map(lambda x : "{}_{}".format(obj,x),blacklist): quantities.append(name) matchedMask = tree["{}_isFake".format(obj)].array() == 0 layers = np.array(list(map(process_layers,ak.flatten(tree["{}_layer_binary".format(obj)].array())))) #moduleTypes = np.array(list(map(process_moduleTypes,ak.flatten(tree["{}_moduleType_binary".format(obj)].array())))) layerTypes = np.array(list(map(process_layerType,layers))) # layerTypes = np.array(list(map(process_numbers, layers))) # print(layerTypes) unique_layerTypes = np.unique(layerTypes, axis = 0) unique_layerTypes = np.append(unique_layerTypes,"") print(unique_layerTypes) #Generic for layerType in unique_layerTypes: print("layerType = {}".format(layerType)) for quantity in quantities: print("quantity = {}".format(quantity)) if layerType == "": qArray = ak.flatten(tree[quantity].array()) qArraySimTrackMatched = qArray[ak.flatten(matchedMask)] else: qArray = ak.flatten(tree[quantity].array())[layerTypes == layerType] qArraySimTrackMatched = qArray[ak.flatten(matchedMask)[layerTypes == layerType]] if all(qArray == -999): continue make_plots(qArray,qArraySimTrackMatched,quantity,layerType)
def fill_genp_hists(self, accumulator, dname, genp_type, flag, obj, evt_weights): #set_trace() accumulator["pt"].fill(dataset=dname, objtype=genp_type, flag=flag, pt=ak.flatten(obj.pt, axis=None), weight=evt_weights) accumulator["eta"].fill(dataset=dname, objtype=genp_type, flag=flag, eta=ak.flatten(obj.eta, axis=None), weight=evt_weights) accumulator["phi"].fill(dataset=dname, objtype=genp_type, flag=flag, phi=ak.flatten(obj.phi, axis=None), weight=evt_weights) accumulator["mass"].fill(dataset=dname, objtype=genp_type, flag=flag, mass=ak.flatten(obj.mass, axis=None), weight=evt_weights) accumulator["energy"].fill(dataset=dname, objtype=genp_type, flag=flag, energy=ak.flatten(obj.energy, axis=None), weight=evt_weights) return accumulator
def get_root_rest_energies(roots, energies, pxs, pys, pzs): """ Find the energies (of anything really, but presumably jets) in the rest frame of particles identified a root particles Parameters ---------- roots : array of bool mask identifying the root particles energies : array like of floats the energies of the particles pxs : array like of floats the momentum in the x direction of the particles pys : array like of floats the momentum in the y direction of the particles pzs : array like of floats the momentum in the z direction of the particles Returns ------- energies : array like of floats the energies of the particles in the rest frame of the root """ # if we are to use the roots as indices they must have this form energies = ak.to_numpy(energies) masses2 = energies**2 - pxs**2 - pys**2 - pzs**2 pxs = pxs - ak.flatten(pxs[roots]) pys = pys - ak.flatten(pys[roots]) pzs = pzs - ak.flatten(pzs[roots]) energies = np.sqrt(masses2 + pxs**2 + pys**2 + pzs**2) return energies
def process(self, events): output = self.accumulator.identity() dataset = events.metadata["dataset"] print(events.metadata) if "checkusermeta" in events.metadata: metaname, metavalue = self.expected_usermeta[dataset] assert metavalue == events.metadata[metaname] mapping = events.behavior["__events_factory__"]._mapping muon_pt = events.Muon.pt if isinstance(mapping, nanoevents.mapping.CachedMapping): keys_in_cache = list(mapping.cache.cache.keys()) has_canaries = [ canary in keys_in_cache for canary in self._canaries ] if has_canaries: try: from distributed import get_worker worker = get_worker() output["worker"].add(worker.name) except ValueError: pass dimuon = ak.combinations(events.Muon, 2) dimuon = dimuon["0"] + dimuon["1"] output["pt"].fill(dataset=dataset, pt=ak.flatten(muon_pt)) output["mass"].fill(dataset=dataset, mass=ak.flatten(dimuon.mass)) output["cutflow"]["%s_pt" % dataset] += sum(ak.num(events.Muon)) output["cutflow"]["%s_mass" % dataset] += sum(ak.num(dimuon)) return output
def process(self, df): ak.behavior.update(vector.behavior) output = self.accumulator.identity() dataset = df.metadata["dataset"] print(df.metadata) if "checkusermeta" in df.metadata: metaname, metavalue = self.expected_usermeta[dataset] assert metavalue == df.metadata[metaname] muon = ak.zip( { "pt": df.Muon_pt, "eta": df.Muon_eta, "phi": df.Muon_phi, "mass": df.Muon_mass, }, with_name="PtEtaPhiMLorentzVector", ) dimuon = ak.combinations(muon, 2) dimuon = dimuon["0"] + dimuon["1"] output["pt"].fill(dataset=dataset, pt=ak.flatten(muon.pt)) output["mass"].fill(dataset=dataset, mass=ak.flatten(dimuon.mass)) output["cutflow"]["%s_pt" % dataset] += np.sum(ak.num(muon)) output["cutflow"]["%s_mass" % dataset] += np.sum(ak.num(dimuon)) return output
def calcGeometricOffset(rCone, E, f_id, mu, mucut): E = ak.to_numpy(ak.flatten(E)).reshape(len(E), nEta)[mu > mucut] f_id = ak.to_numpy(ak.flatten(f_id)).reshape(len(f_id), nEta)[mu > mucut] if (len(f_id) != len(E)): print("Error") area = 2 * np.pi * (etabins[1:] - etabins[:-1]) return E * f_id * np.pi * rCone * rCone / 255. / np.cosh(etaC) / area
def _evaluate(self, *args): """ jec/jer = f(args) """ bin_vals = { argname: args[self._dim_args[argname]] for argname in self._dim_order } eval_vals = { argname: args[self._eval_args[argname]] for argname in self._eval_vars } # lookup the bins that we care about dim1_name = self._dim_order[0] dim1_indices = numpy.clip( numpy.searchsorted( self._bins[dim1_name], bin_vals[dim1_name], side="right") - 1, 0, self._bins[dim1_name].size - 2, ) bin_indices = [dim1_indices] for binname in self._dim_order[1:]: bin_indices.append( masked_bin_eval(bin_indices[0], self._bins[binname], bin_vals[binname])) bin_tuple = tuple(bin_indices) # get clamp values and clip the inputs eval_values = [] for eval_name in self._eval_vars: clamp_mins = None if len(awkward.flatten(self._eval_clamp_mins[eval_name])) == 1: clamp_mins = awkward.flatten( self._eval_clamp_mins[eval_name])[0] else: clamp_mins = numpy.array( self._eval_clamp_mins[eval_name][bin_tuple]).squeeze() clamp_maxs = None if len(awkward.flatten(self._eval_clamp_maxs[eval_name])) == 1: clamp_maxs = awkward.flatten( self._eval_clamp_maxs[eval_name])[0] else: clamp_maxs = numpy.array( self._eval_clamp_maxs[eval_name][bin_tuple]).squeeze() eval_values.append( numpy.clip(eval_vals[eval_name], clamp_mins, clamp_maxs)) # get parameter values parm_values = [] if len(self._parms) > 0: parm_values = [ numpy.array(parm[bin_tuple]).squeeze() for parm in self._parms ] return self._formula(*tuple(parm_values + eval_values))
def test(): a = ak.layout.NumpyArray(np.empty(0)) idx = ak.layout.Index64([]) a = ak.layout.IndexedOptionArray64(idx, a) idx = ak.layout.Index64([0]) a = ak.layout.ListOffsetArray64(idx, a) idx = ak.layout.Index64([175990832]) a = ak.layout.ListOffsetArray64(idx, a) assert ak.flatten(a, axis=2).tolist() == [] assert str(ak.flatten(a, axis=2).type) == "0 * var * ?float64"
def filter_with_mask(eventWise, mask_name, append=True): new_name, jet_name = get_filtered_name(mask_name) eventWise.selected_event = None # copy the hyper parameters hyperparameters = {} # not all jets have all input parameters, # so filter for what really exists for suffix in FormJets.get_jet_input_params(): name = jet_name + '_' + suffix if name in eventWise.hyperparameter_columns: hyperparameters[new_name + '_' + suffix] = getattr(eventWise, name) # not to copy the parameters ps_mask = getattr(eventWise, mask_name) jet_ints = ['_' + name for name in FormJets.Clustering.int_columns] label_column = FormJets.Clustering.int_columns.index("Label") jet_floats = ['_' + name for name in FormJets.Clustering.float_columns] child_ps_mask = ps_mask * getattr(eventWise, jet_name + "_Child1") == -1 # don't cut corners, make it using proper jets.... content = { new_name + suffix: [[] for _ in child_ps_mask] for suffix in jet_ints + jet_floats } for event_n, event_mask in enumerate(child_ps_mask): eventWise.selected_event = event_n # thses are only the floats that have passed the mask event_floats = [ ak.to_numpy( ak.flatten(getattr(eventWise, jet_name + suffix)[event_mask])) for suffix in jet_floats ] event_floats = np.vstack(event_floats).T # the labels need to be preserved event_ints = -np.ones( (len(event_floats), len(FormJets.Clustering.int_columns))) event_labels = getattr(eventWise, jet_name + "_Label")[event_mask] event_ints[:, label_column] = ak.flatten(event_labels) # now make a partitional jet of these values jets = FormJets.ManualPartitional((event_ints, event_floats)) # and make it cluster like the real jets for jet_n, labels in enumerate(event_labels): jets.create_jet(ak.to_list(labels)) # split the partitional object, and read out the created values for jet in jets.split(): mask = jet.Label != -1 for suffix in jet_ints + jet_floats: new_content = getattr(jet, suffix[1:])[mask] content[new_name + suffix][event_n].append(new_content) eventWise.selected_event = None if append: eventWise.append_hyperparameters(**hyperparameters) eventWise.append(**content) return hyperparameters, content
def convert_junc_txt_component(juncFilePath, uncFile): ( name, layout, pars, nBinnedVars, nBinColumns, nEvalVars, formula, nParms, columns, dtypes, ) = _parse_jme_formatted_file(juncFilePath, interpolatedFunc=True, parmsFromColumns=True, jme_f=uncFile) temp = _build_standard_jme_lookup( name, layout, pars, nBinnedVars, nBinColumns, nEvalVars, formula, nParms, columns, dtypes, interpolatedFunc=True, ) wrapped_up = {} for key, val in temp.items(): newkey = (key[0], "jec_uncertainty_lookup") vallist = list(val) vals, names = vallist[-1] knots = vals[0:len(vals):3] downs = vals[1:len(vals):3] ups = vals[2:len(vals):3] downs = numpy.array( [numpy.array(awkward.flatten(down)) for down in downs]) ups = numpy.array([numpy.array(awkward.flatten(up)) for up in ups]) for knotv in knots: knot = numpy.unique(numpy.array(awkward.flatten(knotv))) if knot.size != 1: raise Exception("Multiple bin low edges found") knots = numpy.array( [numpy.unique(numpy.array(awkward.flatten(k)))[0] for k in knots]) vallist[2] = ({ "knots": knots, "ups": ups.T, "downs": downs.T }, vallist[2][-1]) vallist = vallist[:-1] wrapped_up[newkey] = tuple(vallist) return wrapped_up
def test_jet_resolution(): from coffea.jetmet_tools import JetResolution counts, test_eta, test_pt = dummy_jagged_eta_pt() test_Rho = np.full_like(test_eta, 10.0) test_pt_jag = ak.unflatten(test_pt, counts) test_eta_jag = ak.unflatten(test_eta, counts) test_Rho_jag = ak.unflatten(test_Rho, counts) jer_names = ["Spring16_25nsV10_MC_PtResolution_AK4PFPuppi"] reso = JetResolution(**{name: evaluator[name] for name in jer_names}) print(reso) resos = reso.getResolution(JetEta=test_eta, Rho=test_Rho, JetPt=test_pt) resos_jag = reso.getResolution(JetEta=test_eta_jag, Rho=test_Rho_jag, JetPt=test_pt_jag) assert ak.all(np.abs(resos - ak.flatten(resos_jag)) < 1e-6) test_pt_jag = test_pt_jag[0:3] test_eta_jag = test_eta_jag[0:3] test_Rho_jag = test_Rho_jag[0:3] test_Rho_jag = ak.concatenate( [test_Rho_jag[:-1], [ak.concatenate([test_Rho_jag[-1, :-1], 100.0])]]) counts = counts[0:3] print("Raw jet values:") print("pT:", test_pt_jag) print("eta:", test_eta_jag) print("rho:", test_Rho_jag, "\n") resos_jag_ref = ak.unflatten( np.array([ 0.21974642, 0.32421591, 0.33702479, 0.27420327, 0.13940689, 0.48134521, 0.26564994, 1.0, ]), counts, ) resos_jag = reso.getResolution(JetEta=test_eta_jag, Rho=test_Rho_jag, JetPt=test_pt_jag) print("Reference Resolution (jagged):", resos_jag_ref) print("Resolution (jagged):", resos_jag) # NB: 5e-4 tolerance was agreed upon by lgray and aperloff, if the differences get bigger over time # we need to agree upon how these numbers are evaluated (double/float conversion is kinda random) assert ak.all( np.abs(ak.flatten(resos_jag_ref) - ak.flatten(resos_jag)) < 5e-4)
def test_jet_correction_uncertainty(): from coffea.jetmet_tools import JetCorrectionUncertainty counts, test_eta, test_pt = dummy_jagged_eta_pt() test_pt_jag = ak.unflatten(test_pt, counts) test_eta_jag = ak.unflatten(test_eta, counts) junc_names = ["Summer16_23Sep2016V3_MC_Uncertainty_AK4PFPuppi"] junc = JetCorrectionUncertainty( **{name: evaluator[name] for name in junc_names}) print(junc) juncs = junc.getUncertainty(JetEta=test_eta, JetPt=test_pt) juncs_jag = list( junc.getUncertainty(JetEta=test_eta_jag, JetPt=test_pt_jag)) for i, (level, corrs) in enumerate(juncs): assert corrs.shape[0] == test_eta.shape[0] assert ak.all(corrs == ak.flatten(juncs_jag[i][1])) test_pt_jag = test_pt_jag[0:3] test_eta_jag = test_eta_jag[0:3] counts = counts[0:3] print("Raw jet values:") print("pT:", test_pt_jag.tolist()) print("eta:", test_eta_jag.tolist(), "\n") juncs_jag_ref = ak.unflatten( np.array([ [1.053504214, 0.946495786], [1.033343349, 0.966656651], [1.065159157, 0.934840843], [1.033140127, 0.966859873], [1.016858652, 0.983141348], [1.130199999, 0.869800001], [1.039968468, 0.960031532], [1.033100002, 0.966899998], ]), counts, ) juncs_jag = list( junc.getUncertainty(JetEta=test_eta_jag, JetPt=test_pt_jag)) for i, (level, corrs) in enumerate(juncs_jag): print("Index:", i) print("Correction level:", level) print("Reference Uncertainties (jagged):", juncs_jag_ref) print("Uncertainties (jagged):", corrs) assert ak.all( np.abs(ak.flatten(juncs_jag_ref) - ak.flatten(corrs)) < 1e-6)
def hash_root_file(path: Path, ordering_invariant: bool = True) -> str: rf = uproot.open(path) gh = hashlib.sha256() for tree_name in sorted(rf.keys()): gh.update(tree_name.encode("utf8")) try: tree = rf[tree_name] if not isinstance(tree, uproot.TTree): continue except NotImplementedError: continue keys = list(sorted(tree.keys())) branches = tree.arrays(library="ak") if not ordering_invariant: h = hashlib.sha256() for name in keys: h.update(name.encode("utf8")) arr = branches[name] arr = ak.flatten(arr, axis=None) arr = np.array(arr) h.update(arr.tobytes()) gh.update(h.digest()) else: items = np.array([]) for row in zip(*[branches[b] for b in keys]): h = hashlib.md5() for obj in row: if isinstance(obj, ak.highlevel.Array): if obj.ndim == 1: h.update(ak.to_numpy(obj).tobytes()) else: arr = ak.to_numpy(ak.flatten(obj, axis=None)) h.update(arr.tobytes()) else: h.update(np.array([obj]).tobytes()) items = np.append(items, h.digest()) items.sort() h = hashlib.sha256() h.update("".join(keys).encode("utf8")) h.update(items.tobytes()) gh.update(h.digest()) return gh.hexdigest()
def process(self, events): return (hist.Hist.new.Reg(100, 0, 200, name="ptj", label="Jet $p_{T}$ [GeV]").Reg( 100, -5, 5, name="etaj", label=r"Jet $\eta$").Double().fill( ak.flatten(events.Jet.pt), ak.flatten(events.Jet.eta)))
def test(): assert ak.flatten(ak.Array([[1, 2, 3], [], [4, 5]]), axis=0).tolist() == [ [1, 2, 3], [], [4, 5], ] assert ak.flatten(ak.Array([1, 2, 3, 4, 5]), axis=0).tolist() == [1, 2, 3, 4, 5] assert ak.flatten(ak.Array([[1, 2, 3], [], [4, 5]]), axis=-2).tolist() == [ [1, 2, 3], [], [4, 5], ] assert ak.flatten(ak.Array([1, 2, 3, 4, 5]), axis=-1).tolist() == [1, 2, 3, 4, 5]
def get_event(ew, jet_name): """ Get the kinematics of a single event""" roots = getattr(ew, jet_name + "_Parent") == -1 pts = ak.flatten(getattr(ew, jet_name + "_PT")[roots]) top_4 = np.argsort(pts)[-4:] values = [] variables = ["Energy", "Px", "Py", "Pz"] for variable in variables: vals = ak.flatten(getattr(ew, jet_name + "_" + variable)[roots])[top_4] values.append(vals) mass = np.sqrt(values[0]**2 - values[1]**2 - values[2]**2 - values[3]**2) values = [mass] + values return values
def test_ByteMaskedArray_flatten(): content = ak.from_iter( [ [[0.0, 1.1, 2.2], [], [3.3, 4.4]], [], [[5.5]], [[6.6, 7.7, 8.8, 9.9]], [[], [10.0, 11.1, 12.2]], ], highlevel=False, ) mask = ak.layout.Index8(np.array([0, 0, 1, 1, 0], dtype=np.int8)) array = ak.Array(ak.layout.ByteMaskedArray(mask, content, valid_when=False)) assert ak.to_list(array) == [ [[0.0, 1.1, 2.2], [], [3.3, 4.4]], [], None, None, [[], [10.0, 11.1, 12.2]], ] assert ak.to_list(ak.flatten(array, axis=1)) == [ [0.0, 1.1, 2.2], [], [3.3, 4.4], [], [10.0, 11.1, 12.2], ] assert ak.to_list(ak.flatten(array, axis=-2)) == [ [0.0, 1.1, 2.2], [], [3.3, 4.4], [], [10.0, 11.1, 12.2], ] assert ak.to_list(ak.flatten(array, axis=2)) == [ [0.0, 1.1, 2.2, 3.3, 4.4], [], None, None, [10.0, 11.1, 12.2], ] assert ak.to_list(ak.flatten(array, axis=-1)) == [ [0.0, 1.1, 2.2, 3.3, 4.4], [], None, None, [10.0, 11.1, 12.2], ]
def get_all_vars(varsIn, varSet, normMean, normStd): dSets = [] dataSet = pd.DataFrame() for var in varSet: inputArr = varsIn[var][0] if variables[var][4] == 2: inputArr = np.repeat(ak.to_numpy(inputArr), ak.to_numpy(varsIn["njetsAK8"][0])) if variables[var][5] == 1: inputArr = ak.flatten(inputArr) elif variables[var][5] == 2: inputArr = ak.flatten(inputArr) dataSet[var] = inputArr dataSet = normalize(dataSet, normMean, normStd) return dataSet
def test_upper_layers(): # will need an eventwise with Parents, Children, MCPID # layer -1 0 1 1 -1 2 2 3 3 3 -1 # idx 0 1 2 3 4 5 6 7 8 9 10 children = [[], [2, 3], [5], [6, 5], [], [], [7, 8, 9], [], [], [], []] parents = [[], [], [1], [1], [], [2, 3], [3], [6], [6], [6], []] mcpid = [4, 5, 5, 3, 2, 1, -5, -1, 7, 11, 12] expected = [2, 6] labeler = PDGNames.IDConverter() with TempTestDir("tst") as dir_name: eventWise = Components.EventWise(os.path.join(dir_name, "tmp.parquet")) eventWise.append(Children=[ak.from_iter(children)], Parents=[ak.from_iter(parents)], MCPID=[ak.from_iter(mcpid)]) eventWise.selected_event = 0 expected_particle_idx = [0, 1, 2, 3, 4, 10] expected_children = ak.from_iter( [c for i in expected_particle_idx for c in children[i]]) expected_parents = ak.from_iter( [p for i in expected_particle_idx for p in parents[i]]) expected_labels = [labeler[mcpid[i]] for i in expected_particle_idx] shower = FormShower.upper_layers(eventWise, n_layers=2) order = np.argsort(shower.particle_idxs) tst.assert_allclose(shower.particle_idxs[order], expected_particle_idx) tst.assert_allclose(ak.flatten(ak.from_iter(shower.children[order])), expected_children) tst.assert_allclose(ak.flatten(ak.from_iter(shower.parents[order])), expected_parents) for a, b in zip(shower.labels[order], expected_labels): assert a == b # try with capture pids expected_particle_idx = [0, 1, 2, 3, 4, 5, 6, 10] expected_children = ak.from_iter( [c for i in expected_particle_idx for c in children[i]]) expected_parents = ak.from_iter( [p for i in expected_particle_idx for p in parents[i]]) expected_labels = [labeler[mcpid[i]] for i in expected_particle_idx] shower = FormShower.upper_layers(eventWise, n_layers=2, capture_pids=[1]) order = np.argsort(shower.particle_idxs) tst.assert_allclose(shower.particle_idxs[order], expected_particle_idx) tst.assert_allclose(ak.flatten(ak.from_iter(shower.children[order])), expected_children) tst.assert_allclose(ak.flatten(ak.from_iter(shower.parents[order])), expected_parents) for a, b in zip(shower.labels[order], expected_labels): assert a == b
def apply_roccor(df, rochester, is_mc): if is_mc: hasgen = ~np.isnan(ak.fill_none(df.Muon.matched_gen.pt, np.nan)) mc_rand = np.random.rand(*ak.to_numpy(ak.flatten(df.Muon.pt)).shape) mc_rand = ak.unflatten(mc_rand, ak.num(df.Muon.pt, axis=1)) corrections = np.array(ak.flatten(ak.ones_like(df.Muon.pt))) errors = np.array(ak.flatten(ak.ones_like(df.Muon.pt))) mc_kspread = rochester.kSpreadMC( df.Muon.charge[hasgen], df.Muon.pt[hasgen], df.Muon.eta[hasgen], df.Muon.phi[hasgen], df.Muon.matched_gen.pt[hasgen], ) mc_ksmear = rochester.kSmearMC( df.Muon.charge[~hasgen], df.Muon.pt[~hasgen], df.Muon.eta[~hasgen], df.Muon.phi[~hasgen], df.Muon.nTrackerLayers[~hasgen], mc_rand[~hasgen], ) errspread = rochester.kSpreadMCerror( df.Muon.charge[hasgen], df.Muon.pt[hasgen], df.Muon.eta[hasgen], df.Muon.phi[hasgen], df.Muon.matched_gen.pt[hasgen], ) errsmear = rochester.kSmearMCerror( df.Muon.charge[~hasgen], df.Muon.pt[~hasgen], df.Muon.eta[~hasgen], df.Muon.phi[~hasgen], df.Muon.nTrackerLayers[~hasgen], mc_rand[~hasgen], ) hasgen_flat = np.array(ak.flatten(hasgen)) corrections[hasgen_flat] = np.array(ak.flatten(mc_kspread)) corrections[~hasgen_flat] = np.array(ak.flatten(mc_ksmear)) errors[hasgen_flat] = np.array(ak.flatten(errspread)) errors[~hasgen_flat] = np.array(ak.flatten(errsmear)) corrections = ak.unflatten(corrections, ak.num(df.Muon.pt, axis=1)) errors = ak.unflatten(errors, ak.num(df.Muon.pt, axis=1)) else: corrections = rochester.kScaleDT(df.Muon.charge, df.Muon.pt, df.Muon.eta, df.Muon.phi) errors = rochester.kScaleDTerror(df.Muon.charge, df.Muon.pt, df.Muon.eta, df.Muon.phi) df["Muon", "pt_roch"] = df.Muon.pt * corrections df["Muon", "pt_roch_up"] = df.Muon.pt_roch + df.Muon.pt * errors df["Muon", "pt_roch_down"] = df.Muon.pt_roch - df.Muon.pt * errors
def calculate_selection(self, syst_tag, events): """ """ electrons = events.ele electrons["label"] = -1 * awkward.ones_like(electrons.pt) if not self.is_data: electrons["label"] = awkward.where( electrons.genPartFlav == 1, awkward.ones_like(electrons.label), electrons.label) electrons["label"] = awkward.where( (electrons.genPartFlav == 3) | (electrons.genPartFlav == 4) | (electrons.genPartFlav == 5), awkward.zeros_like(electrons.label), electrons.label) fields = [x for x in events.fields if x not in ["ele", "Electron"]] for x in fields: if x == "ZCand": electrons[x] = awkward.firsts(events[x]) else: electrons[x] = events[x] electrons = awkward.flatten(electrons) dummy_cut = electrons.pt >= 0 return dummy_cut, electrons
def _ak_to_numpy(ak_array, fields): """ Convert the given awkward array to a numpy table. Parameters ---------- ak_array : awkward.Array The awkward array, 2D or 3D. fields : List The column names of the last axis of the array. Returns ------- np_branch : tuple Numpy-fied awkward array. See output of _branch_to_numpy. """ n_dims = ak_array.ndim - 1 if n_dims == 1: n_items = np.ones(len(ak_array), dtype="int64") elif n_dims == 2: n_items = ak.num(ak_array).to_numpy() ak_array = ak.flatten(ak_array) else: raise ValueError("Can not process array") filled = np.ma.filled( ak.pad_none(ak_array, target=len(fields), axis=-1).to_numpy(), fill_value=np.nan, ) return {fields[i]: filled[:, i] for i in range(len(fields))}, n_items
def test_jet_correction_regrouped_uncertainty_sources(): from coffea.jetmet_tools import JetCorrectionUncertainty counts, test_eta, test_pt = dummy_jagged_eta_pt() test_pt_jag = ak.unflatten(test_pt, counts) test_eta_jag = ak.unflatten(test_eta, counts) junc_names = [] levels = [] for name in dir(evaluator): if 'Regrouped_Fall17_17Nov2017_V32_MC_UncertaintySources_AK4PFchs' in name: junc_names.append(name) if len(name.split('_')) == 9: levels.append("_".join(name.split('_')[-2:])) else: levels.append(name.split('_')[-1]) junc = JetCorrectionUncertainty( **{name: evaluator[name] for name in junc_names}) print(junc) juncs_jag = list( junc.getUncertainty(JetEta=test_eta_jag, JetPt=test_pt_jag)) for i, tpl in enumerate( list(junc.getUncertainty(JetEta=test_eta, JetPt=test_pt))): assert (tpl[0] in levels) assert (tpl[1].shape[0] == test_eta.shape[0]) assert (ak.all(tpl[1] == ak.flatten(juncs_jag[i][1])))
def test_jet_correction_uncertainty_sources(): from coffea.jetmet_tools import JetCorrectionUncertainty counts, test_eta, test_pt = dummy_jagged_eta_pt() test_pt_jag = ak.unflatten(test_pt, counts) test_eta_jag = ak.unflatten(test_eta, counts) junc_names = [] levels = [] for name in dir(evaluator): if 'Summer16_23Sep2016V3_MC_UncertaintySources_AK4PFPuppi' in name: junc_names.append(name) levels.append(name.split('_')[-1]) #test for underscore in dataera if 'Fall17_17Nov2017_V6_MC_UncertaintySources_AK4PFchs_AbsoluteFlavMap' in name: junc_names.append(name) levels.append(name.split('_')[-1]) junc = JetCorrectionUncertainty( **{name: evaluator[name] for name in junc_names}) print(junc) juncs = junc.getUncertainty(JetEta=test_eta, JetPt=test_pt) juncs_jag = list( junc.getUncertainty(JetEta=test_eta_jag, JetPt=test_pt_jag)) for i, (level, corrs) in enumerate(juncs): assert (level in levels) assert (corrs.shape[0] == test_eta.shape[0]) tic = time.time() assert (ak.all(corrs == ak.flatten(juncs_jag[i][1]))) toc = time.time()
def flatten_idxs(idx_in, jaggedarray): """ This provides a faster way to convert between tuples of jagged indices and flat indices in a jagged array's contents """ if len(idx_in) == 0: return numpy.array([], dtype=numpy.int) idx_out = jaggedarray.starts[idx_in[0]] if len(idx_in) == 1: pass elif len(idx_in) == 2: idx_out += idx_in[1] else: raise Exception( "jme_standard_function only works for two binning dimensions!") flattened = awkward.flatten(jaggedarray) good_idx = idx_out < len(flattened) if (~good_idx).any(): input_idxs = tuple([idx_out[~good_idx]] + [idx_in[i][~good_idx] for i in range(len(idx_in))]) raise Exception("Calculated invalid index {} for" " array with length {}".format( numpy.vstack(input_idxs), len(flattened))) return idx_out
def test(): array = ak.Array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) assert ak.unflatten(array, 5).tolist() == [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]] assert ak.unflatten(array, [3, 0, 2, 1, 4]).tolist() == [ [0, 1, 2], [], [3, 4], [5], [6, 7, 8, 9], ] assert ak.unflatten(array, [3, None, 2, 1, 4]).tolist() == [ [0, 1, 2], None, [3, 4], [5], [6, 7, 8, 9], ] original = ak.Array([[0, 1, 2], [], [3, 4], [5], [6, 7, 8, 9]]) counts = ak.num(original) array = ak.flatten(original) assert counts.tolist() == [3, 0, 2, 1, 4] assert array.tolist() == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] assert ak.unflatten(array, counts).tolist() == [ [0, 1, 2], [], [3, 4], [5], [6, 7, 8, 9], ]
def test_root_scalefactors(): extractor = lookup_tools.extractor() extractor.add_weight_sets([ "testSF2d scalefactors_Tight_Electron tests/samples/testSF2d.histo.root" ]) extractor.finalize(reduce_list=["testSF2d"]) evaluator = extractor.make_evaluator() counts, test_eta, test_pt = dummy_jagged_eta_pt() # test flat eval test_out = evaluator["testSF2d"](test_eta, test_pt) # print it print(evaluator["testSF2d"]) # test structured eval test_eta_jagged = ak.unflatten(test_eta, counts) test_pt_jagged = ak.unflatten(test_pt, counts) test_out_jagged = evaluator["testSF2d"](test_eta_jagged, test_pt_jagged) assert ak.all(ak.num(test_out_jagged) == counts) assert ak.all(ak.flatten(test_out_jagged) == test_out) print(test_out) diff = np.abs(test_out - _testSF2d_expected_output) print("Max diff: %.16f" % diff.max()) print("Median diff: %.16f" % np.median(diff)) print("Diff over threshold rate: %.1f %%" % (100 * (diff >= 1.0e-8).sum() / diff.size)) assert (diff < 1.0e-8).all()