def _string_equal(one, two): nplike = ak.nplike.of(one, two) behavior = ak._util.behaviorof(one, two) one, two = ak.without_parameters(one).layout, ak.without_parameters( two).layout # first condition: string lengths must be the same counts1 = nplike.asarray(one.count(axis=-1)) counts2 = nplike.asarray(two.count(axis=-1)) out = counts1 == counts2 # only compare characters in strings that are possibly equal (same length) possible = nplike.logical_and(out, counts1) possible_counts = counts1[possible] if len(possible_counts) > 0: onepossible = one[possible] twopossible = two[possible] reduced = ak.all(ak.Array(onepossible) == ak.Array(twopossible), axis=-1).layout # update same-length strings with a verdict about their characters out[possible] = reduced return ak._util.wrap(ak.layout.NumpyArray(out), behavior)
def jer_smear( variation, forceStochastic, pt_gen, jetPt, etaJet, jet_energy_resolution, jet_resolution_rand_gauss, jet_energy_resolution_scale_factor, ): pt_gen = pt_gen if not forceStochastic else None if not isinstance(jetPt, awkward.highlevel.Array): raise Exception("'jetPt' must be an awkward array of some kind!") if forceStochastic: pt_gen = awkward.without_parameters(awkward.zeros_like(jetPt)) jersmear = jet_energy_resolution * jet_resolution_rand_gauss jersf = jet_energy_resolution_scale_factor[:, variation] deltaPtRel = (jetPt - pt_gen) / jetPt doHybrid = (pt_gen > 0) & (numpy.abs(deltaPtRel) < 3 * jet_energy_resolution) detSmear = 1 + (jersf - 1) * deltaPtRel stochSmear = 1 + numpy.sqrt(numpy.maximum(jersf**2 - 1, 0)) * jersmear min_jet_pt = _MIN_JET_ENERGY / numpy.cosh(etaJet) min_jet_pt_corr = min_jet_pt / jetPt smearfact = awkward.where(doHybrid, detSmear, stochSmear) smearfact = awkward.where((smearfact * jetPt) < min_jet_pt, min_jet_pt_corr, smearfact) def getfunction(layout, depth): if isinstance(layout, awkward.layout.NumpyArray) or not isinstance( layout, (awkward.layout.Content, awkward.partition.PartitionedArray)): return lambda: awkward.layout.NumpyArray(smearfact) return None smearfact = awkward._util.recursively_apply( awkward.operations.convert.to_layout(jetPt), getfunction) smearfact = awkward._util.wrap(smearfact, awkward._util.behaviorof(jetPt)) return smearfact
def test_corrected_jets_factory(): import os from coffea.jetmet_tools import CorrectedJetsFactory, CorrectedMETFactory, JECStack events = None from coffea.nanoevents import NanoEventsFactory factory = NanoEventsFactory.from_root( os.path.abspath("tests/samples/nano_dy.root")) events = factory.events() jec_stack_names = [ "Summer16_23Sep2016V3_MC_L1FastJet_AK4PFPuppi", "Summer16_23Sep2016V3_MC_L2Relative_AK4PFPuppi", "Summer16_23Sep2016V3_MC_L2L3Residual_AK4PFPuppi", "Summer16_23Sep2016V3_MC_L3Absolute_AK4PFPuppi", "Spring16_25nsV10_MC_PtResolution_AK4PFPuppi", "Spring16_25nsV10_MC_SF_AK4PFPuppi", ] for key in evaluator.keys(): if "Summer16_23Sep2016V3_MC_UncertaintySources_AK4PFPuppi" in key: jec_stack_names.append(key) jec_inputs = {name: evaluator[name] for name in jec_stack_names} jec_stack = JECStack(jec_inputs) name_map = jec_stack.blank_name_map name_map["JetPt"] = "pt" name_map["JetMass"] = "mass" name_map["JetEta"] = "eta" name_map["JetA"] = "area" jets = events.Jet jets["pt_raw"] = (1 - jets["rawFactor"]) * jets["pt"] jets["mass_raw"] = (1 - jets["rawFactor"]) * jets["mass"] jets["pt_gen"] = ak.values_astype(ak.fill_none(jets.matched_gen.pt, 0), np.float32) jets["rho"] = ak.broadcast_arrays(events.fixedGridRhoFastjetAll, jets.pt)[0] name_map["ptGenJet"] = "pt_gen" name_map["ptRaw"] = "pt_raw" name_map["massRaw"] = "mass_raw" name_map["Rho"] = "rho" jec_cache = cachetools.Cache(np.inf) print(name_map) tic = time.time() jet_factory = CorrectedJetsFactory(name_map, jec_stack) toc = time.time() print("setup corrected jets time =", toc - tic) tic = time.time() prof = pyinstrument.Profiler() prof.start() corrected_jets = jet_factory.build(jets, lazy_cache=jec_cache) prof.stop() toc = time.time() print("corrected_jets build time =", toc - tic) print(prof.output_text(unicode=True, color=True, show_all=True)) tic = time.time() print("Generated jet pt:", corrected_jets.pt_gen) print("Original jet pt:", corrected_jets.pt_orig) print("Raw jet pt:", jets.pt_raw) print("Corrected jet pt:", corrected_jets.pt) print("Original jet mass:", corrected_jets.mass_orig) print("Raw jet mass:", jets["mass_raw"]) print("Corrected jet mass:", corrected_jets.mass) print("jet eta:", jets.eta) for unc in jet_factory.uncertainties(): print(unc) print(corrected_jets[unc].up.pt) print(corrected_jets[unc].down.pt) toc = time.time() print("build all jet variations =", toc - tic) # Test that the corrections were applied correctly from coffea.jetmet_tools import ( FactorizedJetCorrector, JetResolution, JetResolutionScaleFactor, ) scalar_form = ak.without_parameters(jets["pt_raw"]).layout.form corrector = FactorizedJetCorrector( **{name: evaluator[name] for name in jec_stack_names[0:4]}) corrs = corrector.getCorrection(JetEta=jets["eta"], Rho=jets["rho"], JetPt=jets["pt_raw"], JetA=jets["area"]) reso = JetResolution( **{name: evaluator[name] for name in jec_stack_names[4:5]}) jets["jet_energy_resolution"] = reso.getResolution( JetEta=jets["eta"], Rho=jets["rho"], JetPt=jets["pt_raw"], form=scalar_form, lazy_cache=jec_cache, ) resosf = JetResolutionScaleFactor( **{name: evaluator[name] for name in jec_stack_names[5:6]}) jets["jet_energy_resolution_scale_factor"] = resosf.getScaleFactor( JetEta=jets["eta"], lazy_cache=jec_cache) # Filter out the non-deterministic (no gen pt) jets def smear_factor(jetPt, pt_gen, jersf): return (ak.full_like(jetPt, 1.0) + (jersf[:, 0] - ak.full_like(jetPt, 1.0)) * (jetPt - pt_gen) / jetPt) test_gen_pt = ak.concatenate( [corrected_jets.pt_gen[0, :-2], corrected_jets.pt_gen[-1, :-1]]) test_raw_pt = ak.concatenate([jets.pt_raw[0, :-2], jets.pt_raw[-1, :-1]]) test_pt = ak.concatenate( [corrected_jets.pt[0, :-2], corrected_jets.pt[-1, :-1]]) test_eta = ak.concatenate([jets.eta[0, :-2], jets.eta[-1, :-1]]) test_jer = ak.concatenate([ jets.jet_energy_resolution[0, :-2], jets.jet_energy_resolution[-1, :-1] ]) test_jer_sf = ak.concatenate([ jets.jet_energy_resolution_scale_factor[0, :-2], jets.jet_energy_resolution_scale_factor[-1, :-1], ]) test_jec = ak.concatenate([corrs[0, :-2], corrs[-1, :-1]]) test_corrected_pt = ak.concatenate( [corrected_jets.pt[0, :-2], corrected_jets.pt[-1, :-1]]) test_corr_pt = test_raw_pt * test_jec test_pt_smear_corr = test_corr_pt * smear_factor(test_corr_pt, test_gen_pt, test_jer_sf) # Print the results of the "by-hand" calculations and confirm that the values match the expected values print("\nConfirm the CorrectedJetsFactory values:") print("Jet pt (gen)", test_gen_pt.tolist()) print("Jet pt (raw)", test_raw_pt.tolist()) print("Jet pt (nano):", test_pt.tolist()) print("Jet eta:", test_eta.tolist()) print("Jet energy resolution:", test_jer.tolist()) print("Jet energy resolution sf:", test_jer_sf.tolist()) print("Jet energy correction:", test_jec.tolist()) print("Corrected jet pt (ref)", test_corr_pt.tolist()) print("Corrected & smeared jet pt (ref):", test_pt_smear_corr.tolist()) print("Corrected & smeared jet pt:", test_corrected_pt.tolist(), "\n") assert ak.all(np.abs(test_pt_smear_corr - test_corrected_pt) < 1e-6) name_map["METpt"] = "pt" name_map["METphi"] = "phi" name_map["JetPhi"] = "phi" name_map["UnClusteredEnergyDeltaX"] = "MetUnclustEnUpDeltaX" name_map["UnClusteredEnergyDeltaY"] = "MetUnclustEnUpDeltaY" tic = time.time() met_factory = CorrectedMETFactory(name_map) toc = time.time() print("setup corrected MET time =", toc - tic) met = events.MET tic = time.time() # prof = pyinstrument.Profiler() # prof.start() corrected_met = met_factory.build(met, corrected_jets, lazy_cache=jec_cache) # prof.stop() toc = time.time() # print(prof.output_text(unicode=True, color=True, show_all=True)) print("corrected_met build time =", toc - tic) tic = time.time() print(corrected_met.pt_orig) print(corrected_met.pt) prof = pyinstrument.Profiler() prof.start() for unc in jet_factory.uncertainties() + met_factory.uncertainties(): print(unc) print(corrected_met[unc].up.pt) print(corrected_met[unc].down.pt) prof.stop() toc = time.time() print("build all met variations =", toc - tic) print(prof.output_text(unicode=True, color=True, show_all=True))
def build(self, jets, lazy_cache): if lazy_cache is None: raise Exception( "CorrectedJetsFactory requires a awkward-array cache to function correctly." ) lazy_cache = awkward._util.MappingProxy.maybe_wrap(lazy_cache) if not isinstance(jets, awkward.highlevel.Array): raise Exception( "'jets' must be an awkward > 1.0.0 array of some kind!") fields = awkward.fields(jets) if len(fields) == 0: raise Exception( "Empty record, please pass a jet object with at least {self.real_sig} defined!" ) out = awkward.flatten(jets) wrap = partial(awkward_rewrap, like_what=jets, gfunc=rewrap_recordarray) scalar_form = awkward.without_parameters( out[self.name_map["ptRaw"]]).layout.form in_dict = {field: out[field] for field in fields} out_dict = dict(in_dict) # take care of nominal JEC (no JER if available) out_dict[self.name_map["JetPt"] + "_orig"] = out_dict[self.name_map["JetPt"]] out_dict[self.name_map["JetMass"] + "_orig"] = out_dict[self.name_map["JetMass"]] if self.treat_pt_as_raw: out_dict[self.name_map["ptRaw"]] = out_dict[self.name_map["JetPt"]] out_dict[self.name_map["massRaw"]] = out_dict[ self.name_map["JetMass"]] jec_name_map = dict(self.name_map) jec_name_map["JetPt"] = jec_name_map["ptRaw"] jec_name_map["JetMass"] = jec_name_map["massRaw"] if self.jec_stack.jec is not None: jec_args = { k: out_dict[jec_name_map[k]] for k in self.jec_stack.jec.signature } out_dict[ "jet_energy_correction"] = self.jec_stack.jec.getCorrection( **jec_args, form=scalar_form, lazy_cache=lazy_cache) else: out_dict["jet_energy_correction"] = awkward.without_parameters( awkward.ones_like(out_dict[self.name_map["JetPt"]])) # finally the lazy binding to the JEC init_pt = partial( awkward.virtual, operator.mul, args=(out_dict["jet_energy_correction"], out_dict[self.name_map["ptRaw"]]), cache=lazy_cache, ) init_mass = partial( awkward.virtual, operator.mul, args=( out_dict["jet_energy_correction"], out_dict[self.name_map["massRaw"]], ), cache=lazy_cache, ) out_dict[self.name_map["JetPt"]] = init_pt(length=len(out), form=scalar_form) out_dict[self.name_map["JetMass"]] = init_mass(length=len(out), form=scalar_form) out_dict[self.name_map["JetPt"] + "_jec"] = out_dict[self.name_map["JetPt"]] out_dict[self.name_map["JetMass"] + "_jec"] = out_dict[self.name_map["JetMass"]] # in jer we need to have a stash for the intermediate JEC products has_jer = False if self.jec_stack.jer is not None and self.jec_stack.jersf is not None: has_jer = True jer_name_map = dict(self.name_map) jer_name_map["JetPt"] = jer_name_map["JetPt"] + "_jec" jer_name_map["JetMass"] = jer_name_map["JetMass"] + "_jec" jerargs = { k: out_dict[jer_name_map[k]] for k in self.jec_stack.jer.signature } out_dict[ "jet_energy_resolution"] = self.jec_stack.jer.getResolution( **jerargs, form=scalar_form, lazy_cache=lazy_cache) jersfargs = { k: out_dict[jer_name_map[k]] for k in self.jec_stack.jersf.signature } out_dict[ "jet_energy_resolution_scale_factor"] = self.jec_stack.jersf.getScaleFactor( **jersfargs, form=_JERSF_FORM, lazy_cache=lazy_cache) seeds = numpy.array(out_dict[self.name_map["JetPt"] + "_orig"])[[0, -1]].view("i4") out_dict["jet_resolution_rand_gauss"] = awkward.virtual( rand_gauss, args=( out_dict[self.name_map["JetPt"] + "_orig"], numpy.random.Generator(numpy.random.PCG64(seeds)), ), cache=lazy_cache, length=len(out), form=scalar_form, ) init_jerc = partial( awkward.virtual, jer_smear, args=( 0, self.forceStochastic, out_dict[jer_name_map["ptGenJet"]], out_dict[jer_name_map["JetPt"]], out_dict[jer_name_map["JetEta"]], out_dict["jet_energy_resolution"], out_dict["jet_resolution_rand_gauss"], out_dict["jet_energy_resolution_scale_factor"], ), cache=lazy_cache, ) out_dict["jet_energy_resolution_correction"] = init_jerc( length=len(out), form=scalar_form) init_pt_jer = partial( awkward.virtual, operator.mul, args=( out_dict["jet_energy_resolution_correction"], out_dict[jer_name_map["JetPt"]], ), cache=lazy_cache, ) init_mass_jer = partial( awkward.virtual, operator.mul, args=( out_dict["jet_energy_resolution_correction"], out_dict[jer_name_map["JetMass"]], ), cache=lazy_cache, ) out_dict[self.name_map["JetPt"]] = init_pt_jer(length=len(out), form=scalar_form) out_dict[self.name_map["JetMass"]] = init_mass_jer( length=len(out), form=scalar_form) out_dict[self.name_map["JetPt"] + "_jer"] = out_dict[self.name_map["JetPt"]] out_dict[self.name_map["JetMass"] + "_jer"] = out_dict[self.name_map["JetMass"]] # JER systematics jerc_up = partial( awkward.virtual, jer_smear, args=( 1, self.forceStochastic, out_dict[jer_name_map["ptGenJet"]], out_dict[jer_name_map["JetPt"]], out_dict[jer_name_map["JetEta"]], out_dict["jet_energy_resolution"], out_dict["jet_resolution_rand_gauss"], out_dict["jet_energy_resolution_scale_factor"], ), cache=lazy_cache, ) up = awkward.flatten(jets) up["jet_energy_resolution_correction"] = jerc_up(length=len(out), form=scalar_form) init_pt_jer = partial( awkward.virtual, operator.mul, args=( up["jet_energy_resolution_correction"], out_dict[jer_name_map["JetPt"]], ), cache=lazy_cache, ) init_mass_jer = partial( awkward.virtual, operator.mul, args=( up["jet_energy_resolution_correction"], out_dict[jer_name_map["JetMass"]], ), cache=lazy_cache, ) up[self.name_map["JetPt"]] = init_pt_jer(length=len(out), form=scalar_form) up[self.name_map["JetMass"]] = init_mass_jer(length=len(out), form=scalar_form) jerc_down = partial( awkward.virtual, jer_smear, args=( 2, self.forceStochastic, out_dict[jer_name_map["ptGenJet"]], out_dict[jer_name_map["JetPt"]], out_dict[jer_name_map["JetEta"]], out_dict["jet_energy_resolution"], out_dict["jet_resolution_rand_gauss"], out_dict["jet_energy_resolution_scale_factor"], ), cache=lazy_cache, ) down = awkward.flatten(jets) down["jet_energy_resolution_correction"] = jerc_down( length=len(out), form=scalar_form) init_pt_jer = partial( awkward.virtual, operator.mul, args=( down["jet_energy_resolution_correction"], out_dict[jer_name_map["JetPt"]], ), cache=lazy_cache, ) init_mass_jer = partial( awkward.virtual, operator.mul, args=( down["jet_energy_resolution_correction"], out_dict[jer_name_map["JetMass"]], ), cache=lazy_cache, ) down[self.name_map["JetPt"]] = init_pt_jer(length=len(out), form=scalar_form) down[self.name_map["JetMass"]] = init_mass_jer(length=len(out), form=scalar_form) out_dict["JER"] = awkward.zip({ "up": up, "down": down }, depth_limit=1, with_name="JetSystematic") if self.jec_stack.junc is not None: juncnames = {} juncnames.update(self.name_map) if has_jer: juncnames["JetPt"] = juncnames["JetPt"] + "_jer" juncnames["JetMass"] = juncnames["JetMass"] + "_jer" else: juncnames["JetPt"] = juncnames["JetPt"] + "_jec" juncnames["JetMass"] = juncnames["JetMass"] + "_jec" juncargs = { k: out_dict[juncnames[k]] for k in self.jec_stack.junc.signature } juncs = self.jec_stack.junc.getUncertainty(**juncargs) def junc_smeared_val(uncvals, up_down, variable): return awkward.materialized(uncvals[:, up_down] * variable) def build_variation(unc, jetpt, jetpt_orig, jetmass, jetmass_orig, updown): var_dict = dict(in_dict) var_dict[jetpt] = awkward.virtual( junc_smeared_val, args=( unc, updown, jetpt_orig, ), length=len(out), form=scalar_form, cache=lazy_cache, ) var_dict[jetmass] = awkward.virtual( junc_smeared_val, args=( unc, updown, jetmass_orig, ), length=len(out), form=scalar_form, cache=lazy_cache, ) return awkward.zip( var_dict, depth_limit=1, parameters=out.layout.parameters, behavior=out.behavior, ) def build_variant(unc, jetpt, jetpt_orig, jetmass, jetmass_orig): up = build_variation(unc, jetpt, jetpt_orig, jetmass, jetmass_orig, 0) down = build_variation(unc, jetpt, jetpt_orig, jetmass, jetmass_orig, 1) return awkward.zip({ "up": up, "down": down }, depth_limit=1, with_name="JetSystematic") for name, func in juncs: out_dict[f"jet_energy_uncertainty_{name}"] = func out_dict[f"JES_{name}"] = build_variant( func, self.name_map["JetPt"], out_dict[juncnames["JetPt"]], self.name_map["JetMass"], out_dict[juncnames["JetMass"]], ) out_parms = out.layout.parameters out_parms["corrected"] = True out = awkward.zip(out_dict, depth_limit=1, parameters=out_parms, behavior=out.behavior) return wrap(out)
def mass(self): return awkward.without_parameters(awkward.zeros_like(self.pt))