def test_spark_hist_adders(): pytest.importorskip("pyspark", minversion="2.4.1") import pandas as pd import pickle as pkl import lz4.frame as lz4f from coffea.util import numpy as np from coffea.processor.spark.spark_executor import agg_histos_raw, reduce_histos_raw from coffea.processor.test_items import NanoTestProcessor proc = NanoTestProcessor() one = proc.accumulator.identity() two = proc.accumulator.identity() hlist1 = [lz4f.compress(pkl.dumps(one))] hlist2 = [lz4f.compress(pkl.dumps(one)), lz4f.compress(pkl.dumps(two))] harray1 = np.array(hlist1, dtype="O") harray2 = np.array(hlist2, dtype="O") series1 = pd.Series(harray1) series2 = pd.Series(harray2) df = pd.DataFrame({"histos": harray2}) # correctness of these functions is checked in test_spark_executor agg_histos_raw(series1, 1) agg_histos_raw(series2, 1) reduce_histos_raw(df, 1)
def test_lumimask(): lumimask = LumiMask( "tests/samples/Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON.txt" ) # pickle & unpickle lumimask_pickle = cloudpickle.loads(cloudpickle.dumps(lumimask)) # check same mask keys keys = lumimask._masks.keys() assert keys == lumimask_pickle._masks.keys() # check same mask values assert all(np.all(lumimask._masks[k] == lumimask_pickle._masks[k]) for k in keys) runs = np.array([303825, 123], dtype=np.uint32) lumis = np.array([115, 123], dtype=np.uint32) for lm in lumimask, lumimask_pickle: mask = lm(runs, lumis) print("mask:", mask) assert mask[0] assert not mask[1] # test underlying py_func py_mask = np.zeros(dtype="bool", shape=runs.shape) LumiMask._apply_run_lumi_mask_kernel.py_func(lm._masks, runs, lumis, py_mask) assert np.all(mask == py_mask) assert np.all(lumimask(runs, lumis) == lumimask_pickle(runs, lumis))
def test_lumimask(): lumimask = LumiMask( "tests/samples/Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON.txt" ) runs = np.array([303825, 123], dtype=np.uint32) lumis = np.array([115, 123], dtype=np.uint32) mask = lumimask(runs, lumis) print("mask:", mask) assert (mask[0] == True) assert (mask[1] == False)
def test_clopper_pearson_interval(): from coffea.hist.plot import clopper_pearson_interval # Reference values for CL=0.6800 calculated with ROOT's TEfficiency num = np.array([1., 5., 10., 10.]) denom = np.array([10., 10., 10., 437.]) ref_hi = np.array([0.293313782248242, 0.6944224231766912, 1.0, 0.032438865381336446]) ref_lo = np.array([0.01728422272382846, 0.3055775768233088, 0.8325532074018731, 0.015839046981153772]) interval = clopper_pearson_interval(num, denom, coverage=0.68) threshold = 1e-6 assert(all((interval[1, :] / ref_hi) - 1 < threshold)) assert(all((interval[0, :] / ref_lo) - 1 < threshold))
def test_lumimask(): lumimask = LumiMask( "tests/samples/Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON.txt" ) runs = np.array([303825, 123], dtype=np.uint32) lumis = np.array([115, 123], dtype=np.uint32) mask = lumimask(runs, lumis) print("mask:", mask) assert (mask[0] == True) assert (mask[1] == False) # test underlying py_func py_mask = np.zeros(dtype='bool', shape=runs.shape) LumiMask.apply_run_lumi_mask_kernel.py_func(lumimask._masks, runs, lumis, py_mask) assert (np.all(mask == py_mask))
def test_hist_serdes(): import pickle h_regular_bins = hist.Hist("regular joe", hist.Bin("x", "x", 20, 0, 200), hist.Bin("y", "why", 20, -3, 3)) h_regular_bins.fill(x=np.array([1.,2.,3.,4.,5.]),y=np.array([-2.,1.,0.,1.,2.])) h_regular_bins.sum('x').identifiers('y') spkl = pickle.dumps(h_regular_bins) hnew = pickle.loads(spkl) hnew.sum('x').identifiers('y') assert(h_regular_bins._dense_shape == hnew._dense_shape) assert(h_regular_bins._axes == hnew._axes)
def test_jet_correction_uncertainty(): from coffea.jetmet_tools import JetCorrectionUncertainty counts, test_eta, test_pt = dummy_jagged_eta_pt() test_pt_jag = ak.unflatten(test_pt, counts) test_eta_jag = ak.unflatten(test_eta, counts) junc_names = ["Summer16_23Sep2016V3_MC_Uncertainty_AK4PFPuppi"] junc = JetCorrectionUncertainty( **{name: evaluator[name] for name in junc_names}) print(junc) juncs = junc.getUncertainty(JetEta=test_eta, JetPt=test_pt) juncs_jag = list( junc.getUncertainty(JetEta=test_eta_jag, JetPt=test_pt_jag)) for i, (level, corrs) in enumerate(juncs): assert corrs.shape[0] == test_eta.shape[0] assert ak.all(corrs == ak.flatten(juncs_jag[i][1])) test_pt_jag = test_pt_jag[0:3] test_eta_jag = test_eta_jag[0:3] counts = counts[0:3] print("Raw jet values:") print("pT:", test_pt_jag.tolist()) print("eta:", test_eta_jag.tolist(), "\n") juncs_jag_ref = ak.unflatten( np.array([ [1.053504214, 0.946495786], [1.033343349, 0.966656651], [1.065159157, 0.934840843], [1.033140127, 0.966859873], [1.016858652, 0.983141348], [1.130199999, 0.869800001], [1.039968468, 0.960031532], [1.033100002, 0.966899998], ]), counts, ) juncs_jag = list( junc.getUncertainty(JetEta=test_eta_jag, JetPt=test_pt_jag)) for i, (level, corrs) in enumerate(juncs_jag): print("Index:", i) print("Correction level:", level) print("Reference Uncertainties (jagged):", juncs_jag_ref) print("Uncertainties (jagged):", corrs) assert ak.all( np.abs(ak.flatten(juncs_jag_ref) - ak.flatten(corrs)) < 1e-6)
def test_jet_resolution(): from coffea.jetmet_tools import JetResolution counts, test_eta, test_pt = dummy_jagged_eta_pt() test_Rho = np.full_like(test_eta, 10.0) test_pt_jag = ak.unflatten(test_pt, counts) test_eta_jag = ak.unflatten(test_eta, counts) test_Rho_jag = ak.unflatten(test_Rho, counts) jer_names = ["Spring16_25nsV10_MC_PtResolution_AK4PFPuppi"] reso = JetResolution(**{name: evaluator[name] for name in jer_names}) print(reso) resos = reso.getResolution(JetEta=test_eta, Rho=test_Rho, JetPt=test_pt) resos_jag = reso.getResolution(JetEta=test_eta_jag, Rho=test_Rho_jag, JetPt=test_pt_jag) assert ak.all(np.abs(resos - ak.flatten(resos_jag)) < 1e-6) test_pt_jag = test_pt_jag[0:3] test_eta_jag = test_eta_jag[0:3] test_Rho_jag = test_Rho_jag[0:3] test_Rho_jag = ak.concatenate( [test_Rho_jag[:-1], [ak.concatenate([test_Rho_jag[-1, :-1], 100.0])]]) counts = counts[0:3] print("Raw jet values:") print("pT:", test_pt_jag) print("eta:", test_eta_jag) print("rho:", test_Rho_jag, "\n") resos_jag_ref = ak.unflatten( np.array([ 0.21974642, 0.32421591, 0.33702479, 0.27420327, 0.13940689, 0.48134521, 0.26564994, 1.0, ]), counts, ) resos_jag = reso.getResolution(JetEta=test_eta_jag, Rho=test_Rho_jag, JetPt=test_pt_jag) print("Reference Resolution (jagged):", resos_jag_ref) print("Resolution (jagged):", resos_jag) # NB: 5e-4 tolerance was agreed upon by lgray and aperloff, if the differences get bigger over time # we need to agree upon how these numbers are evaluated (double/float conversion is kinda random) assert ak.all( np.abs(ak.flatten(resos_jag_ref) - ak.flatten(resos_jag)) < 5e-4)
def test_root_scalefactors(): extractor = lookup_tools.extractor() extractor.add_weight_sets([ "testSF2d scalefactors_Tight_Electron tests/samples/testSF2d.histo.root" ]) extractor.finalize(reduce_list=['testSF2d']) evaluator = extractor.make_evaluator() counts, test_eta, test_pt = dummy_jagged_eta_pt() # test flat eval test_out = evaluator["testSF2d"](test_eta, test_pt) # print it print(evaluator["testSF2d"]) # test structured eval test_eta_jagged = ak.unflatten(test_eta, counts) test_pt_jagged = ak.unflatten(test_pt, counts) test_out_jagged = evaluator["testSF2d"](test_eta_jagged, test_pt_jagged) assert ak.all(ak.num(test_out_jagged) == counts) assert ak.all(ak.flatten(test_out_jagged) == test_out) # From make_expected_lookup.py expected_output = np.array([ 0.90780139, 0.82748538, 0.86332178, 0.86332178, 0.97981155, 0.79701495, 0.88245934, 0.82857144, 0.91884059, 0.97466666, 0.94072163, 1.00775194, 0.82748538, 1.00775194, 0.97203946, 0.98199672, 0.80655736, 0.90893763, 0.88245934, 0.79701495, 0.82748538, 0.82857144, 0.91884059, 0.90893763, 0.97520661, 0.97520661, 0.82748538, 0.91884059, 0.97203946, 0.88245934, 0.79701495, 0.9458763, 1.00775194, 0.80655736, 1.00775194, 1.00775194, 0.98976982, 0.98976982, 0.86332178, 0.94072163, 0.80655736, 0.98976982, 0.96638656, 0.9458763, 0.90893763, 0.9529984, 0.9458763, 0.9529984, 0.80655736, 0.80655736, 0.80655736, 0.98976982, 0.97466666, 0.98199672, 0.86332178, 1.03286386, 0.94072163, 1.03398061, 0.82857144, 0.80655736, 1.00775194, 0.80655736 ]) print(test_out) diff = np.abs(test_out - expected_output) print("Max diff: %.16f" % diff.max()) print("Median diff: %.16f" % np.median(diff)) print("Diff over threshold rate: %.1f %%" % (100 * (diff >= 1.e-8).sum() / diff.size)) assert (diff < 1.e-8).all()
def test_hist_compat(): from coffea.util import load test = load('tests/samples/old_hist_format.coffea') expected_bins = np.array([ -np.inf, 0., 20., 40., 60., 80., 100., 120., 140., 160., 180., 200., 220., 240., 260., 280., 300., 320., 340., 360., 380., 400., 420., 440., 460., 480., 500., 520., 540., 560., 580., 600., 620., 640., 660., 680., 700., 720., 740., 760., 780., 800., 820., 840., 860., 880., 900., 920., 940., 960., 980., 1000., 1020., 1040., 1060., 1080., 1100., 1120., 1140., 1160., 1180., 1200., np.inf, np.nan]) assert np.all(test._axes[2]._interval_bins[:-1] == expected_bins[:-1]) assert np.isnan(test._axes[2]._interval_bins[-1])
def test_jet_resolution_sf(): from coffea.jetmet_tools import JetResolutionScaleFactor counts, test_eta, test_pt = dummy_jagged_eta_pt() test_pt_jag = ak.unflatten(test_pt, counts) test_eta_jag = ak.unflatten(test_eta, counts) jersf_names = ["Spring16_25nsV10_MC_SF_AK4PFPuppi"] resosf = JetResolutionScaleFactor( **{name: evaluator[name] for name in jersf_names}) print(resosf) # 0-jet compatibility assert resosf.getScaleFactor(JetEta=test_eta[:0]).shape == (0, 3) resosfs = resosf.getScaleFactor(JetEta=test_eta) resosfs_jag = resosf.getScaleFactor(JetEta=test_eta_jag) assert ak.all(resosfs == ak.flatten(resosfs_jag)) test_pt_jag = test_pt_jag[0:3] test_eta_jag = test_eta_jag[0:3] counts = counts[0:3] print("Raw jet values:") print("pT:", test_pt_jag) print("eta:", test_eta_jag, "\n") resosfs_jag_ref = ak.unflatten( np.array([ [1.857, 1.928, 1.786], [1.084, 1.095, 1.073], [1.364, 1.403, 1.325], [1.177, 1.218, 1.136], [1.138, 1.151, 1.125], [1.364, 1.403, 1.325], [1.177, 1.218, 1.136], [1.082, 1.117, 1.047], ]), counts, ) resosfs_jag = resosf.getScaleFactor(JetEta=test_eta_jag) print("Reference Resolution SF (jagged):", resosfs_jag_ref) print("Resolution SF (jagged):", resosfs_jag) assert ak.all( np.abs(ak.flatten(resosfs_jag_ref) - ak.flatten(resosfs_jag)) < 1e-6)
def test_jet_resolution_sf_2d(): from coffea.jetmet_tools import JetResolutionScaleFactor counts, test_eta, test_pt = dummy_jagged_eta_pt() test_pt_jag = ak.unflatten(test_pt, counts) test_eta_jag = ak.unflatten(test_eta, counts) resosf = JetResolutionScaleFactor( **{name: evaluator[name] for name in ["Autumn18_V7_MC_SF_AK4PFchs"]}) print(resosf) # 0-jet compatibility assert resosf.getScaleFactor(JetPt=test_pt[:0], JetEta=test_eta[:0]).shape == (0, 3) resosfs = resosf.getScaleFactor(JetPt=test_pt, JetEta=test_eta) resosfs_jag = resosf.getScaleFactor(JetPt=test_pt_jag, JetEta=test_eta_jag) assert ak.all(resosfs == ak.flatten(resosfs_jag)) test_pt_jag = test_pt_jag[0:3] test_eta_jag = test_eta_jag[0:3] counts = counts[0:3] print("Raw jet values:") print("pT:", test_pt_jag) print("eta:", test_eta_jag, "\n") resosfs_jag_ref = ak.unflatten( np.array([ [1.11904, 1.31904, 1.0], [1.1432, 1.2093, 1.0771], [1.16633, 1.36633, 1.0], [1.17642, 1.37642, 1.0], [1.1808, 1.1977, 1.1640], [1.15965, 1.35965, 1.0], [1.17661, 1.37661, 1.0], [1.1175, 1.1571, 1.0778], ]), counts, ) resosfs_jag = resosf.getScaleFactor(JetPt=test_pt_jag, JetEta=test_eta_jag) print("Reference Resolution SF (jagged):", resosfs_jag_ref) print("Resolution SF (jagged):", resosfs_jag) assert ak.all( np.abs(ak.flatten(resosfs_jag_ref) - ak.flatten(resosfs_jag)) < 1e-6)
def test_normal_interval(): from coffea.hist.plot import normal_interval # Reference weighted efficiency and error from ROOTs TEfficiency denom = np.array([ 89.01457591590004, 2177.066076428943, 6122.5256890981855, 0.0, 100.27757990710668, ]) num = np.array([ 75.14287743709515, 2177.066076428943, 5193.454723043864, 0.0, 84.97723540536361, ]) denom_sumw2 = np.array([ 94.37919737476827, 10000.0, 6463.46795877633, 0.0, 105.90898005417333 ]) num_sumw2 = np.array( [67.2202147680005, 10000.0, 4647.983931785646, 0.0, 76.01275761253757]) ref_hi = np.array([ 0.0514643476600107, 0.0, 0.0061403263960343, np.nan, 0.0480731185500146 ]) ref_lo = np.array([ 0.0514643476600107, 0.0, 0.0061403263960343, np.nan, 0.0480731185500146 ]) interval = normal_interval(num, denom, num_sumw2, denom_sumw2) threshold = 1e-6 lo, hi = interval assert len(ref_hi) == len(hi) assert len(ref_lo) == len(lo) for i in range(len(ref_hi)): if np.isnan(ref_hi[i]): assert np.isnan(ref_hi[i]) elif ref_hi[i] == 0.0: assert hi[i] == 0.0 else: assert np.abs(hi[i] / ref_hi[i] - 1) < threshold if np.isnan(ref_lo[i]): assert np.isnan(ref_lo[i]) elif ref_lo[i] == 0.0: assert lo[i] == 0.0 else: assert np.abs(lo[i] / ref_lo[i] - 1) < threshold
def test_rochester(): rochester_data = lookup_tools.txt_converters.convert_rochester_file('tests/samples/RoccoR2018.txt.gz',loaduncs=True) rochester = lookup_tools.rochester_lookup.rochester_lookup(rochester_data) # to test 1-to-1 agreement with official Rochester requires loading C++ files # instead, preload the correct scales in the sample directory # the script tests/samples/rochester/build_rochester.py produces these official_data_k = np.load('tests/samples/nano_dimuon_rochester.npy') official_data_err = np.load('tests/samples/nano_dimuon_rochester_err.npy') official_mc_k = np.load('tests/samples/nano_dy_rochester.npy') official_mc_err = np.load('tests/samples/nano_dy_rochester_err.npy') mc_rand = np.load('tests/samples/nano_dy_rochester_rand.npy') # test against nanoaod events = NanoEvents.from_file(os.path.abspath('tests/samples/nano_dimuon.root')) data_k = rochester.kScaleDT(events.Muon.charge, events.Muon.pt, events.Muon.eta, events.Muon.phi) assert(all(np.isclose(data_k.flatten(), official_data_k))) data_err = rochester.kScaleDTerror(events.Muon.charge, events.Muon.pt, events.Muon.eta, events.Muon.phi) data_err = np.array(data_err.flatten(), dtype=float) assert(all(np.isclose(data_err, official_data_err, atol=1e-8))) # test against mc events = NanoEvents.from_file(os.path.abspath('tests/samples/nano_dy.root')) hasgen = ~np.isnan(events.Muon.matched_gen.pt.fillna(np.nan)) mc_rand = JaggedArray.fromoffsets(hasgen.offsets, mc_rand) mc_kspread = rochester.kSpreadMC(events.Muon.charge[hasgen], events.Muon.pt[hasgen], events.Muon.eta[hasgen], events.Muon.phi[hasgen], events.Muon.matched_gen.pt[hasgen]) mc_ksmear = rochester.kSmearMC(events.Muon.charge[~hasgen], events.Muon.pt[~hasgen], events.Muon.eta[~hasgen], events.Muon.phi[~hasgen], events.Muon.nTrackerLayers[~hasgen], mc_rand[~hasgen]) mc_k = np.ones_like(events.Muon.pt.flatten()) mc_k[hasgen.flatten()] = mc_kspread.flatten() mc_k[~hasgen.flatten()] = mc_ksmear.flatten() assert(all(np.isclose(mc_k, official_mc_k))) mc_errspread = rochester.kSpreadMCerror(events.Muon.charge[hasgen], events.Muon.pt[hasgen], events.Muon.eta[hasgen], events.Muon.phi[hasgen], events.Muon.matched_gen.pt[hasgen]) mc_errsmear = rochester.kSmearMCerror(events.Muon.charge[~hasgen], events.Muon.pt[~hasgen], events.Muon.eta[~hasgen], events.Muon.phi[~hasgen], events.Muon.nTrackerLayers[~hasgen], mc_rand[~hasgen]) mc_err = np.ones_like(events.Muon.pt.flatten()) mc_err[hasgen.flatten()] = mc_errspread.flatten() mc_err[~hasgen.flatten()] = mc_errsmear.flatten() assert(all(np.isclose(mc_err, official_mc_err, atol=1e-8)))
def test_rochester(): rochester_data = lookup_tools.txt_converters.convert_rochester_file( "tests/samples/RoccoR2018.txt.gz", loaduncs=True) rochester = lookup_tools.rochester_lookup.rochester_lookup(rochester_data) # to test 1-to-1 agreement with official Rochester requires loading C++ files # instead, preload the correct scales in the sample directory # the script tests/samples/rochester/build_rochester.py produces these official_data_k = np.load("tests/samples/nano_dimuon_rochester.npy") official_data_err = np.load("tests/samples/nano_dimuon_rochester_err.npy") official_mc_k = np.load("tests/samples/nano_dy_rochester.npy") official_mc_err = np.load("tests/samples/nano_dy_rochester_err.npy") mc_rand = np.load("tests/samples/nano_dy_rochester_rand.npy") # test against nanoaod events = NanoEventsFactory.from_root( os.path.abspath("tests/samples/nano_dimuon.root")).events() data_k = rochester.kScaleDT(events.Muon.charge, events.Muon.pt, events.Muon.eta, events.Muon.phi) data_k = np.array(ak.flatten(data_k)) assert all(np.isclose(data_k, official_data_k)) data_err = rochester.kScaleDTerror(events.Muon.charge, events.Muon.pt, events.Muon.eta, events.Muon.phi) data_err = np.array(ak.flatten(data_err), dtype=float) assert all(np.isclose(data_err, official_data_err, atol=1e-8)) # test against mc events = NanoEventsFactory.from_root( os.path.abspath("tests/samples/nano_dy.root")).events() hasgen = ~np.isnan(ak.fill_none(events.Muon.matched_gen.pt, np.nan)) mc_rand = ak.unflatten(mc_rand, ak.num(hasgen)) mc_kspread = rochester.kSpreadMC( events.Muon.charge[hasgen], events.Muon.pt[hasgen], events.Muon.eta[hasgen], events.Muon.phi[hasgen], events.Muon.matched_gen.pt[hasgen], ) mc_ksmear = rochester.kSmearMC( events.Muon.charge[~hasgen], events.Muon.pt[~hasgen], events.Muon.eta[~hasgen], events.Muon.phi[~hasgen], events.Muon.nTrackerLayers[~hasgen], mc_rand[~hasgen], ) mc_k = np.array(ak.flatten(ak.ones_like(events.Muon.pt))) hasgen_flat = np.array(ak.flatten(hasgen)) mc_k[hasgen_flat] = np.array(ak.flatten(mc_kspread)) mc_k[~hasgen_flat] = np.array(ak.flatten(mc_ksmear)) assert all(np.isclose(mc_k, official_mc_k)) mc_errspread = rochester.kSpreadMCerror( events.Muon.charge[hasgen], events.Muon.pt[hasgen], events.Muon.eta[hasgen], events.Muon.phi[hasgen], events.Muon.matched_gen.pt[hasgen], ) mc_errsmear = rochester.kSmearMCerror( events.Muon.charge[~hasgen], events.Muon.pt[~hasgen], events.Muon.eta[~hasgen], events.Muon.phi[~hasgen], events.Muon.nTrackerLayers[~hasgen], mc_rand[~hasgen], ) mc_err = np.array(ak.flatten(ak.ones_like(events.Muon.pt))) mc_err[hasgen_flat] = np.array(ak.flatten(mc_errspread)) mc_err[~hasgen_flat] = np.array(ak.flatten(mc_errsmear)) assert all(np.isclose(mc_err, official_mc_err, atol=1e-8))
def test_hist(): counts, test_eta, test_pt = dummy_jagged_eta_pt() h_nothing = hist.Hist("empty inside") assert h_nothing.sparse_dim() == h_nothing.dense_dim() == 0 assert h_nothing.values() == {} h_regular_bins = hist.Hist("regular joe", hist.Bin("x", "x", 20, 0, 200), hist.Bin("y", "why", 20, -3, 3)) h_regular_bins.fill(x=test_pt, y=test_eta) nentries = np.sum(counts) assert h_regular_bins.sum( "x", "y", overflow='all').values(sumw2=True)[()] == (nentries, nentries) # bin x=2, y=10 (when overflow removed) count_some_bin = np.sum((test_pt >= 20.) & (test_pt < 30.) & (test_eta >= 0.) & (test_eta < 0.3)) assert h_regular_bins.integrate("x", slice( 20, 30)).values()[()][10] == count_some_bin assert h_regular_bins.integrate("y", slice( 0, 0.3)).values()[()][2] == count_some_bin h_reduced = h_regular_bins[10:, -.6:] # bin x=1, y=2 assert h_reduced.integrate("x", slice(20, 30)).values()[()][2] == count_some_bin assert h_reduced.integrate("y", slice(0, 0.3)).values()[()][1] == count_some_bin h_reduced.fill(x=23, y=0.1) assert h_reduced.integrate("x", slice(20, 30)).values()[()][2] == count_some_bin + 1 assert h_reduced.integrate("y", slice( 0, 0.3)).values()[()][1] == count_some_bin + 1 animal = hist.Cat("animal", "type of animal") vocalization = hist.Cat("vocalization", "onomatopoiea is that how you spell it?") h_cat_bins = hist.Hist("I like cats", animal, vocalization) h_cat_bins.fill(animal="cat", vocalization="meow", weight=2.) h_cat_bins.fill(animal="dog", vocalization="meow", weight=np.array([-1., -1., -5.])) h_cat_bins.fill(animal="dog", vocalization="woof", weight=100.) h_cat_bins.fill(animal="dog", vocalization="ruff") assert h_cat_bins.values()[("cat", "meow")] == 2. assert h_cat_bins.values(sumw2=True)[("dog", "meow")] == (-7., 27.) assert h_cat_bins.integrate( "vocalization", ["woof", "ruff"]).values(sumw2=True)[("dog", )] == (101., 10001.) height = hist.Bin("height", "height [m]", 10, 0, 5) h_mascots_1 = hist.Hist( "fermi mascot showdown", animal, vocalization, height, # weight is a reserved keyword hist.Bin("mass", "weight (g=9.81m/s**2) [kg]", np.power(10., np.arange(5) - 1)), ) h_mascots_2 = hist.Hist( "fermi mascot showdown", axes=( animal, vocalization, height, # weight is a reserved keyword hist.Bin("mass", "weight (g=9.81m/s**2) [kg]", np.power(10., np.arange(5) - 1)), )) h_mascots_3 = hist.Hist( axes=[ animal, vocalization, height, # weight is a reserved keyword hist.Bin("mass", "weight (g=9.81m/s**2) [kg]", np.power(10., np.arange(5) - 1)), ], label="fermi mascot showdown") h_mascots_4 = hist.Hist( "fermi mascot showdown", animal, vocalization, height, # weight is a reserved keyword hist.Bin("mass", "weight (g=9.81m/s**2) [kg]", np.power(10., np.arange(5) - 1)), axes=[ animal, vocalization, height, # weight is a reserved keyword hist.Bin("mass", "weight (g=9.81m/s**2) [kg]", np.power(10., np.arange(5) - 1)), ], ) assert h_mascots_1._dense_shape == h_mascots_2._dense_shape assert h_mascots_2._dense_shape == h_mascots_3._dense_shape assert h_mascots_3._dense_shape == h_mascots_4._dense_shape assert h_mascots_1._axes == h_mascots_2._axes assert h_mascots_2._axes == h_mascots_3._axes assert h_mascots_3._axes == h_mascots_4._axes adult_bison_h = np.random.normal(loc=2.5, scale=0.2, size=40) adult_bison_w = np.random.normal(loc=700, scale=100, size=40) h_mascots_1.fill(animal="bison", vocalization="huff", height=adult_bison_h, mass=adult_bison_w) goose_h = np.random.normal(loc=0.4, scale=0.05, size=1000) goose_w = np.random.normal(loc=7, scale=1, size=1000) h_mascots_1.fill(animal="goose", vocalization="honk", height=goose_h, mass=goose_w) crane_h = np.random.normal(loc=1, scale=0.05, size=4) crane_w = np.random.normal(loc=10, scale=1, size=4) h_mascots_1.fill(animal="crane", vocalization="none", height=crane_h, mass=crane_w) h_mascots_2 = h_mascots_1.copy() h_mascots_2.clear() baby_bison_h = np.random.normal(loc=.5, scale=0.1, size=20) baby_bison_w = np.random.normal(loc=200, scale=10, size=20) baby_bison_cutefactor = 2.5 * np.ones_like(baby_bison_w) h_mascots_2.fill(animal="bison", vocalization="baa", height=baby_bison_h, mass=baby_bison_w, weight=baby_bison_cutefactor) h_mascots_2.fill(animal="fox", vocalization="none", height=1., mass=30.) h_mascots = h_mascots_1 + h_mascots_2 assert h_mascots.integrate("vocalization", "h*").sum("height", "mass", "animal").values()[()] == 1040. species_class = hist.Cat("species_class", "where the subphylum is vertibrates") classes = { 'birds': ['goose', 'crane'], 'mammals': ['bison', 'fox'], } h_species = h_mascots.group("animal", species_class, classes) assert set(h_species.integrate("vocalization").values().keys()) == set([ ('birds', ), ('mammals', ) ]) nbirds_bin = np.sum((goose_h >= 0.5) & (goose_h < 1) & (goose_w > 10) & (goose_w < 100)) nbirds_bin += np.sum((crane_h >= 0.5) & (crane_h < 1) & (crane_w > 10) & (crane_w < 100)) assert h_species.integrate("vocalization").values()[( 'birds', )][1, 2] == nbirds_bin tally = h_species.sum("mass", "height", "vocalization").values() assert tally[('birds', )] == 1004. assert tally[('mammals', )] == 91. h_species.scale({"honk": 0.1, "huff": 0.9}, axis="vocalization") h_species.scale(5.) tally = h_species.sum("mass", height, vocalization).values(sumw2=True) assert tally[('birds', )] == (520., 350.) assert tally[('mammals', )] == (435., 25 * (40 * (0.9**2) + 20 * (2.5**2) + 1)) assert h_species.axis("vocalization") is vocalization assert h_species.axis("height") is height assert h_species.integrate("vocalization", "h*").axis("height") is height tall_class = hist.Cat("tall_class", "species class (species above 1m)") mapping = { 'birds': (['goose', 'crane'], slice(1., None)), 'mammals': (['bison', 'fox'], slice(1., None)), } h_tall = h_mascots.group((animal, height), tall_class, mapping) tall_bird_count = np.sum(goose_h >= 1.) + np.sum(crane_h >= 1) assert h_tall.sum("mass", "vocalization").values()[('birds', )] == tall_bird_count tall_mammal_count = np.sum(adult_bison_h >= 1.) + np.sum( baby_bison_h >= 1) + 1 assert h_tall.sum( "mass", "vocalization").values()[('mammals', )] == tall_mammal_count h_less = h_mascots.remove(["fox", "bison"], axis="animal") assert h_less.sum("vocalization", "height", "mass", "animal").values()[()] == 1004.
def test_factorized_jet_corrector(): from coffea.jetmet_tools import FactorizedJetCorrector counts, test_eta, test_pt = dummy_jagged_eta_pt() test_Rho = np.full_like(test_eta, 100.0) test_A = np.full_like(test_eta, 5.0) # Check that the FactorizedJetCorrector is functional jec_names = [ "Summer16_23Sep2016V3_MC_L1FastJet_AK4PFPuppi", "Summer16_23Sep2016V3_MC_L2Relative_AK4PFPuppi", "Summer16_23Sep2016V3_MC_L2L3Residual_AK4PFPuppi", "Summer16_23Sep2016V3_MC_L3Absolute_AK4PFPuppi", ] corrector = FactorizedJetCorrector( **{name: evaluator[name] for name in jec_names}) print(corrector) pt_copy = np.copy(test_pt) # Check that the corrector can be evaluated for flattened arrays corrs = corrector.getCorrection(JetEta=test_eta, Rho=test_Rho, JetPt=test_pt, JetA=test_A) assert (np.abs(pt_copy - test_pt) < 1e-6).all() test_pt_jag = ak.unflatten(test_pt, counts) test_eta_jag = ak.unflatten(test_eta, counts) test_Rho_jag = ak.unflatten(test_Rho, counts) test_A_jag = ak.unflatten(test_A, counts) # Check that the corrector can be evaluated for jagges arrays corrs_jag = corrector.getCorrection(JetEta=test_eta_jag, Rho=test_Rho_jag, JetPt=test_pt_jag, JetA=test_A_jag) assert ak.all(np.abs(pt_copy - ak.flatten(test_pt_jag)) < 1e-6) assert ak.all(np.abs(corrs - ak.flatten(corrs_jag)) < 1e-6) # Check that the corrector returns the correct answers for each level of correction # Use a subset of the values so that we can check the corrections by hand test_pt_jag = test_pt_jag[0:3] test_eta_jag = test_eta_jag[0:3] test_Rho_jag = test_Rho_jag[0:3] test_A_jag = test_A_jag[0:3] counts = counts[0:3] print("Raw jet values:") print("pT:", test_pt_jag) print("eta:", test_eta_jag) print("rho:", test_Rho_jag) print("area:", test_A_jag, "\n") # Start by checking the L1 corrections corrs_L1_jag_ref = ak.full_like(test_pt_jag, 1.0) corrector = FactorizedJetCorrector( **{name: evaluator[name] for name in jec_names[0:1]}) corrs_L1_jag = corrector.getCorrection(JetEta=test_eta_jag, Rho=test_Rho_jag, JetPt=test_pt_jag, JetA=test_A_jag) print("Reference L1 corrections:", corrs_L1_jag_ref) print("Calculated L1 corrections:", corrs_L1_jag) assert ak.all( np.abs(ak.flatten(corrs_L1_jag_ref) - ak.flatten(corrs_L1_jag)) < 1e-6) # Apply the L1 corrections and save the result test_ptL1_jag = test_pt_jag * corrs_L1_jag print("L1 corrected pT values:", test_ptL1_jag, "\n") assert ak.all( np.abs(ak.flatten(test_pt_jag) - ak.flatten(test_ptL1_jag)) < 1e-6) # Check the L2 corrections on a subset of jets # Look up the parameters for the L2 corrections by hand and calculate the corrections # [(1.37906,35.8534,-0.00829227,7.96644e-05,5.18988e-06), # (1.38034,17.9841,-0.00729638,-0.000127141,5.70889e-05), # (1.74466,18.6372,-0.0367036,0.00310864,-0.000277062), # (1.4759,24.8882,-0.0155333,0.0020836,-0.000198039), # (1.14606,36.4215,-0.00174801,-1.76393e-05,1.91863e-06), # (0.999657,4.02981,1.06597,-0.619679,-0.0494)], # [(1.54524,23.9023,-0.0162807,0.000665243,-4.66608e-06), # (1.48431,8.68725,0.00642424,0.0252104,-0.0335696)]]) corrs_L2_jag_ref = ak.unflatten( np.array([ 1.37038741364, 1.37710384514, 1.65148641108, 1.46840446827, 1.1328319784, 1.0, 1.50762056349, 1.48719866989, ]), counts, ) corrector = FactorizedJetCorrector( **{name: evaluator[name] for name in jec_names[1:2]}) corrs_L2_jag = corrector.getCorrection(JetEta=test_eta_jag, JetPt=test_pt_jag) print("Reference L2 corrections:", corrs_L2_jag_ref.tolist()) print("Calculated L2 corrections:", corrs_L2_jag.tolist()) assert ak.all( np.abs(ak.flatten(corrs_L2_jag_ref) - ak.flatten(corrs_L2_jag)) < 1e-6) # Apply the L2 corrections and save the result test_ptL1L2_jag = test_ptL1_jag * corrs_L2_jag print("L1L2 corrected pT values:", test_ptL1L2_jag, "\n") # Apply the L3 corrections and save the result corrs_L3_jag = ak.full_like(test_pt_jag, 1.0) test_ptL1L2L3_jag = test_ptL1L2_jag * corrs_L3_jag print("L1L2L3 corrected pT values:", test_ptL1L2L3_jag, "\n") # Check that the corrections can be chained together corrs_L1L2L3_jag_ref = ak.unflatten( np.array([ 1.37038741364, 1.37710384514, 1.65148641108, 1.46840446827, 1.1328319784, 1.0, 1.50762056349, 1.48719866989, ]), counts, ) corrector = FactorizedJetCorrector( **{name: evaluator[name] for name in (jec_names[0:2] + jec_names[3:])}) corrs_L1L2L3_jag = corrector.getCorrection(JetEta=test_eta_jag, Rho=test_Rho_jag, JetPt=test_pt_jag, JetA=test_A_jag) print("Reference L1L2L3 corrections:", corrs_L1L2L3_jag_ref) print("Calculated L1L2L3 corrections:", corrs_L1L2L3_jag) assert ak.all( np.abs( ak.flatten(corrs_L1L2L3_jag_ref) - ak.flatten(corrs_L1L2L3_jag)) < 1e-6) # Apply the L1L2L3 corrections and save the result test_ptL1L2L3chain_jag = test_pt_jag * corrs_L1L2L3_jag print("Chained L1L2L3 corrected pT values:", test_ptL1L2L3chain_jag, "\n") assert ak.all( np.abs( ak.flatten(test_ptL1L2L3_jag) - ak.flatten(test_ptL1L2L3chain_jag)) < 1e-6)
def test_jet_correction_regrouped_uncertainty_sources(): from coffea.jetmet_tools import JetCorrectionUncertainty counts, test_eta, test_pt = dummy_jagged_eta_pt() test_pt_jag = ak.unflatten(test_pt, counts) test_eta_jag = ak.unflatten(test_eta, counts) junc_names = [] levels = [] for name in dir(evaluator): if "Regrouped_Fall17_17Nov2017_V32_MC_UncertaintySources_AK4PFchs" in name: junc_names.append(name) if len(name.split("_")) == 9: levels.append("_".join(name.split("_")[-2:])) else: levels.append(name.split("_")[-1]) junc = JetCorrectionUncertainty( **{name: evaluator[name] for name in junc_names}) print(junc) juncs_jag = list( junc.getUncertainty(JetEta=test_eta_jag, JetPt=test_pt_jag)) for i, tpl in enumerate( list(junc.getUncertainty(JetEta=test_eta, JetPt=test_pt))): assert tpl[0] in levels assert tpl[1].shape[0] == test_eta.shape[0] assert ak.all(tpl[1] == ak.flatten(juncs_jag[i][1])) test_pt_jag = test_pt_jag[0:3] test_eta_jag = test_eta_jag[0:3] counts = counts[0:3] print("Raw jet values:") print("pT:", test_pt_jag.tolist()) print("eta:", test_eta_jag.tolist(), "\n") juncs_jag_ref = ak.unflatten( np.array([ [1.119159088, 0.880840912], [1.027003404, 0.972996596], [1.135201275, 0.864798725], [1.039665259, 0.960334741], [1.015064503, 0.984935497], [1.149900004, 0.850099996], [1.079960600, 0.920039400], [1.041200001, 0.958799999], ]), counts, ) juncs_jag = list( junc.getUncertainty(JetEta=test_eta_jag, JetPt=test_pt_jag)) for i, (level, corrs) in enumerate(juncs_jag): if level != "Total": continue print("Index:", i) print("Correction level:", level) print("Reference Uncertainties (jagged):", juncs_jag_ref) print("Uncertainties (jagged):", corrs, "\n") assert ak.all( np.abs(ak.flatten(juncs_jag_ref) - ak.flatten(corrs)) < 1e-6)
def test_jet_correction_uncertainty_sources(): from coffea.jetmet_tools import JetCorrectionUncertainty counts, test_eta, test_pt = dummy_jagged_eta_pt() test_pt_jag = ak.unflatten(test_pt, counts) test_eta_jag = ak.unflatten(test_eta, counts) junc_names = [] levels = [] for name in dir(evaluator): if "Summer16_23Sep2016V3_MC_UncertaintySources_AK4PFPuppi" in name: junc_names.append(name) levels.append(name.split("_")[-1]) # test for underscore in dataera if "Fall17_17Nov2017_V6_MC_UncertaintySources_AK4PFchs_AbsoluteFlavMap" in name: junc_names.append(name) levels.append(name.split("_")[-1]) junc = JetCorrectionUncertainty( **{name: evaluator[name] for name in junc_names}) print(junc) juncs = junc.getUncertainty(JetEta=test_eta, JetPt=test_pt) juncs_jag = list( junc.getUncertainty(JetEta=test_eta_jag, JetPt=test_pt_jag)) for i, (level, corrs) in enumerate(juncs): assert level in levels assert corrs.shape[0] == test_eta.shape[0] assert ak.all(corrs == ak.flatten(juncs_jag[i][1])) test_pt_jag = test_pt_jag[0:3] test_eta_jag = test_eta_jag[0:3] counts = counts[0:3] print("Raw jet values:") print("pT:", test_pt_jag.tolist()) print("eta:", test_eta_jag.tolist(), "\n") juncs_jag_ref = ak.unflatten( np.array([ [1.053504214, 0.946495786], [1.033343349, 0.966656651], [1.065159157, 0.934840843], [1.033140127, 0.966859873], [1.016858652, 0.983141348], [1.130199999, 0.869800001], [1.039968468, 0.960031532], [1.033100002, 0.966899998], ]), counts, ) juncs_jag = list( junc.getUncertainty(JetEta=test_eta_jag, JetPt=test_pt_jag)) for i, (level, corrs) in enumerate(juncs_jag): if level != "Total": continue print("Index:", i) print("Correction level:", level) print("Reference Uncertainties (jagged):", juncs_jag_ref) print("Uncertainties (jagged):", corrs, "\n") assert ak.all( np.abs(ak.flatten(juncs_jag_ref) - ak.flatten(corrs)) < 1e-6)
def test_jec_txt_scalefactors(): extractor = lookup_tools.extractor() extractor.add_weight_sets([ "testJEC * tests/samples/Fall17_17Nov2017_V32_MC_L2Relative_AK4PFPuppi.jec.txt", "* * tests/samples/Summer16_07Aug2017_V11_L1fix_MC_L2Relative_AK4PFchs.jec.txt.gz", "* * tests/samples/Fall17_17Nov2017_V32_MC_Uncertainty_AK4PFPuppi.junc.txt", "* * tests/samples/Autumn18_V8_MC_UncertaintySources_AK4PFchs.junc.txt", "* * tests/samples/Spring16_25nsV10_MC_SF_AK4PFPuppi.jersf.txt", "* * tests/samples/Autumn18_V7b_MC_SF_AK8PFchs.jersf.txt.gz", "* * tests/samples/Fall17_17Nov2017_V32_MC_L2Relative_AK4Calo.jec.txt.gz", "* * tests/samples/Fall17_17Nov2017_V32_MC_L1JPTOffset_AK4JPT.jec.txt.gz", "* * tests/samples/Fall17_17Nov2017B_V32_DATA_L2Relative_AK4Calo.txt.gz", "* * tests/samples/Autumn18_V7b_DATA_SF_AK4PF.jersf.txt", "* * tests/samples/Autumn18_RunC_V19_DATA_L2Relative_AK8PFchs.jec.txt.gz", "* * tests/samples/Autumn18_RunA_V19_DATA_L2Relative_AK4Calo.jec.txt", ]) extractor.finalize() evaluator = extractor.make_evaluator() counts, test_eta, test_pt = dummy_jagged_eta_pt() # test structured eval test_eta_jagged = ak.unflatten(test_eta, counts) test_pt_jagged = ak.unflatten(test_pt, counts) jec_out = evaluator[ "testJECFall17_17Nov2017_V32_MC_L2Relative_AK4PFPuppi"](test_eta, test_pt) jec_out_jagged = evaluator[ "testJECFall17_17Nov2017_V32_MC_L2Relative_AK4PFPuppi"]( test_eta_jagged, test_pt_jagged) print(evaluator["testJECFall17_17Nov2017_V32_MC_L2Relative_AK4PFPuppi"]) jec_out = evaluator["Summer16_07Aug2017_V11_L1fix_MC_L2Relative_AK4PFchs"]( test_eta, test_pt) jec_out_jagged = evaluator[ "Summer16_07Aug2017_V11_L1fix_MC_L2Relative_AK4PFchs"](test_eta_jagged, test_pt_jagged) print(jec_out) print(jec_out_jagged) print(evaluator["Summer16_07Aug2017_V11_L1fix_MC_L2Relative_AK4PFchs"]) jersf_out = evaluator["Spring16_25nsV10_MC_SF_AK4PFPuppi"](test_eta, test_pt) jersf_out_jagged = evaluator["Spring16_25nsV10_MC_SF_AK4PFPuppi"]( test_eta_jagged, test_pt_jagged) print(jersf_out) print(jersf_out_jagged) # single jet jersf lookup test: single_jersf_out_1d = evaluator["Spring16_25nsV10_MC_SF_AK4PFPuppi"]( np.array([1.4]), np.array([44.0])) single_jersf_out_0d = evaluator["Spring16_25nsV10_MC_SF_AK4PFPuppi"]( np.array(1.4), np.array(44.0)) truth_out = np.array([[1.084, 1.095, 1.073]], dtype=np.float32) assert np.all(single_jersf_out_1d == truth_out) assert np.all(single_jersf_out_0d == truth_out) print(evaluator["Spring16_25nsV10_MC_SF_AK4PFPuppi"]) junc_out = evaluator["Fall17_17Nov2017_V32_MC_Uncertainty_AK4PFPuppi"]( test_eta, test_pt) junc_out_jagged = evaluator[ "Fall17_17Nov2017_V32_MC_Uncertainty_AK4PFPuppi"](test_eta_jagged, test_pt_jagged) print(junc_out) print(junc_out_jagged) print(evaluator["Fall17_17Nov2017_V32_MC_Uncertainty_AK4PFPuppi"]) assert ("Autumn18_V8_MC_UncertaintySources_AK4PFchs_AbsoluteScale" in evaluator.keys()) junc_out = evaluator[ "Autumn18_V8_MC_UncertaintySources_AK4PFchs_AbsoluteScale"](test_eta, test_pt) junc_out_jagged = evaluator[ "Autumn18_V8_MC_UncertaintySources_AK4PFchs_AbsoluteScale"]( test_eta_jagged, test_pt_jagged) print(junc_out) print(junc_out_jagged) print( evaluator["Autumn18_V8_MC_UncertaintySources_AK4PFchs_AbsoluteScale"])
_testSF2d_expected_output = np.array([ 0.90780139, 0.82748538, 0.86332178, 0.86332178, 0.97981155, 0.79701495, 0.88245934, 0.82857144, 0.91884059, 0.97466666, 0.94072163, 1.00775194, 0.82748538, 1.00775194, 0.97203946, 0.98199672, 0.80655736, 0.90893763, 0.88245934, 0.79701495, 0.82748538, 0.82857144, 0.91884059, 0.90893763, 0.97520661, 0.97520661, 0.82748538, 0.91884059, 0.97203946, 0.88245934, 0.79701495, 0.9458763, 1.00775194, 0.80655736, 1.00775194, 1.00775194, 0.98976982, 0.98976982, 0.86332178, 0.94072163, 0.80655736, 0.98976982, 0.96638656, 0.9458763, 0.90893763, 0.9529984, 0.9458763, 0.9529984, 0.80655736, 0.80655736, 0.80655736, 0.98976982, 0.97466666, 0.98199672, 0.86332178, 1.03286386, 0.94072163, 1.03398061, 0.82857144, 0.80655736, 1.00775194, 0.80655736, ])