def test_cutflow_2_collect(select_2, tmpdir, infile, full_event_range, multi_chunk_func): chunk_data = FakeBEEvent( MaskedUprootTree(infile, event_ranger=full_event_range), "data") chunk_mc = FakeBEEvent( MaskedUprootTree(infile, event_ranger=full_event_range), "mc") collector, dataset_readers_list = multi_chunk_func(select_2, tmpdir, chunk_data, chunk_mc) output = collector._prepare_output(dataset_readers_list) assert len(output) == 12 data = output.xs("test_data", level="dataset", axis="rows") data_weighted = data.xs("EventWeight", level=1, axis="columns") data_unweighted = data.xs("unweighted", level=1, axis="columns") assert all(data_weighted == data_unweighted) mc = output.xs("test_mc", level="dataset", axis="rows") mc_unweighted = mc.xs("unweighted", level=1, axis="columns") assert all(mc_unweighted == data_unweighted) assert output.loc[("test_data", 0, "All"), ("totals_incl", "unweighted")] == 4580 * 2 assert output.loc[("test_data", 0, "All"), ("totals_incl", "EventWeight")] == 4580 * 2 assert output.loc[("test_mc", 0, "All"), ("totals_incl", "unweighted")] == 4580 * 2 assert output.loc[("test_data", 1, "NMuon > 1"), ("passed_only_cut", "unweighted")] == 289 * 2 assert output.loc[("test_mc", 1, "NMuon > 1"), ("passed_only_cut", "unweighted")] == 289 * 2
def test_cutflow_1_executes_data(cutflow_1, infile, full_event_range, tmpdir): chunk = FakeBEEvent( MaskedUprootTree(infile, event_ranger=full_event_range), "data") cutflow_1.event(chunk) assert len(chunk.tree) == 289 collector = cutflow_1.collector() assert collector.filename == str(tmpdir / "cuts_cutflow_1-NElectron.csv")
def test_sequential_stages(cutflow_1, select_2, infile, full_event_range, tmpdir): cutflow_2 = stage.CutFlow("cutflow_2", str(tmpdir), selection=select_2, weights="EventWeight") chunk = FakeBEEvent( MaskedUprootTree(infile, event_ranger=full_event_range), "data") cutflow_1.event(chunk) cutflow_2.event(chunk) assert len(chunk.tree) == 2 jet_py = chunk.tree.array("Jet_Py") assert pytest.approx( jet_py.flatten()) == [49.641838, 45.008915, -78.01798, 60.730812]
def test_cutflow_1_executes_mc(cutflow_1, infile, full_event_range, tmpdir): chunk = FakeBEEvent( MaskedUprootTree(infile, event_ranger=full_event_range), "mc") cutflow_1.event(chunk) assert len(chunk.tree) == 289 collector = cutflow_1.collector() assert collector.filename == str(tmpdir / "cuts_cutflow_1-NElectron.csv") dataset_readers_list = (("test_mc", (cutflow_1, )), ) output = collector._prepare_output(dataset_readers_list) assert len(output) == 1 assert all(output[("passed_only_cut", "unweighted")] == [289]) assert all(output[("passed_incl", "unweighted")] == [289]) assert all(output[("totals_incl", "unweighted")] == [4580])
def process(self, df): output = self.accumulator.identity() start = df._branchargs['entrystart'] stop = df._branchargs['entrystop'] tree = MaskedUprootTree(df._tree, EventRanger(start, stop, stop - start)) dsname = df['dataset'] cfg_proxy = ConfigProxy(dsname, 'data' if dsname == 'data' else 'mc') chunk = SingleChunk(tree, ChunkConfig(cfg_proxy)) output['stages'][dsname] = stages_accumulator(self._sequence) for work in output['stages'][dsname]._value: work.event(chunk) return output