async def test_2await_exe_from_coroutine(): r1 = EventDataset("file://junk.root") \ .SelectMany("lambda e: e.jets()") \ .Select("lambda j: j.pT()") \ .AsROOTTTree("junk.root", "analysis", "jetPT") \ .future_value(dummy_executor_coroutine) r2 = EventDataset("file://junk.root") \ .SelectMany("lambda e: e.jets()") \ .Select("lambda j: j.eta()") \ .AsROOTTTree("junk.root", "analysis", "jetPT") \ .future_value(dummy_executor_coroutine) rpair = await asyncio.gather(r1, r2) assert isinstance(rpair[0], ast.AST) assert isinstance(rpair[1], ast.AST)
async def test_await_exe_from_normal_function(): r = EventDataset("file://junk.root") \ .SelectMany("lambda e: e.jets()") \ .Select("lambda j: j.pT()") \ .AsROOTTTree("junk.root", "analysis", "jetPT") \ .future_value(dummy_executor) assert isinstance(await r, ast.AST)
def execute_query(exer): 'Build a query and run the supplied executor' return EventDataset("file://junk.root") \ .Where("lambda e: e.jets.Select(lambda j: j.pT()).Where(lambda j: j > 10).Count() > 0") \ .SelectMany("lambda e: e.jets()") \ .AsROOTTTree("junk.root", "analysis", "jetPT") \ .value(exer)
def test_simple_query(): r = EventDataset("file://junk.root") \ .SelectMany("lambda e: e.jets()") \ .Select("lambda j: j.pT()") \ .AsROOTTTree("junk.root", "analysis", "jetPT") \ .value(dummy_executor) assert isinstance(r, ast.AST)
def test_find_EventDataset_Select_and_Many(): a = EventDataset("file://dude.root") \ .Select("lambda x: x") \ .SelectMany("lambda x: x") \ .value(executor=lambda a: a) assert ["file:///dude.root"] == find_dataset(a).url
def test_generate_Max(): r = EventDataset("file://root.root") \ .Select("lambda e: e.Jets('AntiKt4EMTopoJets').Select(lambda j: j.pt()).Max()") \ .AsROOTTTree('dude.root', 'analysis', 'JetPt') \ .value(executor=exe_for_test) lines = get_lines_of_code(r) print_lines(lines)
def test_per_event_item(): r = EventDataset("file://root.root").Select( 'lambda e: e.EventInfo("EventInfo").runNumber()').AsROOTTTree( 'root.root', 'analysis', 'RunNumber').value(executor=exe_for_test) vs = r.QueryVisitor._gc._class_vars assert 1 == len(vs) assert "double" == str(vs[0].cpp_type())
def test_event_dataset_not_ready(downloading_present_ds): url = 'localds://bogus2' eds = EventDataset(url) resolver = dataset_finder() r = resolver.visit(eds) assert r is eds assert resolver.DatasetsLocallyResolves is False
def test_EDS_pickle(): import pickle b = pickle.dumps(EventDataset("file://root.root")) o = pickle.loads(b) assert len(o.url) == 1
def test_first_can_be_iterable_after_where(): # This was found while trying to generate a tuple for some training, below, simplified. # The problem was that First() always returned something you weren't allowed to iterate over. Which is not what we want here. r = EventDataset("file://root.root") \ .Select('lambda e: e.Jets("AllMyJets").Select(lambda j: e.Tracks("InnerTracks").Where(lambda t: t.pt() > 1000.0)).First().Count()') \ .AsROOTTTree('dude.root', 'analysis', 'JetPt') \ .value(executor=exe_for_test)
def simple_query_ast_ROOT(): 'Return a simple ast for a query' f_ds = EventDataset(r'localds://bogus_ds') return f_ds \ .SelectMany('lambda e: e.Jets("AntiKt4EMTopoJets")') \ .Select('lambda j: j.pt()/1000.0') \ .AsROOTTTree('output.root', 'trees', 'JetPt') \ .value(executor=lambda a: a)
def test_event_dataset_ready(already_present_ds): url = 'localds://bogus2' eds = EventDataset(url) resolver = dataset_finder() r = resolver.visit(eds) assert r is not eds assert resolver.DatasetsLocallyResolves is True assert len(r.url) == 7
def test_first_object_in_each_event(): # Part of testing that First puts its outer settings in the right place. # This also tests First on a collection of objects that hasn't been pulled a part # in a select. EventDataset("file://root.root") \ .Select('lambda e: e.Jets("AntiKt4EMTopoJets").First().pt()/1000.0') \ .AsPandasDF('FirstJetPt') \ .value(executor=exe_for_test)
def simple_query_ast_awkward(): 'Return a simple ast for a query' f_ds = EventDataset(r'localds://bogus_ds') return f_ds \ .SelectMany('lambda e: e.Jets("AntiKt4EMTopoJets")') \ .Select('lambda j: j.pt()/1000.0') \ .AsAwkwardArray('JetPt') \ .value(executor=lambda a: a)
def test_nested_query_rendered_correctly(): r = EventDataset("file://junk.root") \ .Where("lambda e: e.jets.Select(lambda j: j.pT()).Where(lambda j: j > 10).Count() > 0") \ .SelectMany("lambda e: e.jets()") \ .AsROOTTTree("junk.root", "analysis", "jetPT") \ .value(dummy_executor) assert isinstance(r, ast.AST) assert "Select(source" in ast.dump(r)
def test_executor_returns_a_coroutine(): 'When the executor returns a future, make sure it waits' r = EventDataset("file://junk.root") \ .SelectMany("lambda e: e.jets()") \ .Select("lambda j: j.pT()") \ .AsROOTTTree("junk.root", "analysis", "jetPT") \ .value(dummy_executor_coroutine) assert isinstance(r, ast.AST)
def test_Aggregate_not_initial_const_SUM(): r = EventDataset("file://root.root") \ .Select("lambda e: e.Jets('AntiKt4EMTopoJets').Select(lambda j: j.pt()/1000).Sum()") \ .AsROOTTTree('dude.root', 'analysis', 'jetPT') \ .value(executor=exe_for_test) lines = get_lines_of_code(r) print_lines(lines) l_sets = find_line_numbers_with("/1000", lines) assert 2 == len(l_sets)
def test_Select_of_2D_array_fails(): # The following statement should be a straight sequence, not an array. try: EventDataset("file://root.root") \ .Select('lambda e: e.Jets("AntiKt4EMTopoJets").Select(lambda j: (j.pt()/1000.0, j.eta()))') \ .AsPandasDF(['JetInfo']) \ .value(executor=exe_for_test) except BaseException as e: assert "Nested data structures" in str(e)
def test_First_selects_collection_count(): # Make sure that we have the "First" predicate after if Where's if statement. r = EventDataset("file://root.root") \ .Select('lambda e: e.Jets("AntiKt4EMTopoJets").Select(lambda j: e.Tracks("InDetTrackParticles")).First().Count()') \ .AsPandasDF('TrackCount') \ .value(executor=exe_for_test) lines = get_lines_of_code(r) print_lines(lines) l = find_line_numbers_with("for", lines) assert 2 == len(l)
def test_First_Of_Select_After_Where_is_in_right_place(): # Make sure that we have the "First" predicate after if Where's if statement. r = EventDataset("file://root.root") \ .Select('lambda e: e.Jets("AntiKt4EMTopoJets").Select(lambda j: j.pt()/1000.0).Where(lambda jpt: jpt > 10.0).First()') \ .AsPandasDF('FirstJetPt') \ .value(executor=exe_for_test) lines = get_lines_of_code(r) print_lines(lines) l = find_line_with(">10.0", lines) # Look for the "false" that First uses to remember it has gone by one. assert find_line_with("false", lines[l:], throw_if_not_found=False) > 0
def test_generate_binary_operators(): # Make sure the binary operators work correctly - that they don't cause a crash in generation. ops = ['+', '-', '*', '/'] for o in ops: r = EventDataset("file://root.root") \ .SelectMany('lambda e: e.Jets("AntiKt4EMTopoJets").Select(lambda j: j.pt(){0}1)'.format(o)) \ .AsPandasDF(['JetInfo']) \ .value(executor=exe_for_test) lines = get_lines_of_code(r) print_lines(lines) _ = find_line_with(f"pt(){o}1", lines)
def good_query_ast_pickle_data(): 'A good query ast to be used for testing below' f_ds = EventDataset( r'localds://mc16_13TeV.311309.MadGraphPythia8EvtGen_A14NNPDF31LO_HSS_LLP_mH125_mS5_ltlow.deriv.DAOD_EXOT15.e7270_e5984_s3234_r9364_r9315_p3795' ) a = f_ds \ .SelectMany('lambda e: e.Jets("AntiKt4EMTopoJets")') \ .Select('lambda j: j.pt()/1000.0') \ .AsROOTTTree('output.root', 'dudetree', 'JetPt') \ .value(executor=lambda a: a) return pickle.dumps(a)
def test_SelectMany_of_tuple_is_not_array(): # The following statement should be a straight sequence, not an array. r = EventDataset("file://root.root") \ .SelectMany('lambda e: e.Jets("AntiKt4EMTopoJets").Select(lambda j: (j.pt()/1000.0, j.eta()))') \ .AsPandasDF(['JetPts', 'JetEta']) \ .value(executor=exe_for_test) lines = get_lines_of_code(r) print_lines(lines) assert 0 == ["push_back" in l for l in lines].count(True) l_push_back = find_line_with("Fill()", lines) active_blocks = find_open_blocks(lines[:l_push_back]) assert 1 == ["for" in a for a in active_blocks].count(True)
def test_per_jet_item_with_event_level(): r = EventDataset("file://root.root") \ .Select('lambda e: (e.Jets("AntiKt4EMTopoJets").Select(lambda j: j.pt()), e.EventInfo("EventInfo").runNumber())') \ .SelectMany('lambda ji: ji[0].Select(lambda pt: (pt, ji[1]))') \ .AsPandasDF(('JetPts', 'RunNumber')) \ .value(executor=exe_for_test) lines = get_lines_of_code(r) print_lines(lines) l_jetpt = find_line_with("_JetPts", lines) l_runnum = find_line_with("_RunNumber", lines) l_fill = find_line_with("->Fill()", lines) assert l_jetpt + 1 == l_runnum assert l_runnum + 1 == l_fill
def test_Select_is_an_array(): # The following statement should be a straight sequence, not an array. r = EventDataset("file://root.root") \ .Select('lambda e: e.Jets("AntiKt4EMTopoJets").Select(lambda j: j.pt())') \ .AsPandasDF('JetPts') \ .value(executor=exe_for_test) # Check to see if there mention of push_back anywhere. lines = get_lines_of_code(r) print_lines(lines) assert 1 == ["push_back" in l for l in lines].count(True) l_push_back = find_line_with("Fill()", lines) active_blocks = find_open_blocks(lines[:l_push_back]) assert 0 == ["for" in a for a in active_blocks].count(True)
def test_per_jet_item_with_where(): # The following statement should be a straight sequence, not an array. r = EventDataset("file://root.root") \ .SelectMany('lambda e: e.Jets("AntiKt4EMTopoJets")') \ .Where("lambda j: j.pt()>40.0") \ .Select("lambda j: j.pt()") \ .AsPandasDF('JetPts') \ .value(executor=exe_for_test) # Make sure that the tree Fill is at the same level as the _JetPts2 getting set. lines = get_lines_of_code(r) print_lines(lines) l_jetpt = find_line_with("_JetPts", lines) assert "Fill()" in lines[l_jetpt + 1]
def test_cant_call_double(): try: EventDataset("file://root.root") \ .Select("lambda e: e.Jets('AntiKt4EMTopoJets').Select(lambda j: j.pt().eta()).Sum()") \ .AsROOTTTree('root.root', 'dude', "n_jets") \ .value(executor=exe_for_test) except BaseException as e: if "Unable to call method 'eta' on type 'double'" not in str(e): raise e from None assert "Unable to call method 'eta' on type 'double'" in str(e) return # Should never get here! assert False
def test_sequence_with_where_first(): r = EventDataset("file://root.root") \ .Select('lambda e: e.Jets("AntiKt4EMTopoJets").Select(lambda j: e.Tracks("InDetTrackParticles").Where(lambda t: t.pt() > 1000.0)).First().Count()') \ .AsPandasDF('dude') \ .value(executor=exe_for_test) lines = get_lines_of_code(r) print_lines(lines) l_first = find_line_numbers_with("if (is_first", lines) assert 1 == len(l_first) active_blocks = find_open_blocks(lines[:l_first[0]]) assert 1 == ["for" in a for a in active_blocks].count(True) l_agg = find_line_with("+1", lines) active_blocks = find_open_blocks(lines[:l_agg]) assert 1 == [">1000" in a for a in active_blocks].count(True)
def test_per_jet_with_delta(): # Trying to repro a bug we saw in the wild r = EventDataset("file://root.root") \ .Select('lambda e: (e.Jets("AntiKt4EMTopoJets"),e.TruthParticles("TruthParticles").Where(lambda tp1: tp1.pdgId() == 35))') \ .SelectMany('lambda ev: ev[0].Select(lambda j1: (j1, ev[1].Where(lambda tp2: DeltaR(tp2.eta(), tp2.phi(), j1.eta(), j1.phi()) < 0.4)))') \ .Select('lambda ji: (ji[0].pt(), 0 if ji[1].Count()==0 else abs(ji[1].First().prodVtx().x()-ji[1].First().decayVtx().x()))') \ .Where('lambda jall: jall[0] > 40.0') \ .AsPandasDF(('JetPts', 'y')) \ .value(executor=exe_for_test) lines = get_lines_of_code(r) print_lines(lines) l_numbers = find_line_numbers_with("if (i_obj", lines) for line in [lines[ln] for ln in l_numbers]: assert "x()" not in line
def test_count_after_single_sequence_of_sequence_unwound(): r = EventDataset("file://root.root") \ .Select('lambda e: e.Jets("AllMyJets").Select(lambda j: e.Tracks("InnerTracks")).SelectMany(lambda ts: ts).Count()') \ .AsROOTTTree('dude.root', 'analysis', 'JetPt') \ .value(executor=exe_for_test) lines = get_lines_of_code(r) print_lines(lines) # Make sure there is just one for loop in here. assert 2 == ["for" in l for l in lines].count(True) # Make sure the +1 happens after the for, and before another } bracket. num_for = find_line_with("for", lines) num_inc = find_line_with("+1", lines[num_for:]) num_close = find_next_closing_bracket(lines[num_for:]) assert num_close > num_inc