def test_make_local_twice_filter(servicex_ds): f = ServiceXDatasetSource(servicex_ds) df = xaod_table(f) seq = df.jets[df.jets.pt > 30].pt make_local(seq) json_1 = clean_linq(extract_selection(servicex_ds)) make_local(seq) json_2 = clean_linq(extract_selection(servicex_ds)) assert json_1 == json_2
def test_make_local_twice_check_test(servicex_ds): # Make sure this method of testing continues to work # references and dicts in python are funny! f = ServiceXDatasetSource(servicex_ds) df = xaod_table(f) seq = df.jets.pt make_local(seq) json_1 = clean_linq(extract_selection(servicex_ds)) make_local(seq / 1000.0) json_2 = clean_linq(extract_selection(servicex_ds)) assert json_1 != json_2
def test_multi_object_call_with_same_thing_twice(servicex_ds): # df.Electrons appears inside a call that has unwrapped the sequence. f = ServiceXDatasetSource(servicex_ds) df = xaod_table(f) mc_part = df.TruthParticles('TruthParticles') eles = df.Electrons('Electrons') # This gives us a list of events, and in each event, good electrons, and then for each # good electron, all good MC electrons that are near by eles['near_mcs'] = lambda reco_e: mc_part eles['hasMC'] = lambda e: e.near_mcs.Count() > 0 make_local(eles[~eles.hasMC].pt) selection = extract_selection(servicex_ds) txt = translate_linq( f .Select("lambda e1: (e1.Electrons('Electrons'), e1)") .Select("lambda e2: e2[0].Where(lambda e3: " "not e2[1]" ".TruthParticles('TruthParticles')" ".Count() > 0)") .Select("lambda e4: e4.Select(lambda e5: e5.pt())") .AsROOTTTree("file.root", "treeme", ['col1'])) assert clean_linq(selection) == txt
def test_multi_object_monads(servicex_ds): f = ServiceXDatasetSource(servicex_ds) df = xaod_table(f) mc_part = df.TruthParticles('TruthParticles') eles = df.Electrons('Electrons') from dataframe_expressions import user_func @user_func def DeltaR(p1_eta: float) -> float: assert False def near(mcs, e): 'Return all particles in mcs that are DR less than 0.5' return mcs[lambda m: DeltaR(e.eta()) < 0.5] # This gives us a list of events, and in each event, good electrons, # and then for each good electron, all good MC electrons that are near by eles['near_mcs'] = lambda reco_e: near(mc_part, reco_e) eles['hasMC'] = lambda e: e.near_mcs.Count() > 0 make_local(eles[eles.hasMC].pt) selection = extract_selection(servicex_ds) txt = translate_linq( f .Select("lambda e1: (e1.Electrons('Electrons'), e1)") .Select("lambda e2: e2[0].Where(lambda e3: " "e2[1]" ".TruthParticles('TruthParticles')" ".Where(lambda e6: DeltaR(e3.eta()) < 0.5).Count() > 0)") .Select("lambda e4: e4.Select(lambda e5: e5.pt())") .AsROOTTTree("file.root", "treeme", ['col1'])) assert clean_linq(selection) == txt
def test_map_with_2filters_inside_twice(servicex_ds): f = ServiceXDatasetSource(servicex_ds) df = xaod_table(f) eles = df.Electrons('Electrons') mc_part = df.TruthParticles('TruthParticles') mc_ele = mc_part[mc_part.pdgId == 11] good_mc_ele = mc_ele[mc_ele.ptgev > 20] ele_mcs = eles.map(lambda reco_e: good_mc_ele) make_local(ele_mcs) json_1 = clean_linq(extract_selection(servicex_ds)) make_local(ele_mcs) json_2 = clean_linq(extract_selection(servicex_ds)) assert json_1 == json_2
def test_first_at_object_level(servicex_ds): f = ServiceXDatasetSource(servicex_ds) df = xaod_table(f) seq = df.jets.First().pt make_local(seq) selection = extract_selection(servicex_ds) txt = translate_linq( f.Select("lambda e5: e5.jets()").Select( "lambda e7: e7.First()").Select("lambda e8: e8.pt()").AsROOTTTree( "file.root", "treeme", ['col1'])) assert clean_linq(selection) == txt
def test_numpy_abs(servicex_ds): f = ServiceXDatasetSource(servicex_ds) df = xaod_table(f) import numpy as np seq = np.abs(df.met) make_local(seq) selection = extract_selection(servicex_ds) txt = translate_linq( f.Select("lambda e1: e1.met()").Select( "lambda e2: abs(e2)").AsROOTTTree("file.root", "treeme", ['col1'])) assert clean_linq(selection) == txt
def test_count_of_objects(servicex_ds): f = ServiceXDatasetSource(servicex_ds) df = xaod_table(f) seq = df.jets.Count() make_local(seq) selection = extract_selection(servicex_ds) txt = translate_linq( f.Select("lambda e1: e1.jets()").Select( "lambda e2: e2.Count()").AsROOTTTree("file.root", "treeme", ['col1'])) assert clean_linq(selection) == txt
def test_abs_of_top_leveldata(servicex_ds): f = ServiceXDatasetSource(servicex_ds) df = xaod_table(f) seq = abs(df.met) a = make_local(seq) assert a is not None selection = extract_selection(servicex_ds) txt = translate_linq( f.Select("lambda e1: e1.met()").Select( "lambda e2: abs(e2)").AsROOTTTree("file.root", "treeme", ['col1'])) assert clean_linq(selection) == txt
def test_combine_leaf_lambda(servicex_ds): f = ServiceXDatasetSource(servicex_ds) df = xaod_table(f) seq = df.jets.map(lambda j: j.pt) make_local(seq) selection = extract_selection(servicex_ds) txt = translate_linq( f .Select("lambda e1: e1.jets()") .Select("lambda e3: e3.Select(lambda e2: e2.pt())") .AsROOTTTree("file.root", "treeme", ['col1'])) assert clean_linq(selection) == txt
def test_collect_xaod_call_with_number(servicex_ds): 'Do this with the actual call we need in ATLAS' f = ServiceXDatasetSource(servicex_ds) df = xaod_table(f) seq = df.Jets(22.0).pt make_local(seq) selection = extract_selection(servicex_ds) txt = translate_linq( f.Select("lambda e1: e1.Jets(22.0)").Select( "lambda e3: e3.Select(lambda e2: e2.pt())").AsROOTTTree( "file.root", "treeme", ['col1'])) assert clean_linq(selection) == txt
def test_count_at_eventLevel(servicex_ds): f = ServiceXDatasetSource(servicex_ds) df = xaod_table(f) seq = df[df.jets.Count() == 2].jets.pt make_local(seq) selection = extract_selection(servicex_ds) txt = translate_linq( f.Where("lambda e4: e4.jets().Count() == 2").Select( "lambda e5: e5.jets()").Select( "lambda e7: e7.Select(lambda e6: e6.pt())").AsROOTTTree( "file.root", "treeme", ['col1'])) assert clean_linq(selection) == txt
def test_binop_in_filter(servicex_ds): f = ServiceXDatasetSource(servicex_ds) df = xaod_table(f) seq = df.jets[(df.jets.pt / 1000.0) > 30].pt make_local(seq) selection = extract_selection(servicex_ds) txt = translate_linq( f.Select("lambda e1: e1.jets()").Select( "lambda e7: e7.Where(lambda e5: e5.pt()/1000.0 > 30)").Select( "lambda e8: e8.Select(lambda e6: e6.pt())").AsROOTTTree( "file.root", "treeme", ['col1'])) assert clean_linq(selection) == txt
def test_filter_not(servicex_ds): f = ServiceXDatasetSource(servicex_ds) df = xaod_table(f) seq = df.jets[~(df.jets.pt > 30.0)].pt make_local(seq) selection = extract_selection(servicex_ds) txt = translate_linq( f.Select("lambda e1: e1.jets()").Select( "lambda e9: e9.Where(lambda e7: not (e7.pt() > 30.0))").Select( "lambda e10: e10.Select(lambda e8: e8.pt())").AsROOTTTree( "file.root", "treeme", ['col1'])) assert clean_linq(selection) == txt
def test_filter_and_abs(servicex_ds): f = ServiceXDatasetSource(servicex_ds) df = xaod_table(f) seq = df.jets[(df.jets.pt > 30.0) & (abs(df.jets.eta) < 2.5)].pt make_local(seq) selection = extract_selection(servicex_ds) txt = translate_linq( f.Select("lambda e1: e1.jets()").Select( "lambda e10: e10.Where(lambda e8: (e8.pt() > 30.0) and (abs(e8.eta()) < 2.5))" ).Select("lambda e11: e11.Select(lambda e9: e9.pt())").AsROOTTTree( "file.root", "treeme", ['col1'])) assert clean_linq(selection) == txt
def test_jet_pt_filter_pts_gt(servicex_ds): f = ServiceXDatasetSource(servicex_ds) df = xaod_table(f) seq = df.jets.pt[df.jets.pt > 30.0] make_local(seq) selection = extract_selection(servicex_ds) txt = translate_linq( f.Select("lambda e1: e1.jets()").Select( "lambda e5: e5.Select(lambda e2: e2.pt())").Select( "lambda e6: e6.Where(lambda e3: e3 > 30.0)").AsROOTTTree( "file.root", "treeme", ['col1'])) assert clean_linq(selection) == txt
def test_filter_jet_by_attributes(servicex_ds): f = ServiceXDatasetSource(servicex_ds) df = xaod_table(f) seq = df.jets[df.jets.hasProdVtx & df.jets.hasDecayVtx].pt make_local(seq) selection = extract_selection(servicex_ds) txt = translate_linq( f.Select("lambda e1: e1.jets()").Select( "lambda e7: e7.Where(lambda e2: e2.hasProdVtx() and e2.hasDecayVtx())" ).Select("lambda e8: e8.Select(lambda e6: e6.pt())").AsROOTTTree( "file.root", "treeme", ['col1'])) assert clean_linq(selection) == txt
def test_pt_sub(servicex_ds): f = ServiceXDatasetSource(servicex_ds) df = xaod_table(f) seq = df.jets.pt - 1000.0 make_local(seq) selection = extract_selection(servicex_ds) txt = translate_linq( f.Select("lambda e1: e1.jets()").Select( "lambda e4: e4.Select(lambda e2: e2.pt())").Select( "lambda e5: e5.Select(lambda e3: e3 - 1000.0)").AsROOTTTree( "file.root", "treeme", ['col1'])) assert clean_linq(selection) == txt
def test_abs_of_data_with_calls(servicex_ds): f = ServiceXDatasetSource(servicex_ds) df = xaod_table(f) seq = abs(df.jets().pt()) a = make_local(seq) assert a is not None selection = extract_selection(servicex_ds) txt = translate_linq( f.Select("lambda e1: e1.jets()").Select( "lambda e4: e4.Select(lambda e2: e2.pt())").Select( "lambda e5: e5.Select(lambda e3: abs(e3))").AsROOTTTree( "file.root", "treeme", ['col1'])) assert clean_linq(selection) == txt
def test_jet_pt_filter_pts_ne(servicex_ds): 'Do this with the actual call we need in ATLAS' f = ServiceXDatasetSource(servicex_ds) df = xaod_table(f) seq = df.jets.pt[df.jets.pt != 30.0] make_local(seq) selection = extract_selection(servicex_ds) txt = translate_linq( f.Select("lambda e1: e1.jets()").Select( "lambda e5: e5.Select(lambda e2: e2.pt())").Select( "lambda e6: e6.Where(lambda e4: e4 != 30.0)").AsROOTTTree( "file.root", "treeme", ['col1'])) assert clean_linq(selection) == txt
def test_filter_and_divide_with_call(servicex_ds): f = ServiceXDatasetSource(servicex_ds) df = xaod_table(f) seq = df.jets().pt[df.jets().pt > 30.0] / 1000.0 make_local(seq) selection = extract_selection(servicex_ds) txt = translate_linq( f.Select("lambda e1: e1.jets()"). Select("lambda e6: e6.Select(lambda e2: e2.pt())").Select( "lambda e7: e7.Where(lambda e3: e3 > 30.0)").Select( "lambda e8: e8.Select(lambda e5: e5 / 1000.0)").AsROOTTTree( "file.root", "treeme", ['col1'])) assert clean_linq(selection) == txt
def test_simple_capture_and_replace(servicex_ds): f = ServiceXDatasetSource(servicex_ds) df = xaod_table(f) seq = df.jets.map(lambda j: df).met make_local(seq) selection = extract_selection(servicex_ds) txt = translate_linq( f .Select("lambda e1: (e1.jets(), e1)") .Select("lambda e5: e5[0].Select(lambda e3: e5[1])") .Select("lambda e6: e6.Select(lambda e4: e4.met())") .AsROOTTTree("file.root", "treeme", ['col1'])) assert clean_linq(selection) == txt
def test_collect_pts_as_call(servicex_ds): f = ServiceXDatasetSource(servicex_ds) df = xaod_table(f) seq = df.jets().pt() a = make_local(seq) assert a is not None assert len(a) == 283458 selection = extract_selection(servicex_ds) txt = translate_linq( f.Select("lambda e1: e1.jets()").Select( "lambda e3: e3.Select(lambda e2: e2.pt())").AsROOTTTree( "file.root", "treeme", ['col1'])) assert clean_linq(selection) == txt
def test_filter_chain(servicex_ds): f = ServiceXDatasetSource(servicex_ds) df = xaod_table(f) seq1 = df.jets[df.jets.pt > 30.0] seq = seq1[seq1.eta < 2.4].pt make_local(seq) selection = extract_selection(servicex_ds) txt = translate_linq( f.Select("lambda e1: e1.jets()").Select( "lambda e6: e6.Where(lambda e3: e3.pt() > 30.0)").Select( "lambda e7: e7.Where(lambda e4: e4.eta() < 2.4)").Select( "lambda e5: e5.Select(lambda e2: e2.pt())").AsROOTTTree( "file.root", "treeme", ['col1'])) assert clean_linq(selection) == txt
def translate_linq(expr) -> str: ''' expr is the LINQ expression, short of the value. We return the `qastle` AST. ''' async def translate(a: ast.AST): import qastle return qastle.python_ast_to_text_ast(a) linq = expr.value(translate) # Replace all the eX's in order so that # we don't have to keep re-writing when the algorithm changes. return clean_linq(linq)
def test_user_function_with_map_2fcall(servicex_ds): @user_func def tns(e1: float, e2: float) -> float: assert False, 'this is a fake function and should never be called' f = ServiceXDatasetSource(servicex_ds) df = xaod_table(f) seq = df.jets.map(lambda j: tns(j.pt, j.eta)) make_local(seq) selection = extract_selection(servicex_ds) txt = translate_linq(f .Select("lambda e1: e1.jets()") .Select("lambda e7: e7.Select(lambda e2: tns(e2.pt(), e2.eta()))") .AsROOTTTree("file.root", "treeme", ['col1'])) assert clean_linq(selection) == txt
def test_two_maps(servicex_ds): f = ServiceXDatasetSource(servicex_ds) df = xaod_table(f) seq = df.jets.map(lambda j: df.Electrons.map(lambda e: e.eta + j.eta)) make_local(seq) selection = extract_selection(servicex_ds) txt = translate_linq( f .Select("lambda e1: (e1.jets(), e1)") .Select("lambda e14: e14[0].Select(lambda e3: " "e14[1]" ".Electrons()" ".Select(lambda e13: e13.eta() + e3.eta()))") .AsROOTTTree("file.root", "treeme", ['col1'])) assert clean_linq(selection) == txt
def test_capture_inside_with_call(servicex_ds): f = ServiceXDatasetSource(servicex_ds) df = xaod_table(f) seq = df.jets.map(lambda j: df.Electrons().Count()) make_local(seq) selection = extract_selection(servicex_ds) txt = translate_linq( f .Select("lambda e1: (e1.jets(), e1)") .Select("lambda e14: e14[0].Select(lambda e3: " "e14[1]" ".Electrons()" ".Count())") .AsROOTTTree("file.root", "treeme", ['col1'])) assert clean_linq(selection) == txt
def test_map_with_const(servicex_ds): f = ServiceXDatasetSource(servicex_ds) df = xaod_table(f) mcs = df.mcs pt_total = mcs.map(lambda mc: 1.0) make_local(pt_total) selection = extract_selection(servicex_ds) txt = translate_linq( f .Select("lambda e1: e1.mcs()") .Select("lambda e2: e2.Select(lambda e3: 1.0)") .AsROOTTTree("file.root", "treeme", ['col1'])) assert clean_linq(selection) == txt
def test_object_compare(servicex_ds): f = ServiceXDatasetSource(servicex_ds) df = xaod_table(f) seq = df.jets.map(lambda j: df.Electrons.DeltaR(j)) make_local(seq) selection = extract_selection(servicex_ds) txt = translate_linq( f .Select("lambda e1: (e1.jets(), e1)") .Select('lambda e8: e8[0].Select(lambda e3: ' 'e8[1]' '.Electrons()' '.Select(lambda e7: e7.DeltaR(e3)))') .AsROOTTTree("file.root", "treeme", ['col1'])) assert clean_linq(selection) == txt