def test_slices(): assert c.this[c.item(0):c.input_arg("slice_to"):c.item(1)].execute( [2, 2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], slice_to=8) == [ 1, 3, 5, ]
def test_list_comprehension(): assert c.list_comp(1).gen_converter()(range(5)) == [1] * 5 data = [{"name": "John"}, {"name": "Bill"}, {"name": "Nick"}] assert c.list_comp(c.item("name")).sort( key=lambda n: n).gen_converter()(data) == ["Bill", "John", "Nick"] assert c.list_comp(c.item("name")).sort().gen_converter()(data) == [ "Bill", "John", "Nick", ] assert tuple(c.generator_comp(c.item("name")).gen_converter()(data)) == ( "John", "Bill", "Nick", ) assert c.list_comp(c.item("name")).sort( key=lambda n: n, reverse=True).gen_converter()(data) == ["Nick", "John", "Bill"] assert c.list_comp({(c.item("name"), )}, ).execute(data) == [ {("John", )}, {("Bill", )}, {("Nick", )}, ] class CustomException(Exception): pass def f(): yield 1 raise CustomException wrapped_generator = c.generator_comp(c.this()).execute(f()) with pytest.raises(CustomException): list(wrapped_generator)
def test_slices(): assert c.this()[c.item(0):c.input_arg("slice_to"):c.item(1)].gen_converter( debug=False)([2, 2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], slice_to=8) == [ 1, 3, 5, ]
def test_aggregate_no_init_loops(): converter = c.aggregate({ "first_a": c.ReduceFuncs.First(c.item("a"), where=c.item("b") > 0), "list_b": c.ReduceFuncs.Array(c.item("b"), where=c.item("a") > 0), }).gen_converter(debug=False) assert converter([ { "a": 1, "b": 0 }, { "a": 2, "b": 1 }, { "a": 3, "b": 2 }, { "a": 4, "b": 3 }, ], ) == { "first_a": 2, "list_b": [0, 1, 2, 3], }
def test_weighted_average(series): assert eq( c.aggregate(c.ReduceFuncs.Average(c.item(0), c.item(1))).execute( series ), weighted_average(series), )
def test_list(): assert c.list(c.item(1), c.item(0), 3).gen_converter()([2, 1]) == [1, 2, 3] assert c([[c.item(1), c.item(0), 3]]).gen_converter()([2, 1]) == [[ 1, 2, 3, ]]
def test_aggregate_func(): input_data = [ { "a": 5, "b": "foo" }, { "a": 10, "b": "bar" }, { "a": 10, "b": "bar" }, ] conv = c.aggregate({ "a": c.ReduceFuncs.Array(c.item("a")), "ab_sum": c.ReduceFuncs.Sum(c.item("a")) + c.ReduceFuncs.Count(), "b": c.ReduceFuncs.ArrayDistinct(c.item("b")), "b_max_a": c.ReduceFuncs.MaxRow(c.item("a")).item("b", default=None), }).gen_converter(debug=False) assert conv(input_data) == { "a": [5, 10, 10], "ab_sum": 28, "b": ["foo", "bar"], "b_max_a": "bar", }
def test_list_comprehension(): assert c.list_comp(1).gen_converter()(range(5)) == [1] * 5 data = [{"name": "John"}, {"name": "Bill"}, {"name": "Nick"}] assert c.list_comp( c.item("name")).sort(key=lambda n: n).gen_converter()(data) == [ "Bill", "John", "Nick", ] assert c.list_comp(c.item("name")).sort().gen_converter()(data) == [ "Bill", "John", "Nick", ] assert tuple(c.generator_comp(c.item("name")).gen_converter()(data)) == ( "John", "Bill", "Nick", ) assert c.list_comp(c.item("name")).sort( key=lambda n: n, reverse=True).gen_converter()(data) == ["Nick", "John", "Bill"] assert c.list_comp({(c.item("name"), )}, ).execute(data) == [ {("John", )}, {("Bill", )}, {("Nick", )}, ]
def test_group_by_with_double_ended_pipes(): input_data = [ { "value": 1 }, { "value": 2 }, ] # fmt: off conv = c.aggregate( c.item("value").pipe(c.ReduceFuncs.Sum(c.this())).pipe( c.this() * 2)).gen_converter() # fmt: on result = conv(input_data) assert result == 6 input_data = [ { "k": "A", "v": 1 }, { "k": "A", "v": 2 }, ] reducer = c.ReduceFuncs.Sum(c.item("v")) conv = (c.group_by(c.item("k")).aggregate({ "v1": c.input_arg("test").pipe(reducer), "v2": reducer, }).gen_converter()) assert conv(input_data, test={"v": 7}) == [{"v1": 14, "v2": 3}]
def test_pipes(): assert c.list_comp(c.inline_expr("{0} ** 2").pass_args(c.this())).pipe( c.call_func(sum, c.this())).pipe( c.call_func( lambda x, a: x + a, c.this(), c.naive({ "abc": 10 }).item(c.input_arg("key_name")), )).pipe([c.this(), c.this()]).execute([1, 2, 3], key_name="abc", debug=False) == [ 24, 24, ] assert c.item(0).pipe( datetime.strptime, "%Y-%m-%d", ).pipe(c.call_func(lambda dt: dt.date(), c.this())).execute([ "2019-01-01", ], debug=False) == date(2019, 1, 1) assert c.item(0).pipe( datetime.strptime, "%Y-%m-%d", ).pipe(c.this().call_method("date")).execute([ "2019-01-01", ], debug=False) == date(2019, 1, 1) with pytest.raises(c.ConversionException): c.naive(True).pipe(c.item("key1", _predefined_input={"key1": 777}))
def test_group_by_reducer_clones(): data = [ { "value": 2 }, { "value": 3 }, ] conv = c.aggregate( c.item("value").pipe(c.ReduceFuncs.Sum(c.this()).pipe(c.this() + 1))) assert conv.execute(data) == 6 reducer = c.ReduceFuncs.DictSum(c.item("k"), c.item("v")) reducer1 = c.item("item1").pipe(reducer) reducer2 = c.item("item2").pipe(reducer) assert c.aggregate(reducer1).execute([{ "item1": { "k": 1, "v": 2 } }]) == { 1: 2 } assert c.aggregate(reducer2).execute([{ "item2": { "k": 2, "v": 3 } }]) == { 2: 3 }
def test_right_join(): join1 = ( c.join( c.item(0), c.item(1), c.and_( c.LEFT == c.RIGHT, c.LEFT + c.RIGHT < 10, c.LEFT > 0, ), how="right", ) .as_type(list) .gen_converter(debug=False) ) assert join1([(0, 1, 2, 3, 3), (3, 3, 4, 5)]) == [ (3, 3), (3, 3), (3, 3), (3, 3), (None, 4), (None, 5), ] conv = ( c.join( c.item("first"), c.item("second"), ( c.LEFT.item("name").call_method("lower") == c.RIGHT.item("full_name").call_method("lower") ), how="right", ) .as_type(list) .gen_converter(debug=False) ) # fmt: off assert conv( { "first": [ {"name": "JOHN"}, {"name": "bob"}, {"name": "ron"}, ], "second": [ {"full_name": "BOB"}, {"full_name": "BOB"}, {"full_name": "John"}, {"full_name": "Nick"}, {"full_name": "Nick"}, ], } ) == [ ({"name": "bob"}, {"full_name": "BOB"},), ({"name": "bob"}, {"full_name": "BOB"},), ({"name": "JOHN"}, {"full_name": "John"},), (None, {"full_name": "Nick"}), (None, {"full_name": "Nick"}), ]
def test_tuple(): assert c.tuple(c.item(1), c.item(0), 3).gen_converter()([2, 1]) == ( 1, 2, 3, ) assert c.tuple( (c.item(1), c.item(0), 3)).gen_converter()([2, 1]) == ((1, 2, 3), )
def test_median_with_group_by(series): assert eq( c.group_by(c.item(0)).aggregate(c.ReduceFuncs.Median( c.item(1))).execute(series), [ statistics.median(x[1] for x in series if x[0] == key) for key in ordered_set(x[0] for x in series) ], )
def test_weighted_average_with_group_by(series): assert eq( c.group_by(c.item(0)).aggregate( c.ReduceFuncs.Average(c.item(0), c.item(1))).execute(series), [ weighted_average([x for x in series if x[0] == key]) for key in ordered_set(x[0] for x in series) ], )
def test_base_reducer(): assert c.aggregate(( c.reduce(lambda a, b: a + b, c.this, initial=0), c.reduce(c.naive(lambda a, b: a + b), c.this, initial=int), c.reduce( c.inline_expr("{0} + {1}"), c.this, initial=c.inline_expr("int()"), default=0, ), c.reduce( c.inline_expr("{0} + {1}"), c.this, initial=c(int), default=0, ), c.reduce( c.inline_expr("{0} + {1}"), c.this, initial=int, default=0, ), )).filter(c.this > 5).gen_converter(debug=False)([1, 2, 3]) == [ 6, 6, 6, 6, 6, ] with pytest.raises(ValueError): c.aggregate(c.ReduceFuncs.Sum(c.reduce( c.ReduceFuncs.Count))).gen_converter() with pytest.raises(ValueError): c.aggregate(c.ReduceFuncs.Sum(c.ReduceFuncs.Count() + 1)).gen_converter() with pytest.raises(ValueError): c.aggregate((c.ReduceFuncs.Count() + 2).pipe(c.ReduceFuncs.Sum(c.this) + 1)).gen_converter() conv = c.aggregate(c.ReduceFuncs.DictArray( c.item(0), c.item(1))).gen_converter(debug=False) data = [ ("a", 1), ("a", 2), ("b", 3), ] result = {"a": [1, 2], "b": [3]} assert conv(data) == result assert conv([]) is None conv2 = c.aggregate({ "key": c.ReduceFuncs.DictArray(c.item(0), c.item(1)) }).gen_converter(debug=False) assert conv2([]) == {"key": None} assert conv2(data) == {"key": result}
def test_top_k_with_group_by(series, k): assert eq( c.group_by(c.item(0)).aggregate(c.ReduceFuncs.TopK( k, c.item(1))).execute(series), [[ x[1] for x in Counter(x[1] for x in series if x[0] == key).most_common(k) ] for key in ordered_set(x[0] for x in series)], )
def test_set_comprehension(): assert c.set_comp(1).gen_converter()(range(5)) == {1} data = [ {"name": "John"}, {"name": "Bill"}, {"name": "Bill"}, ] assert c.set_comp(c.item("name")).gen_converter()(data) == {"John", "Bill"} with pytest.raises(c.ConversionException): c.set_comp(c.item("name")).sort(key=lambda x: x)
def test_multi_statement_reducers(dict_series): output = (c.group_by(c.item("name")).aggregate(( c.item("name"), SumReducer1(c.item("value")), SumReducer2(c.item("value")), SumReducer3(c.item("value")), SumReducer4(c.item("value")), SumReducer5(c.item("value"), initial=5), )).execute(dict_series, debug=False)) assert output == [("Nick", 3, 3, 3, 3, 8), ("John", 63, 63, 63, 63, 68)] with pytest.raises(AttributeError): class SumReducer(MultiStatementReducer): reduce = ("%(result)s = %(result)s + ({0} or 4)", ) default = 0 unconditional_init = True # prepare_first is not specified c.aggregate(SumReducer(c.item("value"))).gen_converter() with pytest.raises(ValueError): class SumReducer(MultiStatementReducer): reduce = ("%(result)s = %(result)s + ({0} or 4)", ) unconditional_init = True # default is not provided SumReducer(c.item("value"))
def test_simple_label(): conv1 = (c.tuple(c.item(2).add_label("a"), c.this()).pipe( c.item(1).pipe(c.list_comp( (c.this(), c.label("a"))))).gen_converter(debug=False)) assert conv1([1, 2, 3, 4]) == [(1, 3), (2, 3), (3, 3), (4, 3)] conv2 = (c.tuple(c.item(1).add_label("a"), c.this()).pipe( c.item(1), label_input={ "aa": c.item(0), "bb": c.item(0) }, label_output="collection1", ).pipe( c.label("collection1").pipe( c.aggregate( c.ReduceFuncs.Sum( c.this() + c.label("a") + c.label("aa") + c.input_arg("x") + c.label("collection1").item(0), ))), label_output="b", ).pipe(c.this() + c.label("b")).gen_converter(debug=False)) assert conv2([1, 2, 3, 4], x=10) == 140 conv3 = (c.tuple(c.item("default").add_label("default"), c.this()).pipe( c.item(1).pipe(c.item( "abc", default=c.label("default")))).gen_converter(debug=False)) assert conv3({"default": 1}) == 1 with pytest.raises(c.ConversionException): c.this().pipe(c.this(), label_input=1)
def test_multi_statement_reducers(dict_series): output = (c.group_by(c.item("name")).aggregate(( c.item("name"), SumReducer1(c.item("value")), SumReducer2(c.item("value")), SumReducer3(c.item("value")), SumReducer4(c.item("value")), SumReducer5(c.item("value"), initial=5), )).execute(dict_series, debug=False)) assert output == [("Nick", 3, 3, 3, 3, 8), ("John", 63, 63, 63, 63, 68)] with pytest.raises(ValueError): class SumReducer(MultiStatementReducer): reduce = ("%(result)s = {0} + ({1} or 4)", ) default = 0 unconditional_init = True SumReducer(c.item("value")) with pytest.raises(ValueError): class SumReducer(MultiStatementReducer): reduce = ("%(result)s = {0} + ({1} or 4)", ) unconditional_init = True SumReducer(c.item("value"))
def test_mode_with_groupby(): series = [(0, 1), (0, 1), (0, 2), (1, 1), (1, 2), (1, 2)] assert eq( (c.group_by(c.item(0)).aggregate(c.ReduceFuncs.Mode( c.item(1))).execute(series)), [ statistics.mode([x[1] for x in series if x[0] == key]) for key in ordered_set(x[0] for x in series) ], )
def test_cross_join(): join1 = (c.join(c.item(0), c.item(1), True).as_type(list).gen_converter(debug=False)) assert join1(([1, 2, 3], [5, 6])) == [ (1, 5), (1, 6), (2, 5), (2, 6), (3, 5), (3, 6), ]
def test_comprehension_filter_cast_assumptions(): assert isinstance( c.generator_comp(c.this).filter(c.this).execute(range(10)), GeneratorType, ) assert isinstance( c.generator_comp(c.this).filter(c.this, cast=None).execute(range(10)), GeneratorType, ) assert (c.list_comp(c.this).filter(c.this).execute(range(3))) == [ 1, 2, ] def f(x): f.number_of_calls += 1 if f.number_of_calls > f.max_number_of_calls: raise ValueError return bool(x) f.max_number_of_calls = 2 f.number_of_calls = 0 assert (c.set_comp(c.this).filter(c.call_func(f, c.this)).execute([0, 0, 1])) == { 1, } assert (c.set_comp(c.this).filter(c.this, cast=list).execute([0, 0, 1])) == [ 1, ] assert (c.set_comp(c.this).filter(c.this).execute(range(3))) == { 1, 2, } assert (c.tuple_comp(c.this).filter(c.this).execute(range(3))) == ( 1, 2, ) assert (c.tuple_comp(c.this).filter(c.this, list).execute(range(3))) == [ 1, 2, ] assert (c.dict_comp(c.this, c.this).filter(c.item(0)).execute(range(3))) == { 1: 1, 2: 2, } assert (c.dict_comp(c.this, c.this).filter(c.item(0), dict).execute(range(3))) == { 1: 1, 2: 2, }
def test_manually_defined_reducers(): data = [ { "name": "John", "category": "Games", "debit": 10, "balance": 90 }, { "name": "John", "category": "Games", "debit": 200, "balance": -110 }, { "name": "John", "category": "Food", "debit": 30, "balance": -140 }, { "name": "John", "category": "Games", "debit": 300, "balance": 0 }, { "name": "Nick", "category": "Food", "debit": 7, "balance": 50 }, { "name": "Nick", "category": "Games", "debit": 18, "balance": 32 }, { "name": "Bill", "category": "Games", "debit": 18, "balance": 120 }, ] grouper = (c.group_by(c.item("name")).aggregate( c.reduce(lambda a, b: a + b, c.item(c.input_arg("group_key")), initial=0)).filter(c.this() > 20).gen_converter( signature="data_, group_key='debit'")) assert grouper(data) == [540, 25] assert grouper(data, group_key="balance") == [82, 120]
def test_tuple_comprehension(): assert c.tuple_comp(1).gen_converter()(range(5)) == (1, ) * 5 data = [{"name": "John"}, {"name": "Bill"}, {"name": "Nick"}] assert c.tuple_comp(c.item("name")).sort( key=lambda n: n).gen_converter()(data) == ("Bill", "John", "Nick") assert c.tuple_comp(c.item("name")).sort().gen_converter()(data) == ( "Bill", "John", "Nick", ) assert c.tuple_comp(c.item("name")).sort( key=lambda n: n, reverse=True).gen_converter()(data) == ("Nick", "John", "Bill")
def test_reducer_reuse(dict_series): f = lambda a, b: a + b reducer = c.reduce(f, c.item("value"), initial=0) reducer2 = c.reduce(f, c.item("value"), initial=0) output = (c.group_by(c.item("name")).aggregate(( c.item("name"), reducer + 10, reducer2 + 20, )).execute(dict_series)) assert output == [ ("Nick", 13, 23), ("John", 73, 83), ]
def test_min_max(): assert c.min(0, 1).execute(None) == 0 assert c.min(2, 1).execute(None) == 1 assert c.max(0, 1).execute(None) == 1 assert c.max(2, 1).execute(None) == 2 assert c.min(c.item(0), c.item(1)).execute((0, 1)) == 0 assert c((2, 1)).pipe(c.min(c.item(0), c.item(1))).execute(None) == 1 with pytest.raises(TypeError): c.min(c.this).execute(-1) with pytest.raises(TypeError): c.max(c.this).execute(-1)
def test_legacy_dict_reduce_approach(dict_series): output = c.aggregate( c.reduce( c.ReduceFuncs.DictSum, (c.item("name"), c.item("value")), )).execute(dict_series) assert output == { "Nick": 3, "John": 63, } with pytest.raises(ValueError): c.ReduceFuncs.DictSum(c.this(), c.this(), c.this()) with pytest.raises(ValueError): c.ReduceFuncs.DictSum({c.this(), c.this()})
def test_outer_join(): join1 = ( c.join( c.item(0), c.item(1), Eq(c.LEFT, c.RIGHT, 2), how="full", ) .as_type(list) .gen_converter(debug=False) ) assert join1(([0, 1, 2, 5], [2, 3, 4, 5])) == [ (0, None), (1, None), (2, 2), (5, None), (None, 3), (None, 4), (None, 5), ] join2 = ( c.join( c.item(0), c.item(1), c.and_( c.LEFT == c.RIGHT, c.LEFT + c.RIGHT < 10, c.LEFT > 0, ), how="outer", ) .as_type(list) .gen_converter(debug=False) ) assert join2([(10, 7, 8, 0, 1, 2, 3, 3), (3, 3, 4, 5, 8)]) == [ (10, None), (7, None), (8, None), (0, None), (1, None), (2, None), (3, 3), (3, 3), (3, 3), (3, 3), (None, 4), (None, 5), (None, 8), ]