def test_base_reducer(): assert c.aggregate(( c.reduce(lambda a, b: a + b, c.this, initial=0), c.reduce(c.naive(lambda a, b: a + b), c.this, initial=int), c.reduce( c.inline_expr("{0} + {1}"), c.this, initial=c.inline_expr("int()"), default=0, ), c.reduce( c.inline_expr("{0} + {1}"), c.this, initial=c(int), default=0, ), c.reduce( c.inline_expr("{0} + {1}"), c.this, initial=int, default=0, ), )).filter(c.this > 5).gen_converter(debug=False)([1, 2, 3]) == [ 6, 6, 6, 6, 6, ] with pytest.raises(ValueError): c.aggregate(c.ReduceFuncs.Sum(c.reduce( c.ReduceFuncs.Count))).gen_converter() with pytest.raises(ValueError): c.aggregate(c.ReduceFuncs.Sum(c.ReduceFuncs.Count() + 1)).gen_converter() with pytest.raises(ValueError): c.aggregate((c.ReduceFuncs.Count() + 2).pipe(c.ReduceFuncs.Sum(c.this) + 1)).gen_converter() conv = c.aggregate(c.ReduceFuncs.DictArray( c.item(0), c.item(1))).gen_converter(debug=False) data = [ ("a", 1), ("a", 2), ("b", 3), ] result = {"a": [1, 2], "b": [3]} assert conv(data) == result assert conv([]) is None conv2 = c.aggregate({ "key": c.ReduceFuncs.DictArray(c.item(0), c.item(1)) }).gen_converter(debug=False) assert conv2([]) == {"key": None} assert conv2(data) == {"key": result}
def test_labels(): conv1 = c.if_( 1, c.input_arg("y").item("abc").add_label("abc").pipe( c.input_arg("x").pipe( c.inline_expr("{cde} + 10").pass_args( cde=c.this().item("cde")))).pipe( c.inline_expr("{this} + {abc}").pass_args( this=c.this(), abc=c.label("abc"))), 2, ).gen_converter(debug=False) assert conv1(data_=1, x={"cde": 2}, y={"abc": 3}) == 15 list(c.generator_comp(c.this().add_label("a")).execute([1, 2])) c.list_comp(c.this().add_label("a")).execute([1, 2])
def test_is_independent(): assert c(0).is_independent() assert c(int).is_independent() assert c(int).call().is_independent() assert c.label("a").is_independent() assert c.inline_expr("{}()").pass_args(int).is_independent() assert c.escaped_string("int()").is_independent() assert c({"a": c.input_arg("key")}).is_independent() assert not c.iter({"a": 1}).is_independent() assert not c.this.is_independent() assert not c({"a": 1}).item("a").is_independent() assert not c({"a": 1}).item(c.item("a")).is_independent() assert not c.inline_expr("{}()").pass_args(c.this).is_independent() assert not c.aggregate({"a": 1}).is_independent() assert not c.this.add_label("a").is_independent() assert not c(int).call(c.item(0)).is_independent()
def test_pipes(): assert c.list_comp(c.inline_expr("{0} ** 2").pass_args(c.this())).pipe( c.call_func(sum, c.this())).pipe( c.call_func( lambda x, a: x + a, c.this(), c.naive({ "abc": 10 }).item(c.input_arg("key_name")), )).pipe([c.this(), c.this()]).execute([1, 2, 3], key_name="abc", debug=False) == [ 24, 24, ] assert c.item(0).pipe( datetime.strptime, "%Y-%m-%d", ).pipe(c.call_func(lambda dt: dt.date(), c.this())).execute([ "2019-01-01", ], debug=False) == date(2019, 1, 1) assert c.item(0).pipe( datetime.strptime, "%Y-%m-%d", ).pipe(c.this().call_method("date")).execute([ "2019-01-01", ], debug=False) == date(2019, 1, 1) with pytest.raises(c.ConversionException): c.naive(True).pipe(c.item("key1", _predefined_input={"key1": 777}))
class SumIfGte10(MultiStatementReducer): prepare_first = ("if {0} and {0} >= 10:", " %(result)s = {0}") reduce = ( "if {0} and {0} >= 10:", " %(result)s = {prev_result} + {0}", ) default = c.inline_expr("0")
def test_input_arg(): assert c.input_arg("x").as_type(int).execute(None, x="10") == 10 assert ( c.inline_expr(""""{{}}_{{}}".format(type({x}).__name__, {x})""") .pass_args(x=c.item("value")) .gen_converter() )({"value": 123}) == "int_123"
def test_memory_freeing(): converter = ( c.this() .pipe( c.list_comp(c.this() + c.label("input_data").item(0)), label_input=dict(input_data=c.this()), ) .gen_converter(debug=True) ) sizes = [] sizes.append(total_size(converter.__dict__)) for i in range(100): l_input = [i + j for j in range(3)] l_out = [j + l_input[0] for j in l_input] assert converter(l_input) == l_out sizes.append(total_size(converter.__dict__)) assert all(sizes[0] == size for size in sizes[1:]), sizes conv2 = ( c.inline_expr("globals().__setitem__('a', {}) or 1") .pass_args(c.this()) .gen_converter() ) with pytest.raises(AssertionError): # should raise because of a memory leak conv2(123)
def test_pipes(): assert c.list_comp(c.inline_expr("{0} ** 2").pass_args(c.this)).pipe( c.call_func(sum, c.this)).pipe( c.call_func( lambda x, a: x + a, c.this, c.naive({ "abc": 10 }).item(c.input_arg("key_name")), )).pipe([c.this, c.this]).execute([1, 2, 3], key_name="abc", debug=False) == [ 24, 24, ] assert c.item(0).pipe(datetime.strptime, "%Y-%m-%d").pipe( c.call_func(lambda dt: dt.date(), c.this)).execute(["2019-01-01"], debug=False) == date(2019, 1, 1) assert c.item(0).pipe(datetime.strptime, "%Y-%m-%d").pipe( c.this.call_method("date")).execute(["2019-01-01"], debug=False) == date(2019, 1, 1) conv = c.dict_comp( c.item("name"), c.item("transactions").pipe( c.list_comp({ "id": c.item(0).as_type(str), "amount": c.item(1).pipe(c.if_(c.this, c.this.as_type(Decimal), None)), })), ).gen_converter(debug=False) assert conv([{ "name": "test", "transactions": [(0, 0), (1, 10)] }]) == { "test": [ { "id": "0", "amount": None }, { "id": "1", "amount": Decimal("10") }, ] } assert c.this.pipe(lambda it: it).filter( c.this).sort().as_type(list).execute((2, 1, 0)) == [1, 2]
def test_grouping(): data = [ { "name": "John", "category": "Games", "debit": 10, "balance": 90 }, { "name": "John", "category": "Games", "debit": 200, "balance": -110 }, { "name": "John", "category": "Food", "debit": 30, "balance": -140 }, { "name": "John", "category": "Games", "debit": 300, "balance": 0 }, { "name": "Nick", "category": "Food", "debit": 7, "balance": 50 }, { "name": "Nick", "category": "Games", "debit": 18, "balance": 32 }, { "name": "Bill", "category": "Games", "debit": 18, "balance": 120 }, ] result = (c.group_by(c.item("name")).aggregate(( c.item("name"), c.item("name").call_method("lower"), c.call_func(str.lower, c.item("name")), c.reduce( lambda a, b: a + b, c.item("debit"), initial=c.input_arg("arg1"), unconditional_init=True, ), c.reduce( c.inline_expr("{0} + {1}"), c.item("debit"), initial=lambda: 100, unconditional_init=True, ), c.reduce( max, c.item("debit"), prepare_first=lambda a: a, default=c.input_arg("arg1"), where=c.call_func(lambda x: x < 0, c.item("balance")), ), c.call_func( lambda max_debit, n: max_debit * n, c.reduce( max, c.item("debit"), prepare_first=lambda a: a, default=0, where=c.call_func(lambda x: x < 0, c.item("balance")), ), 1000, ), c.call_func( lambda max_debit, n: max_debit * n, c.reduce( c.ReduceFuncs.Max, c.item("debit"), default=1000, where=c.inline_expr("{0} > {1}").pass_args( c.item("balance"), c.input_arg("arg2"), ), ), -1, ), c.reduce(c.ReduceFuncs.MaxRow, c.item("debit")).item("balance"), c.reduce(c.ReduceFuncs.MinRow, c.item("debit")).item("balance"), )).sort(key=lambda t: t[0].lower(), reverse=True).execute(data, arg1=100, arg2=0, debug=False)) # fmt: off assert result == [ ('Nick', 'nick', 'nick', 125, 125, 100, 0, -18, 32, 50), ('John', 'john', 'john', 640, 640, 200, 200000, -10, 0, 90), ('Bill', 'bill', 'bill', 118, 118, 100, 0, -18, 120, 120), ] # fmt: on with pytest.raises(c.ConversionException): # there's a single group by field, while we use separate items # of this tuple in aggregate result = (c.group_by(c.item("name")).aggregate(( c.item("category"), c.reduce(c.ReduceFuncs.Sum, c.item("debit")), )).execute(data, debug=False)) aggregation = { c.call_func( tuple, c.ReduceFuncs.Array(c.item("name"), default=None), ): c.item("category").call_method("lower"), "count": c.ReduceFuncs.Count(), "max": c.ReduceFuncs.Max(c.item("debit")), "min": c.ReduceFuncs.Min(c.item("debit")), "count_distinct": c.ReduceFuncs.CountDistinct(c.item("name")), "array_agg_distinct": c.ReduceFuncs.ArrayDistinct(c.item("name")), "dict": c.ReduceFuncs.Dict(c.item("debit"), c.item("name")), } result = (c.group_by(c.item("category")).aggregate(aggregation).execute( data, debug=False)) result2 = (c.group_by(c.item("category")).aggregate( c.dict(*aggregation.items())).execute(data, debug=False)) # fmt: off assert result == result2 == [ { 'array_agg_distinct': ['John', 'Nick', 'Bill'], 'count': 5, 'count_distinct': 3, 'dict': { 10: 'John', 18: 'Bill', 200: 'John', 300: 'John' }, 'max': 300, 'min': 10, ('John', 'John', 'John', 'Nick', 'Bill'): 'games' }, { 'array_agg_distinct': ['John', 'Nick'], 'count': 2, 'count_distinct': 2, 'dict': { 7: 'Nick', 30: 'John' }, 'max': 30, 'min': 7, ('John', 'Nick'): 'food' } ] # fmt: on result3 = (c.aggregate(c.ReduceFuncs.Sum(c.item("debit"))).pipe( c.inline_expr("{0} + {1}").pass_args(c.this(), c.this())).execute(data, debug=False)) assert result3 == 583 * 2 by = c.item("name"), c.item("category") result4 = (c.group_by( *by).aggregate(by + (c.ReduceFuncs.Sum(c.item("debit")), )).execute( data, debug=False)) # fmt: off assert result4 == [('John', 'Games', 510), ('John', 'Food', 30), ('Nick', 'Food', 7), ('Nick', 'Games', 18), ('Bill', 'Games', 18)] # fmt: on result5 = (c.group_by().aggregate(c.ReduceFuncs.Sum( c.item("debit"))).execute(data, debug=False)) assert result5 == 583 with pytest.raises(c.ConversionException): # there's a single group by field, while we use separate items # of this tuple in aggregate (c.group_by(by).aggregate( by + (c.reduce(c.ReduceFuncs.Sum, c.item("debit")), )).execute( data, debug=False))
def test_call_like_methods(): assert c.inline_expr("1").is_itself_callable_like() assert c.item(1).is_itself_callable_like() is None
"name": "Nick", "debit": 3 }, ] reducers_in_out = [ dict( groupby=c.item("name"), reduce=c.reduce(lambda a, b: a + b, c.item("debit"), initial=0), data=reducer_data1, output=[('Bill', 150), ('Nick', 1)], raises=None, ), dict( groupby=c.item("name"), reduce=c.reduce(c.inline_expr("{} + {}"), c.item("debit"), initial=0), data=reducer_data1, output=[('Bill', 150), ('Nick', 1)], raises=None, ), dict( groupby=c.item("name"), reduce=c.ReduceFuncs.Sum(c.item("debit")), data=reducer_data1, output=[('Bill', 150), ('Nick', 1)], raises=None, ), dict( groupby=c.item("name"), reduce=c.ReduceFuncs.Sum(c.item("debit")), data=reducer_data1 + reducer_data2,
def test_doc__index_deserialization(): class Employee: def __init__(self, **kwargs): self.kwargs = kwargs input_data = { "objects": [ { "id": 1, "first_name": "john", "last_name": "black", "dob": None, "salary": "1,000.00", "department": "D1 ", "date": "2000-01-01", }, { "id": 2, "first_name": "bob", "last_name": "wick", "dob": "1900-01-01", "salary": "1,001.00", "department": "D3 ", "date": "2000-01-01", }, ] } # get by "department" key and then call method "strip" department = c.item("department").call_method("strip") first_name = c.item("first_name").call_method("capitalize") last_name = c.item("last_name").call_method("capitalize") # call "format" method of a string and pass first & last names as # parameters full_name = c("{} {}").call_method("format", first_name, last_name) date_of_birth = c.item("dob") # partially initialized "strptime" parse_date = c.call_func(datetime.strptime, c.this(), "%Y-%m-%d").call_method("date") conv = ( c.item("objects").pipe( c.generator_comp({ "id": c.item("id"), "first_name": first_name, "last_name": last_name, "full_name": full_name, "date_of_birth": c.if_( date_of_birth, date_of_birth.pipe(parse_date), None, ), "salary": c.call_func( Decimal, c.item("salary").call_method("replace", ",", ""), ), # pass a hardcoded dict and to get value by "department" # key "department_id": c.naive({ "D1": 10, "D2": 11, "D3": 12, }).item(department), "date": c.item("date").pipe(parse_date), })). pipe( c.dict_comp( c.item( "id"), # key # write a python code expression, format with passed parameters c.inline_expr("{employee_cls}(**{kwargs})").pass_args( employee_cls=Employee, kwargs=c.this(), ), # value )).gen_converter(debug=True)) result = conv(input_data) assert result[1].kwargs == { "date": date(2000, 1, 1), "date_of_birth": None, "department_id": 10, "first_name": "John", "full_name": "John Black", "id": 1, "last_name": "Black", "salary": Decimal("1000.00"), } assert result[2].kwargs == { "date": date(2000, 1, 1), "date_of_birth": date(1900, 1, 1), "department_id": 12, "first_name": "Bob", "full_name": "Bob Wick", "id": 2, "last_name": "Wick", "salary": Decimal("1001.00"), }
def test_hashes(): assert hash(c.input_arg("abc")) == hash(c.input_arg("abc")) assert hash(c.input_arg("abd")) != hash(c.input_arg("abc")) assert hash(c.inline_expr("abc")) == hash(c.inline_expr("abc")) assert hash(c.inline_expr("abd")) != hash(c.inline_expr("abc"))
def test_grouping(): data = [ { "name": "John", "category": "Games", "debit": 10, "balance": 90 }, { "name": "John", "category": "Games", "debit": 200, "balance": -110 }, { "name": "John", "category": "Food", "debit": 30, "balance": -140 }, { "name": "John", "category": "Games", "debit": 300, "balance": 0 }, { "name": "Nick", "category": "Food", "debit": 7, "balance": 50 }, { "name": "Nick", "category": "Games", "debit": 18, "balance": 32 }, { "name": "Bill", "category": "Games", "debit": 18, "balance": 120 }, ] result = (c.group_by(c.item("name")).aggregate(( c.item("name"), c.item("name").call_method("lower"), c.call_func(str.lower, c.item("name")), c.reduce( lambda a, b: a + b, c.item("debit"), initial=c.input_arg("arg1"), ), c.reduce( c.inline_expr("{0} + {1}"), c.item("debit"), initial=lambda: 100, ), c.reduce(max, c.item("debit"), default=c.input_arg("arg1")).filter( c.call_func(lambda x: x < 0, c.item("balance"))), c.call_func( lambda max_debit, n: max_debit * n, c.reduce(max, c.item("debit"), default=0).filter( c.call_func(lambda x: x < 0, c.item("balance"))), 1000, ), c.call_func( lambda max_debit, n: max_debit * n, c.reduce( c.ReduceFuncs.Max, c.item("debit"), default=1000, ).filter(c.inline_expr("{0} > 0").pass_args(c.item("balance"))), -1, ), c.reduce( c.ReduceFuncs.MaxRow, c.item("debit"), ).item("balance"), c.reduce( c.ReduceFuncs.MinRow, c.item("debit"), ).item("balance"), )).sort(key=lambda t: t[0].lower(), reverse=True).execute(data, arg1=100, debug=False)) # fmt: off assert result == [ ('Nick', 'nick', 'nick', 125, 125, 100, 0, -18, 32, 50), ('John', 'john', 'john', 640, 640, 200, 200000, -10, 0, 90), ('Bill', 'bill', 'bill', 118, 118, 100, 0, -18, 120, 120) ] # fmt: on aggregation = { c.call_func( tuple, c.reduce(c.ReduceFuncs.Array, c.item("name"), default=None), ): c.item("category").call_method("lower"), "count": c.reduce(c.ReduceFuncs.Count), "count_distinct": c.reduce(c.ReduceFuncs.CountDistinct, c.item("name")), "array_agg_distinct": c.reduce( c.ReduceFuncs.ArrayDistinct, c.item("name"), ), "dict": c.reduce(c.ReduceFuncs.Dict, (c.item("debit"), c.item("name"))), } result = (c.group_by(c.item("category")).aggregate(aggregation).execute( data, debug=False)) result2 = (c.group_by(c.item("category")).aggregate( c.dict(*aggregation.items())).execute(data, debug=False)) # fmt: off assert result == result2 == [ { 'array_agg_distinct': ['John', 'Nick', 'Bill'], 'count': 5, 'count_distinct': 3, 'dict': { 10: 'John', 18: 'Bill', 200: 'John', 300: 'John' }, ('John', 'John', 'John', 'Nick', 'Bill'): 'games' }, { 'array_agg_distinct': ['John', 'Nick'], 'count': 2, 'count_distinct': 2, 'dict': { 7: 'Nick', 30: 'John' }, ('John', 'Nick'): 'food' } ] # fmt: on result3 = (c.aggregate(c.reduce(c.ReduceFuncs.Sum, c.item("debit"))).pipe( c.inline_expr("{0} + {1}").pass_args(c.this(), c.this())).execute(data, debug=False)) assert result3 == 583 * 2 by = c.item("name"), c.item("category") result4 = (c.group_by(*by).aggregate(by + ( c.reduce(c.ReduceFuncs.Sum, c.item("debit")), )).execute(data, debug=False)) # fmt: off assert result4 == [('John', 'Games', 510), ('John', 'Food', 30), ('Nick', 'Food', 7), ('Nick', 'Games', 18), ('Bill', 'Games', 18)] # fmt: on result5 = (c.group_by().aggregate( c.reduce(c.ReduceFuncs.Sum, c.item("debit"))).execute(data, debug=False)) assert result5 == 583
class SumReducer5(MultiStatementReducer): reduce = ("%(result)s = {prev_result} + ({0} or 5)", ) default = c.inline_expr("0") unconditional_init = True
def test_pipes(): assert c.list_comp(c.inline_expr("{0} ** 2").pass_args(c.this())).pipe( c.call_func(sum, c.this())).pipe( c.call_func( lambda x, a: x + a, c.this(), c.naive({ "abc": 10 }).item(c.input_arg("key_name")), )).pipe([c.this(), c.this()]).execute([1, 2, 3], key_name="abc", debug=False) == [ 24, 24, ] assert c.item(0).pipe(datetime.strptime, "%Y-%m-%d").pipe( c.call_func(lambda dt: dt.date(), c.this())).execute(["2019-01-01"], debug=False) == date(2019, 1, 1) assert c.item(0).pipe(datetime.strptime, "%Y-%m-%d").pipe( c.this().call_method("date")).execute(["2019-01-01"], debug=False) == date(2019, 1, 1) with c.OptionsCtx() as options: max_pipe_length = options.max_pipe_length = 10 with pytest.raises(c.ConversionException): conv = c.this() for i in range(max_pipe_length + 1): conv = c.this().pipe(conv) with c.OptionsCtx() as options2, pytest.raises(c.ConversionException): options2.max_pipe_length = 5 conv.clone() conv = c.dict_comp( c.item("name"), c.item("transactions").pipe( c.list_comp({ "id": c.item(0).as_type(str), "amount": c.item(1).pipe(c.if_(c.this(), c.this().as_type(Decimal), None)), })), ).gen_converter(debug=True) assert conv([{ "name": "test", "transactions": [(0, 0), (1, 10)] }]) == { "test": [ { "id": "0", "amount": None }, { "id": "1", "amount": Decimal("10") }, ] } with c.OptionsCtx() as options: max_pipe_length = options.max_pipe_length = 10 conv1 = c.item(0).pipe(c.item(1).pipe(c.item(2))) def measure_pipe_length(conv): length = 0 for i in range(max_pipe_length): if conv._predefined_input is not None: length += 1 conv = conv._predefined_input else: break return length pipe_length_before = measure_pipe_length(conv1) for i in range(max_pipe_length + 20): c.generator_comp(c.this().pipe(conv1)) pipe_length_after = measure_pipe_length(conv1) assert pipe_length_after == pipe_length_before