示例#1
0
def test_group_by_reducer_clones():
    data = [
        {
            "value": 2
        },
        {
            "value": 3
        },
    ]
    conv = c.aggregate(
        c.item("value").pipe(c.ReduceFuncs.Sum(c.this()).pipe(c.this() + 1)))
    assert conv.execute(data) == 6

    reducer = c.ReduceFuncs.DictSum(c.item("k"), c.item("v"))
    reducer1 = c.item("item1").pipe(reducer)
    reducer2 = c.item("item2").pipe(reducer)
    assert c.aggregate(reducer1).execute([{
        "item1": {
            "k": 1,
            "v": 2
        }
    }]) == {
        1: 2
    }
    assert c.aggregate(reducer2).execute([{
        "item2": {
            "k": 2,
            "v": 3
        }
    }]) == {
        2: 3
    }
def test_multi_statement_reducers(dict_series):
    output = (c.group_by(c.item("name")).aggregate((
        c.item("name"),
        SumReducer1(c.item("value")),
        SumReducer2(c.item("value")),
        SumReducer3(c.item("value")),
        SumReducer4(c.item("value")),
        SumReducer5(c.item("value"), initial=5),
    )).execute(dict_series, debug=False))
    assert output == [("Nick", 3, 3, 3, 3, 8), ("John", 63, 63, 63, 63, 68)]

    with pytest.raises(AttributeError):

        class SumReducer(MultiStatementReducer):
            reduce = ("%(result)s = %(result)s + ({0} or 4)", )
            default = 0
            unconditional_init = True

        # prepare_first is not specified
        c.aggregate(SumReducer(c.item("value"))).gen_converter()

    with pytest.raises(ValueError):

        class SumReducer(MultiStatementReducer):
            reduce = ("%(result)s = %(result)s + ({0} or 4)", )
            unconditional_init = True

        # default is not provided
        SumReducer(c.item("value"))
def test_multiple_aggregations(dict_series):
    assert (
        c.aggregate(c.ReduceFuncs.Array(c.item("name")))
        .pipe(
            c.aggregate(c.ReduceFuncs.ArrayDistinct(c.this())).pipe(
                c.aggregate(c.ReduceFuncs.Max(c.this()))
            )
        )
        .execute(dict_series, debug=False)
        == "Nick"
    )
示例#4
0
def test_base_reducer():
    assert c.aggregate((
        c.reduce(lambda a, b: a + b, c.this, initial=0),
        c.reduce(c.naive(lambda a, b: a + b), c.this, initial=int),
        c.reduce(
            c.inline_expr("{0} + {1}"),
            c.this,
            initial=c.inline_expr("int()"),
            default=0,
        ),
        c.reduce(
            c.inline_expr("{0} + {1}"),
            c.this,
            initial=c(int),
            default=0,
        ),
        c.reduce(
            c.inline_expr("{0} + {1}"),
            c.this,
            initial=int,
            default=0,
        ),
    )).filter(c.this > 5).gen_converter(debug=False)([1, 2, 3]) == [
        6,
        6,
        6,
        6,
        6,
    ]

    with pytest.raises(ValueError):
        c.aggregate(c.ReduceFuncs.Sum(c.reduce(
            c.ReduceFuncs.Count))).gen_converter()
    with pytest.raises(ValueError):
        c.aggregate(c.ReduceFuncs.Sum(c.ReduceFuncs.Count() +
                                      1)).gen_converter()
    with pytest.raises(ValueError):
        c.aggregate((c.ReduceFuncs.Count() +
                     2).pipe(c.ReduceFuncs.Sum(c.this) + 1)).gen_converter()

    conv = c.aggregate(c.ReduceFuncs.DictArray(
        c.item(0), c.item(1))).gen_converter(debug=False)
    data = [
        ("a", 1),
        ("a", 2),
        ("b", 3),
    ]
    result = {"a": [1, 2], "b": [3]}
    assert conv(data) == result
    assert conv([]) is None

    conv2 = c.aggregate({
        "key": c.ReduceFuncs.DictArray(c.item(0), c.item(1))
    }).gen_converter(debug=False)
    assert conv2([]) == {"key": None}
    assert conv2(data) == {"key": result}
示例#5
0
def test_aggregate_no_init_loops():
    converter = c.aggregate({
        "first_a":
        c.ReduceFuncs.First(c.item("a"), where=c.item("b") > 0),
        "list_b":
        c.ReduceFuncs.Array(c.item("b"), where=c.item("a") > 0),
    }).gen_converter(debug=False)
    assert converter([
        {
            "a": 1,
            "b": 0
        },
        {
            "a": 2,
            "b": 1
        },
        {
            "a": 3,
            "b": 2
        },
        {
            "a": 4,
            "b": 3
        },
    ], ) == {
        "first_a": 2,
        "list_b": [0, 1, 2, 3],
    }
示例#6
0
def test_aggregate_func():
    input_data = [
        {
            "a": 5,
            "b": "foo"
        },
        {
            "a": 10,
            "b": "bar"
        },
        {
            "a": 10,
            "b": "bar"
        },
    ]

    conv = c.aggregate({
        "a":
        c.ReduceFuncs.Array(c.item("a")),
        "ab_sum":
        c.ReduceFuncs.Sum(c.item("a")) + c.ReduceFuncs.Count(),
        "b":
        c.ReduceFuncs.ArrayDistinct(c.item("b")),
        "b_max_a":
        c.ReduceFuncs.MaxRow(c.item("a")).item("b", default=None),
    }).gen_converter(debug=False)

    assert conv(input_data) == {
        "a": [5, 10, 10],
        "ab_sum": 28,
        "b": ["foo", "bar"],
        "b_max_a": "bar",
    }
def test_weighted_average(series):
    assert eq(
        c.aggregate(c.ReduceFuncs.Average(c.item(0),
                                          c.item(1))).execute(series),
        weighted_average(series),
    )
    result = (c.group_by(c.item(0) // 5).aggregate([
        c.item(0) // 5,
        c.ReduceFuncs.Average(c.item(1)),
        c.ReduceFuncs.Average(c.item(1), where=c.item(0) > 10, default=-1),
    ]).execute(zip(range(10), range(10)), debug=False))
    assert result == [
        [0, 2, -1],
        [1, 7, -1],
    ]
    result = (c.group_by(c.item(0) // 5).aggregate([
        c.item(0) // 5,
        c.ReduceFuncs.Average(c.item(1), c.item(2)),
        c.ReduceFuncs.Average(c.item(1),
                              c.item(2),
                              where=c.item(0) > 10,
                              default=-1),
    ]).execute(zip(range(10), range(10), cycle([1, 2])), debug=False))

    assert result == [
        [0, 2, -1],
        [1, 7, -1],
    ]
示例#8
0
def test_simple_label():
    conv1 = (c.tuple(c.item(2).add_label("a"), c.this()).pipe(
        c.item(1).pipe(c.list_comp(
            (c.this(), c.label("a"))))).gen_converter(debug=False))
    assert conv1([1, 2, 3, 4]) == [(1, 3), (2, 3), (3, 3), (4, 3)]

    conv2 = (c.tuple(c.item(1).add_label("a"), c.this()).pipe(
        c.item(1),
        label_input={
            "aa": c.item(0),
            "bb": c.item(0)
        },
        label_output="collection1",
    ).pipe(
        c.label("collection1").pipe(
            c.aggregate(
                c.ReduceFuncs.Sum(
                    c.this() + c.label("a") + c.label("aa") +
                    c.input_arg("x") + c.label("collection1").item(0), ))),
        label_output="b",
    ).pipe(c.this() + c.label("b")).gen_converter(debug=False))
    assert conv2([1, 2, 3, 4], x=10) == 140

    conv3 = (c.tuple(c.item("default").add_label("default"), c.this()).pipe(
        c.item(1).pipe(c.item(
            "abc", default=c.label("default")))).gen_converter(debug=False))
    assert conv3({"default": 1}) == 1

    with pytest.raises(c.ConversionException):
        c.this().pipe(c.this(), label_input=1)
示例#9
0
def test_group_by_with_double_ended_pipes():
    input_data = [
        {
            "value": 1
        },
        {
            "value": 2
        },
    ]
    # fmt: off
    conv = c.aggregate(
        c.item("value").pipe(c.ReduceFuncs.Sum(c.this())).pipe(
            c.this() * 2)).gen_converter()
    # fmt: on
    result = conv(input_data)
    assert result == 6

    input_data = [
        {
            "k": "A",
            "v": 1
        },
        {
            "k": "A",
            "v": 2
        },
    ]
    reducer = c.ReduceFuncs.Sum(c.item("v"))
    conv = (c.group_by(c.item("k")).aggregate({
        "v1":
        c.input_arg("test").pipe(reducer),
        "v2":
        reducer,
    }).gen_converter())
    assert conv(input_data, test={"v": 7}) == [{"v1": 14, "v2": 3}]
def test_weighted_average(series):
    assert eq(
        c.aggregate(c.ReduceFuncs.Average(c.item(0), c.item(1))).execute(
            series
        ),
        weighted_average(series),
    )
def test_nested_group_by():
    data = [
        [0, [1, 2, 3]],
        [0, [4, 5, 6]],
        [1, [2, 3, 4]],
    ]
    assert c.group_by(c.item(0)).aggregate(
        (
            c.item(0),
            c.ReduceFuncs.Sum(
                c.item(1).pipe(c.aggregate(c.ReduceFuncs.Sum(c.this())))
            ),
        )
    ).execute(data, debug=False) == [
        (0, 21),
        (1, 9),
    ]
    agg_conv = c.aggregate(c.ReduceFuncs.Sum(c.this()))
    assert c.group_by(c.item(0)).aggregate(
        (
            c.item(0),
            c.if_(c.item(1), c.item(1), c.item(1),).pipe(
                c.if_(c.this(), c.this(), c.this(),).pipe(
                    c.ReduceFuncs.Sum(
                        c.if_(
                            c.this(),
                            c.this(),
                            c.this(),
                        )
                        .pipe((agg_conv, agg_conv))
                        .pipe(c.item(1))
                    ).pipe(
                        c.if_(
                            c.this(),
                            c.this(),
                            c.this(),
                        )
                    ),
                )
            ),
        )
    ).execute(data, debug=True) == [
        (0, 21),
        (1, 9),
    ]
示例#12
0
def test_pipe_conversion():
    from convtools import conversion as c
    from convtools.base import PipeConversion

    assert PipeConversion(c.naive([1, 2, 3]), c.item(1)).execute(None) == 2
    assert (PipeConversion(c.item("key1"),
                           c.item("key2")).execute({"key1": {
                               "key2": 3
                           }},
                                                   debug=False) == 3)
    assert (c.this.pipe(c.list_comp(c.this + 1)).filter(c.this > 3).execute(
        [1, 2, 3, 4, 5, 6], debug=False)) == [4, 5, 6, 7]

    c.aggregate(
        c.ReduceFuncs.Array(c.item("key"), default=list).pipe(
            c.if_(
                c.call_func(any, c.generator_comp(c.this.is_(None))),
                c.call_func(list),
                c.this,
            ))).gen_converter(debug=False)
示例#13
0
def test_legacy_dict_reduce_approach(dict_series):
    output = c.aggregate(
        c.reduce(
            c.ReduceFuncs.DictSum,
            (c.item("name"), c.item("value")),
        )).execute(dict_series)
    assert output == {
        "Nick": 3,
        "John": 63,
    }
    with pytest.raises(ValueError):
        c.ReduceFuncs.DictSum(c.this(), c.this(), c.this())
    with pytest.raises(ValueError):
        c.ReduceFuncs.DictSum({c.this(), c.this()})
示例#14
0
def test_is_independent():
    assert c(0).is_independent()
    assert c(int).is_independent()
    assert c(int).call().is_independent()
    assert c.label("a").is_independent()
    assert c.inline_expr("{}()").pass_args(int).is_independent()
    assert c.escaped_string("int()").is_independent()
    assert c({"a": c.input_arg("key")}).is_independent()
    assert not c.iter({"a": 1}).is_independent()
    assert not c.this.is_independent()
    assert not c({"a": 1}).item("a").is_independent()
    assert not c({"a": 1}).item(c.item("a")).is_independent()
    assert not c.inline_expr("{}()").pass_args(c.this).is_independent()
    assert not c.aggregate({"a": 1}).is_independent()
    assert not c.this.add_label("a").is_independent()
    assert not c(int).call(c.item(0)).is_independent()
示例#15
0
def test_join_with_complex_pipe():
    def f(l):
        return l + [1, 3]

    pipeline = (c.aggregate(c.ReduceFuncs.Array(c.item("a"))).pipe(
        c.join(c.this(), c.call_func(f, c.this()),
               c.LEFT == c.RIGHT)).iter(c.item(1)).as_type(list))

    assert (pipeline.execute([
        {
            "a": 1
        },
        {
            "a": 2
        },
        {
            "a": 3
        },
    ]) == [1, 1, 2, 3, 3])
示例#16
0
def test_reducer_inlining(dict_series):
    def f():
        f.number_of_calls += 1
        if f.number_of_calls > f.max_number_of_calls:
            raise Exception
        return []

    f.max_number_of_calls = 1
    f.number_of_calls = 0

    converter = c.aggregate(
        c.ReduceFuncs.Array(c.item("name"),
                            default=f,
                            where=c.item("value") < 0).pipe(
                                c.if_(
                                    if_true=c.this(),
                                    if_false=c.this(),
                                ))).gen_converter(debug=False)
    assert converter(dict_series) == []
示例#17
0
def test_aggregate():
    input_data = [
        {"a": 5, "b": "foo"},
        {"a": 10, "b": "bar"},
        {"a": 10, "b": "bar"},
    ]

    conv = c.aggregate(
        {
            "a": c.reduce(c.ReduceFuncs.Array, c.item("a")),
            "a_sum": c.reduce(c.ReduceFuncs.Sum, c.item("a")),
            "b": c.reduce(c.ReduceFuncs.ArrayDistinct, c.item("b")),
        }
    ).gen_converter(debug=True)

    assert conv(input_data) == {
        "a": [5, 10, 10],
        "a_sum": 25,
        "b": ["foo", "bar"],
    }
示例#18
0
def test_top_k_invalid_input(k):
    with pytest.raises(TypeError):
        c.aggregate(c.ReduceFuncs.TopK(k, c.this())).execute([1, 2]),
示例#19
0
def test_grouping():
    data = [
        {
            "name": "John",
            "category": "Games",
            "debit": 10,
            "balance": 90
        },
        {
            "name": "John",
            "category": "Games",
            "debit": 200,
            "balance": -110
        },
        {
            "name": "John",
            "category": "Food",
            "debit": 30,
            "balance": -140
        },
        {
            "name": "John",
            "category": "Games",
            "debit": 300,
            "balance": 0
        },
        {
            "name": "Nick",
            "category": "Food",
            "debit": 7,
            "balance": 50
        },
        {
            "name": "Nick",
            "category": "Games",
            "debit": 18,
            "balance": 32
        },
        {
            "name": "Bill",
            "category": "Games",
            "debit": 18,
            "balance": 120
        },
    ]
    result = (c.group_by(c.item("name")).aggregate((
        c.item("name"),
        c.item("name").call_method("lower"),
        c.call_func(str.lower, c.item("name")),
        c.reduce(
            lambda a, b: a + b,
            c.item("debit"),
            initial=c.input_arg("arg1"),
            unconditional_init=True,
        ),
        c.reduce(
            c.inline_expr("{0} + {1}"),
            c.item("debit"),
            initial=lambda: 100,
            unconditional_init=True,
        ),
        c.reduce(
            max,
            c.item("debit"),
            prepare_first=lambda a: a,
            default=c.input_arg("arg1"),
            where=c.call_func(lambda x: x < 0, c.item("balance")),
        ),
        c.call_func(
            lambda max_debit, n: max_debit * n,
            c.reduce(
                max,
                c.item("debit"),
                prepare_first=lambda a: a,
                default=0,
                where=c.call_func(lambda x: x < 0, c.item("balance")),
            ),
            1000,
        ),
        c.call_func(
            lambda max_debit, n: max_debit * n,
            c.reduce(
                c.ReduceFuncs.Max,
                c.item("debit"),
                default=1000,
                where=c.inline_expr("{0} > {1}").pass_args(
                    c.item("balance"),
                    c.input_arg("arg2"),
                ),
            ),
            -1,
        ),
        c.reduce(c.ReduceFuncs.MaxRow, c.item("debit")).item("balance"),
        c.reduce(c.ReduceFuncs.MinRow, c.item("debit")).item("balance"),
    )).sort(key=lambda t: t[0].lower(), reverse=True).execute(data,
                                                              arg1=100,
                                                              arg2=0,
                                                              debug=False))

    # fmt: off
    assert result == [
        ('Nick', 'nick', 'nick', 125, 125, 100, 0, -18, 32, 50),
        ('John', 'john', 'john', 640, 640, 200, 200000, -10, 0, 90),
        ('Bill', 'bill', 'bill', 118, 118, 100, 0, -18, 120, 120),
    ]
    # fmt: on

    with pytest.raises(c.ConversionException):
        # there's a single group by field, while we use separate items
        # of this tuple in aggregate
        result = (c.group_by(c.item("name")).aggregate((
            c.item("category"),
            c.reduce(c.ReduceFuncs.Sum, c.item("debit")),
        )).execute(data, debug=False))

    aggregation = {
        c.call_func(
            tuple,
            c.ReduceFuncs.Array(c.item("name"), default=None),
        ):
        c.item("category").call_method("lower"),
        "count":
        c.ReduceFuncs.Count(),
        "max":
        c.ReduceFuncs.Max(c.item("debit")),
        "min":
        c.ReduceFuncs.Min(c.item("debit")),
        "count_distinct":
        c.ReduceFuncs.CountDistinct(c.item("name")),
        "array_agg_distinct":
        c.ReduceFuncs.ArrayDistinct(c.item("name")),
        "dict":
        c.ReduceFuncs.Dict(c.item("debit"), c.item("name")),
    }
    result = (c.group_by(c.item("category")).aggregate(aggregation).execute(
        data, debug=False))
    result2 = (c.group_by(c.item("category")).aggregate(
        c.dict(*aggregation.items())).execute(data, debug=False))
    # fmt: off
    assert result == result2 == [
        {
            'array_agg_distinct': ['John', 'Nick', 'Bill'],
            'count': 5,
            'count_distinct': 3,
            'dict': {
                10: 'John',
                18: 'Bill',
                200: 'John',
                300: 'John'
            },
            'max': 300,
            'min': 10,
            ('John', 'John', 'John', 'Nick', 'Bill'): 'games'
        }, {
            'array_agg_distinct': ['John', 'Nick'],
            'count': 2,
            'count_distinct': 2,
            'dict': {
                7: 'Nick',
                30: 'John'
            },
            'max': 30,
            'min': 7,
            ('John', 'Nick'): 'food'
        }
    ]
    # fmt: on
    result3 = (c.aggregate(c.ReduceFuncs.Sum(c.item("debit"))).pipe(
        c.inline_expr("{0} + {1}").pass_args(c.this(),
                                             c.this())).execute(data,
                                                                debug=False))
    assert result3 == 583 * 2

    by = c.item("name"), c.item("category")
    result4 = (c.group_by(
        *by).aggregate(by + (c.ReduceFuncs.Sum(c.item("debit")), )).execute(
            data, debug=False))
    # fmt: off
    assert result4 == [('John', 'Games', 510), ('John', 'Food', 30),
                       ('Nick', 'Food', 7), ('Nick', 'Games', 18),
                       ('Bill', 'Games', 18)]
    # fmt: on
    result5 = (c.group_by().aggregate(c.ReduceFuncs.Sum(
        c.item("debit"))).execute(data, debug=False))
    assert result5 == 583

    with pytest.raises(c.ConversionException):
        # there's a single group by field, while we use separate items
        # of this tuple in aggregate
        (c.group_by(by).aggregate(
            by + (c.reduce(c.ReduceFuncs.Sum, c.item("debit")), )).execute(
                data, debug=False))
示例#20
0
def test_average(series):
    assert eq(
        c.aggregate(c.ReduceFuncs.Average(c.item(1))).execute(series),
        statistics.mean(x[1] for x in series),
    )
def test_doc__index_word_count():

    # Let's say we need to count words across all files
    input_data = [
        "war-and-peace-1.txt",
        "war-and-peace-2.txt",
        "war-and-peace-3.txt",
        "war-and-peace-4.txt",
    ]

    # # iterate an input and read file lines
    #
    # def read_file(filename):
    #     with open(filename) as f:
    #         for line in f:
    #             yield line
    # extract_strings = c.generator_comp(c.call_func(read_file, c.this()))

    # to simplify testing
    extract_strings = c.generator_comp(
        c.call_func(lambda filename: [filename], c.this()))

    # 1. make ``re`` pattern available to the code to be generated
    # 2. call ``finditer`` method of the pattern and pass the string
    #    as an argument
    # 3. pass the result to the next conversion
    # 4. iterate results, call ``.group()`` method of each re.Match
    #    and call ``.lower()`` on each result
    split_words = (c.naive(re.compile(r"\w+")).call_method(
        "finditer", c.this()).pipe(
            c.generator_comp(c.this().call_method("group",
                                                  0).call_method("lower"))))

    # ``extract_strings`` is the generator of strings
    # so we iterate it and pass each item to ``split_words`` conversion
    vectorized_split_words = c.generator_comp(c.this().pipe(split_words))

    # flattening the result of ``vectorized_split_words``, which is
    # a generator of generators of strings
    flatten = c.call_func(
        chain.from_iterable,
        c.this(),
    )

    # aggregate the input, the result is a single dict
    # words are keys, values are count of words
    dict_word_to_count = c.aggregate(
        c.ReduceFuncs.DictCount(c.this(), c.this(), default=dict))

    # take top N words by:
    #  - call ``.items()`` method of the dict (the result of the aggregate)
    #  - pass the result to ``sorted``
    #  - take the slice, using input argument named ``top_n``
    #  - cast to a dict
    take_top_n = (c.this().call_method("items").sort(
        key=lambda t: t[1],
        reverse=True).pipe(c.this()[:c.input_arg("top_n")]).as_type(dict))

    # the resulting pipeline is pretty self-descriptive, except the ``c.if_``
    # part, which checks the condition (first argument),
    # and returns the 2nd if True OR the 3rd (input data by default) otherwise
    pipeline = (
        extract_strings.pipe(flatten).pipe(vectorized_split_words).pipe(
            flatten).pipe(dict_word_to_count).pipe(
                c.if_(
                    c.input_arg("top_n").is_not(None),
                    c.this().pipe(take_top_n),
                ))
        # Define the resulting converter function signature.  In fact this
        # isn't necessary if you don't need to specify default values
    ).gen_converter(debug=True, signature="data_, top_n=None")

    assert pipeline(input_data, top_n=3) == {"war": 4, "and": 4, "peace": 4}
示例#22
0
def test_grouping():
    data = [
        {
            "name": "John",
            "category": "Games",
            "debit": 10,
            "balance": 90
        },
        {
            "name": "John",
            "category": "Games",
            "debit": 200,
            "balance": -110
        },
        {
            "name": "John",
            "category": "Food",
            "debit": 30,
            "balance": -140
        },
        {
            "name": "John",
            "category": "Games",
            "debit": 300,
            "balance": 0
        },
        {
            "name": "Nick",
            "category": "Food",
            "debit": 7,
            "balance": 50
        },
        {
            "name": "Nick",
            "category": "Games",
            "debit": 18,
            "balance": 32
        },
        {
            "name": "Bill",
            "category": "Games",
            "debit": 18,
            "balance": 120
        },
    ]
    result = (c.group_by(c.item("name")).aggregate((
        c.item("name"),
        c.item("name").call_method("lower"),
        c.call_func(str.lower, c.item("name")),
        c.reduce(
            lambda a, b: a + b,
            c.item("debit"),
            initial=c.input_arg("arg1"),
        ),
        c.reduce(
            c.inline_expr("{0} + {1}"),
            c.item("debit"),
            initial=lambda: 100,
        ),
        c.reduce(max, c.item("debit"), default=c.input_arg("arg1")).filter(
            c.call_func(lambda x: x < 0, c.item("balance"))),
        c.call_func(
            lambda max_debit, n: max_debit * n,
            c.reduce(max, c.item("debit"), default=0).filter(
                c.call_func(lambda x: x < 0, c.item("balance"))),
            1000,
        ),
        c.call_func(
            lambda max_debit, n: max_debit * n,
            c.reduce(
                c.ReduceFuncs.Max,
                c.item("debit"),
                default=1000,
            ).filter(c.inline_expr("{0} > 0").pass_args(c.item("balance"))),
            -1,
        ),
        c.reduce(
            c.ReduceFuncs.MaxRow,
            c.item("debit"),
        ).item("balance"),
        c.reduce(
            c.ReduceFuncs.MinRow,
            c.item("debit"),
        ).item("balance"),
    )).sort(key=lambda t: t[0].lower(), reverse=True).execute(data,
                                                              arg1=100,
                                                              debug=False))
    # fmt: off
    assert result == [
        ('Nick', 'nick', 'nick', 125, 125, 100, 0, -18, 32, 50),
        ('John', 'john', 'john', 640, 640, 200, 200000, -10, 0, 90),
        ('Bill', 'bill', 'bill', 118, 118, 100, 0, -18, 120, 120)
    ]
    # fmt: on

    aggregation = {
        c.call_func(
            tuple,
            c.reduce(c.ReduceFuncs.Array, c.item("name"), default=None),
        ):
        c.item("category").call_method("lower"),
        "count":
        c.reduce(c.ReduceFuncs.Count),
        "count_distinct":
        c.reduce(c.ReduceFuncs.CountDistinct, c.item("name")),
        "array_agg_distinct":
        c.reduce(
            c.ReduceFuncs.ArrayDistinct,
            c.item("name"),
        ),
        "dict":
        c.reduce(c.ReduceFuncs.Dict, (c.item("debit"), c.item("name"))),
    }
    result = (c.group_by(c.item("category")).aggregate(aggregation).execute(
        data, debug=False))
    result2 = (c.group_by(c.item("category")).aggregate(
        c.dict(*aggregation.items())).execute(data, debug=False))
    # fmt: off
    assert result == result2 == [
        {
            'array_agg_distinct': ['John', 'Nick', 'Bill'],
            'count': 5,
            'count_distinct': 3,
            'dict': {
                10: 'John',
                18: 'Bill',
                200: 'John',
                300: 'John'
            },
            ('John', 'John', 'John', 'Nick', 'Bill'): 'games'
        }, {
            'array_agg_distinct': ['John', 'Nick'],
            'count': 2,
            'count_distinct': 2,
            'dict': {
                7: 'Nick',
                30: 'John'
            },
            ('John', 'Nick'): 'food'
        }
    ]
    # fmt: on
    result3 = (c.aggregate(c.reduce(c.ReduceFuncs.Sum, c.item("debit"))).pipe(
        c.inline_expr("{0} + {1}").pass_args(c.this(),
                                             c.this())).execute(data,
                                                                debug=False))
    assert result3 == 583 * 2

    by = c.item("name"), c.item("category")
    result4 = (c.group_by(*by).aggregate(by + (
        c.reduce(c.ReduceFuncs.Sum, c.item("debit")), )).execute(data,
                                                                 debug=False))
    # fmt: off
    assert result4 == [('John', 'Games', 510), ('John', 'Food', 30),
                       ('Nick', 'Food', 7), ('Nick', 'Games', 18),
                       ('Bill', 'Games', 18)]
    # fmt: on
    result5 = (c.group_by().aggregate(
        c.reduce(c.ReduceFuncs.Sum, c.item("debit"))).execute(data,
                                                              debug=False))
    assert result5 == 583
示例#23
0
def test_base_reducer():
    from convtools.aggregations import _ReducerExpression, _ReducerStatements

    assert c.aggregate((
        c.reduce(
            _ReducerExpression(lambda a, b: a + b, expr=c.this(), initial=0)),
        c.reduce(
            _ReducerExpression(c.naive(lambda a, b: a + b),
                               expr=c.this(),
                               initial=int)),
        c.reduce(_ReducerExpression("{0} + {1}", expr=c.this(), default=0)),
        c.reduce(
            _ReducerExpression(
                "{0} + {1}",
                expr=c.this(),
                initial_from_first=int,
                default=0,
            )),
        c.reduce(
            _ReducerStatements(
                reduce="%(result)s += ({1} or 0)",
                initial_from_first="%(result)s = ({0} or 0)",
                default=0,
            ),
            c.this(),
        ),
        c.reduce(
            _ReducerStatements(
                reduce="%(result)s += ({1} or 0)",
                default=c.naive(int),
            ),
            c.this(),
        ),
        c.reduce(
            _ReducerStatements(
                reduce="%(result)s = ({1} or 0)",
                initial=0,
            ),
            c.this(),
        ),
    )).filter(c.this() > 5, cast=tuple).gen_converter(debug=True)([1, 2,
                                                                   3]) == (
                                                                       6,
                                                                       6,
                                                                       6,
                                                                       6,
                                                                       6,
                                                                       6,
                                                                   )

    with pytest.raises(AssertionError):
        c.aggregate((c.reduce(
            c.ReduceFuncs.Sum,
            c.reduce(c.ReduceFuncs.Count),
        ), )).gen_converter()

    conv = c.aggregate(
        c.reduce(c.ReduceFuncs.DictArray,
                 (c.item(0), c.item(1)))).gen_converter(debug=True)
    data = [
        ("a", 1),
        ("a", 2),
        ("b", 3),
    ]
    result = {"a": [1, 2], "b": [3]}
    assert conv(data) == result
    assert conv([]) is None

    conv2 = c.aggregate({
        "key":
        c.reduce(c.ReduceFuncs.DictArray, (c.item(0), c.item(1)))
    }).gen_converter(debug=True)
    assert conv2([]) == {"key": None}
    assert conv2(data) == {"key": result}
示例#24
0
def test_average_of_empty_collection():
    assert c.aggregate(c.ReduceFuncs.Average(c.item(1))).execute([]) is None
示例#25
0
def test_doc__index_intro():

    # ======== #
    # GROUP BY #
    # ======== #
    input_data = [
        {
            "a": 5,
            "b": "foo"
        },
        {
            "a": 10,
            "b": "foo"
        },
        {
            "a": 10,
            "b": "bar"
        },
        {
            "a": 10,
            "b": "bar"
        },
        {
            "a": 20,
            "b": "bar"
        },
    ]

    conv = (c.group_by(c.item("b")).aggregate({
        "b":
        c.item("b"),
        "a_first":
        c.ReduceFuncs.First(c.item("a")),
        "a_max":
        c.ReduceFuncs.Max(c.item("a")),
    }).gen_converter(debug=True))

    assert conv(input_data) == [
        {
            "b": "foo",
            "a_first": 5,
            "a_max": 10
        },
        {
            "b": "bar",
            "a_first": 10,
            "a_max": 20
        },
    ]

    # ========= #
    # AGGREGATE #
    # ========= #
    conv = c.aggregate({
        # list of "a" values where "b" equals to "bar"
        "a":
        c.ReduceFuncs.Array(c.item("a"), where=c.item("b") == "bar"),
        # "b" value of a row where "a" has Max value
        "b":
        c.ReduceFuncs.MaxRow(c.item("a"), ).item("b", default=None),
    }).gen_converter(debug=True)

    assert conv(input_data) == {"a": [10, 10, 20], "b": "bar"}

    # ==== #
    # JOIN #
    # ==== #
    collection_1 = [
        {
            "id": 1,
            "name": "Nick"
        },
        {
            "id": 2,
            "name": "Joash"
        },
        {
            "id": 3,
            "name": "Bob"
        },
    ]
    collection_2 = [
        {
            "ID": "3",
            "age": 17,
            "country": "GB"
        },
        {
            "ID": "2",
            "age": 21,
            "country": "US"
        },
        {
            "ID": "1",
            "age": 18,
            "country": "CA"
        },
    ]
    input_data = (collection_1, collection_2)

    conv = (c.join(
        c.item(0),
        c.item(1),
        c.and_(
            c.LEFT.item("id") == c.RIGHT.item("ID").as_type(int),
            c.RIGHT.item("age") >= 18,
        ),
        how="left",
    ).pipe(
        c.list_comp({
            "id": c.item(0, "id"),
            "name": c.item(0, "name"),
            "age": c.item(1, "age", default=None),
            "country": c.item(1, "country", default=None),
        })).gen_converter(debug=True))

    assert conv(input_data) == [
        {
            "id": 1,
            "name": "Nick",
            "age": 18,
            "country": "CA"
        },
        {
            "id": 2,
            "name": "Joash",
            "age": 21,
            "country": "US"
        },
        {
            "id": 3,
            "name": "Bob",
            "age": None,
            "country": None
        },
    ]
示例#26
0
def test_mode(series):
    assert eq(
        c.aggregate(c.ReduceFuncs.Mode(c.item(0))).execute(series),
        statistics.mode(x[0] for x in series),
    )
示例#27
0
def test_nested_group_by():
    data = [
        [0, [1, 2, 3]],
        [0, [4, 5, 6]],
        [1, [2, 3, 4]],
    ]
    assert c.group_by(c.item(0)).aggregate((
        c.item(0),
        c.ReduceFuncs.Sum(
            c.item(1).pipe(c.aggregate(c.ReduceFuncs.Sum(c.this())))),
    )).execute(data, debug=False) == [
        (0, 21),
        (1, 9),
    ]
    agg_conv = c.aggregate(c.ReduceFuncs.Sum(c.this()))
    assert c.group_by(c.item(0)).aggregate((
        c.item(0),
        c.if_(
            c.item(1),
            c.item(1),
            c.item(1),
        ).pipe(
            c.if_(
                c.this(),
                c.this(),
                c.this(),
            ).pipe(
                c.ReduceFuncs.Sum(
                    c.if_(
                        c.this(),
                        c.this(),
                        c.this(),
                    ).pipe((agg_conv, agg_conv)).pipe(c.item(1))).pipe(
                        c.if_(
                            c.this(),
                            c.this(),
                            c.this(),
                        )), )),
    )).execute(data, debug=False) == [
        (0, 21),
        (1, 9),
    ]

    summer = c.aggregate(c.ReduceFuncs.Sum(c.this()))

    merger = c.aggregate({
        "value1":
        c.ReduceFuncs.First(c.item("value1"), where=c("value1").in_(c.this())),
        "value2":
        c.ReduceFuncs.First(c.item("value2"),
                            where=c("value2").in_(c.this())).pipe(
                                c.if_(c.this(),
                                      c.this().pipe(summer))),
    })
    converter = (c.group_by(c.item("id_")).aggregate({
        "id_":
        c.item("id_"),
        "data":
        c.ReduceFuncs.Array(c.this()).pipe(merger),
    }).gen_converter(debug=False))
    assert converter([
        {
            "id_": 1,
            "value1": 2
        },
        {
            "id_": 2,
            "value1": 3
        },
        {
            "id_": 2,
            "value2": [1, 2, 3]
        },
    ]) == [
        {
            "id_": 1,
            "data": {
                "value1": 2,
                "value2": None
            }
        },
        {
            "id_": 2,
            "data": {
                "value1": 3,
                "value2": 6
            }
        },
    ]

    def g():
        yield 1
        raise Exception

    assert (c.aggregate(c.ReduceFuncs.First(c.this())).execute(
        g(), debug=False)) == 1
示例#28
0
def test_median(series):
    assert eq(
        c.aggregate(c.ReduceFuncs.Median(c.item(1))).execute(series),
        statistics.median(x[1] for x in series),
    )
示例#29
0
def test_top_k(series, k):
    assert eq(
        c.aggregate(c.ReduceFuncs.TopK(k, c.item(1))).execute(series),
        [x[1] for x in Counter(x[1] for x in series).most_common(k)],
    )
示例#30
0
def test_top_k_non_positive_int(k):
    with pytest.raises(ValueError):
        c.aggregate(c.ReduceFuncs.TopK(k, c.this())).execute([1, 2]),