示例#1
0
def test_expr_parse():
    # check some differences in back to Python versus sending to Pandas
    d = data_algebra.default_data_model.pd.DataFrame({
        "a": [True, False],
        "b": [1, 2],
        "c": [3, 4]
    })

    ops0 = TableDescription("d", ["a", "b", "c"]).extend({"d": "a + 1"})

    assert formats_to_self(ops0)

    res0 = ops0.transform(d)
    expect0 = data_algebra.default_data_model.pd.DataFrame({
        "a": [True, False],
        "b": [1, 2],
        "c": [3, 4],
        "d": [2, 1],
    })
    assert data_algebra.test_util.equivalent_frames(res0, expect0)

    ops1 = TableDescription("d",
                            ["a", "b", "c"]).extend({"d": "a.if_else(1, c)"})

    assert formats_to_self(ops1)

    res1 = ops1.transform(d)
    expect1 = data_algebra.default_data_model.pd.DataFrame({
        "a": [True, False],
        "b": [1, 2],
        "c": [3, 4],
        "d": [1, 4],
    })
    assert data_algebra.test_util.equivalent_frames(res1, expect1)
示例#2
0
def test_project_z():
    d = data_algebra.default_data_model.pd.DataFrame(
        {"c": [1, 1, 1, 1], "g": ["a", "b", "a", "b"], "y": [1, 2, 3, 4]}
    )

    ops = describe_table(d, "d").project({"c": "c.max()"})

    assert formats_to_self(ops)

    res = ops.transform(d)
    expect = data_algebra.default_data_model.pd.DataFrame({"c": [1]})
    assert data_algebra.test_util.equivalent_frames(expect, res)
def test_extend_0():
    d = data_algebra.default_data_model.pd.DataFrame(
        {"c": [1, 1, 1, 1], "g": ["a", "b", "a", "b"], "y": [1, 2, 3, 4]}
    )

    ops = describe_table(d, "d").extend({}, partition_by=["c", "g"])

    assert isinstance(ops, TableDescription)
    assert formats_to_self(ops)

    res = ops.transform(d)
    assert data_algebra.test_util.equivalent_frames(d, res)
def test_extend_p():
    d = data_algebra.default_data_model.pd.DataFrame(
        {"c": [1, 1, 1, 1], "g": ["a", "b", "a", "b"], "y": [1, 2, 3, 4]}
    )

    ops = describe_table(d, "d").extend({"c": "y.max()"}, partition_by=["g"])

    assert formats_to_self(ops)

    res = ops.transform(d)
    expect = data_algebra.default_data_model.pd.DataFrame(
        {"g": ["a", "b", "a", "b"], "y": [1, 2, 3, 4], "c": [3, 4, 3, 4],}
    )
    assert data_algebra.test_util.equivalent_frames(expect, res)
示例#5
0
def test_cc_ops_f():
    d = data_algebra.default_data_model.pd.DataFrame({
        "f": [1, 4, 6, 2, 1],
        "g": [2, 5, 7, 3, 7],
    })

    ops = describe_table(d).extend({"c": "connected_components(f, g)"})
    assert formats_to_self(ops)

    res = ops.transform(d)
    expect = data_algebra.default_data_model.pd.DataFrame({
        "f": [1, 4, 6, 2, 1],
        "g": [2, 5, 7, 3, 7],
        "c": [1, 4, 1, 1, 1],
    })
    assert data_algebra.test_util.equivalent_frames(res, expect)
def test_extend_shrink_1():
    d = data_algebra.default_data_model.pd.DataFrame(
        {"c": [1, 1, 1, 1], "g": ["a", "b", "a", "b"], "y": [1, 2, 3, 4]}
    )

    ops = describe_table(d, "d"). \
        extend({"c": "y.max()"}). \
        extend({"d": "y.min()"})

    assert formats_to_self(ops)

    res = ops.transform(d)
    expect = data_algebra.default_data_model.pd.DataFrame(
        {"g": ["a", "b", "a", "b"], "y": [1, 2, 3, 4], "c": [4, 4, 4, 4], "d":[1, 1, 1, 1]}
    )
    assert data_algebra.test_util.equivalent_frames(expect, res)

    ops2 = describe_table(d, "d"). \
        extend({"c": "y.max()",
                "d": "y.min()"})

    assert str(ops) == str(ops2)

    ops2b = describe_table(d, "d"). \
        extend({"c": "y"}). \
        extend({"d": "c"})

    assert isinstance(ops2b.sources[0], ExtendNode)

    ops2c = describe_table(d, "d"). \
        extend({"c": "1"}). \
        extend({"c": "2"})

    assert isinstance(ops2c.sources[0], TableDescription)


    ops3 = describe_table(d, "d"). \
        extend({"c": "y.max()"}). \
        extend({"d": "y"})

    assert isinstance(ops3.sources[0], ExtendNode)
示例#7
0
def test_extend_shrink_2():
    d = data_algebra.default_data_model.pd.DataFrame({
        "c": [1, 1, 1, 1],
        "g": ["a", "b", "a", "b"],
        "y": [1, 2, 3, 4]
    })

    ops = describe_table(d, "d").extend({
        "c": "y.max()"
    }).extend({"d": "c.min()"})

    assert formats_to_self(ops)
    assert isinstance(ops.sources[0],
                      ExtendNode)  # check doesn't combine nodes in this case

    res = ops.transform(d)
    expect = data_algebra.default_data_model.pd.DataFrame({
        "g": ["a", "b", "a", "b"],
        "y": [1, 2, 3, 4],
        "c": [4, 4, 4, 4],
        "d": [4, 4, 4, 4],
    })
    assert data_algebra.test_util.equivalent_frames(expect, res)