示例#1
0
def test_cache_key():
    df = tibble(g=rep([1, 2], each=2), a=range(1, 5)) >> group_by(f.g)

    out = df >> mutate(
        tibble(
            x=across(where(is_numeric), mean).a,
            y=across(where(is_numeric), max).a,
        ))
    expect = df >> mutate(x=mean(f.a), y=max(f.a))
    assert_frame_equal(out, expect)
示例#2
0
def test_with_group_id():
    df = tibble(g=[1, 2], a=[1, 2], b=[3, 4]) >> group_by(f.g)

    @register_func(context=None)
    def switcher(data, group_id, across_a, across_b):
        return group_id.apply(lambda x: across_a.a.obj[0]
                              if x == 0 else across_b.b.obj[1])

    out = df >> mutate(x=switcher(cur_group_id(), across(f.a), across(f.b)))
    assert out.x.obj.tolist() == [1, 4]
示例#3
0
def test_works_sequentially():

    df = tibble(a=1)
    out = df >> mutate(x=ncol(across(where(is_numeric))),
                       y=ncol(across(where(is_numeric))))
    expect = tibble(a=1, x=1, y=2)
    assert out.equals(expect)

    out = df >> mutate(a="x", y=ncol(across(where(is_numeric))))
    expect = tibble(a="x", y=0)
    assert out.equals(expect)
示例#4
0
def test_to_functions():
    df = tibble(x=c(1, NA))  # -> float

    out = df >> summarise(across(everything(), mean, na_rm=True))
    expect = tibble(x=1.0)
    assert_frame_equal(out, expect)

    out = df >> summarise(
        across(everything(), dict(mean=mean, median=median), na_rm=True))
    expect = tibble(x_mean=1.0, x_median=1.0)
    assert_frame_equal(out, expect)
示例#5
0
def test_use_env_var():
    # not a problem, since we use f.y
    df = tibble(x=1.0, y=2.4)
    y = "x"
    out = df >> summarise(across(all_of(y), mean))
    expect = tibble(x=1.0)
    assert out.equals(expect)

    out = df >> mutate(across(all_of(y), mean))
    assert out.equals(df)

    out = df >> filter(if_all(all_of(y), lambda col: col < 2))
    assert out.equals(df)
示例#6
0
def test_auto_splicing():
    species = tibble(Species=iris.Species)

    df1 = iris >> distinct(f.Species)
    df2 = iris >> distinct(species)
    assert df1.equals(df2)

    df3 = iris >> distinct(across(f.Species))
    assert df1.equals(df3)

    df4 = (iris >> mutate(across(starts_with("Sepal"), round)) >> distinct(
        f.Sepal_Length, f.Sepal_Width))
    df5 = iris >> distinct(across(starts_with("Sepal"), round))
    assert df4.equals(df5)
示例#7
0
def test_summarise_with_multiple_acrosses():
    """https://stackoverflow.com/questions/63200530/python-pandas-equivalent-to-dplyr-1-0-0-summarizeacross"""
    out = (
        mtcars
        >> group_by(f.cyl)
        >> summarize(across(ends_with("p"), sum), across(ends_with("t"), mean))
    )

    exp = tibble(
        cyl=[6, 4, 8],
        disp=[1283.2, 1156.5, 4943.4],
        hp=[856, 909, 2929],
        drat=[3.585714, 4.070909, 3.229286],
        wt=[3.117143, 2.285727, 3.999214],
    )
    assert_tibble_equal(out, exp)
示例#8
0
def test_errors():
    # wrong type
    with pytest.raises(ValueError):
        iris >> group_by(f.Species) >> filter(range(1, 10))
    with pytest.raises(ValueError):
        iris >> filter(range(1, 10))

    # wrong size
    with pytest.raises(ValueError):
        iris >> group_by(f.Species) >> filter([True, False])
    with pytest.raises(ValueError):
        iris >> rowwise(f.Species) >> filter([True, False])
    with pytest.raises(ValueError):
        iris >> filter([True, False])

    # wrong size in column
    with pytest.raises(ValueError):
        iris >> group_by(f.Species) >> filter(tibble([True, False]))
    with pytest.raises(ValueError):
        iris >> rowwise() >> filter(tibble([True, False]))
    with pytest.raises(ValueError):
        iris >> filter(tibble([True, False]))
    with pytest.raises(ValueError):
        tibble(x=1) >> filter([True, False])

    # named inputs
    with pytest.raises(TypeError):
        mtcars >> filter(x=1)
    with pytest.raises(TypeError):
        mtcars >> filter(f.y > 2, z=3)
    with pytest.raises(TypeError):
        mtcars >> filter(True, x=1)

    # across() in filter() does not warn yet
    tibble(x=1, y=2) >> filter(across(everything(), lambda x: x > 0))
示例#9
0
def test_names_output():
    gf = tibble(x=1, y=2, z=3, s="") >> group_by(f.x)

    out = gf >> summarise(across())
    assert out.columns.tolist() == ["x", "y", "z", "s"]

    out = gf >> summarise(across(_names="id_{_col}"))
    assert out.columns.tolist() == ["x", "id_y", "id_z", "id_s"]

    out = gf >> summarise(across(where(is_numeric), mean))
    assert out.columns.tolist() == ["x", "y", "z"]

    out = gf >> summarise(across(where(is_numeric), mean,
                                 _names="mean_{_col}"))
    assert out.columns.tolist() == ["x", "mean_y", "mean_z"]

    out = gf >> summarise(across(where(is_numeric), {
        "mean": mean,
        "sum": sum
    }))
    assert out.columns.tolist() == ["x", "y_mean", "y_sum", "z_mean", "z_sum"]

    # Different from R's list
    out = gf >> summarise(across(where(is_numeric), {"mean": mean, 1: sum}))
    assert out.columns.tolist() == ["x", "y_mean", "y_1", "z_mean", "z_1"]

    # Different from R's list
    out = gf >> summarise(across(where(is_numeric), {0: mean, "sum": sum}))
    assert out.columns.tolist() == ["x", "y_0", "y_sum", "z_0", "z_sum"]

    out = gf >> summarise(across(where(is_numeric), [mean, sum]))
    assert out.columns.tolist() == ["x", "y_0", "y_1", "z_0", "z_1"]

    out = gf >> summarise(
        across(where(is_numeric), [mean, sum], _names="{_col}_{_fn1}"))
    assert out.columns.tolist() == ["x", "y_1", "y_2", "z_1", "z_2"]

    out = gf >> summarise(
        across(
            where(is_numeric),
            {
                "mean": mean,
                "sum": sum
            },
            _names="{_fn}_{_col}",
        ))
    assert out.columns.tolist() == ["x", "mean_y", "sum_y", "mean_z", "sum_z"]
示例#10
0
def test_nb_fail():
    from datar.datasets import iris

    out = iris >> mutate(
        across(
            where(is_double) & ~c(f["Petal_Length"], f["Petal_Width"]), round))
    rows = out >> nrow()
    assert rows == 150
示例#11
0
def test_auto_splicing():
    df1 = iris >> group_by(f.Species)
    df2 = iris >> group_by(tibble(Species=iris.Species))
    assert df1.equals(df2)

    df1 = iris >> group_by(f.Species)
    df2 = iris >> group_by(across(f.Species))
    assert df1.equals(df2)

    df1 = (
        iris
        >> mutate(across(starts_with("Sepal"), round))
        >> group_by(f.Sepal_Length, f.Sepal_Width)
    )
    df2 = iris >> group_by(across(starts_with("Sepal"), round))
    assert df1.equals(df2)

    # across(character()), across(NULL) not supported

    df1 = (
        iris
        >> mutate(across(starts_with("Sepal"), round))
        >> group_by(f.Sepal_Length, f.Sepal_Width, f.Species)
    )
    df2 = iris >> group_by(across(starts_with("Sepal"), round), f.Species)
    assert df1.equals(df2)

    df1 = (
        iris
        >> mutate(across(starts_with("Sepal"), round))
        >> group_by(f.Species, f.Sepal_Length, f.Sepal_Width)
    )
    df2 = iris >> group_by(f.Species, across(starts_with("Sepal"), round))
    assert df1.equals(df2)
示例#12
0
def test_result_locations_aligned_with_column_names():
    df = tibble(x=[1, 2], y=["a", "b"])
    expect = tibble(x_cls=numpy.int64, x_type=True, y_cls=object, y_type=False)
    x = df >> summarise(
        across(everything(), {
            "cls": lambda x: x.dtype,
            "type": is_numeric
        }))
    assert_frame_equal(x, expect)
示例#13
0
def test_across():
    df = tibble(x=[1, 3, 2, 1], y=[4, 3, 2, 1])

    out = df >> arrange(across())
    expect = df >> arrange(f.x, f.y)
    assert out.equals(expect)

    out = df >> arrange(across(None, desc))
    expect = df >> arrange(desc(f.x), desc(f.y))
    assert out.equals(expect)

    out = df >> arrange(across(f.x))
    expect = df >> arrange(f.x)
    assert out.equals(expect)

    out = df >> arrange(across(f.y))
    expect = df >> arrange(f.y)
    assert out.equals(expect)
示例#14
0
def test_summarise_cols_inside_func():
    df = tibble(x=2, y=4, z=8)

    @register_func(None, context=None)
    def data_frame(**kwargs):
        return tibble(**kwargs)

    out = df >> summarise(data_frame(x=f.x / f.y, y=f.y / f.y, z=f.z / f.y))
    expect = df >> summarise(across(everything(), lambda col: col / df.y))
    assert out.equals(expect)
示例#15
0
def test_mutate_cols_inside_func():
    df = tibble(x=2, y=4, z=8)

    @register_func(None, context=None)
    def data_frame(**kwargs):
        return tibble(**kwargs)

    out = df >> mutate(data_frame(x=f.x / f.y, y=f.y / f.y, z=f.z / f.y))
    # df.y does not work on grouped data
    expect = df >> mutate(across(everything(), lambda col: col / df.y))
    assert out.equals(expect)
示例#16
0
def test_used_separately():
    df = tibble(a=1, b=2)
    out = df >> mutate(x=ncol(across(where(is_numeric))), y=ncol(across(f.a)))
    expect = tibble(a=1, b=2, x=2, y=1)
    assert out.equals(expect)
示例#17
0
def test_cur_column():
    df = tibble(x=1, y=2, z=3)
    out = df >> mutate(across(f[f.x :], (lambda x, y: y), y=cur_column()))
    assert out.values.tolist() == [["x", "y", "z"]]
示例#18
0
def test_not_selecting_grouping_var():
    df = tibble(g=1, x=1)
    out = df >> group_by(f.g) >> summarise(x=across(everything()))
    expected = tibble(x=1)
    assert_frame_equal(out["x"], expected)
示例#19
0
def test_on_one_column():
    df = tibble(x=1)
    out = df >> mutate(across())
    assert out.equals(df)
示例#20
0
def test_keep_used_not_affected_by_across():
    df = tibble(x=1, y=2, z=3, a="a", b="b", c="c")
    out = df >> mutate(across(where(is_numeric), identity), _keep="unused")
    assert out.columns.tolist() == df.columns.tolist()
示例#21
0
def test_cols_in_lambda():
    df = tibble(x=1.0, y=2.0)
    out = df >> mutate(across("x", lambda x: x / df.y))
    assert out.x.tolist() == [0.5]
示例#22
0
def test_empty_df():
    df = tibble()
    out = df >> mutate(across())
    assert out.equals(df)
示例#23
0
def test_kwargs():
    df = tibble(x=c(1, 2))
    tail_n = lambda d, n: d.tail(n)
    out = df >> summarise(across(f.x, tail_n, 1))
    expect = tibble(x=2)
    assert_frame_equal(out, expect)
示例#24
0
def test_reject_non_vectors():
    with pytest.raises(ValueError, match="Argument `_fns` of across must be"):
        tibble(x=1) >> summarise(across(where(is_numeric), object()))
示例#25
0
def test_original_ordering():
    df = tibble(a=1, b=2)
    out = df >> mutate(a=2, x=across())
    assert out.columns.tolist() == ["a", "b", "x$a", "x$b"]
示例#26
0
def test_used_twice():
    df = tibble(a=1, b=2)
    out = df >> mutate(x=ncol(across(where(is_numeric))) + ncol(across(f.a)))
    expect = tibble(a=1, b=2, x=3)
    assert out.equals(expect)
示例#27
0
def test_implicit_mutate_operates_on_ungrouped_data():
    vars = tibble(x=c(1, 2), y=c(3, 4), z=c(5, 6)) >> group_by(f.y)
    vars >>= group_by(across(any_of(c("y", "z"))))
    gv = group_vars(vars)
    assert gv == ["y", "z"]