def test_can_move_blocks_of_vars(): df = tibble(x=1, a="a", y=2, b="a") out = df >> relocate(where(is_string_dtype)) assert out.columns.tolist() == ["a", "b", "x", "y"] out = df >> relocate(where(is_string_dtype), _after=where(is_numeric_dtype)) assert out.columns.tolist() == ["x", "y", "a", "b"]
def test_cache_key(): df = tibble(g=rep([1, 2], each=2), a=range(1, 5)) >> group_by(f.g) out = df >> mutate( tibble( x=across(where(is_numeric), mean).a, y=across(where(is_numeric), max).a, )) expect = df >> mutate(x=mean(f.a), y=max(f.a)) assert_frame_equal(out, expect)
def test_works_sequentially(): df = tibble(a=1) out = df >> mutate(x=ncol(across(where(is_numeric))), y=ncol(across(where(is_numeric)))) expect = tibble(a=1, x=1, y=2) assert out.equals(expect) out = df >> mutate(a="x", y=ncol(across(where(is_numeric)))) expect = tibble(a="x", y=0) assert out.equals(expect)
def test_error_messages(): with pytest.raises(ValueError, match="Argument `_fns` of across must be"): tibble(x=1) >> summarise(res=across(where(is_numeric), 42)) with pytest.raises(ValueError, match="must only be used inside verbs"): across() with pytest.raises(ValueError, match="must only be used inside verbs"): c_across()
def test_nb_fail(): from datar.datasets import iris out = iris >> mutate( across( where(is_double) & ~c(f["Petal_Length"], f["Petal_Width"]), round)) rows = out >> nrow() assert rows == 150
def test_tidyselect_funs(): # tidyselect.where def isupper(ser): return ser.name.isupper() df = tibble(x=1, X=2, y=3, Y=4) out = df >> select(where(isupper)) assert out.columns.tolist() == ["X", "Y"] @register_verb def islower(_data, series): return [series.name.islower(), True] out = df >> select(where(islower)) assert out.columns.tolist() == ["x", "y"] out = df >> select(where(lambda x: False)) assert out.shape == (1, 0) out = df >> select(ends_with("y")) assert out.columns.tolist() == ["y", "Y"] out = df >> select(contains("y")) assert out.columns.tolist() == ["y", "Y"] with pytest.raises(KeyError): df >> select(all_of(["x", "a"])) out = df >> select(any_of(["x", "y"])) assert out.columns.tolist() == ["x", "y"] out = df >> select(any_of(["x", "a"])) assert out.columns.tolist() == ["x"] out = num_range("a", 3, width=2) assert out == ["a00", "a01", "a02"] df = tibble(tibble(X=1), X=2, _name_repair="minimal") out = df >> select(contains("X")) assert out.columns.tolist() == ["X"]
def test_names_output(): gf = tibble(x=1, y=2, z=3, s="") >> group_by(f.x) out = gf >> summarise(across()) assert out.columns.tolist() == ["x", "y", "z", "s"] out = gf >> summarise(across(_names="id_{_col}")) assert out.columns.tolist() == ["x", "id_y", "id_z", "id_s"] out = gf >> summarise(across(where(is_numeric), mean)) assert out.columns.tolist() == ["x", "y", "z"] out = gf >> summarise(across(where(is_numeric), mean, _names="mean_{_col}")) assert out.columns.tolist() == ["x", "mean_y", "mean_z"] out = gf >> summarise(across(where(is_numeric), { "mean": mean, "sum": sum })) assert out.columns.tolist() == ["x", "y_mean", "y_sum", "z_mean", "z_sum"] # Different from R's list out = gf >> summarise(across(where(is_numeric), {"mean": mean, 1: sum})) assert out.columns.tolist() == ["x", "y_mean", "y_1", "z_mean", "z_1"] # Different from R's list out = gf >> summarise(across(where(is_numeric), {0: mean, "sum": sum})) assert out.columns.tolist() == ["x", "y_0", "y_sum", "z_0", "z_sum"] out = gf >> summarise(across(where(is_numeric), [mean, sum])) assert out.columns.tolist() == ["x", "y_0", "y_1", "z_0", "z_1"] out = gf >> summarise( across(where(is_numeric), [mean, sum], _names="{_col}_{_fn1}")) assert out.columns.tolist() == ["x", "y_1", "y_2", "z_1", "z_2"] out = gf >> summarise( across( where(is_numeric), { "mean": mean, "sum": sum }, _names="{_fn}_{_col}", )) assert out.columns.tolist() == ["x", "mean_y", "sum_y", "mean_z", "sum_z"]
def test_reject_non_vectors(): with pytest.raises(ValueError, match="Argument `_fns` of across must be"): tibble(x=1) >> summarise(across(where(is_numeric), object()))
def test_used_separately(): df = tibble(a=1, b=2) out = df >> mutate(x=ncol(across(where(is_numeric))), y=ncol(across(f.a))) expect = tibble(a=1, b=2, x=2, y=1) assert out.equals(expect)
def test_used_twice(): df = tibble(a=1, b=2) out = df >> mutate(x=ncol(across(where(is_numeric))) + ncol(across(f.a))) expect = tibble(a=1, b=2, x=3) assert out.equals(expect)
def test_keep_used_not_affected_by_across(): df = tibble(x=1, y=2, z=3, a="a", b="b", c="c") out = df >> mutate(across(where(is_numeric), identity), _keep="unused") assert out.columns.tolist() == df.columns.tolist()