def test_ignores_group(): df = tibble(g=[2, 1] * 2, x=[4, 3, 2, 1]) gf = df >> group_by(f.g) out = gf >> arrange(f.x) assert out.equals(df.iloc[[3, 2, 1, 0], :].reset_index(drop=True)) out = gf >> arrange(f.x, _by_group=True) exp = df.iloc[[3, 1, 2, 0], :].reset_index(drop=True) assert_frame_equal(out, exp)
def test_handles_scalar_results(): df1 = mtcars >> filter(min(f.mpg) > 0) assert df1.equals(mtcars) df2 = (mtcars >> group_by(f.cyl) >> filter(min(f.mpg) > 0) >> arrange( f.cyl, f.mpg)) # See TibbleGrouped's Known issues df3 = mtcars >> group_by(f.cyl) >> arrange(f.cyl, f.mpg) assert_frame_equal(df2, df3)
def test_empty_returns_self(): df = tibble(x=range(1, 11), y=range(1, 11)) gf = df >> group_by(f.x) assert arrange(df).equals(df) out = arrange(gf) assert out.equals(gf) assert group_vars(out) == group_vars(gf)
def test_errors(): x = Series(1, name="x") df = tibble(x, x, _name_repair="minimal") with pytest.raises(NameNonUniqueError): df >> arrange(f.x) df = tibble(x=x) with pytest.raises(KeyError): df >> arrange(f.y) with pytest.raises(ValueError, match="Length of values"): df >> arrange(rep(f.x, 2))
def test_preserve_order_across_groups(): df = tibble(g=c(1, 2, 1, 2, 1), time=[5, 4, 3, 2, 1], x=f.time) res1 = (df >> group_by(f.g) >> filter(f.x <= 4) >> ungroup() >> arrange( f.g, f.time)) res2 = (df >> arrange(f.g) >> group_by(f.g) >> filter(f.x <= 4) >> ungroup() >> arrange(f.g, f.time)) res3 = (df >> filter(f.x <= 4) >> group_by(f.g) >> ungroup() >> arrange( f.g, f.time)) res1.reset_index(drop=True, inplace=True) res2.reset_index(drop=True, inplace=True) res3.reset_index(drop=True, inplace=True) assert res1.equals(res2) assert res1.equals(res3)
def test_slice_works_with_grouped_data(): g = mtcars >> arrange(f.cyl) >> group_by(f.cyl) res = slice(g, f[:2]) exp = filter(g, row_number() < 3) assert_frame_equal(res, exp) res = slice(g, ~f[:2]) exp = filter(g, row_number() >= 3) assert_tibble_equal(res, exp) g = group_by(tibble(x=c(1, 1, 2, 2, 2)), f.x) # out = group_keys(slice(g, 3, _preserve=True)) # assert out.x.tolist() == [1, 2] out = group_keys(slice(g, 2, _preserve=False)) assert out.x.tolist() == [2] gf = tibble(x=f[1:4]) >> group_by( g=Categorical([1, 1, 2], categories=[1, 2, 3]), _drop=False, ) with pytest.raises(TypeError): gf >> slice("a") with pytest.raises(ValueError): gf >> slice(~f[:2], 1) out = gf >> slice(0) assert out.shape[0] == 2 out = gf >> slice( Series([1, 0, 0]).groupby(gf._datar["grouped"].grouper.result_index)) assert_iterable_equal(out.x.obj, [2, 3])
def test_rowwise_preserved_by_major_verbs(): rf = rowwise(tibble(x=range(1, 6), y=range(5, 0, -1)), f.x) out = arrange(rf, f.y) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] out = filter(rf, f.x < 3) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] out = mutate(rf, x=f.x + 1) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] out = rename(rf, X=f.x) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["X"] out = select(rf, "x") assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] out = slice(rf, c(0, 0)) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] # Except for summarise out = summarise(rf, z=mean(f.x, f.y)) assert isinstance(out, TibbleGrouped) assert group_vars(out) == ["x"]
def test_zero_row_dfs(): df = tibble(a=[], b=[], g=[]) dfg = group_by(df, f.g, _drop=False) assert dfg.shape == (0, 3) assert group_vars(dfg) == ["g"] assert group_size(dfg) == [] x = summarise(dfg, n=n()) assert x.shape == (0, 2) assert group_vars(x) == [] x = mutate(dfg, c=f.b + 1) assert x.shape == (0, 4) assert group_vars(x) == ["g"] assert group_size(x) == [] x = filter(dfg, f.a == 100) assert x.shape == (0, 3) assert group_vars(x) == ["g"] assert group_size(x) == [] x = arrange(dfg, f.a, f.g) assert x.shape == (0, 3) assert group_vars(x) == ["g"] assert group_size(x) == [] x = select(dfg, f.a) assert x.shape == (0, 2) assert group_vars(x) == ["g"] assert group_size(x) == []
def test_0_groups_arrange(): df = tibble(x=1).loc[[], :] >> group_by(f.x) res = df >> arrange(f.x) d1 = df >> dim() d2 = res >> dim() assert d1 == d2 assert df.columns.tolist() == res.columns.tolist()
def test_slice_handles_numeric_input(): g = mtcars >> arrange(f.cyl) >> group_by(f.cyl) res = g >> slice(0) assert nrow(res) == 3 exp = g >> filter(row_number() == 1) assert_frame_equal(res, exp) res1 = mtcars >> slice(0) >> as_tibble() res2 = mtcars >> filter(row_number() == 1) assert_frame_equal(res1, res2)
def test_across(): df = tibble(x=[1, 3, 2, 1], y=[4, 3, 2, 1]) out = df >> arrange(across()) expect = df >> arrange(f.x, f.y) assert out.equals(expect) out = df >> arrange(across(None, desc)) expect = df >> arrange(desc(f.x), desc(f.y)) assert out.equals(expect) out = df >> arrange(across(f.x)) expect = df >> arrange(f.x) assert out.equals(expect) out = df >> arrange(across(f.y)) expect = df >> arrange(f.y) assert out.equals(expect)
def test_update_grouping(): df = tibble(g=[2, 2, 1, 1], x=[1, 3, 2, 4]) res = df >> group_by(f.g) >> arrange(f.x) assert isinstance(res, TibbleGrouped) assert group_rows(res) == [[0, 2], [1, 3]]
def test_slice_works_fine_if_n_gt_nrow(): by_slice = mtcars >> arrange(f.cyl) >> group_by(f.cyl) slice_res = by_slice >> slice(7) filter_res = by_slice >> filter(row_number() == 8) assert slice_res.equals(filter_res)
def test_complex_cols(): df = tibble(x=[1, 2, 3], y=[3 + 2j, 2 + 2j, 1 + 2j]) out = df >> arrange(f.y) assert_iterable_equal(out.x, [3, 2, 1])
def test_df_cols(): df = tibble(x=[1, 2, 3], y=tibble(z=[3, 2, 1])) out = df >> arrange(f.y) expect = tibble(x=[3, 2, 1], y=tibble(z=[1, 2, 3])) assert out.reset_index(drop=True).equals(expect)
def test_na_end(): df = tibble(x=c(4, 3, NA)) # NA makes it float out = df >> arrange(f.x) assert_iterable_equal(out.x, [3, 4, None]) out = df >> arrange(desc(f.x)) assert_iterable_equal(out.x, [4, 3, None])
def test_sort_empty_df(): df = tibble() out = df >> arrange() assert_tibble_equal(out, df)
def test_filter_restructures_group_data_correctly(): df = (mtcars >> arrange(f.gear) >> group_by( f.cyl) >> mutate(cum=f.drat.cumsum()) >> filter(f.cum >= 5) >> mutate(ranking=f.cum.rank())) assert nrow(df) == 29