def test_preserves_grouping(): gf = tibble(g=[1, 1, 1, 2, 2], x=[1, 2, 3, 4, 5]) >> group_by(f.g) out = gf >> filter(is_element(f.x, [3, 4])) assert group_vars(out) == ["g"] assert group_rows(out) == [[0], [1]] out = gf >> filter(f.x < 3) assert group_vars(out) == ["g"] assert group_rows(out) == [[0, 1]]
def test_grouped_filter_handles_indices(): res = iris >> group_by(f.Species) >> filter(f.Sepal_Length > 5) res2 = res >> mutate(Petal=f.Petal_Width * f.Petal_Length) assert nrow(res) == nrow(res2) grows1 = group_rows(res) grows2 = group_rows(res2) assert grows1 == grows2 assert all(group_keys(res) == group_keys(res2))
def test_group_rows_group_keys_partition_group_data(): df = tibble(x=[1, 2], y=[1, 2]) rows = group_rows(df) assert rows == [[0, 1]] gf = group_by(df, f.x, f.y) gd = group_data(gf) assert group_keys(gf).equals(gd.iloc[:, [0, 1]]) assert pull(gd, to="list") == group_rows(gf)
def test_rowwise_group_structure_is_updated_after_a_join(): # test_that("rowwise group structure is updated after a join (#5227)", { df1 = rowwise(tibble(x=[1, 2])) df2 = tibble(x=c([1, 2], 2)) x = left_join(df1, df2, by="x") assert group_rows(x) == [[0], [1], [2]]
def test_group_modify_makes_a_grouped_df(): res = group_by(mtcars, f.cyl) >> group_modify(lambda df: head(df, 2)) assert nrow(res) == 6 assert group_rows(res) == [[0, 1], [2, 3], [4, 5]] res = (iris >> group_by(f.Species) >> filter(f.Species == "setosa") >> group_modify(lambda df: tally(df))) assert nrow(res) == 1 assert group_rows(res) == [[0]] res = (iris >> group_by(f.Species, _drop=False) >> filter(f.Species == "setosa") >> group_modify(lambda df: tally(df))) # assert nrow(res) == 3 assert nrow(res) == 1 # assert group_rows(res) == [[0], [1], [2]] assert group_rows(res) == [[0]]
def test_correctly_reconstructs_groups(): d = ( tibble(x=[1, 2, 3, 4], g1=rep([1, 2], 2), g2=[1, 2, 3, 4]) >> group_by(f.g1, f.g2) >> summarise(x=f.x + 1) ) # Different from dplyr, original df does not reorder. assert group_rows(d) == [[0, 2], [1, 3]]
def test_filter_false_handles_indices(caplog): out = mtcars >> group_by(f.cyl) >> filter(False, _preserve=True) assert "support" in caplog.text # out = group_rows(out) # assert out == [[], [], []] out = mtcars >> group_by(f.cyl) >> filter(False, _preserve=False) out = group_rows(out) assert out == []
def test_removes_vars_with_None(): df = tibble(x=range(1, 4), y=range(1, 4)) gf = group_by(df, f.x) out = df >> mutate(y=None) assert out.columns.tolist() == ["x"] out = gf >> mutate(y=None) assert out.columns.tolist() == ["x"] assert isinstance(out, TibbleGrouped) assert group_vars(out) == ["x"] assert group_rows(out) == [[0], [1], [2]] # even if it doesn't exist out = df >> mutate(z=None) assert out.equals(df) z = Series(1, name="z") out = df >> mutate(z, z=None) assert out.equals(df) df = tibble(x=1, y=1) out = mutate(df, z=1, x=None, y=None) assert out.equals(tibble(z=1))
def test_update_grouping(): df = tibble(g=[2, 2, 1, 1], x=[1, 3, 2, 4]) res = df >> group_by(f.g) >> arrange(f.x) assert isinstance(res, TibbleGrouped) assert group_rows(res) == [[0, 2], [1, 3]]
def test_slice_strips_grouped_indices(): res = mtcars >> group_by(f.cyl) >> slice(1) >> mutate(mpgplus=f.mpg + 1) assert nrow(res) == 3 assert group_rows(res) == [[0], [1], [2]]