def facet_pages(column): base_plot = [ aes(x='wt', y='mpg', label='name'), geom_text(), ] for label, group_data in mtcars.groupby(column): yield ggplot(group_data) + base_plot + ggtitle(label)
def test_sorting_within_groups_head(): actual = dp(mtcars).groupby(X.cyl).print().sort_values("qsec").tail(1).pd dfs = [] for cyl, sub_df in mtcars.groupby("cyl"): sub_df = sub_df.sort_values("qsec") dfs.append(sub_df.tail(1)) should = pd.concat(dfs)[actual.columns] assert_frame_equal(should, actual)
def test_filter_by_vector_grouped(): actual = dp(mtcars).groupby("cyl").filter_by(X.hp.rank() <= 2).ungroup().pd keep = set() for grp, sub_df in mtcars.groupby("cyl"): keep.update(sub_df["name"][sub_df["hp"].rank() <= 2]) should = mtcars[mtcars.name.isin(keep)] assert set(should.columns) == set(actual.columns) should = should[actual.columns] assert_frame_equal(actual, should)
def test_grouped_mutate_returns_scalar(): actual = (dp(mtcars).groupby("cyl").mutate( count=4).select("count").ungroup().pd.sort_index()) should = mtcars.groupby("cyl").agg("count")["name"] should = ordered_DataFrame({ "cyl": mtcars.cyl, "count": 4 }, index=mtcars.index) assert_frame_equal(should, actual)
def test_grouped_filter_by_returns_series(): actual = (dp(mtcars).groupby("cyl").filter_by({ grp: sub_df.hp.rank(ascending=False) <= 2 for (grp, sub_df) in X.itergroups() }).ungroup().pd.sort_index()) keep = set() for grp, sub_df in mtcars.groupby("cyl"): keep.update(sub_df["name"][sub_df["hp"].rank(ascending=False) <= 2]) should = mtcars[mtcars.name.isin(keep)] assert set(should.columns) == set(actual.columns) should = should[actual.columns] assert_frame_equal(should, actual)
def test_groupby_two_mutate_grouped(): actual = (dp(mtcars).groupby(["cyl", "vs"]).mutate( grp_rank={grp: sub_df.hp.rank() for (grp, sub_df) in X.itergroups()}).select( "grp_rank").ungroup().pd.sort_index()) ac = [] for grp, sub_df in mtcars.groupby(["cyl", "vs"]): x = sub_df["hp"].rank() ac.append(x) ac = pd.concat(ac) should = mtcars.assign(grp_rank=ac)[["cyl", "vs", "grp_rank"]] assert_frame_equal(should, actual)
def test_grouped_mutate_callable(): actual = (dp(mtcars).groupby("cyl").mutate( max_hp=lambda x: x["hp"].max()).select(["cyl", "max_hp", "name"]).ungroup().pd) ac = [] for grp, sub_df in mtcars.groupby("cyl"): x = pd.Series(sub_df["hp"].max(), index=sub_df.index) ac.append(x) ac = pd.concat(ac) should = mtcars.assign(max_hp=ac)[["cyl", "max_hp", "name"]].sort_values("name") assert_frame_equal(should, actual.sort_values("name"))
def test_grouped_mutate_returns_scalar_per_group_str(): actual = (dp(mtcars).groupby("cyl").mutate(count={ grp: "X" + str(len(sub_df)) for (grp, sub_df) in X.itergroups() }).select("count").ungroup().pd.sort_index()) should = mtcars.groupby("cyl").agg("count")["name"] should = ordered_DataFrame( { "cyl": mtcars.cyl, "count": ["X" + str(should[cyl]) for cyl in mtcars.cyl] }, index=mtcars.index, ) assert_frame_equal(should, actual)
def test_interleaved_context_managers(): with dppd(mtcars) as (dpX, X): with dppd(diamonds) as (dpY, Y): dpX.groupby("cyl") dpY.filter_by(Y.cut == "Ideal") dpX.summarize(("hp", np.mean, "mean_hp")) dpY.summarize(("price", np.max, "max_price")) should_X = (mtcars.groupby("cyl")[["hp"]].agg( np.mean).rename(columns={"hp": "mean_hp"})).reset_index() should_Y = (pd.DataFrame(diamonds[diamonds.cut == "Ideal"].max()[[ "price" ]]).transpose().rename(columns={"price": "max_price"})) should_Y["max_price"] = should_Y["max_price"].astype(int) assert_frame_equal(X, should_X) assert_frame_equal(Y, should_Y)
def test_iter_tuples_in_group_by(): actual = {k: list(v) for (k, v) in dp(mtcars).groupby("cyl").itertuples()} should = {} for key, sub_df in mtcars.groupby("cyl"): should[key, ] = list(sub_df.itertuples()) assert actual == should
def test_grouped_filter_by_X_apply(): actual = dp(mtcars).groupby("cyl").filter_by( X.apply(len) > 10).ungroup().pd g = mtcars.groupby("cyl").apply(len) > 10 should = mtcars[mtcars.cyl.isin(g.index[g])] assert_frame_equal(should, actual, check_column_order=False)
def test_groupby_within_chain_select_on_group(): actual = dp(mtcars).groupby("cyl").select("hp").mean().pd should = mtcars.groupby("cyl").mean()[["hp"]] assert_frame_equal(should, actual)
def test_basic_summary(): actual = dp(mtcars).groupby("cyl").summarize((X.hp, len, "count")).pd should = mtcars.groupby("cyl")[["hp"]].agg("count") should.columns = ["count"] should = should.reset_index() assert_frame_equal(should, actual) # will fail
def test_sorting_within_groups_head_ungroup(): actual = dp(mtcars).groupby(X.cyl).arrange("qsec").ungroup().tail(1).pd for cyl, sub_df in mtcars.groupby("cyl"): sub_df = sub_df.sort_values("qsec") should = sub_df.tail(1)[actual.columns] assert_frame_equal(should, actual)