def test_nth(self): df = data.df_diamonds >> gr.tf_select(X.cut, X.x) >> gr.tf_head(10) # straight summarize t = df >> gr.tf_summarize(second=gr.nth(X.x, 1)) df_truth = pd.DataFrame({"second": [3.89]}) self.assertTrue(t.equals(df_truth)) # grouped summarize t = df >> gr.tf_group_by( X.cut) >> gr.tf_summarize(first=gr.nth(X.x, 0)) df_truth = pd.DataFrame({ "cut": ["Fair", "Good", "Ideal", "Premium", "Very Good"], "first": [3.87, 4.05, 3.95, 3.89, 3.94], }) self.assertTrue(t.equals(df_truth)) # summarize with order_by t = df >> gr.tf_summarize(last=gr.nth( X.x, -1, order_by=[gr.desc(X.cut), gr.desc(X.x)])) df_truth = pd.DataFrame({"last": [3.87]}) self.assertTrue(df_truth.equals(t)) # straight mutate t = df >> gr.tf_mutate(out_of_range=gr.nth(X.x, 500)) df_truth = df.copy() df_truth["out_of_range"] = np.nan self.assertTrue(t.equals(df_truth)) # grouped mutate t = df >> gr.tf_group_by( X.cut) >> gr.tf_mutate(penultimate=gr.nth(X.x, -2)) df_truth = df.copy() df_truth["penultimate"] = pd.Series( [np.nan, 3.89, 4.05, 3.89, 4.05, 4.07, 4.07, 4.07, np.nan, 4.07]) self.assertTrue(t.sort_index().equals(df_truth))
def test_last(self): df = data.df_diamonds >> gr.tf_select(X.cut, X.x) >> gr.tf_head(5) # straight summarize t = df >> gr.tf_summarize(l=gr.last(X.x)) df_truth = pd.DataFrame({"l": [4.34]}) self.assertTrue(t.equals(df_truth)) # grouped summarize t = df >> gr.tf_group_by(X.cut) >> gr.tf_summarize(l=gr.last(X.x)) df_truth = pd.DataFrame({ "cut": ["Good", "Ideal", "Premium"], "l": [4.34, 3.95, 4.20] }) self.assertTrue(t.equals(df_truth)) # summarize with order_by t = df >> gr.tf_summarize(f=gr.last( X.x, order_by=[gr.desc(X.cut), gr.desc(X.x)])) df_truth = pd.DataFrame({"f": [4.05]}) assert df_truth.equals(t) # straight mutate t = df >> gr.tf_mutate(l=gr.last(X.x)) df_truth = df.copy() df_truth["l"] = df_truth.x.iloc[4] self.assertTrue(t.equals(df_truth)) # grouped mutate t = df >> gr.tf_group_by(X.cut) >> gr.tf_mutate(l=gr.last(X.x)) df_truth["l"] = pd.Series([3.95, 4.20, 4.34, 4.20, 4.34]) self.assertTrue(t.sort_index().equals(df_truth))
def test_arrange(self): df = ( data.df_diamonds.groupby("cut") .apply(arrange_apply_helperfunc) .reset_index(drop=True) ) d = ( data.df_diamonds >> gr.tf_group_by("cut") >> gr.tf_arrange("depth", ascending=False) >> gr.tf_head(5) >> gr.tf_ungroup() ).reset_index(drop=True) self.assertTrue(df.equals(d)) d = ( data.df_diamonds >> gr.tf_group_by("cut") >> gr.tf_arrange(X.depth, ascending=False) >> gr.tf_head(5) >> gr.tf_ungroup() ).reset_index(drop=True) assert df.equals(d) df = data.df_diamonds.sort_values(["cut", "price"], ascending=False) d = data.df_diamonds >> gr.tf_arrange(gr.desc(X.cut), gr.desc(X.price)) self.assertTrue(df.equals(d))
def test_first(self): df = data.df_diamonds >> gr.tf_select(X.cut, X.x) >> gr.tf_head(5) # straight summarize t = df >> gr.tf_summarize(f=gr.first(X.x)) df_truth = pd.DataFrame({"f": [3.95]}) self.assertTrue(t.equals(df_truth)) # grouped summarize t = df >> gr.tf_group_by(X.cut) >> gr.tf_summarize(f=gr.first(X.x)) df_truth = pd.DataFrame({ "cut": ["Good", "Ideal", "Premium"], "f": [4.05, 3.95, 3.89] }) self.assertTrue(t.equals(df_truth)) # summarize with order_by t = df >> gr.tf_summarize(f=gr.first(X.x, order_by=gr.desc(X.cut))) df_truth = pd.DataFrame({"f": [3.89]}) # straight mutate t = df >> gr.tf_mutate(f=gr.first(X.x)) df_truth = df.copy() df_truth["f"] = df_truth.x.iloc[0] self.assertTrue(t.equals(df_truth)) # grouped mutate t = df >> gr.tf_group_by(X.cut) >> gr.tf_mutate(f=gr.first(X.x)) df_truth["f"] = pd.Series([3.95, 3.89, 4.05, 3.89, 4.05]) self.assertTrue(t.sort_index().equals(df_truth))
def test_desc(self): df = data.df_diamonds >> gr.tf_select(X.cut, X.x) >> gr.tf_head(10) t = df >> gr.tf_summarize(last=gr.nth( X.x, -1, order_by=[gr.desc(X.cut), gr.desc(X.x)])) series_num = pd.Series([4, 1, 3, 2]) series_bool = pd.Series([True, False, True, False]) series_str = pd.Series(["d", "a", "c", "b"]) num_truth = series_num.rank(method="min", ascending=False) bool_truth = series_bool.rank(method="min", ascending=False) str_truth = series_str.rank(method="min", ascending=False) self.assertTrue(gr.desc(series_num).equals(num_truth)) self.assertTrue(gr.desc(series_bool).equals(bool_truth)) self.assertTrue(gr.desc(series_str).equals(str_truth))