Пример #1
0
 def test_nth(self):
     df = data.df_diamonds >> gr.tf_select(X.cut, X.x) >> gr.tf_head(10)
     # straight summarize
     t = df >> gr.tf_summarize(second=gr.nth(X.x, 1))
     df_truth = pd.DataFrame({"second": [3.89]})
     self.assertTrue(t.equals(df_truth))
     # grouped summarize
     t = df >> gr.tf_group_by(
         X.cut) >> gr.tf_summarize(first=gr.nth(X.x, 0))
     df_truth = pd.DataFrame({
         "cut": ["Fair", "Good", "Ideal", "Premium", "Very Good"],
         "first": [3.87, 4.05, 3.95, 3.89, 3.94],
     })
     self.assertTrue(t.equals(df_truth))
     # summarize with order_by
     t = df >> gr.tf_summarize(last=gr.nth(
         X.x, -1, order_by=[gr.desc(X.cut), gr.desc(X.x)]))
     df_truth = pd.DataFrame({"last": [3.87]})
     self.assertTrue(df_truth.equals(t))
     # straight mutate
     t = df >> gr.tf_mutate(out_of_range=gr.nth(X.x, 500))
     df_truth = df.copy()
     df_truth["out_of_range"] = np.nan
     self.assertTrue(t.equals(df_truth))
     # grouped mutate
     t = df >> gr.tf_group_by(
         X.cut) >> gr.tf_mutate(penultimate=gr.nth(X.x, -2))
     df_truth = df.copy()
     df_truth["penultimate"] = pd.Series(
         [np.nan, 3.89, 4.05, 3.89, 4.05, 4.07, 4.07, 4.07, np.nan, 4.07])
     self.assertTrue(t.sort_index().equals(df_truth))
Пример #2
0
 def test_last(self):
     df = data.df_diamonds >> gr.tf_select(X.cut, X.x) >> gr.tf_head(5)
     # straight summarize
     t = df >> gr.tf_summarize(l=gr.last(X.x))
     df_truth = pd.DataFrame({"l": [4.34]})
     self.assertTrue(t.equals(df_truth))
     # grouped summarize
     t = df >> gr.tf_group_by(X.cut) >> gr.tf_summarize(l=gr.last(X.x))
     df_truth = pd.DataFrame({
         "cut": ["Good", "Ideal", "Premium"],
         "l": [4.34, 3.95, 4.20]
     })
     self.assertTrue(t.equals(df_truth))
     # summarize with order_by
     t = df >> gr.tf_summarize(f=gr.last(
         X.x, order_by=[gr.desc(X.cut), gr.desc(X.x)]))
     df_truth = pd.DataFrame({"f": [4.05]})
     assert df_truth.equals(t)
     # straight mutate
     t = df >> gr.tf_mutate(l=gr.last(X.x))
     df_truth = df.copy()
     df_truth["l"] = df_truth.x.iloc[4]
     self.assertTrue(t.equals(df_truth))
     # grouped mutate
     t = df >> gr.tf_group_by(X.cut) >> gr.tf_mutate(l=gr.last(X.x))
     df_truth["l"] = pd.Series([3.95, 4.20, 4.34, 4.20, 4.34])
     self.assertTrue(t.sort_index().equals(df_truth))
Пример #3
0
    def test_arrange(self):
        df = (
            data.df_diamonds.groupby("cut")
            .apply(arrange_apply_helperfunc)
            .reset_index(drop=True)
        )
        d = (
            data.df_diamonds
            >> gr.tf_group_by("cut")
            >> gr.tf_arrange("depth", ascending=False)
            >> gr.tf_head(5)
            >> gr.tf_ungroup()
        ).reset_index(drop=True)
        self.assertTrue(df.equals(d))

        d = (
            data.df_diamonds
            >> gr.tf_group_by("cut")
            >> gr.tf_arrange(X.depth, ascending=False)
            >> gr.tf_head(5)
            >> gr.tf_ungroup()
        ).reset_index(drop=True)
        assert df.equals(d)

        df = data.df_diamonds.sort_values(["cut", "price"], ascending=False)
        d = data.df_diamonds >> gr.tf_arrange(gr.desc(X.cut), gr.desc(X.price))
        self.assertTrue(df.equals(d))
Пример #4
0
 def test_first(self):
     df = data.df_diamonds >> gr.tf_select(X.cut, X.x) >> gr.tf_head(5)
     # straight summarize
     t = df >> gr.tf_summarize(f=gr.first(X.x))
     df_truth = pd.DataFrame({"f": [3.95]})
     self.assertTrue(t.equals(df_truth))
     # grouped summarize
     t = df >> gr.tf_group_by(X.cut) >> gr.tf_summarize(f=gr.first(X.x))
     df_truth = pd.DataFrame({
         "cut": ["Good", "Ideal", "Premium"],
         "f": [4.05, 3.95, 3.89]
     })
     self.assertTrue(t.equals(df_truth))
     # summarize with order_by
     t = df >> gr.tf_summarize(f=gr.first(X.x, order_by=gr.desc(X.cut)))
     df_truth = pd.DataFrame({"f": [3.89]})
     # straight mutate
     t = df >> gr.tf_mutate(f=gr.first(X.x))
     df_truth = df.copy()
     df_truth["f"] = df_truth.x.iloc[0]
     self.assertTrue(t.equals(df_truth))
     # grouped mutate
     t = df >> gr.tf_group_by(X.cut) >> gr.tf_mutate(f=gr.first(X.x))
     df_truth["f"] = pd.Series([3.95, 3.89, 4.05, 3.89, 4.05])
     self.assertTrue(t.sort_index().equals(df_truth))
Пример #5
0
    def test_desc(self):
        df = data.df_diamonds >> gr.tf_select(X.cut, X.x) >> gr.tf_head(10)
        t = df >> gr.tf_summarize(last=gr.nth(
            X.x, -1, order_by=[gr.desc(X.cut), gr.desc(X.x)]))

        series_num = pd.Series([4, 1, 3, 2])
        series_bool = pd.Series([True, False, True, False])
        series_str = pd.Series(["d", "a", "c", "b"])

        num_truth = series_num.rank(method="min", ascending=False)
        bool_truth = series_bool.rank(method="min", ascending=False)
        str_truth = series_str.rank(method="min", ascending=False)

        self.assertTrue(gr.desc(series_num).equals(num_truth))
        self.assertTrue(gr.desc(series_bool).equals(bool_truth))
        self.assertTrue(gr.desc(series_str).equals(str_truth))