Пример #1
0
def test_apply_no_grouper(df):

    df = df[["x", "y"]]
    res = GroupBy(["a"]).apply(df, lambda x: x.sort_values("x"))
    assert_array_equal(res.columns, ["x", "y"])
    assert_array_equal(res["x"], df["x"].sort_values())
    assert_array_equal(res["y"], df.loc[np.argsort(df["x"]), "y"])
Пример #2
0
    def __call__(self, data: DataFrame, groupby: GroupBy,
                 orient: str) -> DataFrame:

        # TODO where to ensure that other semantic variables are sorted properly?
        # TODO why are we not using the passed in groupby here?
        groupers = ["col", "row", orient]
        return GroupBy(groupers).apply(data, self._stack, orient)
Пример #3
0
def test_agg_one_grouper(df):

    res = GroupBy(["a"]).agg(df, {"y": "max"})
    assert_array_equal(res.index, [0, 1])
    assert_array_equal(res.columns, ["a", "y"])
    assert_array_equal(res["a"], ["a", "b"])
    assert_array_equal(res["y"], [.8, .5])
Пример #4
0
    def test_default_groups(self, df, orient):

        other = {"x": "y", "y": "x"}[orient]
        gb = GroupBy(["grp2"])
        res = Norm()(df, gb, orient)
        for _, grp in res.groupby("grp2"):
            assert grp[other].max() == pytest.approx(1)
Пример #5
0
    def test_faceted_drop(self, toy_df_facets):

        groupby = GroupBy(["x", "grp", "col"])
        res = Dodge(empty="drop")(toy_df_facets, groupby, "x")

        assert_array_equal(res["y"], [1, 2, 3, 1, 2, 3])
        assert_array_almost_equal(res["x"], [-.2, .2, 1, 0, 1, 2])
        assert_array_almost_equal(res["width"], [.4] * 6)
Пример #6
0
    def test_faceted_default(self, toy_df_facets):

        groupby = GroupBy(["x", "grp", "col"])
        res = Dodge()(toy_df_facets, groupby, "x")

        assert_array_equal(res["y"], [1, 2, 3, 1, 2, 3])
        assert_array_almost_equal(res["x"], [-.2, .2, .8, .2, .8, 2.2])
        assert_array_almost_equal(res["width"], [.4] * 6)
Пример #7
0
    def test_widths_drop(self, toy_df_widths):

        groupby = GroupBy(["x", "grp"])
        res = Dodge(empty="drop")(toy_df_widths, groupby, "x")

        assert_array_equal(res["y"], [1, 2, 3])
        assert_array_almost_equal(res["x"], [-.08, .32, 1])
        assert_array_almost_equal(res["width"], [.64, .16, .2])
Пример #8
0
    def test_gap(self, toy_df):

        groupby = GroupBy(["x", "grp"])
        res = Dodge(gap=.25)(toy_df, groupby, "x")

        assert_array_equal(res["y"], [1, 2, 3])
        assert_array_almost_equal(res["x"], [-.2, .2, 1.2])
        assert_array_almost_equal(res["width"], [.3, .3, .3])
Пример #9
0
    def test_drop(self, toy_df):

        groupby = GroupBy(["x", "grp"])
        res = Dodge("drop")(toy_df, groupby, "x")

        assert_array_equal(res["y"], [1, 2, 3])
        assert_array_almost_equal(res["x"], [-.2, .2, 1])
        assert_array_almost_equal(res["width"], [.4, .4, .4])
Пример #10
0
    def test_fill(self, toy_df):

        groupby = GroupBy(["x", "grp"])
        res = Dodge(empty="fill")(toy_df, groupby, "x")

        assert_array_equal(res["y"], [1, 2, 3]),
        assert_array_almost_equal(res["x"], [-.2, .2, 1])
        assert_array_almost_equal(res["width"], [.4, .4, .8])
Пример #11
0
    def triple_args(self):

        groupby = GroupBy(["group", "a", "s"])

        class Scale:
            scale_type = "continuous"

        return groupby, "x", {"x": Scale()}
Пример #12
0
    def test_basic(self, toy_df):

        groupby = GroupBy(["color", "group"])
        res = Stack()(toy_df, groupby, "x")

        assert_array_equal(res["x"], [0, 0, 1])
        assert_array_equal(res["y"], [1, 3, 3])
        assert_array_equal(res["baseline"], [0, 1, 0])
Пример #13
0
def test_apply_one_grouper(df):

    res = GroupBy(["a"]).apply(df, lambda x: x.sort_values("x"))
    assert_array_equal(res.index, [0, 1, 2, 3, 4])
    assert_array_equal(res.columns, ["a", "b", "x", "y"])
    assert_array_equal(res["a"], ["a", "a", "a", "b", "b"])
    assert_array_equal(res["b"], ["g", "h", "f", "f", "h"])
    assert_array_equal(res["x"], [1, 1, 2, 2, 3])
Пример #14
0
    def test_baseline_homogeneity_check(self, toy_df):

        toy_df["baseline"] = [0, 1, 2]
        groupby = GroupBy(["color", "group"])
        move = Stack()
        err = "Stack move cannot be used when baselines"
        with pytest.raises(RuntimeError, match=err):
            move(toy_df, groupby, "x")
Пример #15
0
    def test_faceted(self, toy_df_facets):

        groupby = GroupBy(["color", "group"])
        res = Stack()(toy_df_facets, groupby, "x")

        assert_array_equal(res["x"], [0, 0, 1, 0, 1, 2])
        assert_array_equal(res["y"], [1, 3, 3, 1, 2, 3])
        assert_array_equal(res["baseline"], [0, 1, 0, 0, 0, 0])
Пример #16
0
def test_agg_two_groupers(df):

    res = GroupBy(["a", "x"]).agg(df, {"y": "min"})
    assert_array_equal(res.index, [0, 1, 2, 3, 4, 5])
    assert_array_equal(res.columns, ["a", "x", "y"])
    assert_array_equal(res["a"], ["a", "a", "a", "b", "b", "b"])
    assert_array_equal(res["x"], [1, 2, 3, 1, 2, 3])
    assert_array_equal(res["y"], [.2, .8, np.nan, np.nan, .4, .5])
Пример #17
0
    def test_orient(self, toy_df):

        df = toy_df.assign(x=toy_df["y"], y=toy_df["x"])

        groupby = GroupBy(["y", "grp"])
        res = Dodge("drop")(df, groupby, "y")

        assert_array_equal(res["x"], [1, 2, 3])
        assert_array_almost_equal(res["y"], [-.2, .2, 1])
        assert_array_almost_equal(res["width"], [.4, .4, .4])
Пример #18
0
    def test_no_grouper(self, df):

        groupby = GroupBy(["group"])
        res = PolyFit(order=1, gridsize=100)(df[["x", "y"]], groupby, "x", {})

        assert_array_equal(res.columns, ["x", "y"])

        grid = np.linspace(df["x"].min(), df["x"].max(), 100)
        assert_array_equal(res["x"], grid)
        assert_array_almost_equal(res["y"].diff().diff().dropna(),
                                  np.zeros(grid.size - 2))
Пример #19
0
def test_agg_two_groupers_ordered(df):

    order = {"b": ["h", "g", "f"], "x": [3, 2, 1]}
    res = GroupBy(order).agg(df, {"a": "min", "y": lambda x: x.iloc[0]})
    assert_array_equal(res.index, [0, 1, 2, 3, 4, 5, 6, 7, 8])
    assert_array_equal(res.columns, ["a", "b", "x", "y"])
    assert_array_equal(res["b"], ["h", "h", "h", "g", "g", "g", "f", "f", "f"])
    assert_array_equal(res["x"], [3, 2, 1, 3, 2, 1, 3, 2, 1])

    T, F = True, False
    assert_array_equal(res["a"].isna(), [F, T, F, T, T, F, T, F, T])
    assert_array_equal(res["a"].dropna(), ["b", "a", "a", "a"])
    assert_array_equal(res["y"].dropna(), [.5, .3, .2, .8])
Пример #20
0
    def __call__(
        self,
        data: DataFrame,
        groupby: GroupBy,
        orient: str,
        scales: dict[str, Scale],
    ) -> DataFrame:

        var = {"x": "y", "y": "x"}.get(orient)
        res = (groupby.agg(data, {
            var: self.func
        }).dropna().reset_index(drop=True))
        return res
Пример #21
0
def test_apply_replace_columns(df):

    def add_sorted_cumsum(df):

        x = df["x"].sort_values()
        z = df.loc[x.index, "y"].cumsum()
        return pd.DataFrame(dict(x=x.values, z=z.values))

    res = GroupBy(["a"]).apply(df, add_sorted_cumsum)
    assert_array_equal(res.index, df.index)
    assert_array_equal(res.columns, ["a", "x", "z"])
    assert_array_equal(res["a"], ["a", "a", "a", "b", "b"])
    assert_array_equal(res["x"], [1, 1, 2, 2, 3])
    assert_array_equal(res["z"], [.2, .5, 1.3, .4, .9])
Пример #22
0
    def test_one_grouper(self, df):

        groupby = GroupBy(["group"])
        gridsize = 50
        res = PolyFit(gridsize=gridsize)(df, groupby, "x", {})

        assert res.columns.to_list() == ["x", "y", "group"]

        ngroups = df["group"].nunique()
        assert_array_equal(res.index, np.arange(ngroups * gridsize))

        for _, part in res.groupby("group"):
            grid = np.linspace(part["x"].min(), part["x"].max(), gridsize)
            assert_array_equal(part["x"], grid)
            assert part["y"].diff().diff().dropna().abs().gt(0).all()
Пример #23
0
    def test_single_semantic(self, df, grp):

        groupby = GroupBy(["x", grp])
        res = Dodge()(df, groupby, "x")

        levels = categorical_order(df[grp])
        w, n = 0.8, len(levels)

        shifts = np.linspace(0, w - w / n, n)
        shifts -= shifts.mean()

        assert_series_equal(res["y"], df["y"])
        assert_series_equal(res["width"], df["width"] / n)

        for val, shift in zip(levels, shifts):
            rows = df[grp] == val
            assert_series_equal(res.loc[rows, "x"], df.loc[rows, "x"] + shift)
Пример #24
0
def test_apply_mutate_columns(df):

    xx = np.arange(0, 5)
    hats = []

    def polyfit(df):
        fit = np.polyfit(df["x"], df["y"], 1)
        hat = np.polyval(fit, xx)
        hats.append(hat)
        return pd.DataFrame(dict(x=xx, y=hat))

    res = GroupBy(["a"]).apply(df, polyfit)
    assert_array_equal(res.index, np.arange(xx.size * 2))
    assert_array_equal(res.columns, ["a", "x", "y"])
    assert_array_equal(res["a"], ["a"] * xx.size + ["b"] * xx.size)
    assert_array_equal(res["x"], xx.tolist() + xx.tolist())
    assert_array_equal(res["y"], np.concatenate(hats))
Пример #25
0
    def test_two_semantics(self, df):

        groupby = GroupBy(["x", "grp2", "grp3"])
        res = Dodge()(df, groupby, "x")

        levels = categorical_order(df["grp2"]), categorical_order(df["grp3"])
        w, n = 0.8, len(levels[0]) * len(levels[1])

        shifts = np.linspace(0, w - w / n, n)
        shifts -= shifts.mean()

        assert_series_equal(res["y"], df["y"])
        assert_series_equal(res["width"], df["width"] / n)

        for (v2, v3), shift in zip(product(*levels), shifts):
            rows = (df["grp2"] == v2) & (df["grp3"] == v3)
            assert_series_equal(res.loc[rows, "x"], df.loc[rows, "x"] + shift)
Пример #26
0
    def __call__(
        self,
        data: DataFrame,
        groupby: GroupBy,
        orient: str,
        scales: dict[str, Scale],
    ) -> DataFrame:

        boot_kws = {"n_boot": self.n_boot, "seed": self.seed}
        engine = EstimateAggregator(self.func, self.errorbar, **boot_kws)

        var = {"x": "y", "y": "x"}.get(orient)
        res = (groupby.apply(
            data, self._process, var,
            engine).dropna(subset=["x", "y"]).reset_index(drop=True))

        res = res.fillna({f"{var}min": res[var], f"{var}max": res[var]})

        return res
Пример #27
0
    def __call__(self, data: DataFrame, groupby: GroupBy,
                 orient: str) -> DataFrame:

        grouping_vars = [v for v in groupby.order if v in data]
        groups = groupby.agg(data, {"width": "max"})
        if self.empty == "fill":
            groups = groups.dropna()

        def groupby_pos(s):
            grouper = [groups[v] for v in [orient, "col", "row"] if v in data]
            return s.groupby(grouper, sort=False, observed=True)

        def scale_widths(w):
            # TODO what value to fill missing widths??? Hard problem...
            # TODO short circuit this if outer widths has no variance?
            empty = 0 if self.empty == "fill" else w.mean()
            filled = w.fillna(empty)
            scale = filled.max()
            norm = filled.sum()
            if self.empty == "keep":
                w = filled
            return w / norm * scale

        def widths_to_offsets(w):
            return w.shift(1).fillna(0).cumsum() + (w - w.sum()) / 2

        new_widths = groupby_pos(groups["width"]).transform(scale_widths)
        offsets = groupby_pos(new_widths).transform(widths_to_offsets)

        if self.gap:
            new_widths *= 1 - self.gap

        groups["_dodged"] = groups[orient] + offsets
        groups["width"] = new_widths

        out = (data.drop("width", axis=1).merge(
            groups, on=grouping_vars,
            how="left").drop(orient,
                             axis=1).rename(columns={"_dodged": orient}))

        return out
Пример #28
0
    def __call__(self, data, groupby, orient, scales):

        # TODO better to do this as an isinstance check?
        # We are only asking about Nominal scales now,
        # but presumably would apply to Ordinal too?
        scale_type = scales[orient].__class__.__name__.lower()
        grouping_vars = [v for v in data if v in groupby.order]
        if not grouping_vars or self.common_bins is True:
            bin_kws = self._define_bin_params(data, orient, scale_type)
            data = groupby.apply(data, self._eval, orient, bin_kws)
        else:
            if self.common_bins is False:
                bin_groupby = GroupBy(grouping_vars)
            else:
                bin_groupby = GroupBy(self.common_bins)
            data = bin_groupby.apply(
                data,
                self._get_bins_and_eval,
                orient,
                groupby,
                scale_type,
            )

        # TODO Make this an option?
        # (This needs to be tested if enabled, and maybe should be in _eval)
        # other = {"x": "y", "y": "x"}[orient]
        # data = data[data[other] > 0]

        if not grouping_vars or self.common_norm is True:
            data = self._normalize(data, orient)
        else:
            if self.common_norm is False:
                norm_grouper = grouping_vars
            else:
                norm_grouper = self.common_norm
            normalize = partial(self._normalize, orient=orient)
            data = GroupBy(norm_grouper).apply(data, normalize)

        return data
Пример #29
0
    def __call__(self, data, groupby, orient, scales):

        scale_type = scales[orient].scale_type
        grouping_vars = [v for v in data if v in groupby.order]
        if not grouping_vars or self.common_bins is True:
            bin_kws = self._define_bin_params(data, orient, scale_type)
            data = groupby.apply(data, self._eval, orient, bin_kws)
        else:
            if self.common_bins is False:
                bin_groupby = GroupBy(grouping_vars)
            else:
                bin_groupby = GroupBy(self.common_bins)
            data = bin_groupby.apply(
                data,
                self._get_bins_and_eval,
                orient,
                groupby,
                scale_type,
            )

        # TODO Make this an option?
        # (This needs to be tested if enabled, and maybe should be in _eval)
        # other = {"x": "y", "y": "x"}[orient]
        # data = data[data[other] > 0]

        if not grouping_vars or self.common_norm is True:
            data = self._normalize(data, orient)
        else:
            if self.common_norm is False:
                norm_grouper = grouping_vars
            else:
                norm_grouper = self.common_norm
            normalize = partial(self._normalize, orient=orient)
            data = GroupBy(norm_grouper).apply(data, normalize)

        return data
Пример #30
0
    def get_groupby(self, df, orient):

        other = {"x": "y", "y": "x"}[orient]
        cols = [c for c in df if c != other]
        return GroupBy(cols)