Exemplo n.º 1
0
    def _index_grouped(self):
        """
        Implement [METHOD_NAME].

        TODO: Add more details for this docstring template.

        Parameters
        ----------
        What arguments does this function have.
        [
        PARAMETER_NAME: PARAMETERS TYPES
            Description.
        ]

        Returns
        -------
        What this returns (if anything)
        """
        if self._index_grouped_cache is None:
            if hasattr(self._by, "columns") and len(self._by.columns) > 1:
                by = list(self._by.columns)
                is_multi_by = True
            else:
                by = self._by
                is_multi_by = self._is_multi_by
            if is_multi_by:
                # Because we are doing a collect (to_pandas) here and then groupby, we
                # end up using pandas implementation. Add the warning so the user is
                # aware.
                ErrorMessage.catch_bugs_and_request_email(self._axis == 1)
                ErrorMessage.default_to_pandas("Groupby with multiple columns")
                if isinstance(by, list) and all(
                    hashable(o)
                    and (
                        o in self._df
                        or o in self._df._query_compiler.get_index_names(self._axis)
                    )
                    for o in by
                ):
                    pandas_df = self._df._query_compiler.getitem_column_array(
                        by
                    ).to_pandas()
                else:
                    by = try_cast_to_pandas(by, squeeze=True)
                    pandas_df = self._df._to_pandas()
                self._index_grouped_cache = pandas_df.groupby(by=by).groups
            else:
                if isinstance(self._by, type(self._query_compiler)):
                    by = self._by.to_pandas().squeeze().values
                else:
                    by = self._by
                if self._axis == 0:
                    self._index_grouped_cache = self._index.groupby(by)
                else:
                    self._index_grouped_cache = self._columns.groupby(by)
        return self._index_grouped_cache
Exemplo n.º 2
0
    def caller(
        cls,
        query_compiler,
        by,
        axis,
        groupby_args,
        map_args,
        map_func,
        numeric_only=True,
        **kwargs,
    ):
        if not (isinstance(by, (type(query_compiler)) or hashable(by))) or isinstance(
            by, pandas.Grouper
        ):
            by = try_cast_to_pandas(by, squeeze=True)
            default_func = (
                (lambda grp: grp.agg(map_func))
                if isinstance(map_func, dict)
                else map_func
            )
            return query_compiler.default_to_pandas(
                lambda df: default_func(
                    df.groupby(by=by, axis=axis, **groupby_args), **map_args
                )
            )
        assert axis == 0, "Can only groupby reduce with axis=0"

        if numeric_only:
            qc = query_compiler.getitem_column_array(
                query_compiler._modin_frame._numeric_columns(True)
            )
        else:
            qc = query_compiler

        map_fn, reduce_fn = cls.build_map_reduce_functions(
            by=by,
            axis=axis,
            groupby_args=groupby_args,
            map_func=map_func,
            map_args=map_args,
            **kwargs,
        )

        broadcastable_by = getattr(by, "_modin_frame", None)
        apply_indices = list(map_func.keys()) if isinstance(map_func, dict) else None
        new_modin_frame = qc._modin_frame.groupby_reduce(
            axis, broadcastable_by, map_fn, reduce_fn, apply_indices=apply_indices
        )

        result = query_compiler.__constructor__(new_modin_frame)
        if result.index.name == "__reduced__":
            result.index.name = None
        return result
Exemplo n.º 3
0
    def __init__(
        self,
        df,
        by,
        axis,
        level,
        as_index,
        sort,
        group_keys,
        squeeze,
        idx_name,
        drop,
        **kwargs,
    ):
        self._axis = axis
        self._idx_name = idx_name
        self._df = df
        self._query_compiler = self._df._query_compiler
        self._columns = self._query_compiler.columns
        self._by = by
        self._drop = drop

        if (
            level is None
            and is_list_like(by)
            or isinstance(by, type(self._query_compiler))
        ):
            # This tells us whether or not there are multiple columns/rows in the groupby
            self._is_multi_by = (
                isinstance(by, type(self._query_compiler)) and len(by.columns) > 1
            ) or (
                not isinstance(by, type(self._query_compiler))
                and axis == 0
                and all(
                    (hashable(obj) and obj in self._query_compiler.columns)
                    or isinstance(obj, type(self._query_compiler))
                    or is_list_like(obj)
                    for obj in self._by
                )
            )
        else:
            self._is_multi_by = False
        self._level = level
        self._kwargs = {
            "level": level,
            "sort": sort,
            "as_index": as_index,
            "group_keys": group_keys,
        }
        self._squeeze = squeeze
        self._kwargs.update(kwargs)
Exemplo n.º 4
0
 def __getitem__(self, key):
     kwargs = {**self._kwargs.copy(), "squeeze": self._squeeze}
     # Most of time indexing DataFrameGroupBy results in another DataFrameGroupBy object unless circumstances are
     # special in which case SeriesGroupBy has to be returned. Such circumstances are when key equals to a single
     # column name and is not a list of column names or list of one column name.
     make_dataframe = True
     if self._drop and self._as_index:
         if not isinstance(key, list):
             key = [key]
             kwargs["squeeze"] = True
             make_dataframe = False
     # When `as_index` is False, pandas will always convert to a `DataFrame`, we
     # convert to a list here so that the result will be a `DataFrame`.
     elif not self._as_index and not isinstance(key, list):
         # Sometimes `__getitem__` doesn't only get the item, it also gets the `by`
         # column. This logic is here to ensure that we also get the `by` data so
         # that it is there for `as_index=False`.
         if (
             isinstance(self._by, type(self._query_compiler))
             and all(c in self._columns for c in self._by.columns)
             and self._drop
         ):
             key = list(self._by.columns) + [key]
         else:
             key = [key]
     if isinstance(key, list) and (make_dataframe or not self._as_index):
         return DataFrameGroupBy(
             self._df[key],
             self._by,
             self._axis,
             idx_name=self._idx_name,
             drop=self._drop,
             **kwargs,
         )
     if (
         self._is_multi_by
         and isinstance(self._by, list)
         and not all(hashable(o) and o in self._df for o in self._by)
     ):
         raise NotImplementedError(
             "Column lookups on GroupBy with arbitrary Series in by"
             " is not yet supported."
         )
     return SeriesGroupBy(
         self._df[key],
         self._by,
         self._axis,
         idx_name=self._idx_name,
         drop=False,
         **kwargs,
     )
Exemplo n.º 5
0
def is_label(obj, label, axis=0):
    """
    Check whether or not 'obj' contain column or index level with name 'label'.

    Parameters
    ----------
    obj: DataFrame, Series or QueryCompiler
        Object to check.
    label: object,
        Label name to check.
    axis: int,
        Axis to search name along.

    Returns
    -------
    Boolean
    """
    qc = getattr(obj, "_query_compiler", obj)
    return hashable(label) and (label in qc.get_axis(axis ^ 1)
                                or label in qc.get_index_names(axis))
Exemplo n.º 6
0
def is_label(obj, label, axis=0):
    """
    Check whether or not 'obj' contain column or index level with name 'label'.

    Parameters
    ----------
    obj : modin.pandas.DataFrame, modin.pandas.Series or modin.core.storage_formats.base.BaseQueryCompiler
        Object to check.
    label : object
        Label name to check.
    axis : {0, 1}, default: 0
        Axis to search for `label` along.

    Returns
    -------
    bool
        True if check is successful, False otherwise.
    """
    qc = getattr(obj, "_query_compiler", obj)
    return hashable(label) and (label in qc.get_axis(axis ^ 1)
                                or label in qc.get_index_names(axis))
Exemplo n.º 7
0
    def caller(
        cls,
        query_compiler,
        by,
        map_func,
        reduce_func,
        axis,
        groupby_kwargs,
        agg_args,
        agg_kwargs,
        drop=False,
        method=None,
        default_to_pandas_func=None,
    ):
        """
        Execute GroupBy aggregation with TreeReduce approach.

        Parameters
        ----------
        query_compiler : BaseQueryCompiler
            Frame to group.
        by : BaseQueryCompiler, column or index label, Grouper or list of such
            Object that determine groups.
        map_func : dict or callable(pandas.DataFrameGroupBy) -> pandas.DataFrame
            Function to apply to the `GroupByObject` at the Map phase.
        reduce_func : dict or callable(pandas.DataFrameGroupBy) -> pandas.DataFrame
            Function to apply to the `GroupByObject` at the Reduce phase.
        axis : {0, 1}
            Axis to group and apply aggregation function along. 0 means index axis
            when 1 means column axis.
        groupby_kwargs : dict
            Dictionary which carries arguments for pandas.DataFrame.groupby.
        agg_args : list-like
            Positional arguments to pass to the aggregation functions.
        agg_kwargs : dict
            Keyword arguments to pass to the aggregation functions.
        drop : bool, default: False
            Indicates whether or not by-data came from the `self` frame.
        method : str, optional
            Name of the GroupBy aggregation function. This is a hint to be able to do special casing.
        default_to_pandas_func : callable(pandas.DataFrameGroupBy) -> pandas.DataFrame, optional
            The pandas aggregation function equivalent to the `map_func + reduce_func`.
            Used in case of defaulting to pandas. If not specified `map_func` is used.

        Returns
        -------
        The same type as `query_compiler`
            QueryCompiler which carries the result of GroupBy aggregation.
        """
        if (axis != 0 or groupby_kwargs.get("level", None) is None and
            (not (isinstance(by, (type(query_compiler))) or hashable(by))
             or isinstance(by, pandas.Grouper))):
            by = try_cast_to_pandas(by, squeeze=True)
            # Since 'by' may be a 2D query compiler holding columns to group by,
            # to_pandas will also produce a pandas DataFrame containing them.
            # So splitting 2D 'by' into a list of 1D Series using 'GroupBy.validate_by':
            by = GroupBy.validate_by(by)
            if default_to_pandas_func is None:
                default_to_pandas_func = ((lambda grp: grp.agg(map_func))
                                          if isinstance(map_func, dict) else
                                          map_func)
            return query_compiler.default_to_pandas(
                lambda df: default_to_pandas_func(
                    df.groupby(by=by, axis=axis, **groupby_kwargs),
                    *agg_args,
                    **agg_kwargs,
                ))

        # The bug only occurs in the case of Categorical 'by', so we might want to check whether any of
        # the 'by' dtypes is Categorical before going into this branch, however triggering 'dtypes'
        # computation if they're not computed may take time, so we don't do it
        if not groupby_kwargs.get("sort", True) and isinstance(
                by, type(query_compiler)):
            ErrorMessage.missmatch_with_pandas(
                operation="df.groupby(categorical_by, sort=False)",
                message=
                ("the groupby keys will be sorted anyway, although the 'sort=False' was passed. "
                 "See the following issue for more details: "
                 "https://github.com/modin-project/modin/issues/3571"),
            )
            groupby_kwargs = groupby_kwargs.copy()
            groupby_kwargs["sort"] = True

        map_fn, reduce_fn = cls.build_map_reduce_functions(
            by=by,
            axis=axis,
            groupby_kwargs=groupby_kwargs,
            map_func=map_func,
            reduce_func=reduce_func,
            agg_args=agg_args,
            agg_kwargs=agg_kwargs,
            drop=drop,
            method=method,
        )

        # If `by` is a ModinFrame, then its partitions will be broadcasted to every
        # `self` partition in a way determined by engine (modin_frame.groupby_reduce)
        # Otherwise `by` was already bound to the Map function in `build_map_reduce_functions`.
        broadcastable_by = getattr(by, "_modin_frame", None)
        apply_indices = list(map_func.keys()) if isinstance(map_func,
                                                            dict) else None
        new_modin_frame = query_compiler._modin_frame.groupby_reduce(
            axis,
            broadcastable_by,
            map_fn,
            reduce_fn,
            apply_indices=apply_indices)

        result = query_compiler.__constructor__(new_modin_frame)
        if result.index.name == "__reduced__":
            result.index.name = None
        return result
Exemplo n.º 8
0
    def _compute_index_grouped(self, numerical=False):
        """
        Construct an index of group IDs.

        Parameters
        ----------
        numerical : bool, default: False
            Whether a group indices should be positional (True) or label-based (False).

        Returns
        -------
        dict
            A dict of {group name -> group indices} values.

        See Also
        --------
        pandas.core.groupby.GroupBy.groups
        """
        # We end up using pure pandas to compute group indices, so raising a warning
        ErrorMessage.default_to_pandas("Group indices computation")

        # Splitting level-by and column-by since we serialize them in a different ways
        by = None
        level = []
        if self._level is not None:
            level = self._level
            if not isinstance(level, list):
                level = [level]
        elif isinstance(self._by, list):
            by = []
            for o in self._by:
                if hashable(o) and o in self._query_compiler.get_index_names(
                        self._axis):
                    level.append(o)
                else:
                    by.append(o)
        else:
            by = self._by

        is_multi_by = self._is_multi_by or (by is not None and len(level) > 0)
        # `dropna` param is the only one that matters for the group indices result
        dropna = self._kwargs.get("dropna", True)

        if hasattr(self._by, "columns") and is_multi_by:
            by = list(self._by.columns)

        if is_multi_by:
            # Because we are doing a collect (to_pandas) here and then groupby, we
            # end up using pandas implementation. Add the warning so the user is
            # aware.
            ErrorMessage.catch_bugs_and_request_email(self._axis == 1)
            if isinstance(by, list) and all(
                    is_label(self._df, o, self._axis) for o in by):
                pandas_df = self._df._query_compiler.getitem_column_array(
                    by).to_pandas()
            else:
                by = try_cast_to_pandas(by, squeeze=True)
                pandas_df = self._df._to_pandas()
            by = wrap_into_list(by, level)
            groupby_obj = pandas_df.groupby(by=by, dropna=dropna)
            return groupby_obj.indices if numerical else groupby_obj.groups
        else:
            if isinstance(self._by, type(self._query_compiler)):
                by = self._by.to_pandas().squeeze().values
            elif self._by is None:
                index = self._query_compiler.get_axis(self._axis)
                levels_to_drop = [
                    i for i, name in enumerate(index.names)
                    if name not in level and i not in level
                ]
                by = index.droplevel(levels_to_drop)
                if isinstance(by, pandas.MultiIndex):
                    by = by.reorder_levels(level)
            else:
                by = self._by
            axis_labels = self._query_compiler.get_axis(self._axis)
            if numerical:
                # Since we want positional indices of the groups, we want to group
                # on a `RangeIndex`, not on the actual index labels
                axis_labels = pandas.RangeIndex(len(axis_labels))
            # `pandas.Index.groupby` doesn't take any parameters except `by`.
            # Have to convert an Index to a Series to be able to process `dropna=False`:
            if dropna:
                return axis_labels.groupby(by)
            else:
                groupby_obj = axis_labels.to_series().groupby(by,
                                                              dropna=dropna)
                return groupby_obj.indices if numerical else groupby_obj.groups
Exemplo n.º 9
0
    def __getitem__(self, key):
        """
        Implement indexing operation on a DataFrameGroupBy object.

        Parameters
        ----------
        key : list or str
            Names of columns to use as subset of original object.

        Returns
        -------
        DataFrameGroupBy or SeriesGroupBy
            Result of indexing operation.

        Raises
        ------
        NotImplementedError
            Column lookups on GroupBy with arbitrary Series in by is not yet supported.
        """
        # These parameters are common for building the resulted Series or DataFrame groupby object
        kwargs = {
            **self._kwargs.copy(),
            "by": self._by,
            "axis": self._axis,
            "idx_name": self._idx_name,
            "squeeze": self._squeeze,
        }
        # The rules of type deduction for the resulted object is the following:
        #   1. If `key` is a list-like or `as_index is False`, then the resulted object is a DataFrameGroupBy
        #   2. Otherwise, the resulted object is SeriesGroupBy
        #   3. Result type does not depend on the `by` origin
        # Examples:
        #   - drop: any, as_index: any, __getitem__(key: list_like) -> DataFrameGroupBy
        #   - drop: any, as_index: False, __getitem__(key: any) -> DataFrameGroupBy
        #   - drop: any, as_index: True, __getitem__(key: label) -> SeriesGroupBy
        if is_list_like(key):
            make_dataframe = True
        else:
            if self._as_index:
                make_dataframe = False
            else:
                make_dataframe = True
                key = [key]
        if make_dataframe:
            internal_by = frozenset(self._internal_by)
            if len(internal_by.intersection(key)) != 0:
                ErrorMessage.missmatch_with_pandas(
                    operation="GroupBy.__getitem__",
                    message=
                    ("intersection of the selection and 'by' columns is not yet supported, "
                     +
                     "to achieve the desired result rewrite the original code from:\n"
                     + "df.groupby('by_column')['by_column']\n" + "to the:\n" +
                     "df.groupby(df['by_column'].copy())['by_column']"),
                )
            cols_to_grab = internal_by.union(key)
            key = [col for col in self._df.columns if col in cols_to_grab]
            return DataFrameGroupBy(
                self._df[key],
                drop=self._drop,
                **kwargs,
            )
        if (self._is_multi_by and isinstance(self._by, list)
                and not all(hashable(o) and o in self._df for o in self._by)):
            raise NotImplementedError(
                "Column lookups on GroupBy with arbitrary Series in by" +
                " is not yet supported.")
        return SeriesGroupBy(
            self._df[key],
            drop=False,
            **kwargs,
        )
Exemplo n.º 10
0
    def _index_grouped(self):
        """
        Implement [METHOD_NAME].

        TODO: Add more details for this docstring template.

        Parameters
        ----------
        What arguments does this function have.
        [
        PARAMETER_NAME: PARAMETERS TYPES
            Description.
        ]

        Returns
        -------
        What this returns (if anything)
        """
        if self._index_grouped_cache is None:
            # Splitting level-by and column-by since we serialize them in a different ways
            by = None
            level = []
            if self._level is not None:
                level = self._level
                if not isinstance(level, list):
                    level = [level]
            elif isinstance(self._by, list):
                by = []
                for o in self._by:
                    if hashable(o) and o in self._query_compiler.get_index_names(
                        self._axis
                    ):
                        level.append(o)
                    else:
                        by.append(o)
            else:
                by = self._by

            is_multi_by = self._is_multi_by or (by is not None and len(level) > 0)

            if hasattr(self._by, "columns") and is_multi_by:
                by = list(self._by.columns)

            if is_multi_by:
                # Because we are doing a collect (to_pandas) here and then groupby, we
                # end up using pandas implementation. Add the warning so the user is
                # aware.
                ErrorMessage.catch_bugs_and_request_email(self._axis == 1)
                ErrorMessage.default_to_pandas("Groupby with multiple columns")
                if isinstance(by, list) and all(
                    is_label(self._df, o, self._axis) for o in by
                ):
                    pandas_df = self._df._query_compiler.getitem_column_array(
                        by
                    ).to_pandas()
                else:
                    by = try_cast_to_pandas(by, squeeze=True)
                    pandas_df = self._df._to_pandas()
                by = wrap_into_list(by, level)
                self._index_grouped_cache = pandas_df.groupby(by=by).groups
            else:
                if isinstance(self._by, type(self._query_compiler)):
                    by = self._by.to_pandas().squeeze().values
                elif self._by is None:
                    index = self._query_compiler.get_axis(self._axis)
                    levels_to_drop = [
                        i
                        for i, name in enumerate(index.names)
                        if name not in level and i not in level
                    ]
                    by = index.droplevel(levels_to_drop)
                    if isinstance(by, pandas.MultiIndex):
                        by = by.reorder_levels(level)
                else:
                    by = self._by
                if self._axis == 0:
                    self._index_grouped_cache = self._index.groupby(by)
                else:
                    self._index_grouped_cache = self._columns.groupby(by)
        return self._index_grouped_cache
Exemplo n.º 11
0
    def caller(
        cls,
        query_compiler,
        by,
        axis,
        groupby_args,
        map_args,
        map_func,
        reduce_func,
        reduce_args,
        numeric_only=True,
        drop=False,
        method=None,
        default_to_pandas_func=None,
    ):
        """
        Execute GroupBy aggregation with MapReduce approach.

        Parameters
        ----------
        query_compiler : BaseQueryCompiler
            Frame to group.
        by : BaseQueryCompiler, column or index label, Grouper or list of such
            Object that determine groups.
        axis : {0, 1}, default: 0
            Axis to group and apply aggregation function along. 0 means index axis
            when 1 means column axis.
        groupby_args : dict
            Dictionary which carries arguments for pandas.DataFrame.groupby.
        map_args : dict
            Arguments which will be passed to `map_func`.
        map_func : dict or callable(pandas.DataFrameGroupBy) -> pandas.DataFrame
            Function to apply to the `GroupByObject` at the Map phase.
        reduce_func : dict or callable(pandas.DataFrameGroupBy) -> pandas.DataFrame
            Function to apply to the `GroupByObject` at the Reduce phase.
        reduce_args : dict
            Arguments which will be passed to `reduce_func`.
        numeric_only : bool, default: True
            Whether or not to drop non-numeric columns before executing GroupBy.
        drop : bool, default: False
            Indicates whether or not by-data came from the `self` frame.
        method : str, optional
            Name of the GroupBy aggregation function. This is a hint to be able to do special casing.
        default_to_pandas_func : callable(pandas.DataFrameGroupBy) -> pandas.DataFrame, optional
            The pandas aggregation function equivalent to the `map_func + reduce_func`.
            Used in case of defaulting to pandas. If not specified `map_func` is used.

        Returns
        -------
        The same type as `query_compiler`
            QueryCompiler which carries the result of GroupBy aggregation.
        """
        if groupby_args.get("level", None) is None and (
                not (isinstance(by, (type(query_compiler))) or hashable(by))
                or isinstance(by, pandas.Grouper)):
            by = try_cast_to_pandas(by, squeeze=True)
            if default_to_pandas_func is None:
                default_to_pandas_func = ((lambda grp: grp.agg(map_func))
                                          if isinstance(map_func, dict) else
                                          map_func)
            return query_compiler.default_to_pandas(
                lambda df: default_to_pandas_func(
                    df.groupby(by=by, axis=axis, **groupby_args), **map_args))
        assert axis == 0, "Can only groupby reduce with axis=0"

        if numeric_only:
            qc = query_compiler.getitem_column_array(
                query_compiler._modin_frame.numeric_columns(True))
        else:
            qc = query_compiler

        map_fn, reduce_fn = cls.build_map_reduce_functions(
            by=by,
            axis=axis,
            groupby_args=groupby_args,
            map_func=map_func,
            map_args=map_args,
            reduce_func=reduce_func,
            reduce_args=reduce_args,
            drop=drop,
            method=method,
        )

        # If `by` is a ModinFrame, then its partitions will be broadcasted to every
        # `self` partition in a way determined by engine (modin_frame.groupby_reduce)
        # Otherwise `by` was already bound to the Map function in `build_map_reduce_functions`.
        broadcastable_by = getattr(by, "_modin_frame", None)
        apply_indices = list(map_func.keys()) if isinstance(map_func,
                                                            dict) else None
        new_modin_frame = qc._modin_frame.groupby_reduce(
            axis,
            broadcastable_by,
            map_fn,
            reduce_fn,
            apply_indices=apply_indices)

        result = query_compiler.__constructor__(new_modin_frame)
        if result.index.name == "__reduced__":
            result.index.name = None
        return result
Exemplo n.º 12
0
    def _index_grouped(self):
        """
        Construct an index of group IDs.

        Returns
        -------
        dict
            A dict of {group name -> group labels} values.

        See Also
        --------
        pandas.core.groupby.GroupBy.groups
        """
        if self._index_grouped_cache is None:
            # Splitting level-by and column-by since we serialize them in a different ways
            by = None
            level = []
            if self._level is not None:
                level = self._level
                if not isinstance(level, list):
                    level = [level]
            elif isinstance(self._by, list):
                by = []
                for o in self._by:
                    if hashable(o) and o in self._query_compiler.get_index_names(
                        self._axis
                    ):
                        level.append(o)
                    else:
                        by.append(o)
            else:
                by = self._by

            is_multi_by = self._is_multi_by or (by is not None and len(level) > 0)

            if hasattr(self._by, "columns") and is_multi_by:
                by = list(self._by.columns)

            if is_multi_by:
                # Because we are doing a collect (to_pandas) here and then groupby, we
                # end up using pandas implementation. Add the warning so the user is
                # aware.
                ErrorMessage.catch_bugs_and_request_email(self._axis == 1)
                ErrorMessage.default_to_pandas("Groupby with multiple columns")
                if isinstance(by, list) and all(
                    is_label(self._df, o, self._axis) for o in by
                ):
                    pandas_df = self._df._query_compiler.getitem_column_array(
                        by
                    ).to_pandas()
                else:
                    by = try_cast_to_pandas(by, squeeze=True)
                    pandas_df = self._df._to_pandas()
                by = wrap_into_list(by, level)
                self._index_grouped_cache = pandas_df.groupby(by=by).groups
            else:
                if isinstance(self._by, type(self._query_compiler)):
                    by = self._by.to_pandas().squeeze().values
                elif self._by is None:
                    index = self._query_compiler.get_axis(self._axis)
                    levels_to_drop = [
                        i
                        for i, name in enumerate(index.names)
                        if name not in level and i not in level
                    ]
                    by = index.droplevel(levels_to_drop)
                    if isinstance(by, pandas.MultiIndex):
                        by = by.reorder_levels(level)
                else:
                    by = self._by
                if self._axis == 0:
                    self._index_grouped_cache = self._index.groupby(by)
                else:
                    self._index_grouped_cache = self._columns.groupby(by)
        return self._index_grouped_cache