Пример #1
0
 def apply(self, func, *args, **kwargs):
     if not isinstance(func, BuiltinFunctionType):
         func = wrap_udf_function(func)
     return self._apply_agg_function(
         # Grouping column in never dropped in groupby.apply, so drop=False
         lambda df: df.apply(func, *args, **kwargs),
         drop=False,
     )
Пример #2
0
    def apply(self, func, *args, **kwargs):
        if not isinstance(func, BuiltinFunctionType):
            func = wrap_udf_function(func)

        return self._check_index(
            self._wrap_aggregation(
                qc_method=type(self._query_compiler).groupby_agg,
                numeric_only=False,
                agg_func=func,
                agg_args=args,
                agg_kwargs=kwargs,
                how="group_wise",
            ))
Пример #3
0
    def _apply_agg_function(self, f, drop=True, *args, **kwargs):
        """
        Perform aggregation and combine stages based on a given function.

        TODO: add types.

        Parameters
        ----------
        f:
            The function to apply to each group.

        Returns
        -------
        A new combined DataFrame with the result of all groups.
        """
        assert callable(f), "'{0}' object is not callable".format(type(f))

        f = wrap_udf_function(f)
        if self._is_multi_by:
            return self._default_to_pandas(f, *args, **kwargs)

        if isinstance(self._by, type(self._query_compiler)):
            by = self._by.to_pandas().squeeze()
        else:
            by = self._by

        # For aggregations, pandas behavior does this for the result.
        # For other operations it does not, so we wait until there is an aggregation to
        # actually perform this operation.
        if self._idx_name is not None and drop and self._drop:
            groupby_qc = self._query_compiler.drop(columns=[self._idx_name])
        else:
            groupby_qc = self._query_compiler
        new_manager = groupby_qc.groupby_agg(
            by=by,
            axis=self._axis,
            agg_func=f,
            groupby_args=self._kwargs,
            agg_args=kwargs,
            drop=self._drop,
        )
        if self._idx_name is not None and self._as_index:
            new_manager.index.name = self._idx_name
        result = type(self._df)(query_compiler=new_manager)
        if result.index.name == "__reduced__":
            result.index.name = None
        if self._kwargs.get("squeeze", False):
            return result.squeeze()
        return result
Пример #4
0
 def apply(self, func, *args, **kwargs):
     if not isinstance(func, BuiltinFunctionType):
         func = wrap_udf_function(func)
     return self._apply_agg_function(lambda df: df.apply(func, *args, **kwargs))