def transform_dict_like( obj: FrameOrSeries, func: AggFuncTypeDict, *args, **kwargs, ): """ Compute transform in the case of a dict-like func """ from pandas.core.reshape.concat import concat if len(func) == 0: raise ValueError("No transform functions were provided") if obj.ndim != 1: # Check for missing columns on a frame cols = sorted(set(func.keys()) - set(obj.columns)) if len(cols) > 0: raise SpecificationError(f"Column(s) {cols} do not exist") # Can't use func.values(); wouldn't work for a Series if any(is_dict_like(v) for _, v in func.items()): # GH 15931 - deprecation of renaming keys raise SpecificationError("nested renamer is not supported") is_aggregator = lambda x: isinstance(x, (list, tuple, dict)) # if we have a dict of any non-scalars # eg. {'A' : ['mean']}, normalize all to # be list-likes # Cannot use func.values() because arg may be a Series if any(is_aggregator(x) for _, x in func.items()): new_func: AggFuncTypeDict = {} for k, v in func.items(): if not is_aggregator(v): # mypy can't realize v is not a list here new_func[k] = [v] # type:ignore[list-item] else: new_func[k] = v func = new_func results: Dict[Label, FrameOrSeriesUnion] = {} for name, how in func.items(): colg = obj._gotitem(name, ndim=1) try: results[name] = transform(colg, how, 0, *args, **kwargs) except Exception as err: if ( str(err) == "Function did not transform" or str(err) == "No transform functions were provided" ): raise err # combine results if len(results) == 0: raise ValueError("Transform function failed") return concat(results, axis=1)
def normalize_dictlike_arg( self, how: str, obj: FrameOrSeriesUnion, func: AggFuncTypeDict ) -> AggFuncTypeDict: """ Handler for dict-like argument. Ensures that necessary columns exist if obj is a DataFrame, and that a nested renamer is not passed. Also normalizes to all lists when values consists of a mix of list and non-lists. """ assert how in ("apply", "agg", "transform") # Can't use func.values(); wouldn't work for a Series if ( how == "agg" and isinstance(obj, ABCSeries) and any(is_list_like(v) for _, v in func.items()) ) or (any(is_dict_like(v) for _, v in func.items())): # GH 15931 - deprecation of renaming keys raise SpecificationError("nested renamer is not supported") if obj.ndim != 1: # Check for missing columns on a frame cols = set(func.keys()) - set(obj.columns) if len(cols) > 0: cols_sorted = list(safe_sort(list(cols))) raise KeyError(f"Column(s) {cols_sorted} do not exist") is_aggregator = lambda x: isinstance(x, (list, tuple, dict)) # if we have a dict of any non-scalars # eg. {'A' : ['mean']}, normalize all to # be list-likes # Cannot use func.values() because arg may be a Series if any(is_aggregator(x) for _, x in func.items()): new_func: AggFuncTypeDict = {} for k, v in func.items(): if not is_aggregator(v): # mypy can't realize v is not a list here new_func[k] = [v] # type:ignore[list-item] else: new_func[k] = v func = new_func return func
def transform_dict_like( obj: FrameOrSeries, func: AggFuncTypeDict, *args, **kwargs, ): """ Compute transform in the case of a dict-like func """ from pandas.core.reshape.concat import concat if len(func) == 0: raise ValueError("No transform functions were provided") if obj.ndim != 1: # Check for missing columns on a frame cols = set(func.keys()) - set(obj.columns) if len(cols) > 0: cols_sorted = list(safe_sort(list(cols))) raise SpecificationError(f"Column(s) {cols_sorted} do not exist") # Can't use func.values(); wouldn't work for a Series if any(is_dict_like(v) for _, v in func.items()): # GH 15931 - deprecation of renaming keys raise SpecificationError("nested renamer is not supported") results: Dict[Hashable, FrameOrSeriesUnion] = {} for name, how in func.items(): colg = obj._gotitem(name, ndim=1) try: results[name] = transform(colg, how, 0, *args, **kwargs) except Exception as err: if ( str(err) == "Function did not transform" or str(err) == "No transform functions were provided" ): raise err # combine results if len(results) == 0: raise ValueError("Transform function failed") return concat(results, axis=1)
def validate_dictlike_arg(self, how: str, obj: FrameOrSeriesUnion, func: AggFuncTypeDict) -> None: """ Raise if dict-like argument is invalid. Ensures that necessary columns exist if obj is a DataFrame, and that a nested renamer is not passed. """ assert how in ("apply", "agg", "transform") # Can't use func.values(); wouldn't work for a Series if (how == "agg" and isinstance(obj, ABCSeries) and any(is_list_like(v) for _, v in func.items())) or (any( is_dict_like(v) for _, v in func.items())): # GH 15931 - deprecation of renaming keys raise SpecificationError("nested renamer is not supported") if obj.ndim != 1: # Check for missing columns on a frame cols = set(func.keys()) - set(obj.columns) if len(cols) > 0: cols_sorted = list(safe_sort(list(cols))) raise KeyError(f"Column(s) {cols_sorted} do not exist")
def agg_dict_like( obj: AggObjType, arg: AggFuncTypeDict, _axis: int, ) -> FrameOrSeriesUnion: """ Compute aggregation in the case of a dict-like argument. Parameters ---------- obj : Pandas object to compute aggregation on. arg : dict label-aggregation pairs to compute. _axis : int, 0 or 1 Axis to compute aggregation on. Returns ------- Result of aggregation. """ is_aggregator = lambda x: isinstance(x, (list, tuple, dict)) if _axis != 0: # pragma: no cover raise ValueError("Can only pass dict with axis=0") selected_obj = obj._selected_obj # if we have a dict of any non-scalars # eg. {'A' : ['mean']}, normalize all to # be list-likes # Cannot use arg.values() because arg may be a Series if any(is_aggregator(x) for _, x in arg.items()): new_arg: AggFuncTypeDict = {} for k, v in arg.items(): if not isinstance(v, (tuple, list, dict)): new_arg[k] = [v] else: new_arg[k] = v # the keys must be in the columns # for ndim=2, or renamers for ndim=1 # ok for now, but deprecated # {'A': { 'ra': 'mean' }} # {'A': { 'ra': ['mean'] }} # {'ra': ['mean']} # not ok # {'ra' : { 'A' : 'mean' }} if isinstance(v, dict): raise SpecificationError("nested renamer is not supported") elif isinstance(selected_obj, ABCSeries): raise SpecificationError("nested renamer is not supported") elif ( isinstance(selected_obj, ABCDataFrame) and k not in selected_obj.columns ): raise KeyError(f"Column '{k}' does not exist!") arg = new_arg else: # deprecation of renaming keys # GH 15931 keys = list(arg.keys()) if isinstance(selected_obj, ABCDataFrame) and len( selected_obj.columns.intersection(keys) ) != len(keys): cols = list( safe_sort( list(set(keys) - set(selected_obj.columns.intersection(keys))), ) ) raise SpecificationError(f"Column(s) {cols} do not exist") from pandas.core.reshape.concat import concat if selected_obj.ndim == 1: # key only used for output colg = obj._gotitem(obj._selection, ndim=1) results = {key: colg.agg(how) for key, how in arg.items()} else: # key used for column selection and output results = {key: obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items()} # set the final keys keys = list(arg.keys()) # Avoid making two isinstance calls in all and any below is_ndframe = [isinstance(r, ABCNDFrame) for r in results.values()] # combine results if all(is_ndframe): keys_to_use = [k for k in keys if not results[k].empty] # Have to check, if at least one DataFrame is not empty. keys_to_use = keys_to_use if keys_to_use != [] else keys axis = 0 if isinstance(obj, ABCSeries) else 1 result = concat({k: results[k] for k in keys_to_use}, axis=axis) elif any(is_ndframe): # There is a mix of NDFrames and scalars raise ValueError( "cannot perform both aggregation " "and transformation operations " "simultaneously" ) else: from pandas import Series # we have a dict of scalars # GH 36212 use name only if obj is a series if obj.ndim == 1: obj = cast("Series", obj) name = obj.name else: name = None result = Series(results, name=name) return result