def maybe_mangle_lambdas(agg_spec: Any) -> Any: """ Make new lambdas with unique names. Parameters ---------- agg_spec : Any An argument to GroupBy.agg. Non-dict-like `agg_spec` are pass through as is. For dict-like `agg_spec` a new spec is returned with name-mangled lambdas. Returns ------- mangled : Any Same type as the input. Examples -------- >>> maybe_mangle_lambdas('sum') 'sum' >>> maybe_mangle_lambdas([lambda: 1, lambda: 2]) # doctest: +SKIP [<function __main__.<lambda_0>, <function pandas...._make_lambda.<locals>.f(*args, **kwargs)>] """ is_dict = is_dict_like(agg_spec) if not (is_dict or is_list_like(agg_spec)): return agg_spec mangled_aggspec = type(agg_spec)() # dict or OrderedDict if is_dict: for key, aggfuncs in agg_spec.items(): if is_list_like(aggfuncs) and not is_dict_like(aggfuncs): mangled_aggfuncs = _managle_lambda_list(aggfuncs) else: mangled_aggfuncs = aggfuncs mangled_aggspec[key] = mangled_aggfuncs else: mangled_aggspec = _managle_lambda_list(agg_spec) return mangled_aggspec
def _get_empty_meta( self, columns, index_col, index_names, dtype: DtypeArg | None = None ): columns = list(columns) # Convert `dtype` to a defaultdict of some kind. # This will enable us to write `dtype[col_name]` # without worrying about KeyError issues later on. if not is_dict_like(dtype): # if dtype == None, default will be object. default_dtype = dtype or object # error: Argument 1 to "defaultdict" has incompatible type "Callable[[], # Union[ExtensionDtype, str, dtype[Any], Type[object], Dict[Hashable, # Union[ExtensionDtype, Union[str, dtype[Any]], Type[str], Type[float], # Type[int], Type[complex], Type[bool], Type[object]]]]]"; expected # "Optional[Callable[[], Union[ExtensionDtype, str, dtype[Any], # Type[object]]]]" # error: Incompatible return value type (got "Union[ExtensionDtype, str, # dtype[Any], Type[object], Dict[Hashable, Union[ExtensionDtype, Union[str, # dtype[Any]], Type[str], Type[float], Type[int], Type[complex], Type[bool], # Type[object]]]]", expected "Union[ExtensionDtype, str, dtype[Any], # Type[object]]") dtype = defaultdict( lambda: default_dtype # type: ignore[arg-type, return-value] ) else: dtype = cast(dict, dtype) dtype = defaultdict( lambda: object, {columns[k] if is_integer(k) else k: v for k, v in dtype.items()}, ) # Even though we have no data, the "index" of the empty DataFrame # could for example still be an empty MultiIndex. Thus, we need to # check whether we have any index columns specified, via either: # # 1) index_col (column indices) # 2) index_names (column names) # # Both must be non-null to ensure a successful construction. Otherwise, # we have to create a generic empty Index. if (index_col is None or index_col is False) or index_names is None: index = Index([]) else: data = [Series([], dtype=dtype[name]) for name in index_names] index = ensure_index_from_sequences(data, names=index_names) index_col.sort() for i, n in enumerate(index_col): columns.pop(n - i) col_dict = {col_name: Series([], dtype=dtype[col_name]) for col_name in columns} return index, columns, col_dict
def _get_empty_meta(self, columns, index_col, index_names, dtype: DtypeArg | None = None): columns = list(columns) # Convert `dtype` to a defaultdict of some kind. # This will enable us to write `dtype[col_name]` # without worrying about KeyError issues later on. dtype_dict: defaultdict[Hashable, Any] if not is_dict_like(dtype): # if dtype == None, default will be object. default_dtype = dtype or object dtype_dict = defaultdict(lambda: default_dtype) else: dtype = cast(dict, dtype) dtype_dict = defaultdict( lambda: object, { columns[k] if is_integer(k) else k: v for k, v in dtype.items() }, ) # Even though we have no data, the "index" of the empty DataFrame # could for example still be an empty MultiIndex. Thus, we need to # check whether we have any index columns specified, via either: # # 1) index_col (column indices) # 2) index_names (column names) # # Both must be non-null to ensure a successful construction. Otherwise, # we have to create a generic empty Index. if (index_col is None or index_col is False) or index_names is None: index = Index([]) else: data = [Series([], dtype=dtype_dict[name]) for name in index_names] index = ensure_index_from_sequences(data, names=index_names) index_col.sort() for i, n in enumerate(index_col): columns.pop(n - i) col_dict = { col_name: Series([], dtype=dtype_dict[col_name]) for col_name in columns } return index, columns, col_dict
def aggregate( obj, arg: AggFuncType, *args, **kwargs, ): """ Provide an implementation for the aggregators. Parameters ---------- obj : Pandas object to compute aggregation on. arg : string, dict, function. *args : args to pass on to the function. **kwargs : kwargs to pass on to the function. Returns ------- tuple of result, how. Notes ----- how can be a string describe the required post-processing, or None if not required. """ _axis = kwargs.pop("_axis", None) if _axis is None: _axis = getattr(obj, "axis", 0) if isinstance(arg, str): return obj._try_aggregate_string_function(arg, *args, **kwargs), None elif is_dict_like(arg): arg = cast(Dict[Label, Union[AggFuncTypeBase, List[AggFuncTypeBase]]], arg) return agg_dict_like(obj, arg, _axis), True elif is_list_like(arg): # we require a list, but not an 'str' arg = cast(List[AggFuncTypeBase], arg) return agg_list_like(obj, arg, _axis=_axis), None else: result = None if callable(arg): f = obj._get_cython_func(arg) if f and not args and not kwargs: return getattr(obj, f)(), None # caller can react return result, True
def _validate_parse_dates_presence(self, columns: list[str]) -> None: """ Check if parse_dates are in columns. If user has provided names for parse_dates, check if those columns are available. Parameters ---------- columns : list List of names of the dataframe. Raises ------ ValueError If column to parse_date is not in dataframe. """ cols_needed: Iterable if is_dict_like(self.parse_dates): cols_needed = itertools.chain(*self.parse_dates.values()) elif is_list_like(self.parse_dates): # a column in parse_dates could be represented # ColReference = Union[int, str] # DateGroups = List[ColReference] # ParseDates = Union[DateGroups, List[DateGroups], # Dict[ColReference, DateGroups]] cols_needed = itertools.chain.from_iterable( col if is_list_like(col) and not isinstance(col, tuple) else [col] for col in self.parse_dates ) else: cols_needed = [] # get only columns that are references using names (str), not by index missing_cols = ", ".join( sorted( { col for col in cols_needed if isinstance(col, str) and col not in columns } ) ) if missing_cols: raise ValueError( f"Missing column provided to 'parse_dates': '{missing_cols}'" )
def rename(self, index=None, **kwargs): non_mapping = is_scalar(index) or (is_list_like(index) and not is_dict_like(index)) if non_mapping: if kwargs.get("inplace", False): self.name = index else: self_cp = self.copy() self_cp.name = index return self_cp else: from .dataframe import DataFrame result = DataFrame(self).rename(index=index, **kwargs).squeeze() result.name = self.name return result
def apply(self) -> FrameOrSeriesUnion: """ compute the results """ # dispatch to agg if is_list_like(self.f) or is_dict_like(self.f): # pandas\core\apply.py:144: error: "aggregate" of "DataFrame" gets # multiple values for keyword argument "axis" return self.obj.aggregate( # type: ignore[misc] self.f, axis=self.axis, *self.args, **self.kwds) # all empty if len(self.columns) == 0 and len(self.index) == 0: return self.apply_empty_result() # string dispatch if isinstance(self.f, str): # Support for `frame.transform('method')` # Some methods (shift, etc.) require the axis argument, others # don't, so inspect and insert if necessary. func = getattr(self.obj, self.f) sig = inspect.getfullargspec(func) if "axis" in sig.args: self.kwds["axis"] = self.axis return func(*self.args, **self.kwds) # ufunc elif isinstance(self.f, np.ufunc): with np.errstate(all="ignore"): results = self.obj._mgr.apply("apply", func=self.f) # _constructor will retain self.index and self.columns return self.obj._constructor(data=results) # broadcasting if self.result_type == "broadcast": return self.apply_broadcast(self.obj) # one axis empty elif not all(self.obj.shape): return self.apply_empty_result() # raw elif self.raw: return self.apply_raw() return self.apply_standard()
def get_result(self): """ compute the results """ # dispatch to agg if is_list_like(self.f) or is_dict_like(self.f): return self.obj.aggregate(self.f, axis=self.axis, *self.args, **self.kwds) # all empty if len(self.columns) == 0 and len(self.index) == 0: return self.apply_empty_result() # string dispatch if isinstance(self.f, str): # Support for `frame.transform('method')` # Some methods (shift, etc.) require the axis argument, others # don't, so inspect and insert if necessary. func = getattr(self.obj, self.f) sig = inspect.getfullargspec(func) if "axis" in sig.args: self.kwds["axis"] = self.axis return func(*self.args, **self.kwds) # ufunc elif isinstance(self.f, np.ufunc): with np.errstate(all="ignore"): results = self.obj._data.apply("apply", func=self.f) return self.obj._constructor(data=results, index=self.index, columns=self.columns, copy=False) # broadcasting if self.result_type == "broadcast": return self.apply_broadcast() # one axis empty elif not all(self.obj.shape): return self.apply_empty_result() # raw elif self.raw and not self.obj._is_mixed_type: return self.apply_raw() return self.apply_standard()
def normalize_dictlike_arg( self, how: str, obj: FrameOrSeriesUnion, func: AggFuncTypeDict ) -> AggFuncTypeDict: """ Handler for dict-like argument. Ensures that necessary columns exist if obj is a DataFrame, and that a nested renamer is not passed. Also normalizes to all lists when values consists of a mix of list and non-lists. """ assert how in ("apply", "agg", "transform") # Can't use func.values(); wouldn't work for a Series if ( how == "agg" and isinstance(obj, ABCSeries) and any(is_list_like(v) for _, v in func.items()) ) or (any(is_dict_like(v) for _, v in func.items())): # GH 15931 - deprecation of renaming keys raise SpecificationError("nested renamer is not supported") if obj.ndim != 1: # Check for missing columns on a frame cols = set(func.keys()) - set(obj.columns) if len(cols) > 0: cols_sorted = list(safe_sort(list(cols))) raise KeyError(f"Column(s) {cols_sorted} do not exist") is_aggregator = lambda x: isinstance(x, (list, tuple, dict)) # if we have a dict of any non-scalars # eg. {'A' : ['mean']}, normalize all to # be list-likes # Cannot use func.values() because arg may be a Series if any(is_aggregator(x) for _, x in func.items()): new_func: AggFuncTypeDict = {} for k, v in func.items(): if not is_aggregator(v): # mypy can't realize v is not a list here new_func[k] = [v] # type:ignore[list-item] else: new_func[k] = v func = new_func return func
def agg(self) -> Tuple[Optional[FrameOrSeriesUnion], Optional[bool]]: """ Provide an implementation for the aggregators. Returns ------- tuple of result, how. Notes ----- how can be a string describe the required post-processing, or None if not required. """ obj = self.obj arg = self.f args = self.args kwargs = self.kwds _axis = kwargs.pop("_axis", None) if _axis is None: _axis = getattr(obj, "axis", 0) result = self.maybe_apply_str() if result is not None: return result, None if is_dict_like(arg): arg = cast(AggFuncTypeDict, arg) return agg_dict_like(obj, arg, _axis), True elif is_list_like(arg): # we require a list, but not a 'str' arg = cast(List[AggFuncTypeBase], arg) return agg_list_like(obj, arg, _axis=_axis), None else: result = None if callable(arg): f = obj._get_cython_func(arg) if f and not args and not kwargs: return getattr(obj, f)(), None # caller can react return result, True
def get_result(self): """ compute the results """ # dispatch to agg if is_list_like(self.f) or is_dict_like(self.f): return self.obj.aggregate(self.f, axis=self.axis, *self.args, **self.kwds) # all empty if len(self.columns) == 0 and len(self.index) == 0: return self.apply_empty_result() # string dispatch if isinstance(self.f, compat.string_types): # Support for `frame.transform('method')` # Some methods (shift, etc.) require the axis argument, others # don't, so inspect and insert if necessary. func = getattr(self.obj, self.f) sig = compat.signature(func) if 'axis' in sig.args: self.kwds['axis'] = self.axis return func(*self.args, **self.kwds) # ufunc elif isinstance(self.f, np.ufunc): with np.errstate(all='ignore'): results = self.f(self.values) return self.obj._constructor(data=results, index=self.index, columns=self.columns, copy=False) # broadcasting if self.result_type == 'broadcast': return self.apply_broadcast() # one axis empty elif not all(self.obj.shape): return self.apply_empty_result() # raw elif self.raw and not self.obj._is_mixed_type: return self.apply_raw() return self.apply_standard()
def transform_dict_like( obj: FrameOrSeries, func: AggFuncTypeDict, *args, **kwargs, ): """ Compute transform in the case of a dict-like func """ from pandas.core.reshape.concat import concat if len(func) == 0: raise ValueError("No transform functions were provided") if obj.ndim != 1: # Check for missing columns on a frame cols = set(func.keys()) - set(obj.columns) if len(cols) > 0: cols_sorted = list(safe_sort(list(cols))) raise SpecificationError(f"Column(s) {cols_sorted} do not exist") # Can't use func.values(); wouldn't work for a Series if any(is_dict_like(v) for _, v in func.items()): # GH 15931 - deprecation of renaming keys raise SpecificationError("nested renamer is not supported") results: Dict[Hashable, FrameOrSeriesUnion] = {} for name, how in func.items(): colg = obj._gotitem(name, ndim=1) try: results[name] = transform(colg, how, 0, *args, **kwargs) except Exception as err: if ( str(err) == "Function did not transform" or str(err) == "No transform functions were provided" ): raise err # combine results if len(results) == 0: raise ValueError("Transform function failed") return concat(results, axis=1)
def apply(self) -> FrameOrSeriesUnion: """ compute the results """ # dispatch to agg if is_list_like(self.f) or is_dict_like(self.f): # pandas\core\apply.py:144: error: "aggregate" of "DataFrame" gets # multiple values for keyword argument "axis" return self.obj.aggregate( # type: ignore[misc] self.f, axis=self.axis, *self.args, **self.kwds ) # all empty if len(self.columns) == 0 and len(self.index) == 0: return self.apply_empty_result() # string dispatch result = self.maybe_apply_str() if result is not None: return result # ufunc elif isinstance(self.f, np.ufunc): with np.errstate(all="ignore"): results = self.obj._mgr.apply("apply", func=self.f) # _constructor will retain self.index and self.columns return self.obj._constructor(data=results) # broadcasting if self.result_type == "broadcast": return self.apply_broadcast(self.obj) # one axis empty elif not all(self.obj.shape): return self.apply_empty_result() # raw elif self.raw: return self.apply_raw() return self.apply_standard()
def validate_dictlike_arg(self, how: str, obj: FrameOrSeriesUnion, func: AggFuncTypeDict) -> None: """ Raise if dict-like argument is invalid. Ensures that necessary columns exist if obj is a DataFrame, and that a nested renamer is not passed. """ assert how in ("apply", "agg", "transform") # Can't use func.values(); wouldn't work for a Series if (how == "agg" and isinstance(obj, ABCSeries) and any(is_list_like(v) for _, v in func.items())) or (any( is_dict_like(v) for _, v in func.items())): # GH 15931 - deprecation of renaming keys raise SpecificationError("nested renamer is not supported") if obj.ndim != 1: # Check for missing columns on a frame cols = set(func.keys()) - set(obj.columns) if len(cols) > 0: cols_sorted = list(safe_sort(list(cols))) raise KeyError(f"Column(s) {cols_sorted} do not exist")
def _map_values(self, mapper, na_action=None): """ An internal function that maps values using the input correspondence (which can be a dict, Series, or function). Parameters ---------- mapper : function, dict, or Series The input correspondence object na_action : {None, 'ignore'} If 'ignore', propagate NA values, without passing them to the mapping function Returns ------- Union[Index, MultiIndex], inferred The output of the mapping function applied to the index. If the function returns a tuple with more than one element a MultiIndex will be returned. """ # we can fastpath dict/Series to an efficient map # as we know that we are not going to have to yield # python types if is_dict_like(mapper): if isinstance(mapper, dict) and hasattr(mapper, "__missing__"): # If a dictionary subclass defines a default value method, # convert mapper to a lookup function (GH #15999). dict_with_default = mapper mapper = lambda x: dict_with_default[x] else: # Dictionary does not have a default. Thus it's safe to # convert to an Series for efficiency. # we specify the keys here to handle the # possibility that they are tuples # The return value of mapping with an empty mapper is # expected to be pd.Series(np.nan, ...). As np.nan is # of dtype float64 the return value of this method should # be float64 as well mapper = create_series_with_explicit_dtype( mapper, dtype_if_empty=np.float64) if isinstance(mapper, ABCSeries): # Since values were input this means we came from either # a dict or a series and mapper should be an index if is_categorical_dtype(self.dtype): # use the built in categorical series mapper which saves # time by mapping the categories instead of all values # error: Incompatible types in assignment (expression has type # "Categorical", variable has type "IndexOpsMixin") self = cast("Categorical", self) # type: ignore[assignment] # error: Item "ExtensionArray" of "Union[ExtensionArray, Any]" has no # attribute "map" return self._values.map(mapper) # type: ignore[union-attr] values = self._values indexer = mapper.index.get_indexer(values) new_values = algorithms.take_nd(mapper._values, indexer) return new_values # we must convert to python types if is_extension_array_dtype(self.dtype) and hasattr( self._values, "map"): # GH#23179 some EAs do not have `map` values = self._values if na_action is not None: raise NotImplementedError map_f = lambda values, f: values.map(f) else: # error: "IndexOpsMixin" has no attribute "astype" values = self.astype(object)._values # type: ignore[attr-defined] if na_action == "ignore": map_f = lambda values, f: lib.map_infer_mask( values, f, isna(values).view(np.uint8)) elif na_action is None: map_f = lib.map_infer else: msg = ("na_action must either be 'ignore' or None, " f"{na_action} was passed") raise ValueError(msg) # mapper is a function new_values = map_f(values, mapper) return new_values
def transform( obj: FrameOrSeries, func: AggFuncType, axis: Axis, *args, **kwargs ) -> FrameOrSeriesUnion: """ Transform a DataFrame or Series Parameters ---------- obj : DataFrame or Series Object to compute the transform on. func : string, function, list, or dictionary Function(s) to compute the transform with. axis : {0 or 'index', 1 or 'columns'} Axis along which the function is applied: * 0 or 'index': apply function to each column. * 1 or 'columns': apply function to each row. Returns ------- DataFrame or Series Result of applying ``func`` along the given axis of the Series or DataFrame. Raises ------ ValueError If the transform function fails or does not transform. """ is_series = obj.ndim == 1 if obj._get_axis_number(axis) == 1: assert not is_series return transform(obj.T, func, 0, *args, **kwargs).T if is_list_like(func) and not is_dict_like(func): func = cast(List[AggFuncTypeBase], func) # Convert func equivalent dict if is_series: func = {com.get_callable_name(v) or v: v for v in func} else: func = {col: func for col in obj} if is_dict_like(func): func = cast(Dict[Label, Union[AggFuncTypeBase, List[AggFuncTypeBase]]], func) return transform_dict_like(obj, func, *args, **kwargs) # func is either str or callable func = cast(AggFuncTypeBase, func) try: result = transform_str_or_callable(obj, func, *args, **kwargs) except Exception: raise ValueError("Transform function failed") # Functions that transform may return empty Series/DataFrame # when the dtype is not appropriate if isinstance(result, (ABCSeries, ABCDataFrame)) and result.empty: raise ValueError("Transform function failed") if not isinstance(result, (ABCSeries, ABCDataFrame)) or not result.index.equals( obj.index ): raise ValueError("Function did not transform") return result
def check_syntax(self): #check input action_configs = self.data_configs.get('input') if not action_configs.get('fact_train', {}) or not action_configs.get( 'fact_test', {}): logger.error( 'Both fact_train and fact_test must be configured for {}!'. format('input')) exit(0) are_mappings = [is_dict_like(v) for k, v in action_configs.items()] if not all(are_mappings): logger.error( 'items and nested items for input must be dictionaries!') exit(0) #check process_sequence process_sequence = self.data_configs.get('process_sequence', []) if not process_sequence: logger.error('process_sequence is mandantory!') exit(0) for process_key in process_sequence: process_configs = self.data_configs.get(process_key, {}) if not process_configs: logger.error( 'There is no configruations for process_key {}!'.format( process_key)) exit(0) #check action sequence action_sequence = process_configs.get('action_sequence', []) if not action_sequence: logger.error( 'action_sequence is mandantory for process {}!'.format( process_key)) exit(0) if 'get_data' not in action_sequence or 'result' not in action_sequence: logger.error( 'get_data and result must be in action_sequence for {}'. format(process_key)) exit(0) possible_actions = [ 'aggregations', 'change_dtype', 'clip_outliers', 'drop_columns', 'drop_rows', 'factorize_columns', 'get_data', 'interaction_columns', 'kbins', 'one_hot_encoder', 'onehot_encoding', 'pca', 'reduce_mem_usage', 'remove_duplicate', 'replace_values', 'result', 'select_columns', 'simple_impute', 'standardization' ] #check actions for action_key in action_sequence: action_configs = process_configs.get(action_key) if action_configs is None: logger.error('No {} configuration for {}'.format( action_key, process_key)) exit(0) ac = [x for x in possible_actions if x in action_key] if not ac: logger.error( '{} is not supported. Only below actions are supported at the moment:{}' .format(action_key, possible_actions)) exit(0) #check every action if 'aggregations' in action_key: action_configs = process_configs.get(action_key, []) for action_config in action_configs: groupby_cols = action_config.get('groupby', []) if not groupby_cols: logger.error("No columns for groupby field") exit(0) metrics_cols = action_config.get('metrics', {}) if not (metrics_cols or action_config.get('count', False) or action_config.get('percent', False)): logger.error( "There should be at least one of below three: columns for metrics field , count or percent " ) exit(0) elif 'clip_outliers' in action_key: action_configs = process_configs.get(action_key, []) if type(action_config) != 'list': logger.error("clip_outliers should be a list!") elif 'get_data' in action_key: are_mappings = [is_dict_like(v) for v in action_configs] if not all(are_mappings): logger.error( 'items in {} must be dictionaries!'.format( action_key)) exit(0) elif 'replace_values' in action_key: action_configs = process_configs.get(action_key, []) are_mappings = [ is_dict_like(v) for k, v in action_configs.items() ] if not all(are_mappings): logger.error( 'items in replace_values must be dictionaries!') exit(0) elif 'interaction_columns' in action_key: action_configs = process_configs.get(action_key, []) possible_interactions = [ 'add', 'subtract', 'subtract_positive', 'multiply', 'divide', 'datetime', 'function' ] for v in action_configs: interaction = v.get('mode', None) if interaction not in possible_interactions: logger.error( "interaction {} is not supported. Only below interactions are supported at the moment:{}" .format(v, possible_interactions)) exit(0) return
def transform(self) -> FrameOrSeriesUnion: """ Transform a DataFrame or Series. Returns ------- DataFrame or Series Result of applying ``func`` along the given axis of the Series or DataFrame. Raises ------ ValueError If the transform function fails or does not transform. """ obj = self.obj func = self.orig_f axis = self.axis args = self.args kwargs = self.kwargs is_series = obj.ndim == 1 if obj._get_axis_number(axis) == 1: assert not is_series return obj.T.transform(func, 0, *args, **kwargs).T if is_list_like(func) and not is_dict_like(func): func = cast(List[AggFuncTypeBase], func) # Convert func equivalent dict if is_series: func = {com.get_callable_name(v) or v: v for v in func} else: func = {col: func for col in obj} if is_dict_like(func): func = cast(AggFuncTypeDict, func) return self.transform_dict_like(func) # func is either str or callable func = cast(AggFuncTypeBase, func) try: result = self.transform_str_or_callable(func) except TypeError: raise except Exception as err: raise ValueError("Transform function failed") from err # Functions that transform may return empty Series/DataFrame # when the dtype is not appropriate if ( isinstance(result, (ABCSeries, ABCDataFrame)) and result.empty and not obj.empty ): raise ValueError("Transform function failed") if not isinstance(result, (ABCSeries, ABCDataFrame)) or not result.index.equals( obj.index ): raise ValueError("Function did not transform") return result
def transform(self) -> DataFrame | Series: """ Transform a DataFrame or Series. Returns ------- DataFrame or Series Result of applying ``func`` along the given axis of the Series or DataFrame. Raises ------ ValueError If the transform function fails or does not transform. """ obj = self.obj func = self.orig_f axis = self.axis args = self.args kwargs = self.kwargs is_series = obj.ndim == 1 if obj._get_axis_number(axis) == 1: assert not is_series return obj.T.transform(func, 0, *args, **kwargs).T if is_list_like(func) and not is_dict_like(func): func = cast(List[AggFuncTypeBase], func) # Convert func equivalent dict if is_series: func = {com.get_callable_name(v) or v: v for v in func} else: func = {col: func for col in obj} if is_dict_like(func): func = cast(AggFuncTypeDict, func) return self.transform_dict_like(func) # func is either str or callable func = cast(AggFuncTypeBase, func) try: result = self.transform_str_or_callable(func) except TypeError: raise except Exception as err: raise ValueError("Transform function failed") from err # Functions that transform may return empty Series/DataFrame # when the dtype is not appropriate if (isinstance(result, (ABCSeries, ABCDataFrame)) and result.empty and not obj.empty): raise ValueError("Transform function failed") # error: Argument 1 to "__get__" of "AxisProperty" has incompatible type # "Union[Series, DataFrame, GroupBy[Any], SeriesGroupBy, # DataFrameGroupBy, BaseWindow, Resampler]"; expected "Union[DataFrame, # Series]" if not isinstance( result, (ABCSeries, ABCDataFrame)) or not result.index.equals( obj.index # type:ignore[arg-type] ): raise ValueError("Function did not transform") return result
def to_sql(self, frame, name, if_exists='fail', index=True, index_label=None, schema=None, chunksize=None, dtype=None, pkcs=None): """ Write records stored in a DataFrame to a SQL database. Parameters ---------- frame : DataFrame name : string Name of SQL table. if_exists : {'fail', 'replace', 'append'}, default 'fail' - fail: If table exists, do nothing. - replace: If table exists, drop it, recreate it, and insert data. - append: If table exists, insert data. Create if does not exist. index : boolean, default True Write DataFrame index as a column. index_label : string or sequence, default None Column label for index column(s). If None is given (default) and `index` is True, then the index names are used. A sequence should be given if the DataFrame uses MultiIndex. schema : string, default None Name of SQL schema in database to write to (if database flavor supports this). If specified, this overwrites the default schema of the SQLDatabase object. chunksize : int, default None If not None, then rows will be written in batches of this size at a time. If None, all rows will be written at once. dtype : single type or dict of column name to SQL type, default None Optional specifying the datatype for columns. The SQL type should be a SQLAlchemy type. If all columns are of the same type, one single value can be used. """ if dtype and not is_dict_like(dtype): dtype = {col_name: dtype for col_name in frame} if dtype is not None: from sqlalchemy.types import to_instance, TypeEngine for col, my_type in dtype.items(): if not isinstance(to_instance(my_type), TypeEngine): raise ValueError('The type of %s is not a SQLAlchemy ' 'type ' % col) table = SQLTable_extend(name, self, frame=frame, index=index, if_exists=if_exists, index_label=index_label, schema=schema, dtype=dtype, pkcs=pkcs) table.create() table.insert(chunksize) if (not name.isdigit() and not name.islower()): # check for potentially case sensitivity issues (GH7815) # Only check when name is not a number and name is not lower case engine = self.connectable.engine with self.connectable.connect() as conn: table_names = engine.table_names( schema=schema or self.meta.schema, connection=conn, ) if name not in table_names: msg = ( "The provided table name '{0}' is not found exactly as " "such in the database after writing the table, possibly " "due to case sensitivity issues. Consider using lower " "case table names.").format(name) warnings.warn(msg, UserWarning)