def inverse_transform(self, x): """ Scale back the data to the original representation. Parameters ---------- x: DataFrame, Series, ndarray, list The data used to scale along the features axis. Returns ------- DataFrame Inverse transformed data. """ x = self._check_type(x) xs = [] for col, shift, lmd in zip(x.T, self._shift, self._lmd): for case in Switch(lmd): if case(np.nan, np.inf): _x = col break if case(): _x = inv_boxcox(col, lmd) - shift xs.append(_x.reshape(-1, 1)) xs = np.concatenate(xs, axis=1) if len(self._shape) == 1: return xs.ravel() return xs
def _handle_err(self, e): for c in Switch(self._on_err): if c(None): self._lmd.append(np.inf) break if c('log'): self._lmd.append(0.) break if c('nan'): self._lmd.append(np.nan) break if c('raise'): raise e if c(): raise RuntimeError( 'parameter on_err must be None "log", "nan" or "raise"')
def transform(self, x): """ Parameters ---------- x Returns ------- DataFrame Box-Cox transformed data. """ x = self._check_type(x) xs = [] for i, col in enumerate(x.T): if np.all(col > 0): self._shift[i] = 0. else: self._shift[i] -= col[~np.isnan(col)].min() _lmd = self._lmd[i] _shift = self._shift[i] for case in Switch(_lmd): if case(np.inf): x = col break if case(np.nan): x = np.full(col.shape, np.nan) break if case(): x = boxcox(col + _shift, _lmd) xs.append(x.reshape(-1, 1)) xs = np.concatenate(xs, axis=1) if len(self._shape) == 1: return xs.ravel() return xs.reshape(-1, self._shape[1])
def transform(self, entries: Sequence, *, return_type=None, **kwargs): """ Featurize a list of entries. If `featurize` takes multiple inputs, supply inputs as a list of tuples. Args ---- entries: list-like A list of entries to be featurized. return_type: str Specific the return type. Can be ``any``, ``array`` and ``df``. ``array`` and ``df`` force return type to ``np.ndarray`` and ``pd.DataFrame`` respectively. If ``any``, the return type depend on the input type. This is a temporary change that only have effect in the current transform. Default is ``None`` for no changes. Returns ------- DataFrame features for each entry. """ self._kwargs = kwargs # Check inputs if not isinstance(entries, Iterable): raise TypeError('parameter "entries" must be a iterable object') # Special case: Empty list if len(entries) is 0: return [] for c in Switch(self._n_jobs): if c(0): # Run the actual featurization ret = self.featurize(entries, **kwargs) break if c(1): ret = [self._wrapper(x) for x in entries] break if c(): ret = Parallel(n_jobs=self._n_jobs, verbose=self._parallel_verbose)(delayed(self._wrapper)(x) for x in entries) try: labels = self.feature_labels except NotImplementedError: labels = None if return_type is None: return_type = self.return_type if return_type == 'any': if isinstance(entries, (pd.Series, pd.DataFrame)): tmp = pd.DataFrame(ret, index=entries.index, columns=labels) return tmp if isinstance(entries, np.ndarray): return np.array(ret) return ret if return_type == 'array': return np.array(ret) if return_type == 'df': if isinstance(entries, (pd.Series, pd.DataFrame)): return pd.DataFrame(ret, index=entries.index, columns=labels) return pd.DataFrame(ret, columns=labels)
def transform(self, entries: Sequence, *, return_type=None, target_col=None, **kwargs): """ Featurize a list of entries. If `featurize` takes multiple inputs, supply inputs as a list of tuples, or use pd.DataFrame with parameter ``target_col`` to specify the column name(s). Args ---- entries: list-like or pd.DataFrame A list of entries to be featurized or pd.DataFrame with one specified column. See detail of target_col if entries is pd.DataFrame. Also, make sure n_jobs=0 for pd.DataFrame. return_type: str Specify the return type. Can be ``any``, ``custom``, ``array`` or ``df``. ``array`` or ``df`` forces return type to ``np.ndarray`` or ``pd.DataFrame``, respectively. If ``any``, the return type follow prefixed rules: (1) if input type is pd.Series or pd.DataFrame, returns pd.DataFrame; (2) else if input type is np.array, returns np.array; (3) else if other input type and n_jobs=0, follows the featurize function return; (4) otherwise, return a list of objects (output of featurize function). If ``custom``, the return type depends on the featurize function if n_jobs=0, or the return type is a list of objects (output of featurize function) for other n_jobs values. This is a one-time change that only have effect in the current transformation. Default is ``None`` for using the setting at initialization step. target_col Only relevant when input is pd.DataFrame, otherwise ignored. Specify a single column to be used for transformation. Default is ``None`` for using the setting at initialization step. (see __init__ for more information) Returns ------- DataFrame features for each entry. """ self._kwargs = kwargs # Check inputs if not isinstance(entries, Iterable): raise TypeError('parameter "entries" must be a iterable object') # Extract relevant columns for pd.DataFrame input if isinstance(entries, pd.DataFrame): if target_col is None: target_col = self.target_col if target_col is None: target_col = entries.columns.values entries = entries[target_col] # Special case: Empty list if len(entries) is 0: return [] # Check outputs if return_type not in {None, 'any', 'array', 'df', 'custom'}: raise ValueError('`return_type` must be None, `any`, `custom`, `array` or `df`') for c in Switch(self._n_jobs): if c(0): # Run the actual featurization ret = self.featurize(entries, **kwargs) break if isinstance(entries, pd.DataFrame): raise RuntimeError( "Auto-parallel can not be used when`entries` is `pandas.DataFrame`. " "Please set `n_jobs` to 0 and implements your algorithm in the `featurize` method" ) if c(1): ret = [self._wrapper(x) for x in entries] break if c(): ret = Parallel(n_jobs=self._n_jobs, verbose=self._parallel_verbose)( delayed(self._wrapper)(x) for x in entries) try: labels = self.feature_labels except NotImplementedError: labels = None if return_type is None: return_type = self.return_type if return_type == 'any': if isinstance(entries, (pd.Series, pd.DataFrame)): tmp = pd.DataFrame(ret, index=entries.index, columns=labels) return tmp if isinstance(entries, np.ndarray): return np.array(ret) return ret if return_type == 'array': return np.array(ret) if return_type == 'df': if isinstance(entries, (pd.Series, pd.DataFrame)): return pd.DataFrame(ret, index=entries.index, columns=labels) return pd.DataFrame(ret, columns=labels) if return_type == 'custom': return ret