def encode(self, xs: pd.Series): if get_config('keep_original'): self.mask = xs.isna() if self.by is not None: return xs.fillna(getattr(xs, self.by)()) else: return xs.fillna(self.value)
def encode(self, xs:pd.Series)->pd.Series: if self.date_range is None: self.date_range = pd.date_range( self.start, self.end, self.periods, self.freq, self.tz, self.normalize, closed=self.closed ) result = pd.Series( np.zeros_like(xs, dtype=np.int64), index=xs.index, name=xs.name ) for i, (start, end) in enumerate(zip(self.date_range, self.date_range[1:])): result[xs.between(start, end)] = i if get_config('keep_original'): self.original_xs = xs return result
def encode(self, xs: pd.Series) -> pd.Series: if self.upper is None: upper = xs.quantile(self.percentile) lower = xs.quantile(1.0 - self.percentile) self.upper = upper self.lower = lower else: upper = self.upper lower = self.lower upper_mask = xs > upper lower_mask = xs < lower if get_config('keep_original'): self.lower_mask = lower_mask self.lower_original_values = xs[self.lower_mask] self.upper_mask = upper_mask self.upper_original_values = xs[self.upper_mask] xs = xs.mask(upper_mask, upper) xs = xs.mask(lower_mask, lower) return xs
def encode(self, df:pd.DataFrame): mask = df.isna().any(axis=1) self.original_dropped_index = df.index[mask] if get_config('keep_original'): self.original = df.loc[mask] return df.dropna()
def decode(self, xs:pd.Series)->pd.Series: if get_config('keep_original'): return self.original_xs else: return pd.Series(self.date_range.take(xs))