def transform(self, X, y=None): if X.shape[1] == 0: return X encoded = pd.get_dummies(todf(X), dummy_na=True, drop_first=False, sparse=self.sparse) logger.debug('dummy encoded shape {}'.format(encoded.shape)) return align_columns(encoded, self.encoded.columns)
def _check_params(self, X): p = X.shape[1] self.k_int = ratio2int(p, self.k) self.donothing = (self.k_int <= 0 or self.k_int >= p) if self.donothing: self.feature_names = todf(X).columns else: self.feature_names = ['svd' + str(i) for i in range(self.k_int)]
def fit(self, X, y=None): if X.shape[1] == 0: self.encoded = X return self self.encoded = pd.get_dummies(todf(X), dummy_na=True, drop_first=self.drop_first, sparse=self.sparse) logger.debug('dummy encoded shape {}'.format(self.encoded.shape)) return self
def fit_transform(self, X, y=None): self._check_params(X) if self.donothing: return X self.svd = TruncatedSVD(n_components=self.k_int) return todf(self.svd.fit_transform(X))
def transform(self, X): if self.donothing: return X return todf(self.svd.transform(X))