def transform(self, y): """Perform encoding if already fit. Parameters ---------- y : array_like, shape=(n_samples,) The array to encode Returns ------- e : array_like, shape=(n_samples,) The encoded array """ check_is_fitted(self, 'classes_') y = column_or_1d(y, warn=True) classes = np.unique(y) _check_numpy_unicode_bug(classes) # Check not too many: unseen = _get_unseen() if len(classes) >= unseen: raise ValueError('Too many factor levels in feature. Max is %i' % unseen) e = np.array([ np.searchsorted(self.classes_, x) if x in self.classes_ else unseen for x in y ]) return e
def transform(self, y): check_is_fitted(self, 'classes_') y = column_or_1d(y, warn=True) classes = np.unique(y) _check_numpy_unicode_bug(classes) ## Check not too many: unseen = get_unseen() if len(classes) >= unseen: raise ValueError('Too many factor levels in feature. Max is %i' % unseen) return np.array([np.searchsorted(self.classes_, x)\ if x in self.classes_\ else unseen\ for x in y])
def fit(self, X, y=None): """Fit label encoder Parameters ---------- y : array-like of shape (n_samples,) Target values. Returns ------- self : returns an instance of self. """ X = column_or_1d(X.ravel(), warn=True) _check_numpy_unicode_bug(X) self.classes_ = np.unique(X) if isinstance(self.classes_[0], np.float64): self.classes_ = self.classes_[np.isfinite(self.classes_)] return self
def transform(self, y): """Transform labels to normalized encoding. Parameters ---------- y : array-like of shape [n_samples] Target values. Returns ------- y : array-like of shape [n_samples] """ check_is_fitted(self, 'classes_') y = column_or_1d(y.ravel(), warn=True) classes = np.unique(y) if isinstance(classes[0], np.float64): classes = classes[np.isfinite(classes)] _check_numpy_unicode_bug(classes) if len(np.intersect1d(classes, self.classes_)) < len(classes): diff = np.setdiff1d(classes, self.classes_) print(self.classes_) raise ValueError("y contains new labels: %s" % str(diff)) return np.searchsorted(self.classes_, y).reshape(-1, 1)
def mapper(y): y = column_or_1d(y, warn=True) _check_numpy_unicode_bug(y) return np.unique(y)