def fit(self, x, y, **kwargs): """Constructs a new model with `build_fn` & fit the model to `(x, y)`. Arguments: x : array-like, shape `(n_samples, n_features)` Training samples where `n_samples` is the number of samples and `n_features` is the number of features. y : array-like, shape `(n_samples,)` or `(n_samples, n_outputs)` True labels for `x`. **kwargs: dictionary arguments Legal arguments are the arguments of `Sequential.fit` Returns: history : object details about the training history at each epoch. """ if self.build_fn is None: self.model = self.__call__(**self.filter_sk_params(self.__call__)) elif (not isinstance(self.build_fn, types.FunctionType) and not isinstance(self.build_fn, types.MethodType)): self.model = self.build_fn( **self.filter_sk_params(self.build_fn.__call__)) else: self.model = self.build_fn(**self.filter_sk_params(self.build_fn)) if (losses.is_categorical_crossentropy(self.model.loss) and len(y.shape) != 2): y = to_categorical(y) fit_args = copy.deepcopy(self.filter_sk_params(Sequential.fit)) fit_args.update(kwargs) history = self.model.fit(x, y, **fit_args) return history
def fit(self, y: np.ndarray) -> "ClassifierLabelEncoder": target_type = self._type_of_target(y) keras_dtype = np.dtype(tf.keras.backend.floatx()) encoders = { "binary": make_pipeline( TargetReshaper(), OrdinalEncoder(dtype=keras_dtype, categories=self.categories), ), "multiclass": make_pipeline( TargetReshaper(), OrdinalEncoder(dtype=keras_dtype, categories=self.categories), ), "multiclass-multioutput": FunctionTransformer(), "multilabel-indicator": FunctionTransformer(), } if is_categorical_crossentropy(self.loss): encoders["multiclass"] = make_pipeline( TargetReshaper(), OneHotEncoder(sparse=False, dtype=keras_dtype, categories=self.categories), ) if target_type not in encoders: raise ValueError( f"Unknown label type: {target_type}." "\n\nTo implement support, subclass KerasClassifier and override" " `target_transformer` with a transformer that supports this" " label type." "\n\nFor information on sklearn target types, see:" " * https://scikit-learn.org/stable/modules/generated/sklearn.utils.multiclass.type_of_target.html" " * https://scikit-learn.org/stable/modules/multiclass.html" "\n\nFor information on the SciKeras data transformation interface, see:" " * https://scikeras.readthedocs.io/en/latest/advanced.html#data-transformers" ) self._final_encoder = encoders[target_type].fit(y) if (target_type == "multilabel-indicator" and y.min() == 0 and (y.sum(axis=1) == 1).all()): target_type = "multiclass-onehot" self.n_outputs_ = 1 self.n_outputs_expected_ = 1 self._y_dtype = y.dtype self._target_type = target_type if target_type in ("binary", "multiclass"): self.classes_ = self._final_encoder[1].categories_[0] self.n_classes_ = self.classes_.size elif target_type in ("multiclass-onehot", "multilabel-indicator"): self.classes_ = np.arange(0, y.shape[1]) self.n_classes_ = y.shape[1] elif target_type == "multiclass-multioutput": self.classes_ = None self.n_classes_ = None return self
def fit(self, x, y, **kwargs): """Constructs a new model with `build_fn` & fit the model to `(x, y)`. Args: x : array-like, shape `(n_samples, n_features)` Training samples where `n_samples` is the number of samples and `n_features` is the number of features. y : array-like, shape `(n_samples,)` or `(n_samples, n_outputs)` True labels for `x`. **kwargs: dictionary arguments Legal arguments are the arguments of `Sequential.fit` Returns: history : object details about the training history at each epoch. """ if self.build_fn is None: self.model = self.__call__(**self.filter_sk_params(self.__call__)) elif (not isinstance(self.build_fn, types.FunctionType) and not isinstance(self.build_fn, types.MethodType)): self.model = self.build_fn( **self.filter_sk_params(self.build_fn.__call__)) else: self.model = self.build_fn(**self.filter_sk_params(self.build_fn)) if (losses.is_categorical_crossentropy(self.model.loss) and len(y.shape) != 2): y = to_categorical(y) fit_args = copy.deepcopy(self.filter_sk_params(Sequential.fit)) fit_args.update(kwargs) history = self.model.fit(x, y, **fit_args) return history
def inverse_transform(self, y: np.ndarray, return_proba: bool = False) -> np.ndarray: if self._target_type == "binary": # array([0.9, 0.1], [.2, .8]) -> array(['yes', 'no']) if y.ndim == 1 or (y.shape[1] == 1 and self.n_classes_ == 2): # result from a single sigmoid output # reformat so that we have 2 columns y = np.column_stack([1 - y, y]) class_predictions = np.argmax(y, axis=1).reshape(-1, 1) class_predictions = self._final_encoder.inverse_transform( class_predictions) elif self._target_type == "multiclass": # array([0.8, 0.1, 0.1], [.1, .8, .1]) -> # array(['apple', 'orange']) idx = np.argmax(y, axis=-1) if not is_categorical_crossentropy(self.loss): class_predictions = idx.reshape(-1, 1) else: class_predictions = np.zeros(y.shape, dtype=int) class_predictions[:, idx] = 1 class_predictions = self._final_encoder.inverse_transform( class_predictions) elif self._target_type == "multiclass-onehot": # array([.8, .1, .1], [.1, .8, .1]) -> # array([[1, 0, 0], [0, 1, 0]]) idx = np.argmax(y, axis=-1) class_predictions = np.zeros(y.shape, dtype=int) class_predictions[:, idx] = 1 elif self._target_type == "multilabel-indicator": class_predictions = np.around(y) else: if not return_proba: raise NotImplementedError( f"Class-predictions are not clearly defined for" " 'multiclass-multioutput' target types." "\n\nTo implement support, subclass KerasClassifier and override" " `target_transformer` with a transformer that supports this" " label type." "\n\nFor information on sklearn target types, see:" " * https://scikit-learn.org/stable/modules/generated/sklearn.utils.multiclass.type_of_target.html" " * https://scikit-learn.org/stable/modules/multiclass.html" "\n\nFor information on the SciKeras data transformation interface, see:" " * https://scikeras.readthedocs.io/en/latest/advanced.html#data-transforms" ) if return_proba: return y return np.squeeze(np.column_stack(class_predictions)).astype( self._y_dtype, copy=False)
def _check_output_model_compatibility(self, y): """Checks that the model output number and loss functions match y. """ # check loss function to adjust the encoding of the input # we need to do this to mimick scikit-learn behavior if isinstance(self.model_.loss, list): losses = self.model_.loss else: losses = [self.model_.loss] * self.n_outputs_ for i, (loss, y_) in enumerate(zip(losses, y)): if is_categorical_crossentropy(loss) and (y_.ndim == 1 or y_.shape[1] == 1): y[i] = to_categorical(y_) return super()._check_output_model_compatibility(y)
def fit(self, x, y, **kwargs): y = np.array(y) if len(y.shape) == 2 and y.shape[1] > 1: self.classes_ = np.arange(y.shape[1]) elif (len(y.shape) == 2 and y.shape[1] == 1) or len(y.shape) == 1: self.classes_ = np.unique(y) y = np.searchsorted(self.classes_, y) else: raise ValueError('Invalid shape for y: ' + str(y.shape)) self.n_classes_ = len(self.classes_) self.model = self.build_fn(**self.sk_params) if (losses.is_categorical_crossentropy(self.model.loss) and len(y.shape) != 2): y = to_categorical(y) fit_args = copy.deepcopy(self.filter_sk_params(models.Sequential.fit)) fit_args.update(kwargs) history = self.model.fit(x, y, **fit_args) return history
def _check_output_model_compatibility(self, y): """Checks that the model output number and loss functions match y. """ # check loss function to adjust the encoding of the input # we need to do this to mimick scikit-learn behavior if isinstance(self.model_.loss, list): losses = self.model_.loss else: losses = [self.model_.loss] * self.n_outputs_ for i, loss in enumerate(losses): if is_categorical_crossentropy(loss) and ( y[i].ndim == 1 or y[i].shape[1] == 1 ): encoder = OneHotEncoder(sparse=False) if y[i].ndim == 1: y[i] = y[i].reshape(-1, 1) y[i] = encoder.fit_transform(y[i]) self.encoders_[i] = make_pipeline( self.encoders_[i], encoder, "passthrough", ) return super()._check_output_model_compatibility(y)
def inverse_transform(self, y: np.ndarray, return_proba: bool = False) -> np.ndarray: """Restore the data types, shape and classes of the input y to the output of the Keras Model. Parameters ---------- y : np.ndarray Raw probability predictions from the Keras Model. return_proba : bool, default False If True, return the prediction probabilites themselves. If False, return the class predictions. Returns ------- np.ndarray Class predictions (of the same shape as the y to fit/transform), \ or class prediction probabilities. """ if self._target_type == "binary": # array([0.9, 0.1], [.2, .8]) -> array(['yes', 'no']) if y.ndim == 1 or (y.shape[1] == 1 and self.n_classes_ == 2): # result from a single sigmoid output # reformat so that we have 2 columns y = np.column_stack([1 - y, y]) class_predictions = np.argmax(y, axis=1).reshape(-1, 1) class_predictions = self._final_encoder.inverse_transform( class_predictions) elif self._target_type == "multiclass": # array([0.8, 0.1, 0.1], [.1, .8, .1]) -> array(['apple', 'orange']) idx = np.argmax(y, axis=-1) if not is_categorical_crossentropy(self.loss): class_predictions = idx.reshape(-1, 1) else: class_predictions = np.zeros(y.shape, dtype=int) class_predictions[:, idx] = 1 class_predictions = self._final_encoder.inverse_transform( class_predictions) elif self._target_type == "multiclass-onehot": # array([.8, .1, .1], [.1, .8, .1]) -> array([[1, 0, 0], [0, 1, 0]]) idx = np.argmax(y, axis=-1) class_predictions = np.zeros(y.shape, dtype=int) class_predictions[np.arange(len(idx)), idx] = 1 elif self._target_type == "multilabel-indicator": class_predictions = np.around(y).astype(int, copy=False) else: if not return_proba: raise NotImplementedError( "Class-predictions are not clearly defined for" " 'multiclass-multioutput' target types." "\n\nTo implement support, subclass KerasClassifier and override" " ``target_encoder`` with a transformer that supports this" " label type." "\n\nFor information on sklearn target types, see:" " * https://scikit-learn.org/stable/modules/generated/sklearn.utils.multiclass.type_of_target.html" " * https://scikit-learn.org/stable/modules/multiclass.html" "\n\nFor information on the SciKeras data transformation interface, see:" " * https://www.adriangb.com/scikeras/stable/advanced.html#data-transformers" ) if return_proba: return np.squeeze(y) return class_predictions.reshape(-1, *self._y_shape[1:])
def fit(self, y: np.ndarray) -> "ClassifierLabelEncoder": """Fit the estimator to the target y. For all targets, this transforms classes into ordinal numbers. If the loss function is categorical_crossentropy, the target will be one-hot encoded. Parameters ---------- y : np.ndarray The target data to be transformed. Returns ------- ClassifierLabelEncoder A reference to the current instance of ClassifierLabelEncoder. """ target_type = self._type_of_target(y) keras_dtype = np.dtype(tf.keras.backend.floatx()) self._y_shape = y.shape encoders = { "binary": make_pipeline( TargetReshaper(), OrdinalEncoder(dtype=keras_dtype, categories=self.categories), ), "multiclass": make_pipeline( TargetReshaper(), OrdinalEncoder(dtype=keras_dtype, categories=self.categories), ), "multiclass-multioutput": FunctionTransformer(), "multilabel-indicator": FunctionTransformer(), } if is_categorical_crossentropy(self.loss): encoders["multiclass"] = make_pipeline( TargetReshaper(), OneHotEncoder(sparse=False, dtype=keras_dtype, categories=self.categories), ) if target_type not in encoders: raise ValueError( f"Unknown label type: {target_type}." "\n\nTo implement support, subclass KerasClassifier and override" " ``target_encoder`` with a transformer that supports this" " label type." "\n\nFor information on sklearn target types, see:" " * https://scikit-learn.org/stable/modules/generated/sklearn.utils.multiclass.type_of_target.html" " * https://scikit-learn.org/stable/modules/multiclass.html" "\n\nFor information on the SciKeras data transformation interface, see:" " * https://www.adriangb.com/scikeras/stable/advanced.html#data-transformers" ) self._final_encoder = encoders[target_type].fit(y) if (target_type == "multilabel-indicator" and y.min() == 0 and (y.sum(axis=1) == 1).all()): target_type = "multiclass-onehot" self.n_outputs_ = 1 self.n_outputs_expected_ = 1 self._y_dtype = y.dtype self._target_type = target_type if target_type in ("binary", "multiclass"): self.classes_ = self._final_encoder[1].categories_[0] self.n_classes_ = self.classes_.size elif target_type in ("multiclass-onehot", "multilabel-indicator"): self.classes_ = np.arange(0, y.shape[1]) self.n_classes_ = y.shape[1] elif target_type == "multiclass-multioutput": self.classes_ = None self.n_classes_ = None return self