def test_KerasClassifier_loss_invariance(y, y_type): """Test that KerasClassifier can use both categorical_crossentropy and sparse_categorical_crossentropy with either one-hot encoded targets or sparse targets. """ X = np.arange(0, y.shape[0]).reshape(-1, 1) clf_1 = KerasClassifier( model=dynamic_classifier, hidden_layer_sizes=(100,), loss="categorical_crossentropy", random_state=0, ) clf_1.fit(X, y) clf_1.partial_fit(X, y) y_1 = clf_1.predict(X) if y_type != "multilabel-indicator": # sparse_categorical_crossentropy is not compatible with # one-hot encoded targets, and one-hot encoded targets are not used in sklearn # This is a use case that does not natively succeed in Keras or skelarn estimators # and thus SciKeras does not intend to auto-convert data to support it clf_2 = KerasClassifier( model=dynamic_classifier, hidden_layer_sizes=(100,), loss="sparse_categorical_crossentropy", random_state=0, ) clf_2.fit(X, y) y_2 = clf_1.predict(X) np.testing.assert_equal(y_1, y_2)
def test_single_output_multilabel_indicator(): """Tests a target that a multilabel-indicator target can be used without errors. """ X = np.random.random(size=(100, 2)) y = np.random.randint(0, 1, size=(100, 3)) y[0, :] = 1 # i.e. not "one hot encoded" def build_fn(): model = Sequential() model.add(Dense(10, input_shape=(2, ), activation="relu")) model.add(Dense(3, activation="sigmoid")) return model clf = KerasClassifier( model=build_fn, loss="categorical_crossentropy", ) # check that there are no errors clf.fit(X, y) clf.predict(X) # check the target type assert clf.target_type_ == "multilabel-indicator" # check classes np.testing.assert_equal(clf.classes_, np.arange(3))
def test_not_fitted_error(): """Tests error when trying to use predict before fit.""" estimator = KerasClassifier(dynamic_classifier) X = np.random.rand(10, 20) with pytest.raises(NotFittedError): # This is in BaseWrapper so it covers # KerasRegressor as well estimator.predict(X) with pytest.raises(NotFittedError): estimator.predict_proba(X)
def test_class_weight_param(): """Backport of sklearn.utils.estimator_checks.check_class_weight_classifiers for sklearn <= 0.23.0. """ clf = KerasClassifier( model=dynamic_classifier, model__hidden_layer_sizes=(100, ), epochs=50, random_state=0, ) problems = (2, 3) for n_centers in problems: # create a very noisy dataset X, y = make_blobs(centers=n_centers, random_state=0, cluster_std=20) X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5, random_state=0) n_centers = len(np.unique(y_train)) if n_centers == 2: class_weight = {0: 1000, 1: 0.0001} else: class_weight = {0: 1000, 1: 0.0001, 2: 0.0001} clf.set_params(class_weight=class_weight) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) assert np.mean(y_pred == 0) > 0.87
def test_callbacks_prefixes(): """Test dispatching of callbacks using no prefix, the fit__ prefix or the predict__ prefix.""" class SentinalCallback(Callback): def __init__(self, call_logs: DefaultDict[str, int]): self.call_logs = call_logs def on_test_begin(self, logs=None): self.call_logs["on_test_begin"] += 1 def on_train_begin(self, logs=None): self.call_logs["on_train_begin"] += 1 def on_predict_begin(self, logs=None): self.call_logs["on_predict_begin"] += 1 callbacks_call_logs = defaultdict(int) fit_callbacks_call_logs = defaultdict(int) predict_callbacks_call_logs = defaultdict(int) def get_clf() -> keras.Model: model = keras.models.Sequential() model.add(keras.layers.InputLayer((1, ))) model.add(keras.layers.Dense(1, activation="sigmoid")) return model clf = KerasClassifier( model=get_clf, loss="binary_crossentropy", callbacks=SentinalCallback(callbacks_call_logs), fit__callbacks=SentinalCallback(fit_callbacks_call_logs), predict__callbacks=SentinalCallback(predict_callbacks_call_logs), validation_split=0.1, ) clf.fit([[0]] * 100, [0] * 100) assert callbacks_call_logs == {"on_train_begin": 1, "on_test_begin": 1} assert fit_callbacks_call_logs == {"on_train_begin": 1, "on_test_begin": 1} assert predict_callbacks_call_logs == {} clf.predict([[0]]) assert callbacks_call_logs == { "on_train_begin": 1, "on_test_begin": 1, "on_predict_begin": 1, } assert fit_callbacks_call_logs == {"on_train_begin": 1, "on_test_begin": 1} assert predict_callbacks_call_logs == {"on_predict_begin": 1}
def test_class_weight_param(): """Backport of sklearn.utils.estimator_checks.check_class_weight_classifiers for sklearn <= 0.23.0. Tests that fit and partial_fit correctly handle the class_weight parameter. """ clf = KerasClassifier( model=dynamic_classifier, model__hidden_layer_sizes=(100, ), random_state=0, ) problems = (2, 3) for n_centers in problems: # create a very noisy dataset X, y = make_blobs(centers=n_centers, random_state=0, cluster_std=20) X_train, X_test, y_train, _ = train_test_split(X, y, test_size=0.5, random_state=0) n_centers = len(np.unique(y_train)) if n_centers == 2: class_weight = {0: 1000, 1: 0.0001} fit_epochs = 4 partial_fit_epochs = 3 else: class_weight = {0: 1000, 1: 0.0001, 2: 0.0001} fit_epochs = 8 partial_fit_epochs = 6 clf.set_params(class_weight=class_weight) # run fit epochs followed by several partial_fit iterations # these numbers are purely empirical, just like they are in the # original sklearn test clf.set_params(fit__epochs=fit_epochs) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) assert np.mean(y_pred == 0) > 0.8 for _ in range(partial_fit_epochs): clf.partial_fit(X_train, y_train) y_pred = clf.predict(X_test) assert np.mean(y_pred == 0) > 0.95