def test_grid_search_score_method(self): X, y = make_classification(n_samples=100, n_classes=2, flip_y=0.2, random_state=0) clf = LinearSVC(random_state=0) grid = {"C": [0.1]} search_no_scoring = TuneGridSearchCV(clf, grid, scoring=None).fit(X, y) search_accuracy = TuneGridSearchCV(clf, grid, scoring="accuracy").fit(X, y) search_no_score_method_auc = TuneGridSearchCV(LinearSVCNoScore(), grid, scoring="roc_auc").fit( X, y) search_auc = TuneGridSearchCV(clf, grid, scoring="roc_auc").fit(X, y) # Check warning only occurs in situation where behavior changed: # estimator requires score method to compete with scoring parameter score_no_scoring = search_no_scoring.score(X, y) score_accuracy = search_accuracy.score(X, y) score_no_score_auc = search_no_score_method_auc.score(X, y) score_auc = search_auc.score(X, y) # ensure the test is sane self.assertTrue(score_auc < 1.0) self.assertTrue(score_accuracy < 1.0) self.assertTrue(score_auc != score_accuracy) assert_almost_equal(score_accuracy, score_no_scoring) assert_almost_equal(score_auc, score_no_score_auc)
def test_grid_search_no_score(self): # Test grid-search on classifier that has no score function. clf = LinearSVC(random_state=0) X, y = make_blobs(random_state=0, centers=2) Cs = [0.1, 1, 10] clf_no_score = LinearSVCNoScore(random_state=0) # XXX: It seems there's some global shared state in LinearSVC - fitting # multiple `SVC` instances in parallel using threads sometimes results # in wrong results. This only happens with threads, not processes/sync. # For now, we'll fit using the sync scheduler. grid_search = TuneGridSearchCV(clf, {"C": Cs}, scoring="accuracy") grid_search.fit(X, y) grid_search_no_score = TuneGridSearchCV(clf_no_score, {"C": Cs}, scoring="accuracy") # smoketest grid search grid_search_no_score.fit(X, y) # check that best params are equal self.assertEqual(grid_search_no_score.best_params, grid_search.best_params_) # check that we can call score and that it gives the correct result self.assertEqual(grid_search.score(X, y), grid_search_no_score.score(X, y)) # giving no scoring function raises an error grid_search_no_score = TuneGridSearchCV(clf_no_score, {"C": Cs}) with self.assertRaises(TypeError) as exc: grid_search_no_score.fit([[1]]) self.assertTrue("no scoring" in str(exc.exception))
def test_grid_search(self): # Test that the best estimator contains the right value for foo_param clf = MockClassifier() grid_search = TuneGridSearchCV(clf, {"foo_param": [1, 2, 3]}, cv=3) # make sure it selects the smallest parameter in case of ties grid_search.fit(X, y) self.assertEqual(grid_search.best_estimator_.foo_param, 2) assert_array_equal(grid_search.cv_results_["param_foo_param"].data, [1, 2, 3]) # Smoke test the score etc: grid_search.score(X, y) grid_search.predict_proba(X) grid_search.decision_function(X) grid_search.transform(X) # Test exception handling on scoring grid_search.scoring = "sklearn" with self.assertRaises(ValueError): grid_search.fit(X, y)