def test_grid_search_score_method(): X, y = make_classification(n_samples=100, n_classes=2, flip_y=.2, random_state=0) clf = LinearSVC(random_state=0) grid = {'C': [.1]} search_no_scoring = DaskGridSearchCV(clf, grid, scoring=None).fit(X, y) search_accuracy = DaskGridSearchCV(clf, grid, scoring='accuracy').fit(X, y) search_no_score_method_auc = DaskGridSearchCV(LinearSVCNoScore(), grid, scoring='roc_auc').fit(X, y) search_auc = DaskGridSearchCV(clf, grid, scoring='roc_auc').fit(X, y) # Check warning only occurs in situation where behavior changed: # estimator requires score method to compete with scoring parameter score_no_scoring = search_no_scoring.score(X, y) score_accuracy = search_accuracy.score(X, y) score_no_score_auc = search_no_score_method_auc.score(X, y) score_auc = search_auc.score(X, y) # ensure the test is sane assert score_auc < 1.0 assert score_accuracy < 1.0 assert score_auc != score_accuracy assert_almost_equal(score_accuracy, score_no_scoring) assert_almost_equal(score_auc, score_no_score_auc)
def test_grid_search_no_score(): # Test grid-search on classifier that has no score function. clf = LinearSVC(random_state=0) X, y = make_blobs(random_state=0, centers=2) Cs = [.1, 1, 10] clf_no_score = LinearSVCNoScore(random_state=0) # XXX: It seems there's some global shared state in LinearSVC - fitting # multiple `SVC` instances in parallel using threads sometimes results in # wrong results. This only happens with threads, not processes/sync. # For now, we'll fit using the sync scheduler. grid_search = DaskGridSearchCV(clf, {'C': Cs}, scoring='accuracy', get=dask.get) grid_search.fit(X, y) grid_search_no_score = DaskGridSearchCV(clf_no_score, {'C': Cs}, scoring='accuracy', get=dask.get) # smoketest grid search grid_search_no_score.fit(X, y) # check that best params are equal assert grid_search_no_score.best_params_ == grid_search.best_params_ # check that we can call score and that it gives the correct result assert grid_search.score(X, y) == grid_search_no_score.score(X, y) # giving no scoring function raises an error grid_search_no_score = DaskGridSearchCV(clf_no_score, {'C': Cs}) with pytest.raises(TypeError) as exc: grid_search_no_score.fit([[1]]) assert "no scoring" in str(exc.value)
def test_grid_search(): # Test that the best estimator contains the right value for foo_param clf = MockClassifier() grid_search = DaskGridSearchCV(clf, {'foo_param': [1, 2, 3]}) # make sure it selects the smallest parameter in case of ties grid_search.fit(X, y) assert grid_search.best_estimator_.foo_param == 2 assert_array_equal(grid_search.cv_results_["param_foo_param"].data, [1, 2, 3]) # Smoke test the score etc: grid_search.score(X, y) grid_search.predict_proba(X) grid_search.decision_function(X) grid_search.transform(X) # Test exception handling on scoring grid_search.scoring = 'sklearn' with pytest.raises(ValueError): grid_search.fit(X, y)