Пример #1
0
    def test_grid_search_failing_classifier(self):
        # ATGridSearchCV with on_error != 'raise'
        # Ensures that a warning is raised and score reset where appropriate.

        X, y = make_classification(n_samples=20, n_features=10, random_state=0)

        clf = FailingClassifier()

        # refit=False because we only want to check that errors caused by fits
        # to individual folds will be caught and warnings raised instead. If
        # refit was done, then an exception would be raised on refit and not
        # caught by grid_search (expected behavior), and this would cause an
        # error in this test.
        gs = ATGridSearchCV(clf, [{
            'parameter': [0, 1, 2]
        }],
                            scoring='accuracy',
                            refit=False,
                            error_score=0.0,
                            webserver_url=self.live_server_url)

        #assert_warns(FitFailedWarning, gs.fit, X, y)
        wait(gs.fit(X, y))
        # Ensure that grid scores were set to zero as required for those fits
        # that are expected to fail.
        assert all(
            np.all(this_point.cv_validation_scores == 0.0)
            for this_point in gs.grid_scores_
            if this_point.parameters['parameter'] ==
            FailingClassifier.FAILING_PARAMETER)

        gs = ATGridSearchCV(clf, [{
            'parameter': [0, 1, 2]
        }],
                            scoring='accuracy',
                            refit=False,
                            error_score=float('nan'),
                            webserver_url=self.live_server_url)
        #         assert_warns(FitFailedWarning, gs.fit, X, y)
        wait(gs.fit(X, y))
        assert all(
            np.all(np.isnan(this_point.cv_validation_scores))
            for this_point in gs.grid_scores_
            if this_point.parameters['parameter'] ==
            FailingClassifier.FAILING_PARAMETER)
Пример #2
0
    def test_unsupervised_grid_search(self):
        # test grid-search with unsupervised estimator
        X, y = make_blobs(random_state=0)
        km = KMeans(random_state=0)
        grid_search = ATGridSearchCV(km,
                                     param_grid=dict(n_clusters=[2, 3, 4]),
                                     scoring='adjusted_rand_score',
                                     webserver_url=self.live_server_url)
        wait(grid_search.fit(X, y))
        # ARI can find the right number :)
        assert_equal(grid_search.best_params_["n_clusters"], 3)

        # Now without a score, and without y
        grid_search = ATGridSearchCV(km,
                                     param_grid=dict(n_clusters=[2, 3, 4]),
                                     webserver_url=self.live_server_url)
        wait(grid_search.fit(X))
        assert_equal(grid_search.best_params_["n_clusters"], 4)
Пример #3
0
    def test_grid_search_sparse(self):
        # Test that grid search works with both dense and sparse matrices
        X_, y_ = make_classification(n_samples=200,
                                     n_features=100,
                                     random_state=0)

        clf = LinearSVC()
        cv = ATGridSearchCV(clf, {'C': [0.1, 1.0]},
                            webserver_url=self.live_server_url)
        wait(cv.fit(X_[:180], y_[:180]))
        y_pred = cv.best_estimator_.predict(X_[180:])
        C = cv.best_estimator_.C

        X_ = sp.csr_matrix(X_)
        clf = LinearSVC()
        cv = ATGridSearchCV(clf, {'C': [0.1, 1.0]},
                            webserver_url=self.live_server_url)
        wait(cv.fit(X_[:180].tocoo(), y_[:180]))
        y_pred2 = cv.best_estimator_.predict(X_[180:])
        C2 = cv.best_estimator_.C

        assert_true(np.mean(y_pred == y_pred2) >= .9)
        assert_equal(C, C2)
Пример #4
0
    def test_grid_search_sparse_scoring(self):
        X_, y_ = make_classification(n_samples=200,
                                     n_features=100,
                                     random_state=0)

        clf = LinearSVC()
        cv = ATGridSearchCV(clf, {'C': [0.1, 1.0]},
                            scoring="f1",
                            webserver_url=self.live_server_url)
        wait(cv.fit(X_[:180], y_[:180]))
        y_pred = cv.predict(X_[180:])
        C = cv.best_estimator_.C

        X_ = sp.csr_matrix(X_)
        clf = LinearSVC()
        cv = ATGridSearchCV(clf, {'C': [0.1, 1.0]},
                            scoring="f1",
                            webserver_url=self.live_server_url)
        wait(cv.fit(X_[:180], y_[:180]))
        y_pred2 = cv.predict(X_[180:])
        C2 = cv.best_estimator_.C

        assert_array_equal(y_pred, y_pred2)
        assert_equal(C, C2)

        # Smoke test the score
        # np.testing.assert_allclose(f1_score(cv.predict(X_[:180]), y[:180]),
        #                            cv.score(X_[:180], y[:180]))

        # test loss where greater is worse
        def f1_loss(y_true_, y_pred_):
            return -f1_score(y_true_, y_pred_)

        F1Loss = make_scorer(f1_loss, greater_is_better=False)
        cv = ATGridSearchCV(clf, {'C': [0.1, 1.0]},
                            scoring=F1Loss,
                            webserver_url=self.live_server_url)
        wait(cv.fit(X_[:180], y_[:180]))
        y_pred3 = cv.predict(X_[180:])
        C3 = cv.best_estimator_.C

        assert_equal(C, C3)
        assert_array_equal(y_pred, y_pred3)
Пример #5
0
    def test_grid_search_score_method(self):
        X, y = make_classification(n_samples=100,
                                   n_classes=2,
                                   flip_y=.2,
                                   random_state=0)
        clf = LinearSVC(random_state=0)
        grid = {'C': [.1]}

        search_no_scoring = ATGridSearchCV(clf,
                                           grid,
                                           scoring=None,
                                           webserver_url=self.live_server_url)
        wait(search_no_scoring.fit(X, y))
        search_accuracy = ATGridSearchCV(clf,
                                         grid,
                                         scoring='accuracy',
                                         webserver_url=self.live_server_url)
        wait(search_accuracy.fit(X, y))
        search_no_score_method_auc = ATGridSearchCV(
            LinearSVCNoScore(),
            grid,
            scoring='roc_auc',
            webserver_url=self.live_server_url)
        wait(search_no_score_method_auc.fit(X, y))
        search_auc = ATGridSearchCV(clf,
                                    grid,
                                    scoring='roc_auc',
                                    webserver_url=self.live_server_url)
        wait(search_auc.fit(X, y))

        # ChangedBehaviourWarning occurred previously (prior to #9005)
        score_no_scoring = assert_no_warnings(search_no_scoring.score, X, y)
        score_accuracy = assert_no_warnings(search_accuracy.score, X, y)
        score_no_score_auc = assert_no_warnings(
            search_no_score_method_auc.score, X, y)
        score_auc = assert_no_warnings(search_auc.score, X, y)

        # ensure the test is sane
        assert_true(score_auc < 1.0)
        assert_true(score_accuracy < 1.0)
        assert_not_equal(score_auc, score_accuracy)

        assert_almost_equal(score_accuracy, score_no_scoring)
        assert_almost_equal(score_auc, score_no_score_auc)
Пример #6
0
    def test_grid_search(self):
        # Test that the best estimator contains the right value for foo_param
        clf = MockClassifier()
        grid_search = ATGridSearchCV(clf, {'foo_param': [1, 2, 3]},
                                     verbose=3,
                                     webserver_url=self.live_server_url)
        # make sure it selects the smallest parameter in case of ties
        old_stdout = sys.stdout
        sys.stdout = StringIO()
        wait(grid_search.fit(X, y))
        sys.stdout = old_stdout
        self.assertIn(grid_search.best_estimator_.foo_param, [2, 3])

        _mock_sort = partial(_sort_grid_scores, param='foo_param')

        for idx, tup in enumerate(
                sorted(grid_search.grid_scores_, key=cmp_to_key(_mock_sort))):
            self.assertEqual(tup[0], {'foo_param': idx + 1},
                             '%d%s' % (idx, tup))

        # Smoke test the score etc:
        grid_search.score(X, y)
        grid_search.predict_proba(X)
        grid_search.decision_function(X)
        grid_search.transform(X)

        # Test exception handling on scoring
        grid_search.scoring = 'sklearn'
        self.assertRaises(ValueError, grid_search.fit, X, y)