示例#1
0
def test_failing_classifier_fails():
    clf = dcv.GridSearchCV(
        FailingClassifier(),
        {
            "parameter": [
                FailingClassifier.FAILING_PARAMETER,
                FailingClassifier.FAILING_SCORE_PARAMETER,
            ]
        },
        refit=False,
        return_train_score=False,
    )

    X, y = make_classification()

    with pytest.raises(ValueError, match="Failing"):
        clf.fit(X, y)

    clf = clf.set_params(error_score=-1)

    with pytest.warns(FitFailedWarning):
        clf.fit(X, y)

    for result in ["mean_fit_time", "mean_score_time", "mean_test_score"]:
        assert not any(np.isnan(clf.cv_results_[result]))
示例#2
0
def test_pipeline_fit_failure():
    X, y = make_classification(n_samples=100, n_features=10, random_state=0)

    pipe = Pipeline([
        ("bad", FailingClassifier()),
        ("good1", MockClassifier()),
        ("good2", MockClassifier()),
    ])

    grid = {
        "bad__parameter": [
            0,
            FailingClassifier.FAILING_PARAMETER,
            FailingClassifier.FAILING_PREDICT_PARAMETER,
            FailingClassifier.FAILING_SCORE_PARAMETER,
        ]
    }
    gs = dcv.GridSearchCV(pipe, grid, refit=False)

    # Check that failure raises if error_score is `'raise'`
    with pytest.raises(ValueError):
        gs.fit(X, y)

    # Check that grid scores were set to error_score on failure
    gs.error_score = float("nan")
    with pytest.warns(FitFailedWarning):
        gs.fit(X, y)

    check_scores_all_nan(gs, "bad__parameter")
示例#3
0
def test_feature_union_fit_failure_multiple_metrics():
    scoring = {"score_1": _passthrough_scorer, "score_2": _passthrough_scorer}
    X, y = make_classification(n_samples=100, n_features=10, random_state=0)

    pipe = Pipeline([
        (
            "union",
            FeatureUnion(
                [("good", MockClassifier()), ("bad", FailingClassifier())],
                transformer_weights={"bad": 0.5},
            ),
        ),
        ("clf", MockClassifier()),
    ])

    grid = {"union__bad__parameter": [0, 1, 2]}
    gs = dcv.GridSearchCV(pipe, grid, refit=False, scoring=scoring)

    # Check that failure raises if error_score is `'raise'`
    with pytest.raises(ValueError):
        gs.fit(X, y)

    # Check that grid scores were set to error_score on failure
    gs.error_score = float("nan")
    with pytest.warns(FitFailedWarning):
        gs.fit(X, y)

    for key in scoring:
        check_scores_all_nan(gs, "union__bad__parameter", score_key=key)
示例#4
0
def test_grid_search_failing_classifier():
    X, y = make_classification(n_samples=20, n_features=10, random_state=0)
    clf = FailingClassifier()

    # refit=False because we want to test the behaviour of the grid search part
    gs = dcv.GridSearchCV(
        clf,
        [{"parameter": [0, 1, 2]}],
        scoring="accuracy",
        refit=False,
        error_score=0.0,
    )

    with pytest.warns(FitFailedWarning):
        gs.fit(X, y)

    n_candidates = len(gs.cv_results_["params"])

    # Ensure that grid scores were set to zero as required for those fits
    # that are expected to fail.
    def get_cand_scores(i):
        return np.array(
            list(
                gs.cv_results_["split%d_test_score" % s][i] for s in range(gs.n_splits_)
            )
        )

    assert all(
        (
            np.all(get_cand_scores(cand_i) == 0.0)
            for cand_i in range(n_candidates)
            if gs.cv_results_["param_parameter"][cand_i]
            == FailingClassifier.FAILING_PARAMETER
        )
    )

    gs = dcv.GridSearchCV(
        clf,
        [{"parameter": [0, 1, 2]}],
        scoring="accuracy",
        refit=False,
        error_score=float("nan"),
    )

    if not six.PY2:
        with pytest.warns(FitFailedWarning):
            gs.fit(X, y)
    else:
        gs.fit(X, y)

    n_candidates = len(gs.cv_results_["params"])
    assert all(
        np.all(np.isnan(get_cand_scores(cand_i)))
        for cand_i in range(n_candidates)
        if gs.cv_results_["param_parameter"][cand_i]
        == FailingClassifier.FAILING_PARAMETER
    )
示例#5
0
def test_estimator_predict_failure(in_pipeline):
    X, y = make_classification()
    if in_pipeline:
        clf = Pipeline([("bad", FailingClassifier())])
        key = "bad__parameter"
    else:
        clf = FailingClassifier()
        key = "parameter"

    grid = {
        key: [
            0,
            FailingClassifier.FAILING_PARAMETER,
            FailingClassifier.FAILING_PREDICT_PARAMETER,
            FailingClassifier.FAILING_SCORE_PARAMETER,
        ]
    }
    gs = dcv.GridSearchCV(
        clf, param_grid=grid, refit=False, error_score=float("nan"), cv=2
    )
    gs.fit(X, y)
def test_grid_search_failing_classifier_raise():
    X, y = make_classification(n_samples=20, n_features=10, random_state=0)
    clf = FailingClassifier()

    # refit=False because we want to test the behaviour of the grid search part
    gs = dcv.GridSearchCV(clf, [{
        'parameter': [0, 1, 2]
    }],
                          scoring='accuracy',
                          refit=False,
                          error_score='raise')

    # FailingClassifier issues a ValueError so this is what we look for.
    with pytest.raises(ValueError):
        gs.fit(X, y)
示例#7
0
def test_pipeline_fit_failure():
    X, y = make_classification(n_samples=100, n_features=10, random_state=0)

    pipe = Pipeline([('bad', FailingClassifier()), ('good1', MockClassifier()),
                     ('good2', MockClassifier())])

    grid = {'bad__parameter': [0, 1, 2]}
    gs = dcv.GridSearchCV(pipe, grid, refit=False)

    # Check that failure raises if error_score is `'raise'`
    with pytest.raises(ValueError):
        gs.fit(X, y)

    # Check that grid scores were set to error_score on failure
    gs.error_score = float('nan')
    with pytest.warns(FitFailedWarning):
        gs.fit(X, y)

    check_scores_all_nan(gs, 'bad__parameter')