def test_grid_search_failing_classifier(): X, y = make_classification(n_samples=20, n_features=10, random_state=0) clf = FailingClassifier() # refit=False because we want to test the behaviour of the grid search part gs = dcv.GridSearchCV(clf, [{'parameter': [0, 1, 2]}], scoring='accuracy', refit=False, error_score=0.0) with pytest.warns(FitFailedWarning): gs.fit(X, y) n_candidates = len(gs.cv_results_['params']) # Ensure that grid scores were set to zero as required for those fits # that are expected to fail. def get_cand_scores(i): return np.array(list(gs.cv_results_['split%d_test_score' % s][i] for s in range(gs.n_splits_))) assert all((np.all(get_cand_scores(cand_i) == 0.0) for cand_i in range(n_candidates) if gs.cv_results_['param_parameter'][cand_i] == FailingClassifier.FAILING_PARAMETER)) gs = dcv.GridSearchCV(clf, [{'parameter': [0, 1, 2]}], scoring='accuracy', refit=False, error_score=float('nan')) with pytest.warns(FitFailedWarning): gs.fit(X, y) n_candidates = len(gs.cv_results_['params']) assert all(np.all(np.isnan(get_cand_scores(cand_i))) for cand_i in range(n_candidates) if gs.cv_results_['param_parameter'][cand_i] == FailingClassifier.FAILING_PARAMETER)
def test_grid_search_failing_classifier_raise(): X, y = make_classification(n_samples=20, n_features=10, random_state=0) clf = FailingClassifier() # refit=False because we want to test the behaviour of the grid search part gs = dcv.GridSearchCV(clf, [{'parameter': [0, 1, 2]}], scoring='accuracy', refit=False, error_score='raise') # FailingClassifier issues a ValueError so this is what we look for. with pytest.raises(ValueError): gs.fit(X, y)
def test_pipeline_fit_failure(): X, y = make_classification(n_samples=100, n_features=10, random_state=0) pipe = Pipeline([('bad', FailingClassifier()), ('good1', MockClassifier()), ('good2', MockClassifier())]) grid = {'bad__parameter': [0, 1, 2]} gs = dcv.GridSearchCV(pipe, grid, refit=False) # Check that failure raises if error_score is `'raise'` with pytest.raises(ValueError): gs.fit(X, y) # Check that grid scores were set to error_score on failure gs.error_score = float('nan') with pytest.warns(FitFailedWarning): gs.fit(X, y) check_scores_all_nan(gs, 'bad__parameter')
def test_feature_union_fit_failure_multiple_metrics(): scoring = {"score_1": _passthrough_scorer, "score_2": _passthrough_scorer} X, y = make_classification(n_samples=100, n_features=10, random_state=0) pipe = Pipeline([('union', FeatureUnion([('good', MockClassifier()), ('bad', FailingClassifier())], transformer_weights={'bad': 0.5})), ('clf', MockClassifier())]) grid = {'union__bad__parameter': [0, 1, 2]} gs = dcv.GridSearchCV(pipe, grid, refit=False, scoring=scoring) # Check that failure raises if error_score is `'raise'` with pytest.raises(ValueError): gs.fit(X, y) # Check that grid scores were set to error_score on failure gs.error_score = float('nan') with pytest.warns(FitFailedWarning): gs.fit(X, y) for key in scoring: check_scores_all_nan(gs, 'union__bad__parameter', score_key=key)