Python DaskGridSearchCV.DaskGridSearchCV示例，dklearn.DaskGridSearchCV.DaskGridSearchCV Python示例

示例#1

0

显示文件

def test_grid_search_sparse_scoring():
    X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)

    clf = LinearSVC()
    cv = DaskGridSearchCV(clf, {'C': [0.1, 1.0]}, scoring="f1")
    cv.fit(X_[:180], y_[:180])
    y_pred = cv.predict(X_[180:])
    C = cv.best_estimator_.C

    X_ = sp.csr_matrix(X_)
    clf = LinearSVC()
    cv = DaskGridSearchCV(clf, {'C': [0.1, 1.0]}, scoring="f1")
    cv.fit(X_[:180], y_[:180])
    y_pred2 = cv.predict(X_[180:])
    C2 = cv.best_estimator_.C

    assert_array_equal(y_pred, y_pred2)
    assert C == C2
    # Smoke test the score
    # np.testing.assert_allclose(f1_score(cv.predict(X_[:180]), y[:180]),
    #                            cv.score(X_[:180], y[:180]))

    # test loss where greater is worse
    def f1_loss(y_true_, y_pred_):
        return -f1_score(y_true_, y_pred_)
    F1Loss = make_scorer(f1_loss, greater_is_better=False)
    cv = DaskGridSearchCV(clf, {'C': [0.1, 1.0]}, scoring=F1Loss)
    cv.fit(X_[:180], y_[:180])
    y_pred3 = cv.predict(X_[180:])
    C3 = cv.best_estimator_.C

    assert C == C3
    assert_array_equal(y_pred, y_pred3)

示例#2

0

显示文件

def test_grid_search_bad_param_grid():
    param_dict = {"C": 1.0}
    clf = SVC()

    with pytest.raises(ValueError) as exc:
        DaskGridSearchCV(clf, param_dict)
    assert ("Parameter values for parameter (C) need to be a sequence"
            "(but not a string) or np.ndarray.") in str(exc.value)

    param_dict = {"C": []}
    clf = SVC()

    with pytest.raises(ValueError) as exc:
        DaskGridSearchCV(clf, param_dict)
    assert ("Parameter values for parameter (C) need to be a non-empty "
            "sequence.") in str(exc.value)

    param_dict = {"C": "1,2,3"}
    clf = SVC()

    with pytest.raises(ValueError) as exc:
        DaskGridSearchCV(clf, param_dict)
    assert ("Parameter values for parameter (C) need to be a sequence"
            "(but not a string) or np.ndarray.") in str(exc.value)

    param_dict = {"C": np.ones(6).reshape(3, 2)}
    clf = SVC()
    with pytest.raises(ValueError):
        DaskGridSearchCV(clf, param_dict)

示例#3

0

显示文件

def test_grid_search_groups():
    # Check if ValueError (when groups is None) propagates to DaskGridSearchCV
    # And also check if groups is correctly passed to the cv object
    rng = np.random.RandomState(0)

    X, y = make_classification(n_samples=15, n_classes=2, random_state=0)
    groups = rng.randint(0, 3, 15)

    clf = LinearSVC(random_state=0)
    grid = {'C': [1]}

    group_cvs = [LeaveOneGroupOut(), LeavePGroupsOut(2), GroupKFold(),
                 GroupShuffleSplit()]
    for cv in group_cvs:
        gs = DaskGridSearchCV(clf, grid, cv=cv)

        with pytest.raises(ValueError) as exc:
            assert gs.fit(X, y)
        assert "The groups parameter should not be None" in str(exc.value)

        gs.fit(X, y, groups=groups)

    non_group_cvs = [StratifiedKFold(), StratifiedShuffleSplit()]
    for cv in non_group_cvs:
        gs = DaskGridSearchCV(clf, grid, cv=cv)
        # Should not raise an error
        gs.fit(X, y)

示例#4

0

显示文件

def test_classes__property():
    # Test that classes_ property matches best_estimator_.classes_
    X = np.arange(100).reshape(10, 10)
    y = np.array([0] * 5 + [1] * 5)
    Cs = [.1, 1, 10]

    grid_search = DaskGridSearchCV(LinearSVC(random_state=0), {'C': Cs})
    grid_search.fit(X, y)
    assert_array_equal(grid_search.best_estimator_.classes_,
                       grid_search.classes_)

    # Test that regressors do not have a classes_ attribute
    grid_search = DaskGridSearchCV(Ridge(), {'alpha': [1.0, 2.0]})
    grid_search.fit(X, y)
    assert not hasattr(grid_search, 'classes_')

    # Test that the grid searcher has no classes_ attribute before it's fit
    grid_search = DaskGridSearchCV(LinearSVC(random_state=0), {'C': Cs})
    assert not hasattr(grid_search, 'classes_')

    # Test that the grid searcher has no classes_ attribute without a refit
    grid_search = DaskGridSearchCV(LinearSVC(random_state=0),
                                   {'C': Cs}, refit=False)
    grid_search.fit(X, y)
    assert not hasattr(grid_search, 'classes_')

示例#5

0

显示文件

def test_grid_search_score_method():
    X, y = make_classification(n_samples=100, n_classes=2, flip_y=.2,
                               random_state=0)
    clf = LinearSVC(random_state=0)
    grid = {'C': [.1]}

    search_no_scoring = DaskGridSearchCV(clf, grid, scoring=None).fit(X, y)
    search_accuracy = DaskGridSearchCV(clf, grid, scoring='accuracy').fit(X, y)
    search_no_score_method_auc = DaskGridSearchCV(LinearSVCNoScore(), grid,
                                                  scoring='roc_auc').fit(X, y)
    search_auc = DaskGridSearchCV(clf, grid, scoring='roc_auc').fit(X, y)

    # Check warning only occurs in situation where behavior changed:
    # estimator requires score method to compete with scoring parameter
    score_no_scoring = search_no_scoring.score(X, y)
    score_accuracy = search_accuracy.score(X, y)
    score_no_score_auc = search_no_score_method_auc.score(X, y)
    score_auc = search_auc.score(X, y)

    # ensure the test is sane
    assert score_auc < 1.0
    assert score_accuracy < 1.0
    assert score_auc != score_accuracy

    assert_almost_equal(score_accuracy, score_no_scoring)
    assert_almost_equal(score_auc, score_no_score_auc)

示例#6

0

显示文件

def test_grid_search_no_score():
    # Test grid-search on classifier that has no score function.
    clf = LinearSVC(random_state=0)
    X, y = make_blobs(random_state=0, centers=2)
    Cs = [.1, 1, 10]
    clf_no_score = LinearSVCNoScore(random_state=0)

    # XXX: It seems there's some global shared state in LinearSVC - fitting
    # multiple `SVC` instances in parallel using threads sometimes results in
    # wrong results. This only happens with threads, not processes/sync.
    # For now, we'll fit using the sync scheduler.
    grid_search = DaskGridSearchCV(clf, {'C': Cs}, scoring='accuracy',
                                   get=dask.get)
    grid_search.fit(X, y)

    grid_search_no_score = DaskGridSearchCV(clf_no_score, {'C': Cs},
                                            scoring='accuracy', get=dask.get)
    # smoketest grid search
    grid_search_no_score.fit(X, y)

    # check that best params are equal
    assert grid_search_no_score.best_params_ == grid_search.best_params_
    # check that we can call score and that it gives the correct result
    assert grid_search.score(X, y) == grid_search_no_score.score(X, y)

    # giving no scoring function raises an error
    grid_search_no_score = DaskGridSearchCV(clf_no_score, {'C': Cs})
    with pytest.raises(TypeError) as exc:
        grid_search_no_score.fit([[1]])
    assert "no scoring" in str(exc.value)

示例#7

0

显示文件

def test_grid_search_failing_classifier():
    X, y = make_classification(n_samples=20, n_features=10, random_state=0)
    clf = FailingClassifier()

    # refit=False because we want to test the behaviour of the grid search part
    gs = DaskGridSearchCV(clf, [{'parameter': [0, 1, 2]}], scoring='accuracy',
                          refit=False, error_score=0.0)

    with pytest.warns(FitFailedWarning):
        gs.fit(X, y)

    n_candidates = len(gs.cv_results_['params'])

    # Ensure that grid scores were set to zero as required for those fits
    # that are expected to fail.
    def get_cand_scores(i):
        return np.array(list(gs.cv_results_['split%d_test_score' % s][i]
                             for s in range(gs.n_splits_)))

    assert all((np.all(get_cand_scores(cand_i) == 0.0)
                for cand_i in range(n_candidates)
                if gs.cv_results_['param_parameter'][cand_i] ==
                FailingClassifier.FAILING_PARAMETER))

    gs = DaskGridSearchCV(clf, [{'parameter': [0, 1, 2]}], scoring='accuracy',
                          refit=False, error_score=float('nan'))

    with pytest.warns(FitFailedWarning):
        gs.fit(X, y)

    n_candidates = len(gs.cv_results_['params'])
    assert all(np.all(np.isnan(get_cand_scores(cand_i)))
               for cand_i in range(n_candidates)
               if gs.cv_results_['param_parameter'][cand_i] ==
               FailingClassifier.FAILING_PARAMETER)

示例#8

0

显示文件

def test_unsupervised_grid_search():
    # test grid-search with unsupervised estimator
    X, y = make_blobs(random_state=0)
    km = KMeans(random_state=0)
    grid_search = DaskGridSearchCV(km, param_grid=dict(n_clusters=[2, 3, 4]),
                                   scoring='adjusted_rand_score')
    grid_search.fit(X, y)
    # ARI can find the right number :)
    assert grid_search.best_params_["n_clusters"] == 3

    # Now without a score, and without y
    grid_search = DaskGridSearchCV(km, param_grid=dict(n_clusters=[2, 3, 4]))
    grid_search.fit(X)
    assert grid_search.best_params_["n_clusters"] == 4

示例#9

0

显示文件

def test_grid_search_cv_results():
    X, y = make_classification(n_samples=50, n_features=4,
                               random_state=42)

    n_splits = 3
    n_grid_points = 6
    params = [dict(kernel=['rbf', ], C=[1, 10], gamma=[0.1, 1]),
              dict(kernel=['poly', ], degree=[1, 2])]
    grid_search = DaskGridSearchCV(SVC(), cv=n_splits, iid=False,
                                   param_grid=params)
    grid_search.fit(X, y)
    grid_search_iid = DaskGridSearchCV(SVC(), cv=n_splits, iid=True,
                                       param_grid=params)
    grid_search_iid.fit(X, y)

    param_keys = ('param_C', 'param_degree', 'param_gamma', 'param_kernel')
    score_keys = ('mean_test_score', 'mean_train_score',
                  'rank_test_score',
                  'split0_test_score', 'split1_test_score',
                  'split2_test_score',
                  'split0_train_score', 'split1_train_score',
                  'split2_train_score',
                  'std_test_score', 'std_train_score')
    n_candidates = n_grid_points

    for search, iid in zip((grid_search, grid_search_iid), (False, True)):
        assert iid == search.iid
        cv_results = search.cv_results_
        # Check if score and timing are reasonable
        assert all(cv_results['rank_test_score'] >= 1)
        assert all(all(cv_results[k] >= 0) for k in score_keys
                   if k != 'rank_test_score')
        assert all(all(cv_results[k] <= 1) for k in score_keys
                   if 'time' not in k and k != 'rank_test_score')
        # Check cv_results structure
        check_cv_results_array_types(cv_results, param_keys, score_keys)
        check_cv_results_keys(cv_results, param_keys, score_keys, n_candidates)
        # Check masking
        cv_results = grid_search.cv_results_
        n_candidates = len(grid_search.cv_results_['params'])
        assert all((cv_results['param_C'].mask[i] and
                    cv_results['param_gamma'].mask[i] and
                    not cv_results['param_degree'].mask[i])
                    for i in range(n_candidates)
                    if cv_results['param_kernel'][i] == 'linear')
        assert all((not cv_results['param_C'].mask[i] and
                    not cv_results['param_gamma'].mask[i] and
                    cv_results['param_degree'].mask[i])
                    for i in range(n_candidates)
                    if cv_results['param_kernel'][i] == 'rbf')

示例#10

0

显示文件

def test_grid_search_precomputed_kernel():
    # Test that grid search works when the input features are given in the
    # form of a precomputed kernel matrix
    X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)

    # compute the training kernel matrix corresponding to the linear kernel
    K_train = np.dot(X_[:180], X_[:180].T)
    y_train = y_[:180]

    clf = SVC(kernel='precomputed')
    cv = DaskGridSearchCV(clf, {'C': [0.1, 1.0]})
    cv.fit(K_train, y_train)

    assert cv.best_score_ >= 0

    # compute the test kernel matrix
    K_test = np.dot(X_[180:], X_[:180].T)
    y_test = y_[180:]

    y_pred = cv.predict(K_test)

    assert np.mean(y_pred == y_test) >= 0

    # test error is raised when the precomputed kernel is not array-like
    # or sparse
    with pytest.raises(ValueError):
        cv.fit(K_train.tolist(), y_train)

示例#11

0

显示文件

def test_search_cv_results_rank_tie_breaking():
    X, y = make_blobs(n_samples=50, random_state=42)

    # The two C values are close enough to give similar models
    # which would result in a tie of their mean cv-scores
    param_grid = {'C': [1, 1.001, 0.001]}

    grid_search = DaskGridSearchCV(SVC(), param_grid=param_grid)
    random_search = DaskRandomizedSearchCV(SVC(), n_iter=3,
                                           param_distributions=param_grid)

    for search in (grid_search, random_search):
        search.fit(X, y)
        cv_results = search.cv_results_
        # Check tie breaking strategy -
        # Check that there is a tie in the mean scores between
        # candidates 1 and 2 alone
        assert_almost_equal(cv_results['mean_test_score'][0],
                            cv_results['mean_test_score'][1])
        assert_almost_equal(cv_results['mean_train_score'][0],
                            cv_results['mean_train_score'][1])
        try:
            assert_almost_equal(cv_results['mean_test_score'][1],
                                cv_results['mean_test_score'][2])
        except AssertionError:
            pass
        try:
            assert_almost_equal(cv_results['mean_train_score'][1],
                                cv_results['mean_train_score'][2])
        except AssertionError:
            pass
        # 'min' rank should be assigned to the tied candidates
        assert_almost_equal(search.cv_results_['rank_test_score'], [1, 1, 3])

示例#12

0

显示文件

def test_predict_proba_disabled():
    # Test predict_proba when disabled on estimator.
    X = np.arange(20).reshape(5, -1)
    y = [0, 0, 1, 1, 1]
    clf = SVC(probability=False)
    gs = DaskGridSearchCV(clf, {}, cv=2).fit(X, y)
    assert not hasattr(gs, "predict_proba")

示例#13

0

显示文件

def test_bad_error_score():
    X, y = make_classification(n_samples=100, n_features=10, random_state=0)
    gs = DaskGridSearchCV(MockClassifier(), {'foo_param': [0, 1, 2]},
                          error_score='badparam')

    with pytest.raises(ValueError):
        gs.fit(X, y)

示例#14

0

显示文件

def test_grid_search_dask_inputs():
    # Numpy versions
    np_X, np_y = make_classification(n_samples=15, n_classes=2, random_state=0)
    np_groups = np.random.RandomState(0).randint(0, 3, 15)
    # Dask array versions
    da_X = da.from_array(np_X, chunks=5)
    da_y = da.from_array(np_y, chunks=5)
    da_groups = da.from_array(np_groups, chunks=5)
    # Delayed versions
    del_X = delayed(np_X)
    del_y = delayed(np_y)
    del_groups = delayed(np_groups)

    cv = GroupKFold()
    clf = SVC(random_state=0)
    grid = {'C': [1]}

    sol = SVC(C=1, random_state=0).fit(np_X, np_y).support_vectors_

    for X, y, groups in product([np_X, da_X, del_X], [np_y, da_y, del_y],
                                [np_groups, da_groups, del_groups]):
        gs = DaskGridSearchCV(clf, grid, cv=cv)

        with pytest.raises(ValueError) as exc:
            gs.fit(X, y)
        assert "The groups parameter should not be None" in str(exc.value)

        gs.fit(X, y, groups=groups)
        np.testing.assert_allclose(sol, gs.best_estimator_.support_vectors_)

示例#15

0

显示文件

def test_grid_search_error():
    # Test that grid search will capture errors on data with different length
    X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)

    clf = LinearSVC()
    cv = DaskGridSearchCV(clf, {'C': [0.1, 1.0]})
    with pytest.raises(ValueError):
        cv.fit(X_[:180], y_)

示例#16

0

显示文件

def test_search_train_scores_set_to_false():
    X = np.arange(6).reshape(6, -1)
    y = [0, 0, 0, 1, 1, 1]
    clf = LinearSVC(random_state=0)

    gs = DaskGridSearchCV(clf, param_grid={'C': [0.1, 0.2]},
                          return_train_score=False)
    gs.fit(X, y)

示例#17

0

显示文件

def test_grid_search_precomputed_kernel_error_nonsquare():
    # Test that grid search returns an error with a non-square precomputed
    # training kernel matrix
    K_train = np.zeros((10, 20))
    y_train = np.ones((10, ))
    clf = SVC(kernel='precomputed')
    cv = DaskGridSearchCV(clf, {'C': [0.1, 1.0]})
    with pytest.raises(ValueError):
        cv.fit(K_train, y_train)

示例#18

0

显示文件

def test_gridsearch_nd():
    # Pass X as list in DaskGridSearchCV
    X_4d = np.arange(10 * 5 * 3 * 2).reshape(10, 5, 3, 2)
    y_3d = np.arange(10 * 7 * 11).reshape(10, 7, 11)
    clf = CheckingClassifier(check_X=lambda x: x.shape[1:] == (5, 3, 2),
                             check_y=lambda x: x.shape[1:] == (7, 11))
    grid_search = DaskGridSearchCV(clf, {'foo_param': [1, 2, 3]})
    grid_search.fit(X_4d, y_3d).score(X, y)
    assert hasattr(grid_search, "cv_results_")

示例#19

0

显示文件

def test_grid_search_allows_nans():
    # Test DaskGridSearchCV with Imputer
    X = np.arange(20, dtype=np.float64).reshape(5, -1)
    X[2, :] = np.nan
    y = [0, 0, 1, 1, 1]
    p = Pipeline([
        ('imputer', Imputer(strategy='mean', missing_values='NaN')),
        ('classifier', MockClassifier()),
    ])
    DaskGridSearchCV(p, {'classifier__foo_param': [1, 2, 3]}, cv=2).fit(X, y)

示例#20

0

显示文件

def test_search_cv_results_none_param():
    X, y = [[1], [2], [3], [4], [5]], [0, 0, 0, 0, 1]
    estimators = (DecisionTreeRegressor(), DecisionTreeClassifier())
    est_parameters = {"random_state": [0, None]}
    cv = KFold(random_state=0)

    for est in estimators:
        grid_search = DaskGridSearchCV(est, est_parameters, cv=cv).fit(X, y)
        assert_array_equal(grid_search.cv_results_['param_random_state'],
                           [0, None])

示例#21

0

显示文件

def test_grid_search_sparse():
    # Test that grid search works with both dense and sparse matrices
    X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)

    clf = LinearSVC()
    cv = DaskGridSearchCV(clf, {'C': [0.1, 1.0]})
    cv.fit(X_[:180], y_[:180])
    y_pred = cv.predict(X_[180:])
    C = cv.best_estimator_.C

    X_ = sp.csr_matrix(X_)
    clf = LinearSVC()
    cv = DaskGridSearchCV(clf, {'C': [0.1, 1.0]})
    cv.fit(X_[:180].tocoo(), y_[:180])
    y_pred2 = cv.predict(X_[180:])
    C2 = cv.best_estimator_.C

    assert np.mean(y_pred == y_pred2) >= .9
    assert C == C2

示例#22

0

显示文件

def test_y_as_list():
    # Pass y as list in DaskGridSearchCV
    X = np.arange(100).reshape(10, 10)
    y = np.array([0] * 5 + [1] * 5)

    clf = CheckingClassifier(check_y=lambda x: isinstance(x, list))
    cv = KFold(n_splits=3)
    grid_search = DaskGridSearchCV(clf, {'foo_param': [1, 2, 3]}, cv=cv)
    grid_search.fit(X, y.tolist()).score(X, y)
    assert hasattr(grid_search, "cv_results_")

示例#23

0

显示文件

def test_refit():
    # Regression test for bug in refitting
    # Simulates re-fitting a broken estimator; this used to break with
    # sparse SVMs.
    X = np.arange(100).reshape(10, 10)
    y = np.array([0] * 5 + [1] * 5)

    clf = DaskGridSearchCV(BrokenClassifier(), [{'parameter': [0, 1]}],
                           scoring="precision", refit=True)
    clf.fit(X, y)

示例#24

0

显示文件

def test_grid_search_failing_classifier_raise():
    X, y = make_classification(n_samples=20, n_features=10, random_state=0)
    clf = FailingClassifier()

    # refit=False because we want to test the behaviour of the grid search part
    gs = DaskGridSearchCV(clf, [{'parameter': [0, 1, 2]}], scoring='accuracy',
                          refit=False, error_score='raise')

    # FailingClassifier issues a ValueError so this is what we look for.
    with pytest.raises(ValueError):
        gs.fit(X, y)

示例#25

0

显示文件

def test_trivial_cv_results_attr():
    # Test search over a "grid" with only one point.
    # Non-regression test: grid_scores_ wouldn't be set by DaskGridSearchCV.
    clf = MockClassifier()
    grid_search = DaskGridSearchCV(clf, {'foo_param': [1]})
    grid_search.fit(X, y)
    assert hasattr(grid_search, "cv_results_")

    random_search = DaskRandomizedSearchCV(clf, {'foo_param': [0]}, n_iter=1)
    random_search.fit(X, y)
    assert hasattr(grid_search, "cv_results_")

示例#26

0

显示文件

def test_visualize():
    pytest.importorskip('graphviz')

    X, y = make_classification(n_samples=100,
                               n_classes=2,
                               flip_y=.2,
                               random_state=0)
    clf = SVC(random_state=0)
    grid = {'C': [.1, .5, .9]}
    gs = DaskGridSearchCV(clf, grid).fit(X, y)

    assert hasattr(gs, 'dask_graph_')

    with tmpdir() as d:
        gs.visualize(filename=os.path.join(d, 'mydask'))
        assert os.path.exists(os.path.join(d, 'mydask.png'))

    # Doesn't work if not fitted
    gs = DaskGridSearchCV(clf, grid)
    with pytest.raises(NotFittedError):
        gs.visualize()

示例#27

0

显示文件

def test_grid_search_one_grid_point():
    X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)
    param_dict = {"C": [1.0], "kernel": ["rbf"], "gamma": [0.1]}

    clf = SVC()
    cv = DaskGridSearchCV(clf, param_dict)
    cv.fit(X_, y_)

    clf = SVC(C=1.0, kernel="rbf", gamma=0.1)
    clf.fit(X_, y_)

    assert_array_equal(clf.dual_coef_, cv.best_estimator_.dual_coef_)

示例#28

0

显示文件

def test_gridsearch_no_predict():
    # test grid-search with an estimator without predict.
    # slight duplication of a test from KDE
    def custom_scoring(estimator, X):
        return 42 if estimator.bandwidth == .1 else 0
    X, _ = make_blobs(cluster_std=.1, random_state=1,
                      centers=[[0, 1], [1, 0], [0, 0]])
    search = DaskGridSearchCV(KernelDensity(),
                              param_grid=dict(bandwidth=[.01, .1, 1]),
                              scoring=custom_scoring)
    search.fit(X)
    assert search.best_params_['bandwidth'] == .1
    assert search.best_score_ == 42

示例#29

0

显示文件

def test_pickle():
    # Test that a fit search can be pickled
    clf = MockClassifier()
    grid_search = DaskGridSearchCV(clf, {'foo_param': [1, 2, 3]}, refit=True)
    grid_search.fit(X, y)
    grid_search_pickled = pickle.loads(pickle.dumps(grid_search))
    assert_array_almost_equal(grid_search.predict(X),
                              grid_search_pickled.predict(X))

    random_search = DaskRandomizedSearchCV(clf, {'foo_param': [1, 2, 3]},
                                           refit=True, n_iter=3)
    random_search.fit(X, y)
    random_search_pickled = pickle.loads(pickle.dumps(random_search))
    assert_array_almost_equal(random_search.predict(X),
                              random_search_pickled.predict(X))

示例#30

0

显示文件

def test_no_refit():
    # Test that GSCV can be used for model selection alone without refitting
    clf = MockClassifier()
    grid_search = DaskGridSearchCV(clf, {'foo_param': [1, 2, 3]}, refit=False)
    grid_search.fit(X, y)
    assert (not hasattr(grid_search, "best_estimator_") and
                hasattr(grid_search, "best_index_") and
                hasattr(grid_search, "best_params_"))

    # Make sure the predict/transform etc fns raise meaningfull error msg
    for fn_name in ('predict', 'predict_proba', 'predict_log_proba',
                    'transform', 'inverse_transform'):
        with pytest.raises(NotFittedError) as exc:
            getattr(grid_search, fn_name)(X)
        assert (('refit=False. %s is available only after refitting on the '
                 'best parameters' % fn_name) in str(exc.value))