def test_check_cv(): # No y, classifier=False cv = check_cv(3, classifier=False) assert isinstance(cv, KFold) and cv.n_splits == 3 cv = check_cv(5, classifier=False) assert isinstance(cv, KFold) and cv.n_splits == 5 # y, classifier = False dy = da.from_array(np.array([1, 0, 1, 0, 1]), chunks=2) with assert_dask_compute(False): assert isinstance(check_cv(y=dy, classifier=False), KFold) # Binary and multi-class y for y in [ np.array([0, 1, 0, 1, 0, 0, 1, 1, 1]), np.array([0, 1, 0, 1, 2, 1, 2, 0, 2]) ]: cv = check_cv(5, y, classifier=True) assert isinstance(cv, StratifiedKFold) and cv.n_splits == 5 dy = da.from_array(y, chunks=2) with assert_dask_compute(True): cv = check_cv(5, dy, classifier=True) assert isinstance(cv, StratifiedKFold) and cv.n_splits == 5 # Non-binary/multi-class y y = np.array([[1, 2], [0, 3], [0, 0], [3, 1], [2, 0]]) assert isinstance(check_cv(y=y, classifier=True), KFold) dy = da.from_array(y, chunks=2) with assert_dask_compute(True): assert isinstance(check_cv(y=dy, classifier=True), KFold) # Old style cv = [np.array([True, False, True]), np.array([False, True, False])] with assert_dask_compute(False): assert isinstance(check_cv(cv, y=dy, classifier=True), _CVIterableWrapper) # CV instance passes through y = da.ones(5, chunks=2) cv = ShuffleSplit() with assert_dask_compute(False): assert check_cv(cv, y, classifier=True) is cv assert check_cv(cv, y, classifier=False) is cv
def test_check_cv(): # No y, classifier=False cv = check_cv(3, classifier=False) assert isinstance(cv, KFold) and cv.n_splits == 3 cv = check_cv(5, classifier=False) assert isinstance(cv, KFold) and cv.n_splits == 5 # y, classifier = False dy = da.from_array(np.array([1, 0, 1, 0, 1]), chunks=2) with assert_dask_compute(False): assert isinstance(check_cv(y=dy, classifier=False), KFold) # Binary and multi-class y for y in [np.array([0, 1, 0, 1, 0, 0, 1, 1, 1]), np.array([0, 1, 0, 1, 2, 1, 2, 0, 2])]: cv = check_cv(5, y, classifier=True) assert isinstance(cv, StratifiedKFold) and cv.n_splits == 5 dy = da.from_array(y, chunks=2) with assert_dask_compute(True): cv = check_cv(5, dy, classifier=True) assert isinstance(cv, StratifiedKFold) and cv.n_splits == 5 # Non-binary/multi-class y y = np.array([[1, 2], [0, 3], [0, 0], [3, 1], [2, 0]]) assert isinstance(check_cv(y=y, classifier=True), KFold) dy = da.from_array(y, chunks=2) with assert_dask_compute(True): assert isinstance(check_cv(y=dy, classifier=True), KFold) # Old style cv = [np.array([True, False, True]), np.array([False, True, False])] with assert_dask_compute(False): assert isinstance(check_cv(cv, y=dy, classifier=True), _CVIterableWrapper) # CV instance passes through y = da.ones(5, chunks=2) cv = ShuffleSplit() with assert_dask_compute(False): assert check_cv(cv, y, classifier=True) is cv assert check_cv(cv, y, classifier=False) is cv
def fit(self, X, y=None, groups=None, **fit_params): if self.verbose: n_fitting_tasks = len(self._get_param_iterator()) * check_cv( self.cv).n_splits + int(self.refit) self.estimator._create_progress_bar(n_fitting_tasks, self.scheduler) super(ModelSearchCV, self).fit(X, y, groups, **fit_params) self._modify_grid_search_attrs() return self