Пример #1
0
    def test_get_splitter(self):
        ta_args = dict(backend=BackendMock(),
                       autosklearn_seed=1,
                       logger=self.logger,
                       stats=self.stats,
                       memory_limit=3072,
                       metric=accuracy)
        D = unittest.mock.Mock(spec=AbstractDataManager)
        D.data = dict(Y_train=np.array([0, 0, 0, 1, 1, 1]))
        D.info = dict(task=BINARY_CLASSIFICATION)

        # holdout, binary classification
        ta = ExecuteTaFuncWithQueue(resampling_strategy='holdout', **ta_args)
        cv = ta.get_splitter(D)
        self.assertIsInstance(cv,
                              sklearn.model_selection.StratifiedShuffleSplit)

        # holdout, binary classification, fallback to shuffle split
        D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1, 2])
        ta = ExecuteTaFuncWithQueue(resampling_strategy='holdout', **ta_args)
        cv = ta.get_splitter(D)
        self.assertIsInstance(cv, sklearn.model_selection._split.ShuffleSplit)

        # cv, binary classification
        D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1])
        ta = ExecuteTaFuncWithQueue(resampling_strategy='cv',
                                    folds=5,
                                    **ta_args)
        cv = ta.get_splitter(D)
        self.assertIsInstance(cv,
                              sklearn.model_selection._split.StratifiedKFold)

        # cv, binary classification, no fallback anticipated
        D.data['Y_train'] = np.array([0, 0, 0, 1, 1, 1, 2])
        ta = ExecuteTaFuncWithQueue(resampling_strategy='cv',
                                    folds=5,
                                    **ta_args)
        cv = ta.get_splitter(D)
        self.assertIsInstance(cv,
                              sklearn.model_selection._split.StratifiedKFold)

        # regression, shuffle split
        D.data['Y_train'] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
        D.info['task'] = REGRESSION
        ta = ExecuteTaFuncWithQueue(resampling_strategy='holdout', **ta_args)
        cv = ta.get_splitter(D)
        self.assertIsInstance(cv, sklearn.model_selection._split.ShuffleSplit)

        # regression cv, KFold
        D.data['Y_train'] = np.array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
        D.info['task'] = REGRESSION
        ta = ExecuteTaFuncWithQueue(resampling_strategy='cv',
                                    folds=5,
                                    **ta_args)
        cv = ta.get_splitter(D)
        self.assertIsInstance(cv, sklearn.model_selection._split.KFold)