def test_disable_repeats_when_disabled_shuffle(self): data = { "X": pd.DataFrame(np.array([[0, 0], [0, 1], [1, 0], [1, 1]]), columns=["a", "b"]), "y": pd.DataFrame(np.array([0, 0, 1, 1]), columns=["target"]), } X_path = os.path.join(self._results_path, "X.data") y_path = os.path.join(self._results_path, "y.data") dump_data(X_path, data["X"]) dump_data(y_path, data["y"]) params = { "shuffle": False, "stratify": False, "k_folds": 2, "repeats": 10, "results_path": self._results_path, "X_path": X_path, "y_path": y_path, "random_seed": 1, } vl = KFoldValidator(params) self.assertEqual(params["k_folds"], vl.get_n_splits()) self.assertEqual(1, vl.get_repeats())
def test_repeats(self): data = { "X": pd.DataFrame(np.array([[0, 0], [0, 1], [1, 0], [1, 1]]), columns=["a", "b"]), "y": pd.DataFrame(np.array([0, 0, 1, 1]), columns=["target"]), } X_path = os.path.join(self._results_path, "X.data") y_path = os.path.join(self._results_path, "y.data") dump_data(X_path, data["X"]) dump_data(y_path, data["y"]) params = { "shuffle": True, "stratify": False, "k_folds": 2, "repeats": 10, "results_path": self._results_path, "X_path": X_path, "y_path": y_path, "random_seed": 1, } vl = KFoldValidator(params) self.assertEqual(params["k_folds"], vl.get_n_splits()) self.assertEqual(params["repeats"], vl.get_repeats()) for repeat in range(vl.get_repeats()): for k_fold in range(vl.get_n_splits()): train, validation = vl.get_split(k_fold, repeat) X_train, y_train = train.get("X"), train.get("y") X_validation, y_validation = validation.get( "X"), validation.get("y") self.assertEqual(X_train.shape[0], 2) self.assertEqual(y_train.shape[0], 2) self.assertEqual(X_validation.shape[0], 2) self.assertEqual(y_validation.shape[0], 2)