class ValidationStep: def __init__(self, params): # kfold is default validation technique self.validation_type = params.get("validation_type", "kfold") if self.validation_type == "kfold": self.validator = KFoldValidator(params) else: raise Exception("Other validation types are not implemented yet!") """ elif self.validation_type == "split": self.validator = SplitValidator(params, data) elif self.validation_type == "with_dataset": self.validator = WithDatasetValidator(params, data) else: msg = "Unknown validation type: {0}".format(self.validation_type) raise ValidationStepException(msg) """ def get_split(self, k): return self.validator.get_split(k) def split(self): return self.validator.split() def get_n_splits(self): return self.validator.get_n_splits()
def test_create_with_target_as_labels(self): data = { "train": { "X": pd.DataFrame(np.array([[0, 0], [0, 1], [1, 0], [1, 1]])), "y": pd.DataFrame(np.array(["a", "b", "a", "b"])), } } params = {"shuffle": True, "stratify": True, "k_folds": 2} vl = KFoldValidator(params, data) self.assertEqual(params["k_folds"], vl.get_n_splits()) for train, validation in vl.split(): X_train, y_train = train.get("X"), train.get("y") X_validation, y_validation = validation.get("X"), validation.get("y") self.assertEqual(X_train.shape[0], 2) self.assertEqual(y_train.shape[0], 2) self.assertEqual(X_validation.shape[0], 2) self.assertEqual(y_validation.shape[0], 2)
def test_missing_target_values(self): # rows with missing target will be distributed equaly among folds data = { "train": { "X": pd.DataFrame( np.array([[1, 0], [2, 1], [3, 0], [4, 1], [5, 1], [6, 1]]) ), "y": pd.DataFrame(np.array(["a", "b", "a", "b", np.nan, np.nan])), } } params = {"shuffle": True, "stratify": True, "k_folds": 2} vl = KFoldValidator(params, data) self.assertEqual(params["k_folds"], vl.get_n_splits()) for train, validation in vl.split(): X_train, y_train = train.get("X"), train.get("y") X_validation, y_validation = validation.get("X"), validation.get("y") self.assertEqual(X_train.shape[0], 3) self.assertEqual(y_train.shape[0], 3) self.assertEqual(X_validation.shape[0], 3) self.assertEqual(y_validation.shape[0], 3)