def check_cv(pipeline, X, y=None, n_folds=2, groups=None, split_start='before_transforms', expected_metrics={}, **params): cv = CV(pipeline) if split_start == 'try_all': len_pipeline = len(pipeline.nodes) values_to_test = ['after_transforms', 'before_transforms'] values_to_test.extend(list(range(len_pipeline))) values_to_test.extend(list(range(-len_pipeline, 0))) for s in values_to_test: graph_id = '_split_start={}'.format(str(s)) results = cv.fit(X, y, cv=n_folds, groups=groups, split_start=s, graph_id=graph_id) check_cv_results(cv._learner_type, results, n_folds, expected_metrics) else: results = cv.fit(X, y, cv=n_folds, groups=groups, split_start=split_start, **params) check_cv_results(cv._learner_type, results, n_folds, expected_metrics) return results
def test_unseen_classes(self): # Create a dataset such that cv splits miss some of the classes X = random_df() y = random_series() y[95:] = range(5) msg = 'CV didn\'t raise Warning exception b/c of minority class issue' with self.assertRaises(Warning, msg=msg): cv = CV([FastLinearClassifier()]) cv.fit(X, y, cv=3)
def check_cv_with_non_defaults(self, label_name='label', group_id='groupid', features='Features_1', **params): steps = [ ToKey(columns={ 'groupid2': group_id, 'label2': label_name }), LightGbmRanker() << { Role.GroupId: 'groupid2', Role.Label: 'label2', Role.Feature: [features] } ] data = self.data(label_name, group_id, features) cv = CV(steps) results = cv.fit(data, groups='groupid', cv=4) check_cv_results(cv._learner_type, results, n_folds=4, expected_metrics={})