def test_X_as_list(): # Pass X as list in GridSearchCV X = np.arange(100).reshape(10, 10) y = np.array([0] * 5 + [1] * 5) clf = CheckingClassifier(check_X=lambda x: isinstance(x, list)) cv = KFold(n_splits=3) grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, cv=cv) grid_search.fit(X.tolist(), y).score(X, y) assert_true(hasattr(grid_search, "results_"))
def test_gridsearch_nd(): # Pass X as list in GridSearchCV X_4d = np.arange(10 * 5 * 3 * 2).reshape(10, 5, 3, 2) y_3d = np.arange(10 * 7 * 11).reshape(10, 7, 11) check_X = lambda x: x.shape[1:] == (5, 3, 2) check_y = lambda x: x.shape[1:] == (7, 11) clf = CheckingClassifier(check_X=check_X, check_y=check_y) grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}) grid_search.fit(X_4d, y_3d).score(X, y) assert_true(hasattr(grid_search, "results_"))
def test_y_as_list(): # Pass y as list in GridSearchCV X = np.arange(100).reshape(10, 10) y = np.array([0] * 5 + [1] * 5) clf = CheckingClassifier(check_y=lambda x: isinstance(x, list)) cv = KFold(n=len(X), n_folds=3) grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, cv=cv) grid_search.fit(X, y.tolist()).score(X, y) assert_true(hasattr(grid_search, "grid_scores_"))
def test_gridsearch_nd(self): # Pass X as list in GridSearchCV X_4d = np.arange(10 * 5 * 3 * 2).reshape(10, 5, 3, 2) y_3d = np.arange(10 * 7 * 11).reshape(10, 7, 11) check_X = lambda x: x.shape[1:] == (5, 3, 2) check_y = lambda x: x.shape[1:] == (7, 11) clf = CheckingClassifier(check_X=check_X, check_y=check_y) grid_search = ATGridSearchCV(clf, {'foo_param': [1, 2, 3]}, webserver_url=self.live_server_url) wait(grid_search.fit(X_4d, y_3d)) assert_true(hasattr(grid_search, "grid_scores_"))
def test_cross_val_score(): clf = MockClassifier() for a in range(-10, 10): clf.a = a # Smoke test scores = cval.cross_val_score(clf, X, y) assert_array_equal(scores, clf.score(X, y)) # test with multioutput y scores = cval.cross_val_score(clf, X_sparse, X) assert_array_equal(scores, clf.score(X_sparse, X)) scores = cval.cross_val_score(clf, X_sparse, y) assert_array_equal(scores, clf.score(X_sparse, y)) # test with multioutput y scores = cval.cross_val_score(clf, X_sparse, X) assert_array_equal(scores, clf.score(X_sparse, X)) # test with X and y as list list_check = lambda x: isinstance(x, list) clf = CheckingClassifier(check_X=list_check) scores = cval.cross_val_score(clf, X.tolist(), y.tolist()) clf = CheckingClassifier(check_y=list_check) scores = cval.cross_val_score(clf, X, y.tolist()) assert_raises(ValueError, cval.cross_val_score, clf, X, y, scoring="sklearn") # test with 3d X and X_3d = X[:, :, np.newaxis] clf = MockClassifier(allow_nd=True) scores = cval.cross_val_score(clf, X_3d, y) clf = MockClassifier(allow_nd=False) assert_raises(ValueError, cval.cross_val_score, clf, X_3d, y)
def test_X_as_list(self): # Pass X as list in GridSearchCV X = np.arange(100).reshape(10, 10) y = np.array([0] * 5 + [1] * 5) clf = CheckingClassifier(check_X=lambda x: isinstance(x, list)) cv = KFold(n=len(X), n_folds=3) grid_search = ATGridSearchCV(clf, {'foo_param': [1, 2, 3]}, cv=cv, webserver_url=self.live_server_url) wait(grid_search.fit(X.tolist(), y)) assert_true(hasattr(grid_search, "grid_scores_"))
def test_gridsearch_feature_extractor(): X = data y = np.ones((X.shape[0], )) # dummy labels pipe = Pipeline([ ('FE', FeatureExtractor(sfreq=sfreq, selected_funcs=['higuchi_fd'])), ('clf', CheckingClassifier( check_X=lambda arr: arr.shape[1:] == (X.shape[1], ))) ]) params_grid = {'FE__higuchi_fd__kmax': [5, 10]} gs = GridSearchCV(estimator=pipe, param_grid=params_grid) gs.fit(X, y) assert_equal(hasattr(gs, 'cv_results_'), True)
def test_cross_val_predict_input_types(): iris = load_iris() X, y = iris.data, iris.target X_sparse = coo_matrix(X) multioutput_y = np.column_stack([y, y[::-1]]) clf = Ridge(fit_intercept=False, random_state=0) # 3 fold cv is used --> atleast 3 samples per class # Smoke test predictions = cross_val_predict(clf, X, y) assert_equal(predictions.shape, (150, )) # test with multioutput y predictions = cross_val_predict(clf, X_sparse, multioutput_y) assert_equal(predictions.shape, (150, 2)) predictions = cross_val_predict(clf, X_sparse, y) assert_array_equal(predictions.shape, (150, )) # test with multioutput y predictions = cross_val_predict(clf, X_sparse, multioutput_y) assert_array_equal(predictions.shape, (150, 2)) # test with X and y as list list_check = lambda x: isinstance(x, list) clf = CheckingClassifier(check_X=list_check) predictions = cross_val_predict(clf, X.tolist(), y.tolist()) clf = CheckingClassifier(check_y=list_check) predictions = cross_val_predict(clf, X, y.tolist()) # test with 3d X and X_3d = X[:, :, np.newaxis] check_3d = lambda x: x.ndim == 3 clf = CheckingClassifier(check_X=check_3d) predictions = cross_val_predict(clf, X_3d, y) assert_array_equal(predictions.shape, (150, ))
def test_cross_val_score_pandas(): # check cross_val_score doesn't destroy pandas dataframe types = [(MockDataFrame, MockDataFrame)] try: from pandas import Series, DataFrame types.append((Series, DataFrame)) except ImportError: pass for TargetType, InputFeatureType in types: # X dataframe, y series X_df, y_ser = InputFeatureType(X), TargetType(y) check_df = lambda x: isinstance(x, InputFeatureType) check_series = lambda x: isinstance(x, TargetType) clf = CheckingClassifier(check_X=check_df, check_y=check_series) cval.cross_val_score(clf, X_df, y_ser)
def test_intercept_model(): data = patsy.demo_data("x1", "x2", "x3", "y") def check_X_no_intercept(X): return X.shape[1] == 2 # check wether X contains only the two features, no intercept est = PatsyModel(CheckingClassifier(check_X=check_X_no_intercept), "y ~ x1 + x2") est.fit(data) # predict checks applying to new data est.predict(data) def check_X_intercept(X): shape_correct = X.shape[1] == 3 first_is_intercept = np.all(X[:, 0] == 1) return shape_correct and first_is_intercept # check wether X does contain intercept est = PatsyModel(CheckingClassifier(check_X=check_X_intercept), "y ~ x1 + x2", add_intercept=True) est.fit(data) est.predict(data)
def test_stateful_model(): data_train = patsy.demo_data("x1", "x2", "y") data_train['x1'][:] = 1 # mean of x1 is 1 data_test = patsy.demo_data("x1", "x2", "y") data_test['x1'][:] = 0 # center x1 est = PatsyModel(CheckingClassifier(), "y ~ center(x1) + x2") est.fit(data_train) def check_centering(X): return np.all(X[:, 0] == -1) est.estimator_.check_X = check_centering # make sure that mean of training, not test data was removed est.predict(data_test)
def test_permutation_test_score_pandas(): # check permutation_test_score doesn't destroy pandas dataframe types = [(MockDataFrame, MockDataFrame)] try: from pandas import Series, DataFrame types.append((Series, DataFrame)) except ImportError: pass for TargetType, InputFeatureType in types: # X dataframe, y series iris = load_iris() X, y = iris.data, iris.target X_df, y_ser = InputFeatureType(X), TargetType(y) check_df = lambda x: isinstance(x, InputFeatureType) check_series = lambda x: isinstance(x, TargetType) clf = CheckingClassifier(check_X=check_df, check_y=check_series) permutation_test_score(clf, X_df, y_ser)
def test_scope_model(): data = patsy.demo_data("x1", "x2", "x3", "y") def myfunc(x): tmp = np.ones_like(x) tmp.fill(42) return tmp def check_X(X): return np.all(X[:, 1] == 42) # checking classifier raises error if check_X doesn't return true. # this checks that myfunc was actually applied est = PatsyModel(CheckingClassifier(check_X=check_X), "y ~ x1 + myfunc(x2)") est.fit(data) # test feature names assert_equal(est.feature_names_, ["x1", "myfunc(x2)"])
def check_hyperparameter_searcher_with_fit_params(klass, **klass_kwargs): X = np.arange(100).reshape(10, 10) y = np.array([0] * 5 + [1] * 5) clf = CheckingClassifier(expected_fit_params=['spam', 'eggs']) searcher = klass(clf, {'foo_param': [1, 2, 3]}, cv=2, **klass_kwargs) # The CheckingClassifier generates an assertion error if # a parameter is missing or has length != len(X). assert_raise_message(AssertionError, "Expected fit parameter(s) ['eggs'] not seen.", searcher.fit, X, y, spam=np.ones(10)) assert_raise_message(AssertionError, "Fit parameter spam has length 1; expected 4.", searcher.fit, X, y, spam=np.ones(1), eggs=np.zeros(10)) searcher.fit(X, y, spam=np.ones(10), eggs=np.zeros(10))
def test_pandas_input(): # check cross_val_score doesn't destroy pandas dataframe types = [(MockDataFrame, MockDataFrame)] try: from pandas import Series, DataFrame types.append((DataFrame, Series)) except ImportError: pass X = np.arange(100).reshape(10, 10) y = np.array([0] * 5 + [1] * 5) for InputFeatureType, TargetType in types: # X dataframe, y series X_df, y_ser = InputFeatureType(X), TargetType(y) check_df = lambda x: isinstance(x, InputFeatureType) check_series = lambda x: isinstance(x, TargetType) clf = CheckingClassifier(check_X=check_df, check_y=check_series) grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}) grid_search.fit(X_df, y_ser).score(X_df, y_ser) grid_search.predict(X_df) assert_true(hasattr(grid_search, "grid_scores_"))