def test_grid_search_precomputed_kernel(self): # Test that grid search works when the input features are given in the # form of a precomputed kernel matrix X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0) # compute the training kernel matrix corresponding to the linear kernel K_train = np.dot(X_[:180], X_[:180].T) y_train = y_[:180] clf = SVC(kernel='precomputed') cv = ATGridSearchCV(clf, {'C': [0.1, 1.0]}, webserver_url=self.live_server_url) wait(cv.fit(K_train, y_train)) assert_true(cv.best_score_ >= 0) # compute the test kernel matrix K_test = np.dot(X_[180:], X_[:180].T) y_test = y_[180:] y_pred = cv.predict(K_test) assert_true(np.mean(y_pred == y_test) >= 0) # test error is raised when the precomputed kernel is not array-like # or sparse assert_raises(ValueError, cv.fit, K_train.tolist(), y_train)
def test_grid_search_sparse_scoring(self): X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0) clf = LinearSVC() cv = ATGridSearchCV(clf, {'C': [0.1, 1.0]}, scoring="f1", webserver_url=self.live_server_url) wait(cv.fit(X_[:180], y_[:180])) y_pred = cv.predict(X_[180:]) C = cv.best_estimator_.C X_ = sp.csr_matrix(X_) clf = LinearSVC() cv = ATGridSearchCV(clf, {'C': [0.1, 1.0]}, scoring="f1", webserver_url=self.live_server_url) wait(cv.fit(X_[:180], y_[:180])) y_pred2 = cv.predict(X_[180:]) C2 = cv.best_estimator_.C assert_array_equal(y_pred, y_pred2) assert_equal(C, C2) # Smoke test the score # np.testing.assert_allclose(f1_score(cv.predict(X_[:180]), y[:180]), # cv.score(X_[:180], y[:180])) # test loss where greater is worse def f1_loss(y_true_, y_pred_): return -f1_score(y_true_, y_pred_) F1Loss = make_scorer(f1_loss, greater_is_better=False) cv = ATGridSearchCV(clf, {'C': [0.1, 1.0]}, scoring=F1Loss, webserver_url=self.live_server_url) wait(cv.fit(X_[:180], y_[:180])) y_pred3 = cv.predict(X_[180:]) C3 = cv.best_estimator_.C assert_equal(C, C3) assert_array_equal(y_pred, y_pred3)