def test_nystroem_callable(): # Test Nystroem on a callable. rnd = np.random.RandomState(42) n_samples = 10 X = rnd.uniform(size=(n_samples, 4)) def logging_histogram_kernel(x, y, log): """Histogram kernel that writes to a log.""" log.append(1) return np.minimum(x, y).sum() kernel_log = [] X = list(X) # test input validation Nystroem(kernel=logging_histogram_kernel, n_components=(n_samples - 1), kernel_params={ 'log': kernel_log }).fit(X) assert len(kernel_log) == n_samples * (n_samples - 1) / 2 def linear_kernel(X, Y): return np.dot(X, Y.T) # if degree, gamma or coef0 is passed, we raise a warning msg = "Don't pass gamma, coef0 or degree to Nystroem" params = ({'gamma': 1}, {'coef0': 1}, {'degree': 2}) for param in params: ny = Nystroem(kernel=linear_kernel, **param) with pytest.raises(ValueError, match=msg): ny.fit(X)
def test_nystroem_approximation(): # some basic tests rnd = np.random.RandomState(0) X = rnd.uniform(size=(10, 4)) # With n_components = n_samples this is exact X_transformed = Nystroem(n_components=X.shape[0]).fit_transform(X) K = rbf_kernel(X) assert_array_almost_equal(np.dot(X_transformed, X_transformed.T), K) trans = Nystroem(n_components=2, random_state=rnd) X_transformed = trans.fit(X).transform(X) assert X_transformed.shape == (X.shape[0], 2) # test callable kernel def linear_kernel(X, Y): return np.dot(X, Y.T) trans = Nystroem(n_components=2, kernel=linear_kernel, random_state=rnd) X_transformed = trans.fit(X).transform(X) assert X_transformed.shape == (X.shape[0], 2) # test that available kernels fit and transform kernels_available = kernel_metrics() for kern in kernels_available: trans = Nystroem(n_components=2, kernel=kern, random_state=rnd) X_transformed = trans.fit(X).transform(X) assert X_transformed.shape == (X.shape[0], 2)
def test_nystroem_poly_kernel_params(): # Non-regression: Nystroem should pass other parameters beside gamma. rnd = np.random.RandomState(37) X = rnd.uniform(size=(10, 4)) K = polynomial_kernel(X, degree=3.1, coef0=.1) nystroem = Nystroem(kernel="polynomial", n_components=X.shape[0], degree=3.1, coef0=.1) X_transformed = nystroem.fit_transform(X) assert_array_almost_equal(np.dot(X_transformed, X_transformed.T), K)
def test_nystroem_singular_kernel(): # test that nystroem works with singular kernel matrix rng = np.random.RandomState(0) X = rng.rand(10, 20) X = np.vstack([X] * 2) # duplicate samples gamma = 100 N = Nystroem(gamma=gamma, n_components=X.shape[0]).fit(X) X_transformed = N.transform(X) K = rbf_kernel(X, gamma=gamma) assert_array_almost_equal(K, np.dot(X_transformed, X_transformed.T)) assert np.all(np.isfinite(Y))
def test_nystroem_precomputed_kernel(): # Non-regression: test Nystroem on precomputed kernel. # PR - 14706 rnd = np.random.RandomState(12) X = rnd.uniform(size=(10, 4)) K = polynomial_kernel(X, degree=2, coef0=.1) nystroem = Nystroem(kernel='precomputed', n_components=X.shape[0]) X_transformed = nystroem.fit_transform(K) assert_array_almost_equal(np.dot(X_transformed, X_transformed.T), K) # if degree, gamma or coef0 is passed, we raise a ValueError msg = "Don't pass gamma, coef0 or degree to Nystroem" params = ({'gamma': 1}, {'coef0': 1}, {'degree': 2}) for param in params: ny = Nystroem(kernel='precomputed', n_components=X.shape[0], **param) with pytest.raises(ValueError, match=msg): ny.fit(K)
def test_nystroem_default_parameters(): rnd = np.random.RandomState(42) X = rnd.uniform(size=(10, 4)) # rbf kernel should behave as gamma=None by default # aka gamma = 1 / n_features nystroem = Nystroem(n_components=10) X_transformed = nystroem.fit_transform(X) K = rbf_kernel(X, gamma=None) K2 = np.dot(X_transformed, X_transformed.T) assert_array_almost_equal(K, K2) # chi2 kernel should behave as gamma=1 by default nystroem = Nystroem(kernel='chi2', n_components=10) X_transformed = nystroem.fit_transform(X) K = chi2_kernel(X, gamma=1) K2 = np.dot(X_transformed, X_transformed.T) assert_array_almost_equal(K, K2)
n_train = 60000 X_train = X[:n_train] y_train = y[:n_train] X_test = X[n_train:] y_test = y[n_train:] return X_train, X_test, y_train, y_test ESTIMATORS = { "dummy": DummyClassifier(), 'CART': DecisionTreeClassifier(), 'ExtraTrees': ExtraTreesClassifier(), 'RandomForest': RandomForestClassifier(), 'Nystroem-SVM': make_pipeline( Nystroem(gamma=0.015, n_components=1000), LinearSVC(C=100)), 'SampledRBF-SVM': make_pipeline( RBFSampler(gamma=0.015, n_components=1000), LinearSVC(C=100)), 'LogisticRegression-SAG': LogisticRegression(solver='sag', tol=1e-1, C=1e4), 'LogisticRegression-SAGA': LogisticRegression(solver='saga', tol=1e-1, C=1e4), 'MultilayerPerceptron': MLPClassifier( hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4, solver='sgd', learning_rate_init=0.2, momentum=0.9, verbose=1, tol=1e-4, random_state=1), 'MLP-adam': MLPClassifier( hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4, solver='adam', learning_rate_init=0.001, verbose=1, tol=1e-4, random_state=1) }
data_train, targets_train = (data[:n_samples // 2], digits.target[:n_samples // 2]) # Now predict the value of the digit on the second half: data_test, targets_test = (data[n_samples // 2:], digits.target[n_samples // 2:]) # data_test = scaler.transform(data_test) # Create a classifier: a support vector classifier kernel_svm = svm.SVC(gamma=.2) linear_svm = svm.LinearSVC() # create pipeline from kernel approximation # and linear svm feature_map_fourier = RBFSampler(gamma=.2, random_state=1) feature_map_nystroem = Nystroem(gamma=.2, random_state=1) fourier_approx_svm = pipeline.Pipeline([("feature_map", feature_map_fourier), ("svm", svm.LinearSVC())]) nystroem_approx_svm = pipeline.Pipeline([("feature_map", feature_map_nystroem), ("svm", svm.LinearSVC())]) # fit and predict using linear and kernel svm: kernel_svm_time = time() kernel_svm.fit(data_train, targets_train) kernel_svm_score = kernel_svm.score(data_test, targets_test) kernel_svm_time = time() - kernel_svm_time linear_svm_time = time() linear_svm.fit(data_train, targets_train)