def test_lle_simple_grid(): rng = np.random.RandomState(42) # grid of equidistant points in 2D, out_dim = n_dim X = np.array(list(product(range(5), repeat=2))) out_dim = 2 clf = manifold.LocallyLinearEmbedding(n_neighbors=5, out_dim=out_dim, random_state=rng) tol = .1 N = neighbors.kneighbors_graph( X, clf.n_neighbors, mode='barycenter').todense() reconstruction_error = np.linalg.norm(np.dot(N, X) - X, 'fro') assert_lower(reconstruction_error, tol) for solver in eigen_solvers: clf.fit(X, eigen_solver=solver) assert clf.embedding_.shape[1] == out_dim reconstruction_error = np.linalg.norm( np.dot(N, clf.embedding_) - clf.embedding_, 'fro') ** 2 # FIXME: ARPACK fails this test ... if solver != 'arpack': assert_lower(reconstruction_error, tol) assert_almost_equal(clf.reconstruction_error_, reconstruction_error, decimal=4) # re-embed a noisy version of X using the transform method noise = rng.randn(*X.shape) / 100 X_reembedded = clf.transform(X + noise) assert_lower(np.linalg.norm(X_reembedded - clf.embedding_), tol)
def test_kneighbors_iris(): # make sure reconstruction error is kept small using a real datasets # note that we choose neighbors < n_dim and n_neighbors > n_dim for i in range(1, 8): A = neighbors.kneighbors_graph(iris.data, i, mode='barycenter') pred_data = np.dot(A.todense(), iris.data) assert np.linalg.norm(pred_data - iris.data) / iris.data.shape[0] < 0.1
def test_kneighbors_iris(): # make sure reconstruction error is kept small using a real datasets # note that we choose neighbors < n_dim and n_neighbors > n_dim for i in range(1, 8): for data in (iris.data, neighbors.BallTree(iris.data)): # check for both input as numpy array and as BallTree A = neighbors.kneighbors_graph(data, i, mode='barycenter') if hasattr(data, 'query'): data = data.data pred_data = np.dot(A.todense(), data) assert np.linalg.norm(pred_data - data) / data.shape[0] < 0.1
def test_isomap_reconstruction_error(): # Same setup as in test_isomap_simple_grid, with an added dimension N_per_side = 5 Npts = N_per_side ** 2 n_neighbors = Npts - 1 # grid of equidistant points in 2D, out_dim = n_dim X = np.array(list(product(range(N_per_side), repeat=2))) # add noise in a third dimension rng = np.random.RandomState(0) noise = 0.1 * rng.randn(Npts, 1) X = np.concatenate((X, noise), 1) # compute input kernel G = neighbors.kneighbors_graph(X, n_neighbors, mode='distance').toarray() centerer = preprocessing.KernelCenterer() K = centerer.fit_transform(-0.5 * G ** 2) for eigen_solver in eigen_solvers: for path_method in path_methods: clf = manifold.Isomap(n_neighbors=n_neighbors, out_dim=2, eigen_solver=eigen_solver, path_method=path_method) clf.fit(X) # compute output kernel G_iso = neighbors.kneighbors_graph(clf.embedding_, n_neighbors, mode='distance').toarray() K_iso = centerer.fit_transform(-0.5 * G_iso ** 2) # make sure error agrees reconstruction_error = np.linalg.norm(K - K_iso) / Npts assert_almost_equal(reconstruction_error, clf.reconstruction_error())
def test_kneighbors_graph(): """ Test kneighbors_graph to build the k-Nearest Neighbor graph. """ X = [[0, 1], [1.01, 1.], [2, 0]] # n_neighbors = 1 A = neighbors.kneighbors_graph(X, 1, mode='connectivity') assert_array_equal(A.todense(), np.eye(A.shape[0])) A = neighbors.kneighbors_graph(X, 1, mode='distance') assert_array_almost_equal( A.todense(), [[ 0. , 1.01 , 0. ], [ 1.01 , 0. , 0. ], [ 0. , 1.40716026, 0. ]]) A = neighbors.kneighbors_graph(X, 1, mode='barycenter') assert_array_almost_equal( A.todense(), [[ 0., 1., 0.], [ 1., 0., 0.], [ 0., 1., 0.]]) # n_neigbors = 2 A = neighbors.kneighbors_graph(X, 2, mode='connectivity') assert_array_equal( A.todense(), [[ 1., 1., 0.], [ 1., 1., 0.], [ 0., 1., 1.]]) A = neighbors.kneighbors_graph(X, 2, mode='distance') assert_array_almost_equal( A.todense(), [[ 0. , 1.01 , 2.23606798], [ 1.01 , 0. , 1.40716026], [ 2.23606798, 1.40716026, 0. ]]) A = neighbors.kneighbors_graph(X, 2, mode='barycenter') # check that columns sum to one assert_array_almost_equal(np.sum(A.todense(), 1), np.ones((3, 1))) assert_array_almost_equal( A.todense(), [[ 0. , 1.5049745 , -0.5049745 ], [ 0.596 , 0. , 0.404 ], [-0.98019802, 1.98019802, 0. ]]) # n_neighbors = 3 A = neighbors.kneighbors_graph(X, 3, mode='connectivity') assert_array_almost_equal( A.todense(), [[1, 1, 1], [1, 1, 1], [1, 1, 1]])
def test_kneighbors_graph(): """ Test kneighbors_graph to build the k-Nearest Neighbor graph. """ X = [[0, 1], [1.01, 1.], [2, 0]] # n_neighbors = 1 A = neighbors.kneighbors_graph(X, 1, mode='connectivity') assert_array_equal(A.todense(), np.eye(A.shape[0])) A = neighbors.kneighbors_graph(X, 1, mode='distance') assert_array_almost_equal( A.todense(), [[ 0. , 1.01 , 0. ], [ 1.01 , 0. , 0. ], [ 0. , 1.40716026, 0. ]]) A = neighbors.kneighbors_graph(X, 1, mode='barycenter') assert_array_almost_equal( A.todense(), [[ 0., 1., 0.], [ 1., 0., 0.], [ 0., 1., 0.]]) # n_neighbors = 2 A = neighbors.kneighbors_graph(X, 2, mode='connectivity') assert_array_equal( A.todense(), [[ 1., 1., 0.], [ 1., 1., 0.], [ 0., 1., 1.]]) A = neighbors.kneighbors_graph(X, 2, mode='distance') assert_array_almost_equal( A.todense(), [[ 0. , 1.01 , 2.23606798], [ 1.01 , 0. , 1.40716026], [ 2.23606798, 1.40716026, 0. ]]) A = neighbors.kneighbors_graph(X, 2, mode='barycenter') # check that columns sum to one assert_array_almost_equal(np.sum(A.todense(), 1), np.ones((3, 1))) assert_array_almost_equal( A.todense(), [[ 0. , 1.5049745 , -0.5049745 ], [ 0.596 , 0. , 0.404 ], [-0.98019802, 1.98019802, 0. ]]) # n_neighbors = 3 A = neighbors.kneighbors_graph(X, 3, mode='connectivity') assert_array_almost_equal( A.todense(), [[1, 1, 1], [1, 1, 1], [1, 1, 1]])
def test_kneighbors_graph(): """ Test kneighbors_graph to build the k-Nearest Neighbor graph. """ X = [[0], [1.01], [2]] A = neighbors.kneighbors_graph(X, 2, weight=None) assert_array_equal(A.todense(), [[1, 1, 0], [0, 1, 1], [0, 1, 1]]) A = neighbors.kneighbors_graph(X, 2, weight=None, drop_first=True) assert_array_equal(A.todense(), [[0, 1, 0], [0, 0, 1], [0, 1, 0]]) A = neighbors.kneighbors_graph(X, 2, weight="distance") assert_array_almost_equal(A.todense(), [[0, 1.01, 0], [0, 0, 0.99], [0, 0.99, 0]], 4) A = neighbors.kneighbors_graph(X, 2, weight="distance", drop_first=True) assert_array_almost_equal(A.todense(), [[0, 1.01, 0], [0, 0, 0.99], [0, 0.99, 0]], 4) A = neighbors.kneighbors_graph(X, 2, weight='barycenter') assert_array_almost_equal(A.todense(), [[0.99, 0, 0], [0, 0.99, 0], [0, 0, 0.99]], 2) A = neighbors.kneighbors_graph(X, 2, weight='barycenter', drop_first=True) assert_array_almost_equal(A.todense(), [[0, 1, 0], [0, 0, 1], [0, 1, 0]], 2) # Also check corner cases # TODO: result should be compared A = neighbors.kneighbors_graph(X, 3, weight=None) assert_array_almost_equal(A.todense(), [[1, 1, 1], [1, 1, 1], [1, 1, 1]]) A = neighbors.kneighbors_graph(X, 3, weight="distance") assert_array_almost_equal(A.todense(), [[ 0. , 1.01, 2. ], [ 1.01, 0. , 0.99], [ 2. , 0.99, 0. ]]) A = neighbors.kneighbors_graph(X, 3, weight="barycenter")
def test_isomap_simple_grid(): # Isomap should preserve distances when all neighbors are used N_per_side = 5 Npts = N_per_side ** 2 n_neighbors = Npts - 1 # grid of equidistant points in 2D, out_dim = n_dim X = np.array(list(product(range(N_per_side), repeat=2))) # distances from each point to all others G = neighbors.kneighbors_graph(X, n_neighbors, mode='distance').toarray() for eigen_solver in eigen_solvers: for path_method in path_methods: clf = manifold.Isomap(n_neighbors=n_neighbors, out_dim=2, eigen_solver=eigen_solver, path_method=path_method) clf.fit(X) G_iso = neighbors.kneighbors_graph(clf.embedding_, n_neighbors, mode='distance').toarray() assert_array_almost_equal(G, G_iso)
def test_kneighbors_graph(): """ Test kneighbors_graph to build the k-Nearest Neighbor graph. """ X = np.array([[0, 1], [1.01, 1.], [2, 0]]) # n_neighbors = 1 A = neighbors.kneighbors_graph(X, 1, mode='connectivity') assert_array_equal(A.todense(), np.eye(A.shape[0])) A = neighbors.kneighbors_graph(X, 1, mode='distance') assert_array_almost_equal( A.todense(), [[ 0. , 1.01 , 0. ], [ 1.01 , 0. , 0. ], [ 0. , 1.40716026, 0. ]]) A = neighbors.kneighbors_graph(X, 1, mode='barycenter') assert_array_almost_equal( A.todense(), [[ 0., 1., 0.], [ 1., 0., 0.], [ 0., 1., 0.]]) # n_neighbors = 2 A = neighbors.kneighbors_graph(X, 2, mode='connectivity') assert_array_equal( A.todense(), [[ 1., 1., 0.], [ 1., 1., 0.], [ 0., 1., 1.]]) A = neighbors.kneighbors_graph(X, 2, mode='distance') assert_array_almost_equal( A.todense(), [[ 0. , 1.01 , 2.23606798], [ 1.01 , 0. , 1.40716026], [ 2.23606798, 1.40716026, 0. ]]) A = neighbors.kneighbors_graph(X, 2, mode='barycenter') # check that columns sum to one assert_array_almost_equal(np.sum(A.todense(), 1), np.ones((3, 1))) pred = np.dot(A.todense(), X) assert np.linalg.norm(pred - X) / X.shape[0] < 1 # n_neighbors = 3 A = neighbors.kneighbors_graph(X, 3, mode='connectivity') assert_array_almost_equal( A.todense(), [[1, 1, 1], [1, 1, 1], [1, 1, 1]])
def locally_linear_embedding(X, n_neighbors, out_dim, tol=1e-6, max_iter=200): W = neighbors.kneighbors_graph(X, n_neighbors=n_neighbors, mode="barycenter") # M = (I-W)' (I-W) A = eye(*W.shape, format=W.format) - W A = (A.T).dot(A).tocsr() # initial approximation to the eigenvectors X = np.random.rand(W.shape[0], out_dim) ml = smoothed_aggregation_solver(A, symmetry="symmetric") prec = ml.aspreconditioner() # compute eigenvalues and eigenvectors with LOBPCG eigen_values, eigen_vectors = linalg.lobpcg(A, X, M=prec, largest=False, tol=tol, maxiter=max_iter) index = np.argsort(eigen_values) return eigen_vectors[:, index], np.sum(eigen_values)
def test_lle_manifold(): # similar test on a slightly more complex manifold X = np.array(list(product(range(20), repeat=2))) X = np.c_[X, X[:, 0]**2 / 20] clf = manifold.LocallyLinearEmbedding(n_neighbors=5, out_dim=2) tol = .5 N = neighbors.kneighbors_graph(X, clf.n_neighbors, mode='barycenter').todense() reconstruction_error = np.linalg.norm(np.dot(N, X) - X) assert reconstruction_error < tol for solver in ('dense', 'lobpcg'): clf.fit(X, eigen_solver=solver) reconstruction_error = np.linalg.norm( np.dot(N, clf.embedding_) - clf.embedding_, 'fro') ** 2 assert reconstruction_error < tol assert_array_almost_equal(clf.reconstruction_error_, reconstruction_error)
def test_lle_simple_grid(): # grid of equidistant points in 2D, out_dim = n_dim X = np.array(list(product(range(5), repeat=2))) clf = manifold.LocallyLinearEmbedding(n_neighbors=5, out_dim=2) tol = .1 N = neighbors.kneighbors_graph(X, clf.n_neighbors, mode='barycenter').todense() reconstruction_error = np.linalg.norm(np.dot(N, X) - X, 'fro') assert reconstruction_error < tol for solver in ('dense', 'lobpcg'): clf.fit(X, eigen_solver=solver) reconstruction_error = np.linalg.norm( np.dot(N, clf.embedding_) - clf.embedding_, 'fro') ** 2 assert reconstruction_error < tol assert_array_almost_equal(clf.reconstruction_error_, reconstruction_error, decimal=4) noise = np.random.randn(*X.shape) / 100 assert np.linalg.norm(clf.transform(X + noise) - clf.embedding_) < tol
def locally_linear_embedding(X, n_neighbors, out_dim, tol=1e-6, max_iter=200): W = neighbors.kneighbors_graph(X, n_neighbors=n_neighbors, mode='barycenter') # M = (I-W)' (I-W) A = eye(*W.shape, format=W.format) - W A = (A.T).dot(A).tocsr() # initial approximation to the eigenvectors X = np.random.rand(W.shape[0], out_dim) ml = smoothed_aggregation_solver(A, symmetry='symmetric') prec = ml.aspreconditioner() # compute eigenvalues and eigenvectors with LOBPCG eigen_values, eigen_vectors = linalg.lobpcg( A, X, M=prec, largest=False, tol=tol, maxiter=max_iter) index = np.argsort(eigen_values) return eigen_vectors[:, index], np.sum(eigen_values)
def test_lle_manifold(): # similar test on a slightly more complex manifold X = np.array(list(product(range(20), repeat=2))) X = np.c_[X, X[:, 0]**2 / 20] clf = manifold.LocallyLinearEmbedding(n_neighbors=5, out_dim=2) tol = .5 N = neighbors.kneighbors_graph(X, clf.n_neighbors, mode='barycenter').todense() reconstruction_error = np.linalg.norm(np.dot(N, X) - X) assert reconstruction_error < tol for solver in ('dense', 'lobpcg'): clf.fit(X, eigen_solver=solver) reconstruction_error = np.linalg.norm( np.dot(N, clf.embedding_) - clf.embedding_, 'fro')**2 assert reconstruction_error < tol assert_array_almost_equal(clf.reconstruction_error_, reconstruction_error)
def test_lle_simple_grid(): # grid of equidistant points in 2D, out_dim = n_dim X = np.array(list(product(range(5), repeat=2))) clf = manifold.LocallyLinearEmbedding(n_neighbors=5, out_dim=2) tol = .1 N = neighbors.kneighbors_graph(X, clf.n_neighbors, mode='barycenter').todense() reconstruction_error = np.linalg.norm(np.dot(N, X) - X, 'fro') assert reconstruction_error < tol for solver in ('dense', 'lobpcg'): clf.fit(X, eigen_solver=solver) reconstruction_error = np.linalg.norm( np.dot(N, clf.embedding_) - clf.embedding_, 'fro')**2 assert reconstruction_error < tol assert_array_almost_equal(clf.reconstruction_error_, reconstruction_error, decimal=4) noise = np.random.randn(*X.shape) / 100 assert np.linalg.norm(clf.transform(X + noise) - clf.embedding_) < tol
def test_lle_manifold(): # similar test on a slightly more complex manifold X = np.array(list(product(range(20), repeat=2))) X = np.c_[X, X[:, 0] ** 2 / 20] out_dim = 2 clf = manifold.LocallyLinearEmbedding(n_neighbors=5, out_dim=out_dim, random_state=42) tol = .5 N = neighbors.kneighbors_graph(X, clf.n_neighbors, mode='barycenter').toarray() reconstruction_error = np.linalg.norm(np.dot(N, X) - X) assert_lower(reconstruction_error, tol) for solver in eigen_solvers: clf.fit(X, eigen_solver=solver) assert clf.embedding_.shape[1] == out_dim reconstruction_error = np.linalg.norm( np.dot(N, clf.embedding_) - clf.embedding_, 'fro') ** 2 details = "solver: " + solver assert_lower(reconstruction_error, tol, details=details) assert_lower(np.abs(clf.reconstruction_error_ - reconstruction_error), tol * reconstruction_error, details=details)
def test_kneighbors_graph(): """ Test kneighbors_graph to build the k-Nearest Neighbor graph. """ X = np.array([[0, 1], [1.01, 1.], [2, 0]]) # n_neighbors = 1 A = neighbors.kneighbors_graph(X, 1, mode='connectivity') assert_array_equal(A.todense(), np.eye(A.shape[0])) A = neighbors.kneighbors_graph(X, 1, mode='distance') assert_array_almost_equal( A.todense(), [[0., 1.01, 0.], [1.01, 0., 0.], [0., 1.40716026, 0.]]) A = neighbors.kneighbors_graph(X, 1, mode='barycenter') assert_array_almost_equal(A.todense(), [[0., 1., 0.], [1., 0., 0.], [0., 1., 0.]]) # n_neighbors = 2 A = neighbors.kneighbors_graph(X, 2, mode='connectivity') assert_array_equal(A.todense(), [[1., 1., 0.], [1., 1., 0.], [0., 1., 1.]]) A = neighbors.kneighbors_graph(X, 2, mode='distance') assert_array_almost_equal(A.todense(), [[0., 1.01, 2.23606798], [1.01, 0., 1.40716026], [2.23606798, 1.40716026, 0.]]) A = neighbors.kneighbors_graph(X, 2, mode='barycenter') # check that columns sum to one assert_array_almost_equal(np.sum(A.todense(), 1), np.ones((3, 1))) pred = np.dot(A.todense(), X) assert np.linalg.norm(pred - X) / X.shape[0] < 1 # n_neighbors = 3 A = neighbors.kneighbors_graph(X, 3, mode='connectivity') assert_array_almost_equal(A.todense(), [[1, 1, 1], [1, 1, 1], [1, 1, 1]])
import numpy as np import pylab as pl import mpl_toolkits.mplot3d.axes3d as p3 from scikits.learn.neighbors import kneighbors_graph from scikits.learn.cluster import Ward from scikits.learn.datasets.samples_generator import swiss_roll ############################################################################### # Generate data (swiss roll dataset) n_samples = 5000 noise = 0.05 X = swiss_roll(n_samples, noise) ############################################################################### # Define the structure A of the data. Here a 10 nearest neighbors connectivity = kneighbors_graph(X, n_neighbors=10) ############################################################################### # Compute clustering print "Compute structured hierarchical clustering..." st = time.time() ward = Ward(n_clusters=10).fit(X, connectivity=connectivity) label = ward.labels_ print "Elapsed time: ", time.time() - st print "Number of points: ", label.size print "Number of clusters: ", np.unique(label).size ############################################################################### # Plot result fig = pl.figure() ax = p3.Axes3D(fig)
############################################################################### # Plot result fig = pl.figure() ax = p3.Axes3D(fig) ax.view_init(7, -80) for l in np.unique(label): ax.plot3D(X[label == l, 0], X[label == l, 1], X[label == l, 2], 'o', color=pl.cm.jet(np.float(l) / np.max(label + 1))) pl.title('Without connectivity constraints') ############################################################################### # Define the structure A of the data. Here a 10 nearest neighbors from scikits.learn.neighbors import kneighbors_graph connectivity = kneighbors_graph(X, n_neighbors=10) ############################################################################### # Compute clustering print "Compute structured hierarchical clustering..." st = time.time() ward = Ward(n_clusters=6).fit(X, connectivity=connectivity) label = ward.labels_ print "Elapsed time: ", time.time() - st print "Number of points: ", label.size ############################################################################### # Plot result fig = pl.figure() ax = p3.Axes3D(fig) ax.view_init(7, -80)