def test_ward_clustering(): """ Check that we obtain the correct number of clusters with Ward clustering. """ np.random.seed(0) mask = np.ones([10, 10], dtype=np.bool) X = np.random.randn(100, 50) connectivity = grid_to_graph(*mask.shape) clustering = Ward(n_clusters=10, connectivity=connectivity) clustering.fit(X) assert(np.size(np.unique(clustering.labels_)) == 10)
def test_height_ward_tree(): """ Check that the height of ward tree is sorted. """ np.random.seed(0) mask = np.ones([10, 10], dtype=np.bool) X = np.random.randn(50, 100) connectivity = grid_to_graph(*mask.shape) children, n_nodes, n_leaves = ward_tree(X.T, connectivity) n_nodes = 2 * X.shape[1] - 1 assert(len(children) + n_leaves == n_nodes)
def test_structured_ward_tree(): """ Check that we obtain the correct solution for structured ward tree. """ np.random.seed(0) mask = np.ones([10, 10], dtype=np.bool) X = np.random.randn(50, 100) connectivity = grid_to_graph(*mask.shape) children, n_components, n_leaves = ward_tree(X.T, connectivity) n_nodes = 2 * X.shape[1] - 1 assert(len(children) + n_leaves == n_nodes)
def test_cluster_permutation_t_test_with_connectivity(): """Test cluster level permutations T-test with connectivity matrix.""" try: from scikits.learn.feature_extraction.image import grid_to_graph except ImportError: pass else: out = permutation_cluster_1samp_test(condition1, n_permutations=500) connectivity = grid_to_graph(1, condition1.shape[1]) out_connectivity = permutation_cluster_1samp_test(condition1, n_permutations=500, connectivity=connectivity) assert_array_equal(out[0], out_connectivity[0]) for a, b in zip(out_connectivity[1], out[1]): assert_true(np.sum(out[0][a]) == np.sum(out[0][b])) assert_true(np.all(a[b]))
def test_ward_agglomeration(): """ Check that we obtain the correct solution in a simplistic case """ np.random.seed(0) mask = np.ones([10, 10], dtype=np.bool) X = np.random.randn(50, 100) connectivity = grid_to_graph(*mask.shape) ward = WardAgglomeration(n_clusters=5, connectivity=connectivity) ward.fit(X) assert(np.size(np.unique(ward.labels_)) == 5) Xred = ward.transform(X) assert(Xred.shape[1] == 5) Xfull = ward.inverse_transform(Xred) assert(np.unique(Xfull[0]).size == 5)
import pylab as pl from scikits.learn.feature_extraction.image import grid_to_graph from scikits.learn.cluster import Ward ############################################################################### # Generate data lena = sp.lena() # Downsample the image by a factor of 4 lena = lena[::2, ::2] + lena[1::2, ::2] + lena[::2, 1::2] + lena[1::2, 1::2] lena = lena[::2, ::2] + lena[1::2, ::2] + lena[::2, 1::2] + lena[1::2, 1::2] mask = np.ones_like(lena).astype(bool) X = np.atleast_2d(lena[mask]).T ############################################################################### # Define the structure A of the data. Pixels connected to their neighbors. connectivity = grid_to_graph(*lena.shape) ############################################################################### # Compute clustering print "Compute structured hierarchical clustering..." st = time.time() n_clusters = 15 # number of regions ward = Ward(n_clusters=n_clusters).fit(X, connectivity=connectivity) label = np.reshape(ward.labels_, mask.shape) print "Elaspsed time: ", time.time() - st print "Number of pixels: ", label.size print "Number of clusters: ", np.unique(label).size ############################################################################### # Plot the results on an image pl.figure(figsize=(5, 5))
X -= X.mean(axis=0) X /= X.std(axis=0) y = np.dot(X, coef.ravel()) noise = np.random.randn(y.shape[0]) noise_coef = (linalg.norm(y, 2) / np.exp(snr / 20.)) / linalg.norm(noise, 2) y += noise_coef * noise # add noise ############################################################################### # Compute the coefs of a Bayesian Ridge with GridSearch cv = KFold(len(y), 2) # cross-validation generator for model selection ridge = BayesianRidge() mem = Memory(cachedir='.', verbose=1) # Ward agglomeration followed by BayesianRidge A = grid_to_graph(n_x=size, n_y=size) ward = WardAgglomeration(n_clusters=10, connectivity=A, memory=mem, n_components=1) clf = Pipeline([('ward', ward), ('ridge', ridge)]) # Select the optimal number of parcels with grid search clf = GridSearchCV(clf, {'ward__n_clusters': [10, 20, 30]}, n_jobs=1) clf.fit(X, y, cv=cv) # set the best parameters coef_ = clf.best_estimator.steps[-1][1].coef_ coef_ = clf.best_estimator.steps[0][1].inverse_transform(coef_) coef_agglomeration_ = coef_.reshape(size, size) # Anova univariate feature selection followed by BayesianRidge f_regression = mem.cache(feature_selection.f_regression) # caching function anova = feature_selection.SelectPercentile(f_regression) clf = Pipeline([('anova', anova), ('ridge', ridge)]) # Select the optimal percentage of features with grid search
size = 4 # image size roi_size = 2 X = np.zeros(size**2) X2 = X #Generating two convexe parts mask = np.zeros((size, size), dtype=bool) mask[0:roi_size, 0:roi_size] = True mask[-roi_size:, -roi_size:] = True mask = mask.reshape(size**2) X = X[mask] # making n_samples X2 = X2 + np.zeros((n_samples, 1)) X = X + np.arange(n_samples).reshape((n_samples, 1)) Y = np.arange(n_samples) # Generating the connectivity grids and ward trees A = grid_to_graph(n_x=size, n_y=size, mask=mask) children, n_components, n_leaves = ward_tree(X.T, connectivity=A, n_components=2) children = children.tolist() A2 = grid_to_graph(n_x=size, n_y=size) children2, n_components2, n_leaves2 = ward_tree(X2.T, connectivity=A2, n_components=1) children2 = children2.tolist() ############################################################################### # Test functions def test_tree_roots(): """
X -= X.mean(axis=0) X /= X.std(axis=0) y = np.dot(X, coef.ravel()) noise = np.random.randn(y.shape[0]) noise_coef = (linalg.norm(y, 2) / np.exp(snr / 20.)) / linalg.norm(noise, 2) y += noise_coef * noise # add noise ############################################################################### # Compute the coefs of a Bayesian Ridge with GridSearch cv = KFold(len(y), 2) # cross-validation generator for model selection ridge = BayesianRidge() mem = Memory(cachedir='.', verbose=1) # Ward agglomeration followed by BayesianRidge A = grid_to_graph(n_x=size, n_y=size) ward = WardAgglomeration(n_clusters=10, connectivity=A, memory=mem, n_components=1) clf = Pipeline([('ward', ward), ('ridge', ridge)]) parameters = {'ward__n_clusters': [10, 20, 30]} # Select the optimal number of parcels with grid search clf = GridSearchCV(clf, parameters, n_jobs=1) clf.fit(X, y, cv=cv) # set the best parameters coef_ = clf.best_estimator.steps[-1][1].coef_ coef_ = clf.best_estimator.steps[0][1].inverse_transform(coef_) coef_agglomeration_ = coef_.reshape(size, size) # Anova univariate feature selection followed by BayesianRidge f_regression = mem.cache(feature_selection.f_regression) # caching function
import pylab as pl from scikits.learn.feature_extraction.image import grid_to_graph from scikits.learn.cluster import Ward ############################################################################### # Generate data lena = sp.lena() # Downsample the image by a factor of 4 lena = lena[::2, ::2] + lena[1::2, ::2] + lena[::2, 1::2] + lena[1::2, 1::2] lena = lena[::2, ::2] + lena[1::2, ::2] + lena[::2, 1::2] + lena[1::2, 1::2] mask = np.ones_like(lena).astype(bool) X = np.atleast_2d(lena[mask]).T ############################################################################### # Define the structure A of the data. Pixels connected to their neighbors. connectivity = grid_to_graph(*lena.shape) ############################################################################### # Compute clustering print "Compute structured hierarchical clustering..." st = time.time() n_clusters = 15 # number of regions ward = Ward(n_clusters=n_clusters).fit(X, connectivity=connectivity) label = np.reshape(ward.labels_, mask.shape) print "Elaspsed time: ", time.time() - st print "Number of pixels: ", label.size print "Number of clusters: ", np.unique(label).size ############################################################################### # Plot the results on an image pl.figure(figsize=(5, 5))
import numpy as np import pylab as pl from scikits.learn import datasets, cluster from scikits.learn.feature_extraction.image import grid_to_graph digits = datasets.load_digits() images = digits.images X = np.reshape(images, (len(images), -1)) connectivity = grid_to_graph(*images[0].shape) agglo = cluster.WardAgglomeration(connectivity=connectivity, n_clusters=32) agglo.fit(X) X_reduced = agglo.transform(X) X_restored = agglo.inverse_transform(X_reduced) images_restored = np.reshape(X_restored, images.shape) pl.figure(1, figsize=(4, 3.5)) pl.clf() pl.subplots_adjust(left=.01, right=.99, bottom=.01, top=.91) for i in range(4): pl.subplot(3, 4, i+1) pl.imshow(images[i], cmap=pl.cm.gray, vmax=16, interpolation='nearest') pl.xticks(()) pl.yticks(()) if i == 1: pl.title('Original data') pl.subplot(3, 4, 4+i+1)
import numpy as np import pylab as pl from scikits.learn import datasets, cluster from scikits.learn.feature_extraction.image import grid_to_graph digits = datasets.load_digits() images = digits.images X = np.reshape(images, (len(images), -1)) connectivity = grid_to_graph(*images[0].shape) agglo = cluster.WardAgglomeration(connectivity=connectivity, n_clusters=32) agglo.fit(X) X_reduced = agglo.transform(X) X_restored = agglo.inverse_transform(X_reduced) images_restored = np.reshape(X_restored, images.shape) pl.figure(1, figsize=(4, 3.5)) pl.clf() pl.subplots_adjust(left=.01, right=.99, bottom=.01, top=.91) for i in range(4): pl.subplot(3, 4, i + 1) pl.imshow(images[i], cmap=pl.cm.gray, vmax=16, interpolation='nearest') pl.xticks(()) pl.yticks(()) if i == 1: pl.title('Original data') pl.subplot(3, 4, 4 + i + 1) pl.imshow(images_restored[i], cmap=pl.cm.gray,
size = 4 # image size roi_size = 2 X = np.zeros(size**2) X2 = X #Generating two convexe parts mask = np.zeros((size, size), dtype=bool) mask[0:roi_size, 0:roi_size] = True mask[-roi_size:, -roi_size:] = True mask = mask.reshape(size**2) X = X[mask] # making n_samples X2 = X2 + np.zeros((n_samples, 1)) X = X + np.arange(n_samples).reshape((n_samples, 1)) Y = np.arange(n_samples) # Generating the connectivity grids and ward trees A = grid_to_graph(n_x=size, n_y=size, mask=mask) children, n_components, n_leaves = ward_tree(X.T, connectivity=A, n_components=2) children = children.tolist() A2 = grid_to_graph(n_x=size, n_y=size) children2, n_components2, n_leaves2 = ward_tree(X2.T, connectivity=A2, n_components=1) children2 = children2.tolist() ############################################################################### # Test functions def test_tree_roots(): """ Tests that the function returns the right roots. """