def test_ward_clustering(): """ Check that we obtain the correct number of clusters with Ward clustering. """ np.random.seed(0) mask = np.ones([10, 10], dtype=np.bool) X = np.random.randn(100, 50) connectivity = grid_to_graph(*mask.shape) clustering = Ward(n_clusters=10, connectivity=connectivity) clustering.fit(X) assert(np.size(np.unique(clustering.labels_)) == 10)
""" Bench the scikit's ward implement compared to scipy's """ import time import numpy as np from scipy.cluster import hierarchy import pylab as pl from scikits.learn.cluster import Ward ward = Ward(n_clusters=15) n_samples = np.logspace(.5, 3, 9) n_features = np.logspace(1, 3.5, 7) N_samples, N_features = np.meshgrid(n_samples, n_features) scikits_time = np.zeros(N_samples.shape) scipy_time = np.zeros(N_samples.shape) for i, n in enumerate(n_samples): for j, p in enumerate(n_features): X = np.random.normal(size=(n, p)) t0 = time.time() ward.fit(X) scikits_time[j, i] = time.time() - t0 t0 = time.time() hierarchy.ward(X) scipy_time[j, i] = time.time() - t0 ratio = scikits_time / scipy_time
############################################################################### # Generate data (swiss roll dataset) n_samples = 5000 noise = 0.05 X = swiss_roll(n_samples, noise) ############################################################################### # Define the structure A of the data. Here a 10 nearest neighbors connectivity = kneighbors_graph(X, n_neighbors=10) ############################################################################### # Compute clustering print "Compute structured hierarchical clustering..." st = time.time() ward = Ward(n_clusters=10).fit(X, connectivity=connectivity) label = ward.labels_ print "Elapsed time: ", time.time() - st print "Number of points: ", label.size print "Number of clusters: ", np.unique(label).size ############################################################################### # Plot result fig = pl.figure() ax = p3.Axes3D(fig) ax.view_init(7, -80) for l in np.unique(label): ax.plot3D(X[label == l, 0], X[label == l, 1], X[label == l, 2], 'o',
""" Bench the scikit's ward implement compared to scipy's """ import time import numpy as np from scipy.cluster import hierarchy import pylab as pl from scikits.learn.cluster import Ward ward = Ward(n_clusters=15) n_samples = np.logspace(.5, 3, 9) n_features = np.logspace(1, 3.5, 7) N_samples, N_features = np.meshgrid(n_samples, n_features) scikits_time = np.zeros(N_samples.shape) scipy_time = np.zeros(N_samples.shape) for i, n in enumerate(n_samples): for j, p in enumerate(n_features): X = np.random.normal(size=(n, p)) t0 = time.time() ward.fit(X) scikits_time[j, i] = time.time() - t0 t0 = time.time() hierarchy.ward(X) scipy_time[j, i] = time.time() - t0
import pylab as pl import mpl_toolkits.mplot3d.axes3d as p3 from scikits.learn.cluster import Ward from scikits.learn.datasets.samples_generator import swiss_roll ############################################################################### # Generate data (swiss roll dataset) n_samples = 1000 noise = 0.05 X = swiss_roll(n_samples, noise) ############################################################################### # Compute clustering print "Compute unstructured hierarchical clustering..." st = time.time() ward = Ward(n_clusters=5).fit(X) label = ward.labels_ print "Elapsed time: ", time.time() - st print "Number of points: ", label.size print "Number of clusters: ", np.unique(label).size ############################################################################### # Plot result fig = pl.figure() ax = p3.Axes3D(fig) ax.view_init(7, -80) for l in np.unique(label): ax.plot3D(X[label == l, 0], X[label == l, 1], X[label == l, 2], 'o',