# estimate bandwidth for mean shift bandwidth = cluster.estimate_bandwidth(X, quantile=0.3) # connectivity matrix for structured Ward connectivity = kneighbors_graph(X, n_neighbors=10) # make connectivity symmetric connectivity = 0.5 * (connectivity + connectivity.T) # Compute distances # distances = np.exp(-euclidean_distances(X)) distances = euclidean_distances(X) # create clustering estimators ms = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True) two_means = cluster.MiniBatchKMeans(n_clusters=25) ward = cluster.Ward(n_clusters=25) # ward = cluster.AgglomerativeClustering(n_clusters=2,linkage='ward', connectivity=connectivity) spectral = cluster.SpectralClustering(n_clusters=25, eigen_solver='arpack', affinity="nearest_neighbors") dbscan = cluster.DBSCAN(eps=.2) affinity_propagation = cluster.AffinityPropagation(damping=.9, preference=-200) # average_linkage = cluster.AgglomerativeClustering(linkage="average", affinity="cityblock", n_clusters=2, connectivity=connectivity) for name, algorithm in [ ('MiniBatchKMeans', two_means), ('AffinityPropagation', affinity_propagation), ('MeanShift', ms), ('SpectralClustering', spectral),
# estimate bandwidth for mean shift bandwidth = cluster.estimate_bandwidth(X, quantile=0.3) # connectivity matrix for structured Ward connectivity = kneighbors_graph(X, n_neighbors=10) # make connectivity symmetric connectivity = 0.5 * (connectivity + connectivity.T) # Compute distances #distances = np.exp(-euclidean_distances(X)) distances = euclidean_distances(X) # create clustering estimators ms = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True) two_means = cluster.MiniBatchKMeans(n_clusters=2) ward_five = cluster.Ward(n_clusters=2, connectivity=connectivity) spectral = cluster.SpectralClustering(n_clusters=2, eigen_solver='arpack', affinity="nearest_neighbors") dbscan = cluster.DBSCAN(eps=.2) affinity_propagation = cluster.AffinityPropagation(damping=.9, preference=-200) for algorithm in [ two_means, affinity_propagation, ms, spectral, ward_five, dbscan ]: # predict cluster memberships t0 = time.time() algorithm.fit(X) t1 = time.time() if hasattr(algorithm, 'labels_'):
def ward(matrix): ward = skcluster.Ward(n_clusters=50, compute_full_tree=False) ward.fit(matrix) return ward.labels_