def __kmedoids_compression(self, clustering, matrix_handler): """ """ representatives = [] for cluster in clustering.clusters: # Guess 'correct' number of elements for this cluster cluster_size = cluster.get_size() expected_cluster_elements = cluster_size * (float(self.parameters["final_number_of_frames"]) / clustering.total_number_of_elements) expected_cluster_elements = int(math.ceil(expected_cluster_elements)) remapped_matrix = get_submatrix(matrix_handler.distance_matrix, cluster.all_elements) # Prepare and run kmedoids algorithm kmedoids = KMedoidsAlgorithm(remapped_matrix) # print "KMEDOIDS:: EXPECTED", expected_cluster_elements, cluster_size, clustering.total_number_of_elements, self.parameters["final_number_of_frames"] new_clustering = kmedoids.perform_clustering({ "k": expected_cluster_elements, "seeding_type": "EQUIDISTANT" }) # print "NEW CLUSTERING SIZE clusters: %d elements: %d"%(len(new_clustering.clusters), new_clustering.total_number_of_elements) # reverse the remapping and add it to representatives remapped_representatives = new_clustering.get_medoids(remapped_matrix) fake_cluster = Cluster(None, remapped_representatives) representatives.extend(Refiner.redefine_cluster_with_map(cluster, fake_cluster).all_elements) return representatives
def perform_clustering(self, kwargs): """ Does the actual clustering by doing a k-medoids clustering of the first k eigenvector rows. @param kwargs: Dictionary with this mandatory keys: - 'k': Number of clusters to generate. Must be <= than max_clusters @return: a Clustering instance with the clustered data. """ # Mandatory parameter k = int(kwargs["k"]) if k > self.max_clusters: print "[ERROR SpectralClusteringAlgorithm::perform_clustering] this algorithm was defined to generate at most %d clusters."%self.max_clusters, algorithm_details = "Spectral algorithm with k = %d and sigma squared = %.3f" %(int(k), self.sigma_sq) if self.use_k_medoids: # The row vectors we have are in R^k (so k length) eigen_distances = CondensedMatrix(pdist(self.eigenvectors[:,:k])) k_medoids_args = { "k":k, "seeding_max_cutoff":-1, "seeding_type": "RANDOM" } k_medoids_alg = KMedoidsAlgorithm(eigen_distances) clustering = k_medoids_alg.perform_clustering(k_medoids_args) clustering.details = algorithm_details return k_medoids_alg.perform_clustering(k_medoids_args) else: centroid, labels = scipy.cluster.vq.kmeans2(self.eigenvectors[:,:k], k, iter = 1000, minit = 'random') del centroid clusters = gen_clusters_from_class_list(labels) return Clustering(clusters,details = algorithm_details)
def perform_clustering(self, kwargs): """ Does the actual clustering by doing a k-medoids clustering of the first k eigenvector rows. @param kwargs: Dictionary with this mandatory keys: - 'k': Number of clusters to generate. Must be <= than max_clusters @return: a Clustering instance with the clustered data. """ # Mandatory parameter k = int(kwargs["k"]) if k > self.max_clusters: print "[ERROR SpectralClusteringAlgorithm::perform_clustering] this algorithm was defined to generate at most %d clusters." % self.max_clusters, algorithm_details = "Spectral algorithm with k = %d and sigma squared = %.3f" % ( int(k), self.sigma_sq) if self.use_k_medoids: # The row vectors we have are in R^k (so k length) eigen_distances = CondensedMatrix(pdist(self.eigenvectors[:, :k])) k_medoids_args = { "k": k, "seeding_max_cutoff": -1, "seeding_type": "RANDOM" } k_medoids_alg = KMedoidsAlgorithm(eigen_distances) clustering = k_medoids_alg.perform_clustering(k_medoids_args) clustering.details = algorithm_details return k_medoids_alg.perform_clustering(k_medoids_args) else: centroid, labels = scipy.cluster.vq.kmeans2( self.eigenvectors[:, :k], k, iter=1000, minit='random') del centroid clusters = gen_clusters_from_class_list(labels) return Clustering(clusters, details=algorithm_details)
def test_naive_case(self): # 1 5 8 # | | | # 0 - 3 4 6 - 7 # | | # 2 9 points = [(0,0),(0,1),(0,-1),(1,0), (3,0),(3,1), (6,0),(7,0),(7,1),(7,-1)] matrix = CondensedMatrix(pdist(points)) s_algo = KMedoidsAlgorithm(matrix, 10) clusters = s_algo.perform_clustering({'k':3, 'seeding_type':'RANDOM'}).clusters for c in clusters: self.assertIn(c.prototype, [0, 4, 6]) self.assertIn(c.all_elements, [[0, 1, 2, 3],[6, 7, 8, 9],[4, 5]])
def test_naive_case(self): # 1 5 8 # | | | # 0 - 3 4 6 - 7 # | | # 2 9 points = [(0, 0), (0, 1), (0, -1), (1, 0), (3, 0), (3, 1), (6, 0), (7, 0), (7, 1), (7, -1)] matrix = CondensedMatrix(pdist(points)) s_algo = KMedoidsAlgorithm(matrix, 10) clusters = s_algo.perform_clustering({ 'k': 3, 'seeding_type': 'RANDOM' }).clusters for c in clusters: self.assertIn(c.prototype, [0, 4, 6]) self.assertIn(c.all_elements, [[0, 1, 2, 3], [6, 7, 8, 9], [4, 5]])
def __kmedoids_compression(self, clustering, matrix_handler): """ """ representatives = [] for cluster in clustering.clusters: # Guess 'correct' number of elements for this cluster cluster_size = cluster.get_size() expected_cluster_elements = cluster_size * ( float(self.parameters["final_number_of_frames"]) / clustering.total_number_of_elements) expected_cluster_elements = int( math.ceil(expected_cluster_elements)) remapped_matrix = get_submatrix(matrix_handler.distance_matrix, cluster.all_elements) # Prepare and run kmedoids algorithm kmedoids = KMedoidsAlgorithm(remapped_matrix) # print "KMEDOIDS:: EXPECTED", expected_cluster_elements, cluster_size, clustering.total_number_of_elements, self.parameters["final_number_of_frames"] new_clustering = kmedoids.perform_clustering({ "k": expected_cluster_elements, "seeding_type": "EQUIDISTANT" }) # print "NEW CLUSTERING SIZE clusters: %d elements: %d"%(len(new_clustering.clusters), new_clustering.total_number_of_elements) # reverse the remapping and add it to representatives remapped_representatives = new_clustering.get_medoids( remapped_matrix) fake_cluster = Cluster(None, remapped_representatives) representatives.extend( Refiner.redefine_cluster_with_map(cluster, fake_cluster).all_elements) return representatives