def test_get_submatrix(self): old_matrix = CondensedMatrix([0.2, 1.0, 0.3, 4.0, 6.1, 0.5, 0.6, 0.7, 0.5, 0.9, 0.8, 0.3, 0.4, 1.4, 2.9]) expected = [0.9, 0.3, 1.4] numpy.testing.assert_array_almost_equal(expected, get_submatrix(old_matrix, [2, 3, 5]).get_data(), 6) data = [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, ] matrix = CondensedMatrix(data) all_elements_map = [2, 4, 6] new_matrix = get_submatrix(matrix, all_elements_map) expected1 = [13, 15, 20] numpy.testing.assert_array_equal(expected1, new_matrix.get_data())
def evaluate(self, clustering, condensed_distance_matrix): """ Returns the cohesion value of a cluster. The weight will be the number of elements of each cluster. The maximum value will be (in the case we have only one cluster) the sum of all the pair distances. We can avoid the 2x factor, increasing the performance. """ clustered_elements = sorted(clustering.get_all_clustered_elements()) number_of_clustered_elements = len(clustered_elements) if number_of_clustered_elements > 0: distances = get_submatrix(condensed_distance_matrix, clustered_elements) max_cohesion = numpy.sum(distances.get_data())/number_of_clustered_elements total_cohesion = 0 for c in clustering.clusters: size = c.get_size() weight = 1. / size cohesion = 0. for i in range(size-1): for j in range(i+1, size): cohesion = cohesion + condensed_distance_matrix[c[i],c[j]] total_cohesion += weight*cohesion return 1 - total_cohesion / max_cohesion else: return 0.
def evaluate(self, clustering, condensed_distance_matrix): """ Returns the cohesion value of a cluster. The weight will be the number of elements of each cluster. The maximum value will be (in the case we have only one cluster) the sum of all the pair distances. We can avoid the 2x factor, increasing the performance. """ clustered_elements = sorted(clustering.get_all_clustered_elements()) number_of_clustered_elements = len(clustered_elements) if number_of_clustered_elements > 0: distances = get_submatrix(condensed_distance_matrix, clustered_elements) max_cohesion = numpy.sum( distances.get_data()) / number_of_clustered_elements total_cohesion = 0 for c in clustering.clusters: size = c.get_size() weight = 1. / size cohesion = 0. for i in range(size - 1): for j in range(i + 1, size): cohesion = cohesion + condensed_distance_matrix[c[i], c[j]] total_cohesion += weight * cohesion return 1 - total_cohesion / max_cohesion else: return 0.
def __kmedoids_compression(self, clustering, matrix_handler): """ """ representatives = [] for cluster in clustering.clusters: # Guess 'correct' number of elements for this cluster cluster_size = cluster.get_size() expected_cluster_elements = cluster_size * (float(self.parameters["final_number_of_frames"]) / clustering.total_number_of_elements) expected_cluster_elements = int(math.ceil(expected_cluster_elements)) remapped_matrix = get_submatrix(matrix_handler.distance_matrix, cluster.all_elements) # Prepare and run kmedoids algorithm kmedoids = KMedoidsAlgorithm(remapped_matrix) # print "KMEDOIDS:: EXPECTED", expected_cluster_elements, cluster_size, clustering.total_number_of_elements, self.parameters["final_number_of_frames"] new_clustering = kmedoids.perform_clustering({ "k": expected_cluster_elements, "seeding_type": "EQUIDISTANT" }) # print "NEW CLUSTERING SIZE clusters: %d elements: %d"%(len(new_clustering.clusters), new_clustering.total_number_of_elements) # reverse the remapping and add it to representatives remapped_representatives = new_clustering.get_medoids(remapped_matrix) fake_cluster = Cluster(None, remapped_representatives) representatives.extend(Refiner.redefine_cluster_with_map(cluster, fake_cluster).all_elements) return representatives
def test_get_submatrix(self): old_matrix = CondensedMatrix([ 0.2, 1., 0.3, 4.0, 6.1, 0.5, 0.6, 0.7, 0.5, 0.9, 0.8, 0.3, 0.4, 1.4, 2.9 ]) expected = [0.9, 0.3, 1.4] numpy.testing.assert_array_almost_equal( expected, get_submatrix(old_matrix, [2, 3, 5]).get_data(), 6) data = [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0 ] matrix = CondensedMatrix(data) all_elements_map = [2, 4, 6] new_matrix = get_submatrix(matrix, all_elements_map) expected1 = [13, 15, 20] numpy.testing.assert_array_equal(expected1, new_matrix.get_data())
def run (self, clustering): """ Refine a clustering recursively using a k-means over each cluster. New clusters obtained from a cluster must have no noise and """ max_partitions = self.refinement_parameters["max_partitions"] try_step = int(max(1, float(max_partitions) / self.refinement_parameters["tries_per_cluster"])) matrix = self.matrixHandler.distance_matrix new_clusters = [] for cluster in clustering.clusters: base_id = cluster.id # The initial clustering is added to the list of new clusters. # With this 'trick' the initial cluster also enters the competition for the best clustering price. clusterings = {base_id:{"type":"refined_base", "clustering": Clustering([cluster]), "parameters": {}}} submatrix = get_submatrix(matrix, cluster.all_elements) # Proceed with some K Medoids partitions # TODO: Generate parameters with parameter generator for k in range(2,max_partitions,try_step): clustering = self.repartition_with_kmedoids(cluster, k, submatrix) clusterings["%s_%d"%(base_id,k)] = {"type":"refined", "clustering": clustering, "parameters": {"k":k}} # Evaluate all clusterings and pick the best one AnalysisRunner(scheduling_tools.build_scheduler( self.clustering_parameters["clustering"]["control"], self.observer), clusterings, AnalysisPopulator(self.matrixHandler, self.trajectoryHandler, self.clustering_parameters)).evaluate() best_clustering_id, all_scores = BestClusteringSelector(self.clustering_parameters).choose_best(clusterings) # @UnusedVariable new_clusters.extend(clusterings[best_clustering_id]["clustering"].clusters) # Convert all new clusters in the new clustering return {"type":"refined_clustering", "clustering": Clustering(new_clusters), "parameters": self.refinement_parameters}
def __kmedoids_compression(self, clustering, matrix_handler): """ """ representatives = [] for cluster in clustering.clusters: # Guess 'correct' number of elements for this cluster cluster_size = cluster.get_size() expected_cluster_elements = cluster_size * ( float(self.parameters["final_number_of_frames"]) / clustering.total_number_of_elements) expected_cluster_elements = int( math.ceil(expected_cluster_elements)) remapped_matrix = get_submatrix(matrix_handler.distance_matrix, cluster.all_elements) # Prepare and run kmedoids algorithm kmedoids = KMedoidsAlgorithm(remapped_matrix) # print "KMEDOIDS:: EXPECTED", expected_cluster_elements, cluster_size, clustering.total_number_of_elements, self.parameters["final_number_of_frames"] new_clustering = kmedoids.perform_clustering({ "k": expected_cluster_elements, "seeding_type": "EQUIDISTANT" }) # print "NEW CLUSTERING SIZE clusters: %d elements: %d"%(len(new_clustering.clusters), new_clustering.total_number_of_elements) # reverse the remapping and add it to representatives remapped_representatives = new_clustering.get_medoids( remapped_matrix) fake_cluster = Cluster(None, remapped_representatives) representatives.extend( Refiner.redefine_cluster_with_map(cluster, fake_cluster).all_elements) return representatives
parameters["data"]["files"] = [sys.argv[1], sys.argv[2]] frames_ini = get_number_of_frames(sys.argv[1]) frames_proto = get_number_of_frames(sys.argv[2]) print sys.argv[1],"->",frames_ini print sys.argv[2],"->",frames_proto try: Driver(Observer()).run(parameters) except SystemExit: # Expected improductive search # Load again the matrix handler = MatrixHandler({ "method": "load", "parameters": { "path": parameters["workspace"]["base"]+"/matrix/matrix" } }) matrix = handler.create_matrix(None) submatrix = get_submatrix(matrix, range(frames_ini,frames_ini+frames_proto)) matrixToImage(submatrix, parameters["workspace"]["base"] +"/submatrix.png") print "Original mean:",get_submatrix(matrix, range(0,frames_ini)).calculateMean() values = [] for i in range(0,frames_ini): for j in range(frames_ini,frames_ini+frames_proto): values.append((handler.distance_matrix[i,j],i,j-frames_ini)) for d,i,j in sorted(values): print "%d %d %.2f"% (i,j,d) print "Combined mean:", numpy.mean(values)
print sys.argv[1], "->", frames_ini print sys.argv[2], "->", frames_proto try: Driver(Observer()).run(parameters) except SystemExit: # Expected improductive search # Load again the matrix handler = MatrixHandler({ "method": "load", "parameters": { "path": parameters["workspace"]["base"] + "/matrix/matrix" } }) matrix = handler.create_matrix(None) submatrix = get_submatrix(matrix, range(frames_ini, frames_ini + frames_proto)) matrixToImage(submatrix, parameters["workspace"]["base"] + "/submatrix.png") print "Original mean:", get_submatrix(matrix, range( 0, frames_ini)).calculateMean() values = [] for i in range(0, frames_ini): for j in range(frames_ini, frames_ini + frames_proto): values.append((handler.distance_matrix[i, j], i, j - frames_ini)) for d, i, j in sorted(values): print "%d %d %.2f" % (i, j, d) print "Combined mean:", numpy.mean(values)
def run(self, clustering): """ Refine a clustering recursively using a k-means over each cluster. New clusters obtained from a cluster must have no noise and """ max_partitions = self.refinement_parameters["max_partitions"] try_step = int( max( 1, float(max_partitions) / self.refinement_parameters["tries_per_cluster"])) matrix = self.matrixHandler.distance_matrix new_clusters = [] for cluster in clustering.clusters: base_id = cluster.id # The initial clustering is added to the list of new clusters. # With this 'trick' the initial cluster also enters the competition for the best clustering price. clusterings = { base_id: { "type": "refined_base", "clustering": Clustering([cluster]), "parameters": {} } } submatrix = get_submatrix(matrix, cluster.all_elements) # Proceed with some K Medoids partitions # TODO: Generate parameters with parameter generator for k in range(2, max_partitions, try_step): clustering = self.repartition_with_kmedoids( cluster, k, submatrix) clusterings["%s_%d" % (base_id, k)] = { "type": "refined", "clustering": clustering, "parameters": { "k": k } } # Evaluate all clusterings and pick the best one AnalysisRunner( scheduling_tools.build_scheduler( self.clustering_parameters["clustering"]["control"], self.observer), clusterings, AnalysisPopulator(self.matrixHandler, self.trajectoryHandler, self.clustering_parameters)).evaluate() best_clustering_id, all_scores = BestClusteringSelector( self.clustering_parameters).choose_best( clusterings) # @UnusedVariable new_clusters.extend( clusterings[best_clustering_id]["clustering"].clusters) # Convert all new clusters in the new clustering return { "type": "refined_clustering", "clustering": Clustering(new_clusters), "parameters": self.refinement_parameters }