def run(self, clustering_parameters, matrix_handler, data_handler, workspaceHandler): ############################ # Clustering exploration ############################ self.notify("Exploration Started", []) self.timer.start("Clustering Exploration") clusterings = ClusteringExplorer( clustering_parameters, matrix_handler, workspaceHandler, scheduling_tools.build_scheduler(clustering_parameters["global"]["control"], self.observer), AlgorithmRunParametersGenerator(clustering_parameters, matrix_handler), self.observer).run() self.notify("Clusterings Created", {"number_of_clusters":len(clusterings)}) self.timer.stop("Clustering Exploration") ###################### # First filtering ###################### self.timer.start("Clustering Filtering") selected_clusterings, not_selected_clusterings = ClusteringFilter(clustering_parameters["clustering"]["evaluation"], matrix_handler).filter(clusterings) self.notify("Filter", {"selected":len(selected_clusterings.keys()),"not_selected":len(not_selected_clusterings.keys())}) self.timer.stop("Clustering Filtering") if selected_clusterings == {}: return None ###################### # Clustering scoring ###################### self.timer.start("Evaluation") analyzer = AnalysisRunner(scheduling_tools.build_scheduler( clustering_parameters["global"]["control"], self.observer), selected_clusterings, AnalysisPopulator(matrix_handler, data_handler, clustering_parameters)) analyzer.evaluate() self.timer.stop("Evaluation") ###################### # Choose the best clustering ###################### self.timer.start("Selection") best_clustering_id, all_scores = BestClusteringSelector(clustering_parameters).choose_best(selected_clusterings) self.timer.stop("Selection") return best_clustering_id, selected_clusterings, not_selected_clusterings, all_scores
def run (self, clustering): """ Refine a clustering recursively using a k-means over each cluster. New clusters obtained from a cluster must have no noise and """ max_partitions = self.refinement_parameters["max_partitions"] try_step = int(max(1, float(max_partitions) / self.refinement_parameters["tries_per_cluster"])) matrix = self.matrixHandler.distance_matrix new_clusters = [] for cluster in clustering.clusters: base_id = cluster.id # The initial clustering is added to the list of new clusters. # With this 'trick' the initial cluster also enters the competition for the best clustering price. clusterings = {base_id:{"type":"refined_base", "clustering": Clustering([cluster]), "parameters": {}}} submatrix = get_submatrix(matrix, cluster.all_elements) # Proceed with some K Medoids partitions # TODO: Generate parameters with parameter generator for k in range(2,max_partitions,try_step): clustering = self.repartition_with_kmedoids(cluster, k, submatrix) clusterings["%s_%d"%(base_id,k)] = {"type":"refined", "clustering": clustering, "parameters": {"k":k}} # Evaluate all clusterings and pick the best one AnalysisRunner(scheduling_tools.build_scheduler( self.clustering_parameters["clustering"]["control"], self.observer), clusterings, AnalysisPopulator(self.matrixHandler, self.trajectoryHandler, self.clustering_parameters)).evaluate() best_clustering_id, all_scores = BestClusteringSelector(self.clustering_parameters).choose_best(clusterings) # @UnusedVariable new_clusters.extend(clusterings[best_clustering_id]["clustering"].clusters) # Convert all new clusters in the new clustering return {"type":"refined_clustering", "clustering": Clustering(new_clusters), "parameters": self.refinement_parameters}
def run(self, clustering_parameters, matrixHandler, workspaceHandler, trajectoryHandler): ############################ # Clustering exploration ############################ self.notify("Exploration Started", []) self.timer.start("Clustering Exploration") clusterings = ClusteringExplorer( clustering_parameters, matrixHandler, workspaceHandler, scheduling_tools.build_scheduler( clustering_parameters["global"]["control"], self.observer), AlgorithmRunParametersGenerator(clustering_parameters, matrixHandler), self.observer).run() self.notify("Clusterings Created", {"number_of_clusters": len(clusterings)}) self.timer.stop("Clustering Exploration") ###################### # First filtering ###################### self.timer.start("Clustering Filtering") selected_clusterings, not_selected_clusterings = ClusteringFilter( clustering_parameters["clustering"]["evaluation"], matrixHandler).filter(clusterings) self.notify( "Filter", { "selected": len(selected_clusterings.keys()), "not_selected": len(not_selected_clusterings.keys()) }) self.timer.stop("Clustering Filtering") if selected_clusterings == {}: return None ###################### # Clustering scoring ###################### self.timer.start("Evaluation") analyzer = AnalysisRunner( scheduling_tools.build_scheduler( clustering_parameters["global"]["control"], self.observer), selected_clusterings, AnalysisPopulator(matrixHandler, trajectoryHandler, clustering_parameters)) analyzer.evaluate() self.timer.stop("Evaluation") ###################### # Choose the best clustering ###################### self.timer.start("Selection") best_clustering_id, all_scores = BestClusteringSelector( clustering_parameters).choose_best(selected_clusterings) self.timer.stop("Selection") return best_clustering_id, selected_clusterings, not_selected_clusterings, all_scores
def run(self, clustering): """ Refine a clustering recursively using a k-means over each cluster. New clusters obtained from a cluster must have no noise and """ max_partitions = self.refinement_parameters["max_partitions"] try_step = int( max( 1, float(max_partitions) / self.refinement_parameters["tries_per_cluster"])) matrix = self.matrixHandler.distance_matrix new_clusters = [] for cluster in clustering.clusters: base_id = cluster.id # The initial clustering is added to the list of new clusters. # With this 'trick' the initial cluster also enters the competition for the best clustering price. clusterings = { base_id: { "type": "refined_base", "clustering": Clustering([cluster]), "parameters": {} } } submatrix = get_submatrix(matrix, cluster.all_elements) # Proceed with some K Medoids partitions # TODO: Generate parameters with parameter generator for k in range(2, max_partitions, try_step): clustering = self.repartition_with_kmedoids( cluster, k, submatrix) clusterings["%s_%d" % (base_id, k)] = { "type": "refined", "clustering": clustering, "parameters": { "k": k } } # Evaluate all clusterings and pick the best one AnalysisRunner( scheduling_tools.build_scheduler( self.clustering_parameters["clustering"]["control"], self.observer), clusterings, AnalysisPopulator(self.matrixHandler, self.trajectoryHandler, self.clustering_parameters)).evaluate() best_clustering_id, all_scores = BestClusteringSelector( self.clustering_parameters).choose_best( clusterings) # @UnusedVariable new_clusters.extend( clusterings[best_clustering_id]["clustering"].clusters) # Convert all new clusters in the new clustering return { "type": "refined_clustering", "clustering": Clustering(new_clusters), "parameters": self.refinement_parameters }