Exemplo n.º 1
0
    def run(self, clustering_parameters, matrix_handler, data_handler, workspaceHandler):

        ############################
        # Clustering exploration
        ############################
        self.notify("Exploration Started", [])
        self.timer.start("Clustering Exploration")
        clusterings  = ClusteringExplorer(  clustering_parameters,
                                            matrix_handler,
                                            workspaceHandler,
                                            scheduling_tools.build_scheduler(clustering_parameters["global"]["control"],
                                                                             self.observer),
                                            AlgorithmRunParametersGenerator(clustering_parameters,
                                                                            matrix_handler),
                                            self.observer).run()

        self.notify("Clusterings Created", {"number_of_clusters":len(clusterings)})
        self.timer.stop("Clustering Exploration")

        ######################
        # First filtering
        ######################
        self.timer.start("Clustering Filtering")
        selected_clusterings, not_selected_clusterings = ClusteringFilter(clustering_parameters["clustering"]["evaluation"],
                                                                          matrix_handler).filter(clusterings)

        self.notify("Filter", {"selected":len(selected_clusterings.keys()),"not_selected":len(not_selected_clusterings.keys())})
        self.timer.stop("Clustering Filtering")

        if selected_clusterings == {}:
            return None

        ######################
        # Clustering scoring
        ######################
        self.timer.start("Evaluation")
        analyzer = AnalysisRunner(scheduling_tools.build_scheduler(
                                                       clustering_parameters["global"]["control"],
                                                       self.observer),
                                          selected_clusterings,
                                          AnalysisPopulator(matrix_handler,
                                                            data_handler,
                                                            clustering_parameters))

        analyzer.evaluate()
        self.timer.stop("Evaluation")

        ######################
        # Choose the best clustering
        ######################
        self.timer.start("Selection")
        best_clustering_id, all_scores = BestClusteringSelector(clustering_parameters).choose_best(selected_clusterings)
        self.timer.stop("Selection")

        return best_clustering_id, selected_clusterings, not_selected_clusterings, all_scores
Exemplo n.º 2
0
    def run (self, clustering):
        """
        Refine a clustering recursively using a k-means over each cluster.
        New clusters obtained from a cluster must have no noise and
        """
        max_partitions = self.refinement_parameters["max_partitions"]
        try_step = int(max(1, float(max_partitions) / self.refinement_parameters["tries_per_cluster"]))
        matrix = self.matrixHandler.distance_matrix

        new_clusters = []
        for cluster in clustering.clusters:
            base_id = cluster.id
            # The initial clustering is added to the list of new clusters.
            # With this 'trick' the initial cluster also enters the competition for the best clustering price.
            clusterings = {base_id:{"type":"refined_base",
                                    "clustering": Clustering([cluster]),
                                    "parameters": {}}}

            submatrix = get_submatrix(matrix, cluster.all_elements)

            # Proceed with some K Medoids partitions
            # TODO: Generate parameters with parameter generator
            for k in range(2,max_partitions,try_step):
                clustering = self.repartition_with_kmedoids(cluster, k, submatrix)
                clusterings["%s_%d"%(base_id,k)] = {"type":"refined",
                                                     "clustering": clustering,
                                                     "parameters": {"k":k}}

            # Evaluate all clusterings and pick the best one
            AnalysisRunner(scheduling_tools.build_scheduler(
                                                       self.clustering_parameters["clustering"]["control"],
                                                       self.observer),
                                          clusterings,
                                          AnalysisPopulator(self.matrixHandler,
                                                            self.trajectoryHandler,
                                                            self.clustering_parameters)).evaluate()

            best_clustering_id, all_scores = BestClusteringSelector(self.clustering_parameters).choose_best(clusterings)  # @UnusedVariable
            new_clusters.extend(clusterings[best_clustering_id]["clustering"].clusters)

        # Convert all new clusters in the new clustering
        return {"type":"refined_clustering",
                "clustering": Clustering(new_clusters),
                "parameters": self.refinement_parameters}
Exemplo n.º 3
0
    def run(self, clustering_parameters, matrixHandler, workspaceHandler,
            trajectoryHandler):

        ############################
        # Clustering exploration
        ############################
        self.notify("Exploration Started", [])
        self.timer.start("Clustering Exploration")
        clusterings = ClusteringExplorer(
            clustering_parameters, matrixHandler, workspaceHandler,
            scheduling_tools.build_scheduler(
                clustering_parameters["global"]["control"], self.observer),
            AlgorithmRunParametersGenerator(clustering_parameters,
                                            matrixHandler),
            self.observer).run()

        self.notify("Clusterings Created",
                    {"number_of_clusters": len(clusterings)})
        self.timer.stop("Clustering Exploration")

        ######################
        # First filtering
        ######################
        self.timer.start("Clustering Filtering")
        selected_clusterings, not_selected_clusterings = ClusteringFilter(
            clustering_parameters["clustering"]["evaluation"],
            matrixHandler).filter(clusterings)

        self.notify(
            "Filter", {
                "selected": len(selected_clusterings.keys()),
                "not_selected": len(not_selected_clusterings.keys())
            })
        self.timer.stop("Clustering Filtering")

        if selected_clusterings == {}:
            return None

        ######################
        # Clustering scoring
        ######################
        self.timer.start("Evaluation")
        analyzer = AnalysisRunner(
            scheduling_tools.build_scheduler(
                clustering_parameters["global"]["control"], self.observer),
            selected_clusterings,
            AnalysisPopulator(matrixHandler, trajectoryHandler,
                              clustering_parameters))

        analyzer.evaluate()
        self.timer.stop("Evaluation")

        ######################
        # Choose the best clustering
        ######################
        self.timer.start("Selection")
        best_clustering_id, all_scores = BestClusteringSelector(
            clustering_parameters).choose_best(selected_clusterings)
        self.timer.stop("Selection")

        return best_clustering_id, selected_clusterings, not_selected_clusterings, all_scores
Exemplo n.º 4
0
    def run(self, clustering):
        """
        Refine a clustering recursively using a k-means over each cluster.
        New clusters obtained from a cluster must have no noise and
        """
        max_partitions = self.refinement_parameters["max_partitions"]
        try_step = int(
            max(
                1,
                float(max_partitions) /
                self.refinement_parameters["tries_per_cluster"]))
        matrix = self.matrixHandler.distance_matrix

        new_clusters = []
        for cluster in clustering.clusters:
            base_id = cluster.id
            # The initial clustering is added to the list of new clusters.
            # With this 'trick' the initial cluster also enters the competition for the best clustering price.
            clusterings = {
                base_id: {
                    "type": "refined_base",
                    "clustering": Clustering([cluster]),
                    "parameters": {}
                }
            }

            submatrix = get_submatrix(matrix, cluster.all_elements)

            # Proceed with some K Medoids partitions
            # TODO: Generate parameters with parameter generator
            for k in range(2, max_partitions, try_step):
                clustering = self.repartition_with_kmedoids(
                    cluster, k, submatrix)
                clusterings["%s_%d" % (base_id, k)] = {
                    "type": "refined",
                    "clustering": clustering,
                    "parameters": {
                        "k": k
                    }
                }

            # Evaluate all clusterings and pick the best one
            AnalysisRunner(
                scheduling_tools.build_scheduler(
                    self.clustering_parameters["clustering"]["control"],
                    self.observer), clusterings,
                AnalysisPopulator(self.matrixHandler, self.trajectoryHandler,
                                  self.clustering_parameters)).evaluate()

            best_clustering_id, all_scores = BestClusteringSelector(
                self.clustering_parameters).choose_best(
                    clusterings)  # @UnusedVariable
            new_clusters.extend(
                clusterings[best_clustering_id]["clustering"].clusters)

        # Convert all new clusters in the new clustering
        return {
            "type": "refined_clustering",
            "clustering": Clustering(new_clusters),
            "parameters": self.refinement_parameters
        }