示例#1
0
    def test_get_submatrix(self):
        old_matrix = CondensedMatrix([0.2, 1.0, 0.3, 4.0, 6.1, 0.5, 0.6, 0.7, 0.5, 0.9, 0.8, 0.3, 0.4, 1.4, 2.9])
        expected = [0.9, 0.3, 1.4]

        numpy.testing.assert_array_almost_equal(expected, get_submatrix(old_matrix, [2, 3, 5]).get_data(), 6)

        data = [
            1.0,
            2.0,
            3.0,
            4.0,
            5.0,
            6.0,
            7.0,
            8.0,
            9.0,
            10.0,
            11.0,
            12.0,
            13.0,
            14.0,
            15.0,
            16.0,
            17.0,
            18.0,
            19.0,
            20.0,
            21.0,
        ]
        matrix = CondensedMatrix(data)

        all_elements_map = [2, 4, 6]
        new_matrix = get_submatrix(matrix, all_elements_map)
        expected1 = [13, 15, 20]
        numpy.testing.assert_array_equal(expected1, new_matrix.get_data())
示例#2
0
 def evaluate(self, clustering, condensed_distance_matrix):
     """
     Returns the cohesion value of a cluster. The weight will be the number of elements
     of each cluster. 
     
     The maximum value will be (in the case we have only one cluster) the sum of
     all the pair distances. We can avoid the 2x factor, increasing the performance.
     """
     clustered_elements = sorted(clustering.get_all_clustered_elements())
     number_of_clustered_elements = len(clustered_elements)
     if number_of_clustered_elements > 0:
         distances = get_submatrix(condensed_distance_matrix, clustered_elements)
         max_cohesion = numpy.sum(distances.get_data())/number_of_clustered_elements
         
         total_cohesion = 0
         for c in clustering.clusters:
             size = c.get_size()
             weight = 1. / size
             cohesion = 0.
             for i in range(size-1):
                 for j in range(i+1, size):
                     cohesion = cohesion + condensed_distance_matrix[c[i],c[j]]
             total_cohesion +=  weight*cohesion
         return 1 - total_cohesion / max_cohesion
     else:
         return 0.
示例#3
0
    def evaluate(self, clustering, condensed_distance_matrix):
        """
        Returns the cohesion value of a cluster. The weight will be the number of elements
        of each cluster. 
        
        The maximum value will be (in the case we have only one cluster) the sum of
        all the pair distances. We can avoid the 2x factor, increasing the performance.
        """
        clustered_elements = sorted(clustering.get_all_clustered_elements())
        number_of_clustered_elements = len(clustered_elements)
        if number_of_clustered_elements > 0:
            distances = get_submatrix(condensed_distance_matrix,
                                      clustered_elements)
            max_cohesion = numpy.sum(
                distances.get_data()) / number_of_clustered_elements

            total_cohesion = 0
            for c in clustering.clusters:
                size = c.get_size()
                weight = 1. / size
                cohesion = 0.
                for i in range(size - 1):
                    for j in range(i + 1, size):
                        cohesion = cohesion + condensed_distance_matrix[c[i],
                                                                        c[j]]
                total_cohesion += weight * cohesion
            return 1 - total_cohesion / max_cohesion
        else:
            return 0.
    def __kmedoids_compression(self, clustering, matrix_handler):
        """
        """
        representatives = []
        for cluster in clustering.clusters:
            # Guess 'correct' number of elements for this cluster
            cluster_size = cluster.get_size()
            expected_cluster_elements = cluster_size * (float(self.parameters["final_number_of_frames"]) / clustering.total_number_of_elements)
            expected_cluster_elements = int(math.ceil(expected_cluster_elements))

            remapped_matrix = get_submatrix(matrix_handler.distance_matrix, cluster.all_elements)

            # Prepare and run kmedoids algorithm
            kmedoids = KMedoidsAlgorithm(remapped_matrix)
#             print "KMEDOIDS:: EXPECTED", expected_cluster_elements, cluster_size, clustering.total_number_of_elements, self.parameters["final_number_of_frames"]
            new_clustering = kmedoids.perform_clustering({
                                                      "k": expected_cluster_elements,
                                                      "seeding_type": "EQUIDISTANT"
            })

#             print "NEW CLUSTERING SIZE  clusters: %d  elements: %d"%(len(new_clustering.clusters), new_clustering.total_number_of_elements)

            # reverse the remapping and add it to representatives
            remapped_representatives = new_clustering.get_medoids(remapped_matrix)
            fake_cluster = Cluster(None, remapped_representatives)

            representatives.extend(Refiner.redefine_cluster_with_map(cluster, fake_cluster).all_elements)

        return representatives
示例#5
0
    def test_get_submatrix(self):
        old_matrix = CondensedMatrix([
            0.2, 1., 0.3, 4.0, 6.1, 0.5, 0.6, 0.7, 0.5, 0.9, 0.8, 0.3, 0.4,
            1.4, 2.9
        ])
        expected = [0.9, 0.3, 1.4]

        numpy.testing.assert_array_almost_equal(
            expected,
            get_submatrix(old_matrix, [2, 3, 5]).get_data(), 6)

        data = [
            1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0,
            13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0
        ]
        matrix = CondensedMatrix(data)

        all_elements_map = [2, 4, 6]
        new_matrix = get_submatrix(matrix, all_elements_map)
        expected1 = [13, 15, 20]
        numpy.testing.assert_array_equal(expected1, new_matrix.get_data())
示例#6
0
    def run (self, clustering):
        """
        Refine a clustering recursively using a k-means over each cluster.
        New clusters obtained from a cluster must have no noise and
        """
        max_partitions = self.refinement_parameters["max_partitions"]
        try_step = int(max(1, float(max_partitions) / self.refinement_parameters["tries_per_cluster"]))
        matrix = self.matrixHandler.distance_matrix

        new_clusters = []
        for cluster in clustering.clusters:
            base_id = cluster.id
            # The initial clustering is added to the list of new clusters.
            # With this 'trick' the initial cluster also enters the competition for the best clustering price.
            clusterings = {base_id:{"type":"refined_base",
                                    "clustering": Clustering([cluster]),
                                    "parameters": {}}}

            submatrix = get_submatrix(matrix, cluster.all_elements)

            # Proceed with some K Medoids partitions
            # TODO: Generate parameters with parameter generator
            for k in range(2,max_partitions,try_step):
                clustering = self.repartition_with_kmedoids(cluster, k, submatrix)
                clusterings["%s_%d"%(base_id,k)] = {"type":"refined",
                                                     "clustering": clustering,
                                                     "parameters": {"k":k}}

            # Evaluate all clusterings and pick the best one
            AnalysisRunner(scheduling_tools.build_scheduler(
                                                       self.clustering_parameters["clustering"]["control"],
                                                       self.observer),
                                          clusterings,
                                          AnalysisPopulator(self.matrixHandler,
                                                            self.trajectoryHandler,
                                                            self.clustering_parameters)).evaluate()

            best_clustering_id, all_scores = BestClusteringSelector(self.clustering_parameters).choose_best(clusterings)  # @UnusedVariable
            new_clusters.extend(clusterings[best_clustering_id]["clustering"].clusters)

        # Convert all new clusters in the new clustering
        return {"type":"refined_clustering",
                "clustering": Clustering(new_clusters),
                "parameters": self.refinement_parameters}
示例#7
0
    def __kmedoids_compression(self, clustering, matrix_handler):
        """
        """
        representatives = []
        for cluster in clustering.clusters:
            # Guess 'correct' number of elements for this cluster
            cluster_size = cluster.get_size()
            expected_cluster_elements = cluster_size * (
                float(self.parameters["final_number_of_frames"]) /
                clustering.total_number_of_elements)
            expected_cluster_elements = int(
                math.ceil(expected_cluster_elements))

            remapped_matrix = get_submatrix(matrix_handler.distance_matrix,
                                            cluster.all_elements)

            # Prepare and run kmedoids algorithm
            kmedoids = KMedoidsAlgorithm(remapped_matrix)
            #             print "KMEDOIDS:: EXPECTED", expected_cluster_elements, cluster_size, clustering.total_number_of_elements, self.parameters["final_number_of_frames"]
            new_clustering = kmedoids.perform_clustering({
                "k":
                expected_cluster_elements,
                "seeding_type":
                "EQUIDISTANT"
            })

            #             print "NEW CLUSTERING SIZE  clusters: %d  elements: %d"%(len(new_clustering.clusters), new_clustering.total_number_of_elements)

            # reverse the remapping and add it to representatives
            remapped_representatives = new_clustering.get_medoids(
                remapped_matrix)
            fake_cluster = Cluster(None, remapped_representatives)

            representatives.extend(
                Refiner.redefine_cluster_with_map(cluster,
                                                  fake_cluster).all_elements)

        return representatives
示例#8
0
    parameters["data"]["files"] = [sys.argv[1], sys.argv[2]]

    frames_ini = get_number_of_frames(sys.argv[1])
    frames_proto = get_number_of_frames(sys.argv[2])
    print sys.argv[1],"->",frames_ini
    print sys.argv[2],"->",frames_proto

    try:
        Driver(Observer()).run(parameters)
    except SystemExit:
        # Expected improductive search
        # Load again the matrix
        handler = MatrixHandler({
            "method": "load",
            "parameters": {
                "path": parameters["workspace"]["base"]+"/matrix/matrix"
            }
        })
        matrix = handler.create_matrix(None)
        submatrix = get_submatrix(matrix, range(frames_ini,frames_ini+frames_proto))
        matrixToImage(submatrix, parameters["workspace"]["base"] +"/submatrix.png")
        print "Original mean:",get_submatrix(matrix, range(0,frames_ini)).calculateMean()
        values = []
        for i in range(0,frames_ini):
            for j in range(frames_ini,frames_ini+frames_proto):
                values.append((handler.distance_matrix[i,j],i,j-frames_ini))
        for d,i,j in sorted(values):
            print "%d %d %.2f"% (i,j,d)

        print "Combined mean:", numpy.mean(values)
示例#9
0
    print sys.argv[1], "->", frames_ini
    print sys.argv[2], "->", frames_proto

    try:
        Driver(Observer()).run(parameters)
    except SystemExit:
        # Expected improductive search
        # Load again the matrix
        handler = MatrixHandler({
            "method": "load",
            "parameters": {
                "path": parameters["workspace"]["base"] + "/matrix/matrix"
            }
        })
        matrix = handler.create_matrix(None)
        submatrix = get_submatrix(matrix,
                                  range(frames_ini, frames_ini + frames_proto))
        matrixToImage(submatrix,
                      parameters["workspace"]["base"] + "/submatrix.png")
        print "Original mean:", get_submatrix(matrix, range(
            0, frames_ini)).calculateMean()
        values = []
        for i in range(0, frames_ini):
            for j in range(frames_ini, frames_ini + frames_proto):
                values.append((handler.distance_matrix[i,
                                                       j], i, j - frames_ini))
        for d, i, j in sorted(values):
            print "%d %d %.2f" % (i, j, d)

        print "Combined mean:", numpy.mean(values)
示例#10
0
    def run(self, clustering):
        """
        Refine a clustering recursively using a k-means over each cluster.
        New clusters obtained from a cluster must have no noise and
        """
        max_partitions = self.refinement_parameters["max_partitions"]
        try_step = int(
            max(
                1,
                float(max_partitions) /
                self.refinement_parameters["tries_per_cluster"]))
        matrix = self.matrixHandler.distance_matrix

        new_clusters = []
        for cluster in clustering.clusters:
            base_id = cluster.id
            # The initial clustering is added to the list of new clusters.
            # With this 'trick' the initial cluster also enters the competition for the best clustering price.
            clusterings = {
                base_id: {
                    "type": "refined_base",
                    "clustering": Clustering([cluster]),
                    "parameters": {}
                }
            }

            submatrix = get_submatrix(matrix, cluster.all_elements)

            # Proceed with some K Medoids partitions
            # TODO: Generate parameters with parameter generator
            for k in range(2, max_partitions, try_step):
                clustering = self.repartition_with_kmedoids(
                    cluster, k, submatrix)
                clusterings["%s_%d" % (base_id, k)] = {
                    "type": "refined",
                    "clustering": clustering,
                    "parameters": {
                        "k": k
                    }
                }

            # Evaluate all clusterings and pick the best one
            AnalysisRunner(
                scheduling_tools.build_scheduler(
                    self.clustering_parameters["clustering"]["control"],
                    self.observer), clusterings,
                AnalysisPopulator(self.matrixHandler, self.trajectoryHandler,
                                  self.clustering_parameters)).evaluate()

            best_clustering_id, all_scores = BestClusteringSelector(
                self.clustering_parameters).choose_best(
                    clusterings)  # @UnusedVariable
            new_clusters.extend(
                clusterings[best_clustering_id]["clustering"].clusters)

        # Convert all new clusters in the new clustering
        return {
            "type": "refined_clustering",
            "clustering": Clustering(new_clusters),
            "parameters": self.refinement_parameters
        }