Пример #1
0
 def templateDistanceCalculation(self, cluster1, cluster2, type_measurement):
     entry1 = cfentry(len(cluster1), linear_sum(cluster1), square_sum(cluster1));
     entry2 = cfentry(len(cluster2), linear_sum(cluster2), square_sum(cluster2));
     
     # check that the same distance from 1 to 2 and from 2 to 1.
     distance12 = entry1.get_distance(entry2, type_measurement);
     distance21 = entry2.get_distance(entry1, type_measurement);
     
     assert distance12 == distance21;
     
     # check with utils calculation
     float_delta = 0.0000001;
     if (type_measurement == measurement_type.CENTROID_EUCLIDIAN_DISTANCE):
         assert distance12 == euclidean_distance_sqrt(entry1.get_centroid(), entry2.get_centroid());
     
     elif (type_measurement == measurement_type.CENTROID_MANHATTAN_DISTANCE):
         assert distance12 == manhattan_distance(entry1.get_centroid(), entry2.get_centroid());
     
     elif (type_measurement == measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE):
         assert numpy.isclose(distance12, average_inter_cluster_distance(cluster1, cluster2)) == True;
     
     elif (type_measurement == measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE):
         assert numpy.isclose(distance12, average_intra_cluster_distance(cluster1, cluster2)) == True;
     
     elif (type_measurement == measurement_type.VARIANCE_INCREASE_DISTANCE):
         assert numpy.isclose(distance12, variance_increase_distance(cluster1, cluster2)) == True;
Пример #2
0
    def __merge_by_signle_link(self):
        """!
        @brief Merges the most similar clusters in line with single link type.
        
        """
        
        minimum_single_distance = float('Inf');
        indexes = None;
        
        for index_cluster1 in range(0, len(self.__clusters)):
            for index_cluster2 in range(index_cluster1 + 1, len(self.__clusters)):
                
                # Find nearest objects
                candidate_minimum_distance = float('Inf');
                for index_object1 in self.__clusters[index_cluster1]:
                    for index_object2 in self.__clusters[index_cluster2]:
                        distance = euclidean_distance_sqrt(self.__pointer_data[index_object1], self.__pointer_data[index_object2]);
                        if (distance < candidate_minimum_distance):
                            candidate_minimum_distance = distance;
                
                if (candidate_minimum_distance < minimum_single_distance):
                    minimum_single_distance = candidate_minimum_distance;
                    indexes = [index_cluster1, index_cluster2];

        self.__clusters[indexes[0]] += self.__clusters[indexes[1]];  
        self.__clusters.pop(indexes[1]);   # remove merged cluster.
Пример #3
0
 def __update_clusters(self):
     """!
     @brief Calculate Euclidean distance to each point from the each cluster. Nearest points are captured by according clusters and as a result clusters are updated.
     
     @return (list) updated clusters as list of clusters. Each cluster contains indexes of objects from data.
     
     """
     
     clusters = [[] for i in range(len(self.__centers))];
     for index_point in range(len(self.__pointer_data)):
         index_optim = -1;
         dist_optim = 0.0;
          
         for index in range(len(self.__centers)):
             # dist = euclidean_distance(data[index_point], centers[index]);         # Slow solution
             dist = euclidean_distance_sqrt(self.__pointer_data[index_point], self.__centers[index]);      # Fast solution
              
             if ( (dist < dist_optim) or (index is 0)):
                 index_optim = index;
                 dist_optim = dist;
          
         clusters[index_optim].append(index_point);
     
     # If cluster is not able to capture object it should be removed
     clusters = [cluster for cluster in clusters if len(cluster) > 0];
     
     return clusters;
Пример #4
0
 def __update_clusters(self, centers, available_indexes = None):
     """!
     @brief Calculates Euclidean distance to each point from the each cluster.
            Nearest points are captured by according clusters and as a result clusters are updated.
            
     @param[in] centers (list): Coordinates of centers of clusters that are represented by list: [center1, center2, ...].
     @param[in] available_indexes (list): Indexes that defines which points can be used from imput data, if None - then all points are used.
     
     @return (list) Updated clusters.
     
     """
         
     bypass = None;
     if (available_indexes is None):
         bypass = range(len(self.__pointer_data));
     else:
         bypass = available_indexes;
       
     clusters = [[] for _ in range(len(centers))];
     for index_point in bypass:
         index_optim = -1;
         dist_optim = 0.0;
           
         for index in range(len(centers)):
             # dist = euclidean_distance(data[index_point], centers[index]);         # Slow solution
             dist = euclidean_distance_sqrt(self.__pointer_data[index_point], centers[index]);      # Fast solution
               
             if ( (dist < dist_optim) or (index is 0)):
                 index_optim = index;
                 dist_optim = dist;
           
         clusters[index_optim].append(index_point);
           
     return clusters;
Пример #5
0
 def __improve_parameters(self, centers, available_indexes = None):
     """!
     @brief Performs k-means clustering in the specified region.
     
     @param[in] centers (list): Centers of clusters.
     @param[in] available_indexes (list): Indexes that defines which points can be used for k-means clustering, if None - then all points are used.
     
     @return (list) List of allocated clusters, each cluster contains indexes of objects in list of data.
     
     """
     
     changes = numpy.Inf;
     
     stop_condition = self.__tolerance * self.__tolerance; # Fast solution
       
     clusters = [];
       
     while (changes > stop_condition):
         clusters = self.__update_clusters(centers, available_indexes);
         clusters = [ cluster for cluster in clusters if len(cluster) > 0 ]; 
         
         updated_centers = self.__update_centers(clusters);
       
         changes = max([euclidean_distance_sqrt(centers[index], updated_centers[index]) for index in range(len(updated_centers))]);    # Fast solution
           
         centers = updated_centers;
       
     return (clusters, centers);
Пример #6
0
    def __update_clusters(self, medoids):
        """!
        @brief Forms cluster in line with specified medoids by calculation distance from each point to medoids. 
        
        """

        self.__belong = [0] * len(self.__pointer_data)
        self.__clusters = [[] for i in range(len(medoids))]
        for index_point in range(len(self.__pointer_data)):
            index_optim = -1
            dist_optim = 0.0

            for index in range(len(medoids)):
                dist = euclidean_distance_sqrt(
                    self.__pointer_data[index_point],
                    self.__pointer_data[medoids[index]])

                if ((dist < dist_optim) or (index is 0)):
                    index_optim = index
                    dist_optim = dist

            self.__clusters[index_optim].append(index_point)
            self.__belong[index_point] = index_optim

        # If cluster is not able to capture object it should be removed
        self.__clusters = [
            cluster for cluster in self.__clusters if len(cluster) > 0
        ]
Пример #7
0
    def __update_clusters(self):
        """!
        @brief Calculate Manhattan distance to each point from the each cluster. 
        @details Nearest points are captured by according clusters and as a result clusters are updated.
        
        @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
        
        """

        clusters = [[] for i in range(len(self.__medians))]
        for index_point in range(len(self.__pointer_data)):
            index_optim = -1
            dist_optim = 0.0

            for index in range(len(self.__medians)):
                dist = euclidean_distance_sqrt(
                    self.__pointer_data[index_point], self.__medians[index])

                if ((dist < dist_optim) or (index is 0)):
                    index_optim = index
                    dist_optim = dist

            clusters[index_optim].append(index_point)

        # If cluster is not able to capture object it should be removed
        clusters = [cluster for cluster in clusters if len(cluster) > 0]

        return clusters
Пример #8
0
    def __improve_parameters(self, centers, available_indexes = None):
        """!
        @brief Performs k-means clustering in the specified region.
        
        @param[in] centers (list): Centers of clusters.
        @param[in] available_indexes (list): Indexes that defines which points can be used for k-means clustering, if None - then all points are used.
        
        @return (list) List of allocated clusters, each cluster contains indexes of objects in list of data.
        
        """

        changes = numpy.Inf;

        stop_condition = self.__tolerance * self.__tolerance; # Fast solution

        clusters = [];

        while (changes > stop_condition):
            clusters = self.__update_clusters(centers, available_indexes);
            clusters = [ cluster for cluster in clusters if len(cluster) > 0 ]; 
            
            updated_centers = self.__update_centers(clusters);
          
            changes = max([euclidean_distance_sqrt(centers[index], updated_centers[index]) for index in range(len(updated_centers))]);    # Fast solution
              
            centers = updated_centers;
        
        return (clusters, centers);
Пример #9
0
    def __update_clusters(self):
        """!
        @brief Calculate distance to each point from the each cluster. 
        @details Nearest points are captured by according clusters and as a result clusters are updated.
        
        @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
        
        """
        
        clusters = [[self.__medoid_indexes[i]] for i in range(len(self.__medoids))];
        for index_point in range(len(self.__pointer_data)):
            if (index_point in self.__medoid_indexes):
                continue;

            index_optim = -1;
            dist_optim = float('Inf');
            
            for index in range(len(self.__medoids)):
                dist = euclidean_distance_sqrt(self.__pointer_data[index_point], self.__medoids[index]);
                
                if ( (dist < dist_optim) or (index is 0)):
                    index_optim = index;
                    dist_optim = dist;
            
            clusters[index_optim].append(index_point);
        
        return clusters;
Пример #10
0
 def __update_clusters(self, centers, available_indexes = None):
     """!
     @brief Calculates Euclidean distance to each point from the each cluster.
            Nearest points are captured by according clusters and as a result clusters are updated.
            
     @param[in] centers (list): Coordinates of centers of clusters that are represented by list: [center1, center2, ...].
     @param[in] available_indexes (list): Indexes that defines which points can be used from imput data, if None - then all points are used.
     
     @return (list) Updated clusters.
     
     """
         
     bypass = None;
     if (available_indexes is None):
         bypass = range(len(self.__pointer_data));
     else:
         bypass = available_indexes;
       
     clusters = [[] for i in range(len(centers))];
     for index_point in bypass:
         index_optim = -1;
         dist_optim = 0.0;
           
         for index in range(len(centers)):
             # dist = euclidean_distance(data[index_point], centers[index]);         # Slow solution
             dist = euclidean_distance_sqrt(self.__pointer_data[index_point], centers[index]);      # Fast solution
               
             if ( (dist < dist_optim) or (index is 0)):
                 index_optim = index;
                 dist_optim = dist;
           
         clusters[index_optim].append(index_point);
           
     return clusters;
Пример #11
0
    def get_distance_matrix(self):
        """!
        @brief Calculates distance matrix (U-matrix).
        @details The U-Matrix visualizes based on the distance in input space between a weight vector and its neighbors on map.
        
        @return (list) Distance matrix (U-matrix).
        
        @see show_distance_matrix()
        @see get_density_matrix()
        
        """
        if (self.__ccore_som_pointer is not None):
            self._weights = wrapper.som_get_weights(self.__ccore_som_pointer)

            if (self._conn_type != type_conn.func_neighbor):
                self._neighbors = wrapper.som_get_neighbors(
                    self.__ccore_som_pointer)

        distance_matrix = [[0.0] * self._cols for i in range(self._rows)]

        for i in range(self._rows):
            for j in range(self._cols):
                neuron_index = i * self._cols + j

                if (self._conn_type == type_conn.func_neighbor):
                    self._create_connections(type_conn.grid_eight)

                for neighbor_index in self._neighbors[neuron_index]:
                    distance_matrix[i][j] += euclidean_distance_sqrt(
                        self._weights[neuron_index],
                        self._weights[neighbor_index])

                distance_matrix[i][j] /= len(self._neighbors[neuron_index])

        return distance_matrix
Пример #12
0
 def __merge_by_average_link(self):
     """!
     @brief Merges the most similar clusters in line with average link type.
     
     """
     
     minimum_average_distance = float('Inf');
     
     for index_cluster1 in range(0, len(self.__clusters)):
         for index_cluster2 in range(index_cluster1 + 1, len(self.__clusters)):
             
             # Find farthest objects
             candidate_average_distance = 0.0;
             for index_object1 in self.__clusters[index_cluster1]:
                 for index_object2 in self.__clusters[index_cluster2]:
                     candidate_average_distance += euclidean_distance_sqrt(self.__pointer_data[index_object1], self.__pointer_data[index_object2]);
             
             candidate_average_distance /= (len(self.__clusters[index_cluster1]) + len(self.__clusters[index_cluster2]));
             
             if (candidate_average_distance < minimum_average_distance):
                 minimum_average_distance = candidate_average_distance;
                 indexes = [index_cluster1, index_cluster2];
     
     self.__clusters[indexes[0]] += self.__clusters[indexes[1]];  
     self.__clusters.pop(indexes[1]);   # remove merged cluster.  
Пример #13
0
 def get_distance_matrix(self):
     """!
     @brief Calculates distance matrix (U-matrix).
     @details The U-Matrix visualizes based on the distance in input space between a weight vector and its neighbors on map.
     
     @return (list) Distance matrix (U-matrix).
     
     @see show_distance_matrix()
     @see get_density_matrix()
     
     """
     if (self.__ccore_som_pointer is not None):
         self._weights = wrapper.som_get_weights(self.__ccore_som_pointer);
         
         if (self._conn_type != type_conn.func_neighbor):
             self._neighbors = wrapper.som_get_neighbors(self.__ccore_som_pointer);
         
     distance_matrix = [ [0.0] * self._cols for i in range(self._rows) ];
     
     for i in range(self._rows):
         for j in range(self._cols):
             neuron_index = i * self._cols + j;
             
             if (self._conn_type == type_conn.func_neighbor):
                 self._create_connections(type_conn.grid_eight);
             
             for neighbor_index in self._neighbors[neuron_index]:
                 distance_matrix[i][j] += euclidean_distance_sqrt(self._weights[neuron_index], self._weights[neighbor_index]);
                 
             distance_matrix[i][j] /= len(self._neighbors[neuron_index]);
 
     return distance_matrix;
Пример #14
0
 def __recursive_nearest_nodes(self, point, distance, sqrt_distance, node, best_nodes):
     """!
     @brief Returns list of neighbors such as tuple (distance, node) that is located in area that is covered by distance.
     
     @param[in] point (list): Coordinates that is considered as centroind for searching
     @param[in] distance (double): Distance from the center where seaching is performed.
     @param[in] sqrt_distance (double): Square distance from the center where searching is performed.
     @param[in] node (node): Node from that searching is performed.
     @param[in|out] best_nodes (list): List of founded nodes.
     
     """
     
     minimum = node.data[node.disc] - distance;
     maximum = node.data[node.disc] + distance;
     
     if (node.right is not None):
         if (point[node.disc] >= minimum):
             self.__recursive_nearest_nodes(point, distance, sqrt_distance, node.right, best_nodes);
     
     if (node.left is not None):
         if (point[node.disc] < maximum):
             self.__recursive_nearest_nodes(point, distance, sqrt_distance, node.left, best_nodes);
     
     candidate_distance = euclidean_distance_sqrt(point, node.data);
     if (candidate_distance <= sqrt_distance):
         best_nodes.append( (candidate_distance, node) );
Пример #15
0
 def process(self):
     """!
     @brief Performs cluster analysis in line with rules of K-Means algorithm.
     
     @remark Results of clustering can be obtained using corresponding get methods.
     
     @see get_clusters()
     @see get_centers()
     
     """
     
     if (self.__ccore is True):
         self.__clusters = wrapper.kmeans(self.__pointer_data, self.__centers, self.__tolerance);
         self.__centers = self.__update_centers();
     else: 
         changes = float('inf');
          
         stop_condition = self.__tolerance * self.__tolerance;   # Fast solution
         #stop_condition = self.__tolerance;              # Slow solution
          
         # Check for dimension
         if (len(self.__pointer_data[0]) != len(self.__centers[0])):
             raise NameError('Dimension of the input data and dimension of the initial cluster centers must be equal.');
          
         while (changes > stop_condition):
             self.__clusters = self.__update_clusters();
             updated_centers = self.__update_centers();  # changes should be calculated before asignment
          
             #changes = max([euclidean_distance(self.__centers[index], updated_centers[index]) for index in range(len(self.__centers))]);        # Slow solution
             changes = max([euclidean_distance_sqrt(self.__centers[index], updated_centers[index]) for index in range(len(updated_centers))]);    # Fast solution
              
             self.__centers = updated_centers;
Пример #16
0
 def templateDistanceCalculation(self, cluster1, cluster2, type_measurement):
     entry1 = cfentry(len(cluster1), linear_sum(cluster1), square_sum(cluster1));
     entry2 = cfentry(len(cluster2), linear_sum(cluster2), square_sum(cluster2));
     
     # check that the same distance from 1 to 2 and from 2 to 1.
     distance12 = entry1.get_distance(entry2, type_measurement);
     distance21 = entry2.get_distance(entry1, type_measurement);
     
     assert distance12 == distance21;
     
     # check with utils calculation
     float_delta = 0.0000001;
     if (type_measurement == measurement_type.CENTROID_EUCLIDIAN_DISTANCE):
         assert distance12 == euclidean_distance_sqrt(entry1.get_centroid(), entry2.get_centroid());
     
     elif (type_measurement == measurement_type.CENTROID_MANHATTAN_DISTANCE):
         assert distance12 == manhattan_distance(entry1.get_centroid(), entry2.get_centroid());
     
     elif (type_measurement == measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE):
         assert numpy.isclose(distance12, average_inter_cluster_distance(cluster1, cluster2)) == True;
     
     elif (type_measurement == measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE):
         assert numpy.isclose(distance12, average_intra_cluster_distance(cluster1, cluster2)) == True;
     
     elif (type_measurement == measurement_type.VARIANCE_INCREASE_DISTANCE):
         assert numpy.isclose(distance12, variance_increase_distance(cluster1, cluster2)) == True;
Пример #17
0
 def get_distance(self, entry, type_measurement):
     """!
     @brief Calculates distance between two clusters in line with measurement type.
     
     @details In case of usage CENTROID_EUCLIDIAN_DISTANCE square euclidian distance will be returned.
              Square root should be taken from the result for obtaining real euclidian distance between
              entries. 
     
     @param[in] entry (cfentry): Clustering feature to which distance should be obtained.
     @param[in] type_measurement (measurement_type): Distance measurement algorithm between two clusters.
     
     @return (double) Distance between two clusters.
     
     """
     
     if (type_measurement is measurement_type.CENTROID_EUCLIDIAN_DISTANCE):
         return euclidean_distance_sqrt(entry.get_centroid(), self.get_centroid());
     
     elif (type_measurement is measurement_type.CENTROID_MANHATTAN_DISTANCE):
         return manhattan_distance(entry.get_centroid(), self.get_centroid());
     
     elif (type_measurement is measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE):
         return self.__get_average_inter_cluster_distance(entry);
         
     elif (type_measurement is measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE):
         return self.__get_average_intra_cluster_distance(entry);
     
     elif (type_measurement is measurement_type.VARIANCE_INCREASE_DISTANCE):
         return self.__get_variance_increase_distance(entry);
     
     else:
         assert 0;
Пример #18
0
    def __merge_by_average_link(self):
        """!
        @brief Merges the most similar clusters in line with average link type.
        
        """

        minimum_average_distance = float('Inf')

        for index_cluster1 in range(0, len(self.__clusters)):
            for index_cluster2 in range(index_cluster1 + 1,
                                        len(self.__clusters)):

                # Find farthest objects
                candidate_average_distance = 0.0
                for index_object1 in self.__clusters[index_cluster1]:
                    for index_object2 in self.__clusters[index_cluster2]:
                        candidate_average_distance += euclidean_distance_sqrt(
                            self.__pointer_data[index_object1],
                            self.__pointer_data[index_object2])

                candidate_average_distance /= (
                    len(self.__clusters[index_cluster1]) +
                    len(self.__clusters[index_cluster2]))

                if (candidate_average_distance < minimum_average_distance):
                    minimum_average_distance = candidate_average_distance
                    indexes = [index_cluster1, index_cluster2]

        self.__clusters[indexes[0]] += self.__clusters[indexes[1]]
        self.__clusters.pop(indexes[1])
Пример #19
0
    def __recursive_nearest_nodes(self, point, distance, sqrt_distance,
                                  node_head, best_nodes):
        """!
        @brief Returns list of neighbors such as tuple (distance, node) that is located in area that is covered by distance.
        
        @param[in] point (list): Coordinates that is considered as centroind for searching
        @param[in] distance (double): Distance from the center where seaching is performed.
        @param[in] sqrt_distance (double): Square distance from the center where searching is performed.
        @param[in] node_head (node): Node from that searching is performed.
        @param[in|out] best_nodes (list): List of founded nodes.
        
        """

        if (node_head.right is not None):
            minimum = node_head.data[node_head.disc] - distance
            if (point[node_head.disc] >= minimum):
                self.__recursive_nearest_nodes(point, distance, sqrt_distance,
                                               node_head.right, best_nodes)

        if (node_head.left is not None):
            maximum = node_head.data[node_head.disc] + distance
            if (point[node_head.disc] < maximum):
                self.__recursive_nearest_nodes(point, distance, sqrt_distance,
                                               node_head.left, best_nodes)

        candidate_distance = euclidean_distance_sqrt(point, node_head.data)
        if (candidate_distance <= sqrt_distance):
            best_nodes.append((candidate_distance, node_head))
Пример #20
0
    def __merge_by_signle_link(self):
        """!
        @brief Merges the most similar clusters in line with single link type.
        
        """
        
        minimum_single_distance = float('Inf');
        indexes = None;
        
        for index_cluster1 in range(0, len(self.__clusters)):
            for index_cluster2 in range(index_cluster1 + 1, len(self.__clusters)):
                
                # Find nearest objects
                candidate_minimum_distance = float('Inf');
                for index_object1 in self.__clusters[index_cluster1]:
                    for index_object2 in self.__clusters[index_cluster2]:
                        distance = euclidean_distance_sqrt(self.__pointer_data[index_object1], self.__pointer_data[index_object2]);
                        if (distance < candidate_minimum_distance):
                            candidate_minimum_distance = distance;
                
                if (candidate_minimum_distance < minimum_single_distance):
                    minimum_single_distance = candidate_minimum_distance;
                    indexes = [index_cluster1, index_cluster2];

        self.__clusters[indexes[0]] += self.__clusters[indexes[1]];  
        self.__clusters.pop(indexes[1]);   # remove merged cluster.
Пример #21
0
 def __update_clusters(self):
     """!
     @brief Calculate Manhattan distance to each point from the each cluster. 
     @details Nearest points are captured by according clusters and as a result clusters are updated.
     
     @return (list) updated clusters as list of clusters where each cluster contains indexes of objects from data.
     
     """
     
     clusters = [[] for i in range(len(self.__medians))];
     for index_point in range(len(self.__pointer_data)):
         index_optim = -1;
         dist_optim = 0.0;
          
         for index in range(len(self.__medians)):
             dist = euclidean_distance_sqrt(self.__pointer_data[index_point], self.__medians[index]);
              
             if ( (dist < dist_optim) or (index is 0)):
                 index_optim = index;
                 dist_optim = dist;
          
         clusters[index_optim].append(index_point);
         
     # If cluster is not able to capture object it should be removed
     clusters = [cluster for cluster in clusters if len(cluster) > 0];
     
     return clusters;
Пример #22
0
 def get_distance(self, entry, type_measurement):
     """!
     @brief Calculates distance between two clusters in line with measurement type.
     
     @param[in] entry (cfentry): Clustering feature to which distance should be obtained.
     @param[in] type_measurement (measurement_type): Distance measurement algorithm between two clusters.
     
     @return (double) Distance between two clusters.
     
     """
     
     if (type_measurement is measurement_type.CENTROID_EUCLIDIAN_DISTANCE):
         return euclidean_distance_sqrt(entry.get_centroid(), self.get_centroid());
     
     elif (type_measurement is measurement_type.CENTROID_MANHATTAN_DISTANCE):
         return manhattan_distance(entry.get_centroid(), self.get_centroid());
     
     elif (type_measurement is measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE):
         return self.__get_average_inter_cluster_distance(entry);
         
     elif (type_measurement is measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE):
         return self.__get_average_intra_cluster_distance(entry);
     
     elif (type_measurement is measurement_type.VARIANCE_INCREASE_DISTANCE):
         return self.__get_variance_increase_distance(entry);
     
     else:
         assert 0;
Пример #23
0
 def process(self):
     """!
     @brief Performs cluster analysis in line with rules of K-Medoids algorithm.
     
     @remark Results of clustering can be obtained using corresponding get methods.
     
     @see get_clusters()
     @see get_medoids()
     
     """
     
     if (self.__ccore is True):
         self.__clusters = wrapper.kmedoids(self.__pointer_data, self.__medoid_indexes, self.__tolerance);
         self.__medoids, self.__medoid_indexes = self.__update_medoids();
     
     else:
         changes = float('inf');
          
         stop_condition = self.__tolerance * self.__tolerance;   # Fast solution
         #stop_condition = self.__tolerance;              # Slow solution
          
         while (changes > stop_condition):
             self.__clusters = self.__update_clusters();
             updated_medoids, update_medoid_indexes = self.__update_medoids();  # changes should be calculated before asignment
          
             changes = max([euclidean_distance_sqrt(self.__medoids[index], updated_medoids[index]) for index in range(len(updated_medoids))]);    # Fast solution
              
             self.__medoids = updated_medoids;
             self.__medoid_indexes = update_medoid_indexes;
Пример #24
0
 def process(self):
     """!
     @brief Performs cluster analysis in line with rules of K-Medians algorithm.
     
     @remark Results of clustering can be obtained using corresponding get methods.
     
     @see get_clusters()
     @see get_medians()
     
     """
     
     changes = float('inf');
      
     stop_condition = self.__tolerance * self.__tolerance;   # Fast solution
     #stop_condition = self.__tolerance;              # Slow solution
      
     # Check for dimension
     if (len(self.__pointer_data[0]) != len(self.__medians[0])):
         raise NameError('Dimension of the input data and dimension of the initial cluster medians must be equal.');
      
     while (changes > stop_condition):
         self.__clusters = self.__update_clusters();
         updated_centers = self.__update_medians();  # changes should be calculated before asignment
      
         changes = max([euclidean_distance_sqrt(self.__medians[index], updated_centers[index]) for index in range(len(updated_centers))]);    # Fast solution
          
         self.__medians = updated_centers;
Пример #25
0
 def __init__(self, rows, cols, conn_type = type_conn.grid_eight, parameters = None, ccore = False):
     """!
     @brief Constructor of self-organized map.
     
     @param[in] rows (uint): Number of neurons in the column (number of rows).
     @param[in] cols (uint): Number of neurons in the row (number of columns).
     @param[in] conn_type (type_conn): Type of connection between oscillators in the network (grid four, grid eight, honeycomb, function neighbour).
     @param[in] parameters (som_parameters): Other specific parameters.
     @param[in] ccore (bool): If True simulation is performed by CCORE library (C++ implementation of pyclustering).
     
     """
     
     # some of these parameters are required despite core implementation, for example, for network demonstration.
     self._cols = cols;
     self._rows = rows;        
     self._size = cols * rows;
     self._conn_type = conn_type;
     
     if (parameters is not None):
         self._params = parameters;
     else:
         self._params = som_parameters();
         
     if (self._params.init_radius is None):
         if ((cols + rows) / 4.0 > 1.0): 
             self._params.init_radius = 2.0;
         elif ( (cols > 1) and (rows > 1) ): 
             self._params.init_radius = 1.5;
         else: 
             self._params.init_radius = 1.0;
     
     if (ccore is True):
         self.__ccore_som_pointer = wrapper.som_create(rows, cols, conn_type, self._params);
         
     else:
         # location
         self._location = list();
         for i in range(self._rows):
             for j in range(self._cols):
                 self._location.append([float(i), float(j)]);
         
         # awards
         self._award = [0] * self._size;
         self._capture_objects = [ [] for i in range(self._size) ];
         
         # distances
         self._sqrt_distances = [ [ [] for i in range(self._size) ] for j in range(self._size) ];
         for i in range(self._size):
             for j in range(i, self._size, 1):
                 dist = euclidean_distance_sqrt(self._location[i], self._location[j]);
                 self._sqrt_distances[i][j] = dist;
                 self._sqrt_distances[j][i] = dist;
     
         # connections
         if (conn_type != type_conn.func_neighbor):
             self._create_connections(conn_type);
Пример #26
0
 def _competition(self, x):
     """!
     @brief Calculates neuron winner (distance, neuron index).
     
     @param[in] x (list): Input pattern from the input data set, for example it can be coordinates of point.
     
     @return (uint) Returns index of neuron that is winner.
     
     """
     
     index = 0;
     minimum = euclidean_distance_sqrt(self._weights[0], x);
     
     for i in range(1, self._size, 1):
         candidate = euclidean_distance_sqrt(self._weights[i], x);
         if (candidate < minimum):
             index = i;
             minimum = candidate;
     
     return index;
Пример #27
0
    def _competition(self, x):
        """!
        @brief Calculates neuron winner (distance, neuron index).
        
        @param[in] x (list): Input pattern from the input data set, for example it can be coordinates of point.
        
        @return (uint) Returns index of neuron that is winner.
        
        """

        index = 0
        minimum = euclidean_distance_sqrt(self._weights[0], x)

        for i in range(1, self._size, 1):
            candidate = euclidean_distance_sqrt(self._weights[i], x)
            if (candidate < minimum):
                index = i
                minimum = candidate

        return index
Пример #28
0
 def __neighbor_indexes(self, point):
     """!
     @brief Return list of indexes of neighbors of specified point for the data.
     
     @param[in] point (list): An index of a point for which potential neighbors should be returned in line with connectivity radius.
     
     @return (list) Return list of indexes of neighbors in line the connectivity radius.
     
     """
     
     # return [i for i in range(0, len(data)) if euclidean_distance(data[point], data[i]) <= eps and data[i] != data[point]];    # Slow mode
     return [i for i in range(0, len(self.__pointer_data)) if euclidean_distance_sqrt(self.__pointer_data[point], self.__pointer_data[i]) <= self.__sqrt_eps and self.__pointer_data[i] != self.__pointer_data[point]]; # Fast mode
Пример #29
0
    def __calculate_weight(self, stimulus1, stimulus2):
        """!
        @brief Calculate weight between neurons that have external stimulus1 and stimulus2.
        
        @param[in] stimulus1 (list): External stimulus of the first neuron.
        @param[in] stimulus2 (list): External stimulus of the second neuron.
        
        @return (double) Weight between neurons that are under specified stimulus.
        
        """

        distance = euclidean_distance_sqrt(stimulus1, stimulus2)
        return math.exp(-distance / (2.0 * self.__average_distance))
Пример #30
0
    def __bayesian_information_criterion(self, clusters, centers):
        """!
        @brief Calculates splitting criterion for input clusters using bayesian information criterion.
        
        @param[in] clusters (list): Clusters for which splitting criterion should be calculated.
        @param[in] centers (list): Centers of the clusters.
        
        @return (double) Splitting criterion in line with bayesian information criterion.
                High value of splitting criterion means that current structure is much better.
                
        @see __minimum_noiseless_description_length(clusters, centers)
        
        """

        scores = [float('inf')] * len(clusters)  # splitting criterion
        dimension = len(self.__pointer_data[0])

        # estimation of the noise variance in the data set
        sigma_sqrt = 0.0
        K = len(clusters)
        N = 0.0

        for index_cluster in range(0, len(clusters), 1):
            for index_object in clusters[index_cluster]:
                sigma_sqrt += euclidean_distance_sqrt(
                    self.__pointer_data[index_object], centers[index_cluster])

            N += len(clusters[index_cluster])

        if (N - K > 0):
            sigma_sqrt /= (N - K)
            p = (K - 1) + dimension * K + 1

            # in case of the same points, sigma_sqrt can be zero (issue: #407)
            sigma_multiplier = 0.0
            if (sigma_sqrt <= 0.0):
                sigma_multiplier = float('-inf')
            else:
                sigma_multiplier = dimension * 0.5 * log(sigma_sqrt)

            # splitting criterion
            for index_cluster in range(0, len(clusters), 1):
                n = len(clusters[index_cluster])

                L = n * log(n) - n * log(N) - n * 0.5 * log(
                    2.0 * numpy.pi) - n * sigma_multiplier - (n - K) * 0.5

                # BIC calculation
                scores[index_cluster] = L - p * 0.5 * log(N)

        return sum(scores)
Пример #31
0
    def __minimum_noiseless_description_length(self, clusters, centers):
        """!
        @brief Calculates splitting criterion for input clusters using minimum noiseless description length criterion.
        
        @param[in] clusters (list): Clusters for which splitting criterion should be calculated.
        @param[in] centers (list): Centers of the clusters.
        
        @return (double) Returns splitting criterion in line with bayesian information criterion. 
                Low value of splitting cretion means that current structure is much better.
        
        @see __bayesian_information_criterion(clusters, centers)
        
        """

        scores = 0.0

        W = 0.0
        K = len(clusters)
        N = 0.0

        sigma_sqrt = 0.0

        alpha = 0.9
        betta = 0.9

        for index_cluster in range(0, len(clusters), 1):
            Ni = len(clusters[index_cluster])
            Wi = 0.0
            for index_object in clusters[index_cluster]:
                Wi += euclidean_distance_sqrt(
                    self.__pointer_data[index_object], centers[index_cluster])

            sigma_sqrt += Wi
            W += Wi / Ni
            N += Ni

        if (N - K != 0):
            sigma_sqrt /= (N - K)
            sigma = sigma_sqrt**0.5

            Kw = (1.0 - K / N) * sigma_sqrt
            Ks = (2.0 * alpha * sigma / (N**0.5)) * (
                (alpha**2.0) * sigma_sqrt / N + W - Kw / 2.0)**0.5

            scores = sigma_sqrt * (2 * K)**0.5 * (
                (2 * K)**0.5 + betta
            ) / N + W - sigma_sqrt + Ks + 2 * alpha**0.5 * sigma_sqrt / N

        return scores
Пример #32
0
 def __calculate_estimation(self):
     """!
     @brief Calculates estimation (cost) of the current clusters. The lower the estimation,
            the more optimally configuration of clusters.
     
     @return (double) estimation of current clusters.
     
     """
     estimation = 0.0;
     for index_cluster in range(0, len(self.__clusters)):
         cluster = self.__clusters[index_cluster];
         index_medoid = self.__current[index_cluster];
         for index_point in cluster:
             estimation += euclidean_distance_sqrt(self.__pointer_data[index_point], self.__pointer_data[index_medoid]);
     
     return estimation;
Пример #33
0
 def __calculate_estimation(self):
     """!
     @brief Calculates estimation (cost) of the current clusters. The lower the estimation,
            the more optimally configuration of clusters.
     
     @return (double) estimation of current clusters.
     
     """
     estimation = 0.0;
     for index_cluster in range(0, len(self.__clusters)):
         cluster = self.__clusters[index_cluster];
         index_medoid = self.__current[index_cluster];
         for index_point in cluster:
             estimation += euclidean_distance_sqrt(self.__pointer_data[index_point], self.__pointer_data[index_medoid]);
     
     return estimation;
Пример #34
0
    def __neighbor_indexes(self, point):
        """!
        @brief Return list of indexes of neighbors of specified point for the data.
        
        @param[in] point (list): An index of a point for which potential neighbors should be returned in line with connectivity radius.
        
        @return (list) Return list of indexes of neighbors in line the connectivity radius.
        
        """

        # return [i for i in range(0, len(data)) if euclidean_distance(data[point], data[i]) <= eps and data[i] != data[point]];    # Slow mode
        return [
            i for i in range(0, len(self.__pointer_data))
            if euclidean_distance_sqrt(self.__pointer_data[point], self.
                                       __pointer_data[i]) <= self.__sqrt_eps
            and (i != point)
        ]
Пример #35
0
    def __initialize_distances(self, size, location):
        """!
        @brief Initialize distance matrix in SOM grid.
        
        @param[in] size (uint): Amount of neurons in the network.
        @param[in] location (list): List of coordinates of each neuron in the network.
        
        @return (list) Distance matrix between neurons in the network.
        
        """
        sqrt_distances = [[[] for i in range(size)] for j in range(size)]
        for i in range(size):
            for j in range(i, size, 1):
                dist = euclidean_distance_sqrt(location[i], location[j])
                sqrt_distances[i][j] = dist
                sqrt_distances[j][i] = dist

        return sqrt_distances
Пример #36
0
 def __calculate_nearest_distance(self, index_cluster1, index_cluster2):
     """!
     @brief Finds two nearest objects in two specified clusters and returns distance between them.
     
     @param[in] (uint) Index of the first cluster.
     @param[in] (uint) Index of the second cluster.
     
     @return The nearest euclidean distance between two clusters.
     
     """
     candidate_minimum_distance = float('Inf');
     
     for index_object1 in self.__clusters[index_cluster1]:
         for index_object2 in self.__clusters[index_cluster2]:
             distance = euclidean_distance_sqrt(self.__pointer_data[index_object1], self.__pointer_data[index_object2]);
             if (distance < candidate_minimum_distance):
                 candidate_minimum_distance = distance;
     
     return candidate_minimum_distance;
Пример #37
0
 def __find_another_nearest_medoid(self, point_index, current_medoid_index):
     """!
     @brief Finds the another nearest medoid for the specified point that is differ from the specified medoid. 
     
     @param[in] point_index: index of point in dataspace for that searching of medoid in current list of medoids is perfomed.
     @param[in] current_medoid_index: index of medoid that shouldn't be considered as a nearest.
     
     @return (uint) index of the another nearest medoid for the point.
     
     """
     other_medoid_index = -1;
     other_distance_nearest = float('inf');
     for index_medoid in self.__current:
         if (index_medoid != current_medoid_index):
             other_distance_candidate = euclidean_distance_sqrt(self.__pointer_data[point_index], self.__pointer_data[current_medoid_index]);
             
             if (other_distance_candidate < other_distance_nearest):
                 other_distance_nearest = other_distance_candidate;
                 other_medoid_index = index_medoid;
     
     return other_medoid_index;
Пример #38
0
    def __has_object_connection(self, oscillator_index1, oscillator_index2):
        """!
        @brief Searches for pair of objects that are encoded by specified neurons and that are connected in line with connectivity radius.
        
        @param[in] oscillator_index1 (uint): Index of the first oscillator in the second layer.
        @param[in] oscillator_index2 (uint): Index of the second oscillator in the second layer.
        
        @return (bool) True - if there is pair of connected objects encoded by specified oscillators.
        
        """
        som_neuron_index1 = self._som_osc_table[oscillator_index1]
        som_neuron_index2 = self._som_osc_table[oscillator_index2]

        for index_object1 in self._som.capture_objects[som_neuron_index1]:
            for index_object2 in self._som.capture_objects[som_neuron_index2]:
                distance = euclidean_distance_sqrt(self._data[index_object1],
                                                   self._data[index_object2])
                if (distance <= self._radius):
                    return True

        return False
Пример #39
0
    def __calculate_farthest_distance(self, index_cluster1, index_cluster2):
        """!
        @brief Finds two farthest objects in two specified clusters in terms and returns distance between them.
        
        @param[in] (uint) Index of the first cluster.
        @param[in] (uint) Index of the second cluster.
        
        @return The farthest euclidean distance between two clusters.
        
        """
        candidate_maximum_distance = 0.0
        for index_object1 in self.__clusters[index_cluster1]:
            for index_object2 in self.__clusters[index_cluster2]:
                distance = euclidean_distance_sqrt(
                    self.__pointer_data[index_object1],
                    self.__pointer_data[index_object2])

                if (distance > candidate_maximum_distance):
                    candidate_maximum_distance = distance

        return candidate_maximum_distance
Пример #40
0
 def __merge_by_centroid_link(self):
     """!
     @brief Merges the most similar clusters in line with centroid link type.
     
     """
     
     minimum_centroid_distance = float('Inf');
     indexes = None;
     
     for index1 in range(0, len(self.__centers)):
         for index2 in range(index1 + 1, len(self.__centers)):
             distance = euclidean_distance_sqrt(self.__centers[index1], self.__centers[index2]);
             if (distance < minimum_centroid_distance):
                 minimum_centroid_distance = distance;
                 indexes = [index1, index2];
     
     self.__clusters[indexes[0]] += self.__clusters[indexes[1]];
     self.__centers[indexes[0]] = self.__calculate_center(self.__clusters[indexes[0]]);
      
     self.__clusters.pop(indexes[1]);   # remove merged cluster.
     self.__centers.pop(indexes[1]);    # remove merged center.
Пример #41
0
 def __find_another_nearest_medoid(self, point_index, current_medoid_index):
     """!
     @brief Finds the another nearest medoid for the specified point that is differ from the specified medoid. 
     
     @param[in] point_index: index of point in dataspace for that searching of medoid in current list of medoids is perfomed.
     @param[in] current_medoid_index: index of medoid that shouldn't be considered as a nearest.
     
     @return (uint) index of the another nearest medoid for the point.
     
     """
     other_medoid_index = -1;
     other_distance_nearest = float('inf');
     for index_medoid in self.__current:
         if (index_medoid != current_medoid_index):
             other_distance_candidate = euclidean_distance_sqrt(self.__pointer_data[point_index], self.__pointer_data[current_medoid_index]);
             
             if (other_distance_candidate < other_distance_nearest):
                 other_distance_nearest = other_distance_candidate;
                 other_medoid_index = index_medoid;
     
     return other_medoid_index;
Пример #42
0
 def __merge_by_centroid_link(self):
     """!
     @brief Merges the most similar clusters in line with centroid link type.
     
     """
     
     minimum_centroid_distance = float('Inf');
     indexes = None;
     
     for index1 in range(0, len(self.__centers)):
         for index2 in range(index1 + 1, len(self.__centers)):
             distance = euclidean_distance_sqrt(self.__centers[index1], self.__centers[index2]);
             if (distance < minimum_centroid_distance):
                 minimum_centroid_distance = distance;
                 indexes = [index1, index2];
     
     self.__clusters[indexes[0]] += self.__clusters[indexes[1]];
     self.__centers[indexes[0]] = self.__calculate_center(self.__clusters[indexes[0]]);
      
     self.__clusters.pop(indexes[1]);   # remove merged cluster.
     self.__centers.pop(indexes[1]);    # remove merged center.
Пример #43
0
 def __calculate_initial_clusters(self, centers):
     """!
     @brief Calculate Euclidean distance to each point from the each cluster. 
     @brief Nearest points are captured by according clusters and as a result clusters are updated.
     
     @return (list) updated clusters as list of clusters. Each cluster contains indexes of objects from data.
     
     """
     
     clusters = [[] for _ in range(len(centers))];
     for index_point in range(len(self.__sample)):
         index_optim, dist_optim = -1, 0.0;
          
         for index in range(len(centers)):
             dist = euclidean_distance_sqrt(self.__sample[index_point], centers[index]);
              
             if ( (dist < dist_optim) or (index is 0)):
                 index_optim, dist_optim = index, dist;
          
         clusters[index_optim].append(index_point);
     
     return clusters;
Пример #44
0
 def __update_clusters(self, medoids):
     """!
     @brief Forms cluster in line with specified medoids by calculation distance from each point to medoids. 
     
     """
     
     self.__belong = [0] * len(self.__pointer_data);
     self.__clusters = [[] for i in range(len(medoids))];
     for index_point in range(len(self.__pointer_data)):
         index_optim = -1;
         dist_optim = 0.0;
          
         for index in range(len(medoids)):
             dist = euclidean_distance_sqrt(self.__pointer_data[index_point], self.__pointer_data[medoids[index]]);
              
             if ( (dist < dist_optim) or (index is 0)):
                 index_optim = index;
                 dist_optim = dist;
          
         self.__clusters[index_optim].append(index_point);
         self.__belong[index_point] = index_optim;
     
     # If cluster is not able to capture object it should be removed
     self.__clusters = [cluster for cluster in self.__clusters if len(cluster) > 0];
Пример #45
0
    def process(self):
        """!
        @brief Performs cluster analysis in line with rules of K-Medoids algorithm.
        
        @remark Results of clustering can be obtained using corresponding get methods.
        
        @see get_clusters()
        @see get_medoids()
        
        """

        changes = float('inf')

        stop_condition = self.__tolerance * self.__tolerance
        # Fast solution
        #stop_condition = self.__tolerance;              # Slow solution

        # Check for dimension
        if (len(self.__pointer_data[0]) != len(self.__medoids[0])):
            raise NameError(
                'Dimension of the input data and dimension of the initial cluster medians must be equal.'
            )

        while (changes > stop_condition):
            self.__clusters = self.__update_clusters()
            updated_medoids = self.__update_medoids()
            # changes should be calculated before asignment

            changes = max([
                euclidean_distance_sqrt(self.__medoids[index],
                                        updated_medoids[index])
                for index in range(len(updated_medoids))
            ])
            # Fast solution

            self.__medoids = updated_medoids
Пример #46
0
 def __optimize_configuration(self):
     """!
     @brief Finds quasi-optimal medoids and updates in line with them clusters in line with algorithm's rules. 
     
     """
     index_neighbor = 0;
     while (index_neighbor < self.__maxneighbor):
         # get random current medoid that is to be replaced
         current_medoid_index = self.__current[random.randint(0, self.__number_clusters - 1)];
         current_medoid_cluster_index = self.__belong[current_medoid_index];
         
         # get new candidate to be medoid
         candidate_medoid_index = random.randint(0, len(self.__pointer_data) - 1);
         candidate_medoid_cluster_index = self.__belong[candidate_medoid_index];
         
         while (candidate_medoid_index in self.__current):
             candidate_medoid_index = random.randint(0, len(self.__pointer_data) - 1);
         
         candidate_cost = 0.0;
         for point_index in range(0, len(self.__pointer_data)):
             if (point_index not in self.__current):
                 # get non-medoid point and its medoid
                 point_cluster_index = self.__belong[point_index];
                 point_medoid_index = self.__current[point_cluster_index];
                 
                 # get other medoid that is nearest to the point (except current and candidate)
                 other_medoid_index = self.__find_another_nearest_medoid(point_index, current_medoid_index);
                 other_medoid_cluster_index = self.__belong[other_medoid_index];
                 
                 # for optimization calculate all required distances
                 # from the point to current medoid
                 distance_current = euclidean_distance_sqrt(self.__pointer_data[point_index], self.__pointer_data[current_medoid_index]);
                 
                 # from the point to candidate median
                 distance_candidate = euclidean_distance_sqrt(self.__pointer_data[point_index], self.__pointer_data[candidate_medoid_index]);
                 
                 # from the point to nearest (own) medoid
                 distance_nearest = float('inf');
                 if ( (point_medoid_index != candidate_medoid_index) and (point_medoid_index != current_medoid_cluster_index) ):
                     distance_nearest = euclidean_distance_sqrt(self.__pointer_data[point_index], self.__pointer_data[point_medoid_index]);
                 
                 # apply rules for cost calculation
                 if (point_cluster_index == current_medoid_cluster_index):
                     # case 1:
                     if (distance_candidate >= distance_nearest):
                         candidate_cost += distance_nearest - distance_current;
                     
                     # case 2:
                     else:
                         candidate_cost += distance_candidate - distance_current;
                 
                 elif (point_cluster_index == other_medoid_cluster_index):
                     # case 3 ('nearest medoid' is the representative object of that cluster and object is more similar to 'nearest' than to 'candidate'):
                     if (distance_candidate > distance_nearest):
                         pass;
                     
                     # case 4:
                     else:
                         candidate_cost += distance_candidate - distance_nearest;
         
         if (candidate_cost < 0):
             # set candidate that has won
             self.__current[current_medoid_cluster_index] = candidate_medoid_index;
             
             # recalculate clusters
             self.__update_clusters(self.__current);
             
             # reset iterations and starts investigation from the begining
             index_neighbor = 0;
             
         else:
             index_neighbor += 1;
Пример #47
0
    def __init__(self,
                 rows,
                 cols,
                 conn_type=type_conn.grid_eight,
                 parameters=None,
                 ccore=False):
        """!
        @brief Constructor of self-organized map.
        
        @param[in] rows (uint): Number of neurons in the column (number of rows).
        @param[in] cols (uint): Number of neurons in the row (number of columns).
        @param[in] conn_type (type_conn): Type of connection between oscillators in the network (grid four, grid eight, honeycomb, function neighbour).
        @param[in] parameters (som_parameters): Other specific parameters.
        @param[in] ccore (bool): If True simulation is performed by CCORE library (C++ implementation of pyclustering).
        
        """

        # some of these parameters are required despite core implementation, for example, for network demonstration.
        self._cols = cols
        self._rows = rows
        self._size = cols * rows
        self._conn_type = conn_type

        if (parameters is not None):
            self._params = parameters
        else:
            self._params = som_parameters()

        if (self._params.init_radius is None):
            if ((cols + rows) / 4.0 > 1.0):
                self._params.init_radius = 2.0
            elif ((cols > 1) and (rows > 1)):
                self._params.init_radius = 1.5
            else:
                self._params.init_radius = 1.0

        if (ccore is True):
            self.__ccore_som_pointer = wrapper.som_create(
                rows, cols, conn_type, self._params)

        else:
            # location
            self._location = list()
            for i in range(self._rows):
                for j in range(self._cols):
                    self._location.append([float(i), float(j)])

            # awards
            self._award = [0] * self._size
            self._capture_objects = [[] for i in range(self._size)]

            # distances
            self._sqrt_distances = [[[] for i in range(self._size)]
                                    for j in range(self._size)]
            for i in range(self._size):
                for j in range(i, self._size, 1):
                    dist = euclidean_distance_sqrt(self._location[i],
                                                   self._location[j])
                    self._sqrt_distances[i][j] = dist
                    self._sqrt_distances[j][i] = dist

            # connections
            if (conn_type != type_conn.func_neighbor):
                self._create_connections(conn_type)
Пример #48
0
 def __optimize_configuration(self):
     """!
     @brief Finds quasi-optimal medoids and updates in line with them clusters in line with algorithm's rules. 
     
     """
     index_neighbor = 0;
     while (index_neighbor < self.__maxneighbor):
         # get random current medoid that is to be replaced
         current_medoid_index = self.__current[random.randint(0, self.__number_clusters - 1)];
         current_medoid_cluster_index = self.__belong[current_medoid_index];
         
         # get new candidate to be medoid
         candidate_medoid_index = random.randint(0, len(self.__pointer_data) - 1);
         
         while (candidate_medoid_index in self.__current):
             candidate_medoid_index = random.randint(0, len(self.__pointer_data) - 1);
         
         candidate_cost = 0.0;
         for point_index in range(0, len(self.__pointer_data)):
             if (point_index not in self.__current):
                 # get non-medoid point and its medoid
                 point_cluster_index = self.__belong[point_index];
                 point_medoid_index = self.__current[point_cluster_index];
                 
                 # get other medoid that is nearest to the point (except current and candidate)
                 other_medoid_index = self.__find_another_nearest_medoid(point_index, current_medoid_index);
                 other_medoid_cluster_index = self.__belong[other_medoid_index];
                 
                 # for optimization calculate all required distances
                 # from the point to current medoid
                 distance_current = euclidean_distance_sqrt(self.__pointer_data[point_index], self.__pointer_data[current_medoid_index]);
                 
                 # from the point to candidate median
                 distance_candidate = euclidean_distance_sqrt(self.__pointer_data[point_index], self.__pointer_data[candidate_medoid_index]);
                 
                 # from the point to nearest (own) medoid
                 distance_nearest = float('inf');
                 if ( (point_medoid_index != candidate_medoid_index) and (point_medoid_index != current_medoid_cluster_index) ):
                     distance_nearest = euclidean_distance_sqrt(self.__pointer_data[point_index], self.__pointer_data[point_medoid_index]);
                 
                 # apply rules for cost calculation
                 if (point_cluster_index == current_medoid_cluster_index):
                     # case 1:
                     if (distance_candidate >= distance_nearest):
                         candidate_cost += distance_nearest - distance_current;
                     
                     # case 2:
                     else:
                         candidate_cost += distance_candidate - distance_current;
                 
                 elif (point_cluster_index == other_medoid_cluster_index):
                     # case 3 ('nearest medoid' is the representative object of that cluster and object is more similar to 'nearest' than to 'candidate'):
                     if (distance_candidate > distance_nearest):
                         pass;
                     
                     # case 4:
                     else:
                         candidate_cost += distance_candidate - distance_nearest;
         
         if (candidate_cost < 0):
             # set candidate that has won
             self.__current[current_medoid_cluster_index] = candidate_medoid_index;
             
             # recalculate clusters
             self.__update_clusters(self.__current);
             
             # reset iterations and starts investigation from the begining
             index_neighbor = 0;
             
         else:
             index_neighbor += 1;