示例#1
0
 def testEuclideanDistance(self):
     point1 = [1, 2];
     point2 = [1, 3];
     point3 = [4, 6];
     
     # Tests for euclidean_distance
     assert euclidean_distance(point1, point2) == 1;
     assert euclidean_distance(point1, point1) == 0;
     assert euclidean_distance(point1, point3) == 5;
示例#2
0
    def testEuclideanDistance(self):
        point1 = [1, 2]
        point2 = [1, 3]
        point3 = [4, 6]

        # Tests for euclidean_distance
        assert euclidean_distance(point1, point2) == 1
        assert euclidean_distance(point1, point1) == 0
        assert euclidean_distance(point1, point3) == 5
示例#3
0
 def __merge_clusters(self, cluster1, cluster2):
     """!
     @brief Merges two clusters and returns new merged cluster. Representation points and mean points are calculated for the new cluster.
     
     @param[in] cluster1 (cure_cluster): Cluster that should be merged.
     @param[in] cluster2 (cure_cluster): Cluster that should be merged.
     
     @return (cure_cluster) New merged CURE cluster.
     
     """
     
     merged_cluster = cure_cluster(None, None);
     
     merged_cluster.points = cluster1.points + cluster2.points;
     merged_cluster.indexes = cluster1.indexes + cluster2.indexes;
     
     # merged_cluster.mean = ( len(cluster1.points) * cluster1.mean + len(cluster2.points) * cluster2.mean ) / ( len(cluster1.points) + len(cluster2.points) );
     dimension = len(cluster1.mean);
     merged_cluster.mean = [0] * dimension;
     if merged_cluster.points[1:] == merged_cluster.points[:-1]:
         merged_cluster.mean = merged_cluster.points[0]
     else:
         for index in range(dimension):
             merged_cluster.mean[index] = ( len(cluster1.points) * cluster1.mean[index] + len(cluster2.points) * cluster2.mean[index] ) / ( len(cluster1.points) + len(cluster2.points) );
     
     temporary = list();
     
     for index in range(self.__number_represent_points):
         maximal_distance = 0;
         maximal_point = None;
         
         for point in merged_cluster.points:
             minimal_distance = 0;
             if (index == 0):
                 minimal_distance = euclidean_distance(point, merged_cluster.mean);
                 #minimal_distance = euclidean_distance_sqrt(point, merged_cluster.mean);
             else:
                 minimal_distance = min([euclidean_distance(point, p) for p in temporary]);
                 #minimal_distance = cluster_distance(cure_cluster(point), cure_cluster(temporary[0]));
                 
             if (minimal_distance >= maximal_distance):
                 maximal_distance = minimal_distance;
                 maximal_point = point;
     
         if (maximal_point not in temporary):
             temporary.append(maximal_point);
             
     for point in temporary:
         representative_point = [0] * dimension;
         for index in range(dimension):
             representative_point[index] = point[index] + self.__compression * (merged_cluster.mean[index] - point[index]);
             
         merged_cluster.rep.append(representative_point);
     
     return merged_cluster;
示例#4
0
 def __merge_clusters(self, cluster1, cluster2):
     """!
     @brief Merges two clusters and returns new merged cluster. Representation points and mean points are calculated for the new cluster.
     
     @param[in] cluster1 (cure_cluster): Cluster that should be merged.
     @param[in] cluster2 (cure_cluster): Cluster that should be merged.
     
     @return (cure_cluster) New merged CURE cluster.
     
     """
     
     merged_cluster = cure_cluster();
     
     merged_cluster.points = cluster1.points + cluster2.points;
     
     # merged_cluster.mean = ( len(cluster1.points) * cluster1.mean + len(cluster2.points) * cluster2.mean ) / ( len(cluster1.points) + len(cluster2.points) );
     dimension = len(cluster1.mean);
     merged_cluster.mean = [0] * dimension;
     if merged_cluster.points[1:] == merged_cluster.points[:-1]:
         merged_cluster.mean = merged_cluster.points[0]
     else:
         for index in range(dimension):
             merged_cluster.mean[index] = ( len(cluster1.points) * cluster1.mean[index] + len(cluster2.points) * cluster2.mean[index] ) / ( len(cluster1.points) + len(cluster2.points) );
     
     temporary = list(); # TODO: Set should be used in line with specification (article), but list is not hashable object therefore it's impossible to use list in this f*****g set!
     
     for index in range(self.__number_represent_points):
         maximal_distance = 0;
         maximal_point = None;
         
         for point in merged_cluster.points:
             minimal_distance = 0;
             if (index == 0):
                 minimal_distance = euclidean_distance(point, merged_cluster.mean);
                 #minimal_distance = euclidean_distance_sqrt(point, merged_cluster.mean);
             else:
                 minimal_distance = euclidean_distance(point, temporary[0]);
                 #minimal_distance = cluster_distance(cure_cluster(point), cure_cluster(temporary[0]));
                 
             if (minimal_distance >= maximal_distance):
                 maximal_distance = minimal_distance;
                 maximal_point = point;
     
         if (maximal_point not in temporary):
             temporary.append(maximal_point);
             
     for point in temporary:
         representative_point = [0] * dimension;
         for index in range(dimension):
             representative_point[index] = point[index] + self.__compression * (merged_cluster.mean[index] - point[index]);
             
         merged_cluster.rep.append(representative_point);
     
     return merged_cluster;
示例#5
0
 def __merge_clusters(self, cluster1, cluster2):
     """!
     @brief Merges two clusters and returns new merged cluster. Representation points and mean points are calculated for the new cluster.
     
     @param[in] cluster1 (cure_cluster): Cluster that should be merged.
     @param[in] cluster2 (cure_cluster): Cluster that should be merged.
     
     @return (cure_cluster) New merged CURE cluster.
     
     """
     
     merged_cluster = cure_cluster();
     
     merged_cluster.points = cluster1.points + cluster2.points;
     
     # merged_cluster.mean = ( len(cluster1.points) * cluster1.mean + len(cluster2.points) * cluster2.mean ) / ( len(cluster1.points) + len(cluster2.points) );
     dimension = len(cluster1.mean);
     merged_cluster.mean = [0] * dimension;
     if merged_cluster.points[1:] == merged_cluster.points[:-1]:
         merged_cluster.mean = merged_cluster.points[0]
     else:
         for index in range(dimension):
             merged_cluster.mean[index] = ( len(cluster1.points) * cluster1.mean[index] + len(cluster2.points) * cluster2.mean[index] ) / ( len(cluster1.points) + len(cluster2.points) );
     
     temporary = list(); # TODO: Set should be used in line with specification (article), but list is not hashable object therefore it's impossible to use list in this f*****g set!
     
     for index in range(self.__number_represent_points):
         maximal_distance = 0;
         maximal_point = None;
         
         for point in merged_cluster.points:
             minimal_distance = 0;
             if (index == 0):
                 minimal_distance = euclidean_distance(point, merged_cluster.mean);
                 #minimal_distance = euclidean_distance_sqrt(point, merged_cluster.mean);
             else:
                 minimal_distance = euclidean_distance(point, temporary[0]);
                 #minimal_distance = cluster_distance(cure_cluster(point), cure_cluster(temporary[0]));
                 
             if (minimal_distance >= maximal_distance):
                 maximal_distance = minimal_distance;
                 maximal_point = point;
     
         if (maximal_point not in temporary):
             temporary.append(maximal_point);
             
     for point in temporary:
         representative_point = [0] * dimension;
         for index in range(dimension):
             representative_point[index] = point[index] + self.__compression * (merged_cluster.mean[index] - point[index]);
             
         merged_cluster.rep.append(representative_point);
     
     return merged_cluster;
    def __calc_distance_to_nearest_center(self, data, centers):
        """!
        @brief Calculates distance from each data point to nearest center.
        
        @param[in] data (list): List of points where each point is represented by list of coordinates.
        @param[in] centers (list): List of points that represents centers and where each center is represented by list of coordinates.
        
        @return (list) List of distances to closest center for each data point.
        
        """

        # Initialize
        distance_data = [];

        # For each data point x, compute D(x), the distance between x and the nearest center
        for _point in data:

            # Min dist to nearest center
            min_dist = float('inf');

            # For each center
            for _center in centers:
                min_dist = min(min_dist, euclidean_distance(_center, _point));

            # Add distance to nearest center into result list
            distance_data.append(min_dist);

        return distance_data;
示例#7
0
    def __minimum_noiseless_description_length(self, clusters, centers):
        """!
        @brief Calculates splitting criterion for input clusters using minimum noiseless description length criterion.
        
        @param[in] clusters (list): Clusters for which splitting criterion should be calculated.
        @param[in] centers (list): Centers of the clusters.
        
        @return (double) Returns splitting criterion in line with bayesian information criterion. 
                Low value of splitting cretion means that current structure is much better.
        
        @see __bayesian_information_criterion(clusters, centers)
        
        """

        scores = float('inf')

        W = 0.0
        K = len(clusters)
        N = 0.0

        sigma_sqrt = 0.0

        alpha = 0.9
        betta = 0.9

        for index_cluster in range(0, len(clusters), 1):
            Ni = len(clusters[index_cluster])
            if (Ni == 0):
                return float('inf')

            Wi = 0.0
            for index_object in clusters[index_cluster]:
                # euclidean_distance_sqrt should be used in line with paper, but in this case results are
                # very poor, therefore square root is used to improved.
                Wi += euclidean_distance(self.__pointer_data[index_object],
                                         centers[index_cluster])

            sigma_sqrt += Wi
            W += Wi / Ni
            N += Ni

        if (N - K > 0):
            sigma_sqrt /= (N - K)
            sigma = sigma_sqrt**0.5

            Kw = (1.0 - K / N) * sigma_sqrt
            Ks = (2.0 * alpha * sigma / (N**0.5)) * (
                (alpha**2.0) * sigma_sqrt / N + W - Kw / 2.0)**0.5

            scores = sigma_sqrt * (2 * K)**0.5 * (
                (2 * K)**0.5 + betta
            ) / N + W - sigma_sqrt + Ks + 2 * alpha**0.5 * sigma_sqrt / N

        return scores
    def __create_adjacency_matrix(self):
        size_data = len(self.__pointer_data)

        self.__adjacency_matrix = [[0 for i in range(size_data)]
                                   for j in range(size_data)]
        for i in range(0, size_data):
            for j in range(i + 1, size_data):
                distance = euclidean_distance(self.__pointer_data[i],
                                              self.__pointer_data[j])
                if (distance <= self.__eps):
                    self.__adjacency_matrix[i][j] = 1
                    self.__adjacency_matrix[j][i] = 1
示例#9
0
def cluster_distances(path_sample, amount_clusters):
    distances = [
        'euclidian', 'manhattan', 'avr-inter', 'avr-intra', 'variance'
    ]

    sample = utils.read_sample(path_sample)

    agglomerative_instance = agglomerative(sample, amount_clusters)
    agglomerative_instance.process()

    obtained_clusters = agglomerative_instance.get_clusters()

    print("Measurements for:", path_sample)

    for index_cluster in range(len(obtained_clusters)):
        for index_neighbor in range(index_cluster + 1, len(obtained_clusters),
                                    1):
            cluster1 = obtained_clusters[index_cluster]
            cluster2 = obtained_clusters[index_neighbor]

            center_cluster1 = utils.centroid(sample, cluster1)
            center_cluster2 = utils.centroid(sample, cluster2)

            for index_distance_type in range(len(distances)):
                distance = None
                distance_type = distances[index_distance_type]

                if (distance_type == 'euclidian'):
                    distance = utils.euclidean_distance(
                        center_cluster1, center_cluster2)

                elif (distance_type == 'manhattan'):
                    distance = utils.manhattan_distance(
                        center_cluster1, center_cluster2)

                elif (distance_type == 'avr-inter'):
                    distance = utils.average_inter_cluster_distance(
                        cluster1, cluster2, sample)

                elif (distance_type == 'avr-intra'):
                    distance = utils.average_intra_cluster_distance(
                        cluster1, cluster2, sample)

                elif (distance_type == 'variance'):
                    distance = utils.variance_increase_distance(
                        cluster1, cluster2, sample)

            print("\tDistance", distance_type, "from", index_cluster, "to",
                  index_neighbor, "is:", distance)
示例#10
0
 def __create_adjacency_matrix(self):
     """!
     @brief Creates 2D adjacency matrix (list of lists) where each element described existence of link between points (means that points are neighbors).
     
     """
     
     size_data = len(self.__pointer_data);
     
     self.__adjacency_matrix = [ [ 0 for i in range(size_data) ] for j in range(size_data) ];
     for i in range(0, size_data):
         for j in range(i + 1, size_data):
             distance = euclidean_distance(self.__pointer_data[i], self.__pointer_data[j]);
             if (distance <= self.__eps):
                 self.__adjacency_matrix[i][j] = 1;
                 self.__adjacency_matrix[j][i] = 1;
示例#11
0
 def __create_adjacency_matrix(self):
     """!
     @brief Creates 2D adjacency matrix (list of lists) where each element described existence of link between points (means that points are neighbors).
     
     """
     
     size_data = len(self.__pointer_data)
     
     self.__adjacency_matrix = [[0 for i in range(size_data)] for j in range(size_data)]
     for i in range(0, size_data):
         for j in range(i + 1, size_data):
             distance = euclidean_distance(self.__pointer_data[i], self.__pointer_data[j])
             if (distance <= self.__eps):
                 self.__adjacency_matrix[i][j] = 1
                 self.__adjacency_matrix[j][i] = 1
示例#12
0
 def _create_connections(self, radius):
     """!
     @brief Create connections between oscillators in line with input radius of connectivity.
     
     @param[in] radius (double): Connectivity radius between oscillators.
     
     """
     
     if (self._ena_conn_weight is True):
         self._conn_weight = [[0] * self._num_osc for index in range(0, self._num_osc, 1)];
     
     maximum_distance = 0;
     minimum_distance = float('inf');
     
     # Create connections
     for i in range(0, self._num_osc, 1):
         for j in range(i + 1, self._num_osc, 1):                 
                 dist = euclidean_distance(self._osc_loc[i], self._osc_loc[j]);
                 
                 if (self._ena_conn_weight is True):
                     self._conn_weight[i][j] = dist;
                     self._conn_weight[j][i] = dist;
                     
                     if (dist > maximum_distance): maximum_distance = dist;
                     if (dist < minimum_distance): minimum_distance = dist;
                 
                 if (dist <= radius):
                     if (self._conn_represent == conn_represent.LIST):
                         self._osc_conn[i].append(j);
                         self._osc_conn[j].append(i);
                     else:
                         self._osc_conn[i][j] = True;
                         self._osc_conn[j][i] = True;
     
     if (self._ena_conn_weight is True):
         multiplier = 1; 
         subtractor = 0;
         
         if (maximum_distance != minimum_distance):
             multiplier = (maximum_distance - minimum_distance);
             subtractor = minimum_distance;
         
         for i in range(0, self._num_osc, 1):
             for j in range(i + 1, self._num_osc, 1):
                 value_conn_weight = (self._conn_weight[i][j] - subtractor) / multiplier;
                 
                 self._conn_weight[i][j] = value_conn_weight;
                 self._conn_weight[j][i] = value_conn_weight;
示例#13
0
 def getCityDistance(self, result, object_locations, citiesDistRepresent):
     visited_objects = [False] * len(result.object_sequence);
     current_distance = 0.0;
     
     for i in range(len(result.object_sequence)):
         assert visited_objects[i] == False;
         
         index1 = result.object_sequence[i];
         index2 = result.object_sequence[(i + 1) % len(result.object_sequence)];
         
         if citiesDistRepresent == wrapper.CITIES_DISTANCE_SET_BY_MATRIX:
             current_distance += object_locations[index1][index2];
         else:
             current_distance += euclidean_distance(object_locations[index1], object_locations[index2]);
     
     return current_distance
示例#14
0
    def _create_connections(self, radius):
        """!
        @brief Create connections between oscillators in line with input radius of connectivity.
        
        @param[in] radius (double): Connectivity radius between oscillators.
        
        """

        if self._ena_conn_weight is True:
            self._conn_weight = [[0] * self._num_osc
                                 for _ in range(0, self._num_osc, 1)]

        maximum_distance = 0
        minimum_distance = float('inf')

        # Create connections
        for i in range(0, self._num_osc, 1):
            for j in range(i + 1, self._num_osc, 1):
                dist = euclidean_distance(self._osc_loc[i], self._osc_loc[j])

                if self._ena_conn_weight is True:
                    self._conn_weight[i][j] = dist
                    self._conn_weight[j][i] = dist

                    if (dist > maximum_distance): maximum_distance = dist
                    if (dist < minimum_distance): minimum_distance = dist

                if dist <= radius:
                    self.set_connection(i, j)

        if self._ena_conn_weight is True:
            multiplier = 1
            subtractor = 0

            if maximum_distance != minimum_distance:
                multiplier = (maximum_distance - minimum_distance)
                subtractor = minimum_distance

            for i in range(0, self._num_osc, 1):
                for j in range(i + 1, self._num_osc, 1):
                    value_conn_weight = (self._conn_weight[i][j] -
                                         subtractor) / multiplier

                    self._conn_weight[i][j] = value_conn_weight
                    self._conn_weight[j][i] = value_conn_weight
示例#15
0
    def getCityDistance(self, result, object_locations, citiesDistRepresent):
        visited_objects = [False] * len(result.object_sequence)
        current_distance = 0.0

        for i in range(len(result.object_sequence)):
            assert not visited_objects[i]

            index1 = result.object_sequence[i]
            index2 = result.object_sequence[(i + 1) %
                                            len(result.object_sequence)]

            if citiesDistRepresent == wrapper.CITIES_DISTANCE_SET_BY_MATRIX:
                current_distance += object_locations[index1][index2]
            else:
                current_distance += euclidean_distance(
                    object_locations[index1], object_locations[index2])

        return current_distance
示例#16
0
    def __bayesian_information_criterion(self, clusters, centers):
        """!
        @brief Calculates splitting criterion for input clusters using bayesian information criterion.
        
        @param[in] clusters (list): Clusters for which splitting criterion should be calculated.
        @param[in] centers (list): Centers of the clusters.
        
        @return (double) Splitting criterion in line with bayesian information criterion.
                High value of splitting cretion means that current structure is much better.
                
        @see __minimum_noiseless_description_length(clusters, centers)
        
        """

        scores = [0.0] * len(clusters)  # splitting criterion
        dimension = len(self.__pointer_data[0])

        # estimation of the noise variance in the data set
        sigma = 0.0
        K = len(clusters)
        N = 0.0

        for index_cluster in range(0, len(clusters), 1):
            for index_object in clusters[index_cluster]:
                sigma += (euclidean_distance(self.__pointer_data[index_object],
                                             centers[index_cluster]))
                # It works

            N += len(clusters[index_cluster])

        if (N - K != 0):
            sigma /= (N - K)

            # splitting criterion
            for index_cluster in range(0, len(clusters), 1):
                n = len(clusters[index_cluster])

                if (sigma > 0.0):
                    scores[index_cluster] = n * math.log(n) - n * math.log(
                        N) - n * math.log(
                            2.0 * numpy.pi) / 2.0 - n * dimension * math.log(
                                sigma) / 2.0 - (n - K) / 2.0

        return sum(scores)
示例#17
0
 def __cluster_distance(self, cluster1, cluster2):
     """!
     @brief Calculate minimal distance between clusters using representative points.
     
     @param[in] cluster1 (cure_cluster): The first cluster.
     @param[in] cluster2 (cure_cluster): The second cluster.
     
     @return (double) Euclidean distance between two clusters that is defined by minimum distance between representation points of two clusters.
     
     """
     
     distance = float('inf');
     for i in range(0, len(cluster1.rep)):
         for k in range(0, len(cluster2.rep)):
             #dist = euclidean_distance_sqrt(cluster1.rep[i], cluster2.rep[k]);   # Fast mode
             dist = euclidean_distance(cluster1.rep[i], cluster2.rep[k]);        # Slow mode
             if (dist < distance):
                 distance = dist;
                 
     return distance;
示例#18
0
 def __cluster_distance(self, cluster1, cluster2):
     """!
     @brief Calculate minimal distance between clusters using representative points.
     
     @param[in] cluster1 (cure_cluster): The first cluster.
     @param[in] cluster2 (cure_cluster): The second cluster.
     
     @return (double) Euclidean distance between two clusters that is defined by minimum distance between representation points of two clusters.
     
     """
     
     distance = float('inf');
     for i in range(0, len(cluster1.rep)):
         for k in range(0, len(cluster2.rep)):
             #dist = euclidean_distance_sqrt(cluster1.rep[i], cluster2.rep[k]);   # Fast mode
             dist = euclidean_distance(cluster1.rep[i], cluster2.rep[k]);        # Slow mode
             if (dist < distance):
                 distance = dist;
                 
     return distance;
示例#19
0
 def __neighbor_indexes(self, optic_object):
     """!
     @brief Return list of indexes of neighbors of specified point for the data.
     
     @param[in] optic_object (optics_descriptor): Object for which neighbors should be returned in line with connectivity radius.
     
     @return (list) List of indexes of neighbors in line the connectivity radius.
     
     """
           
     neighbor_description = [];
     
     for index in range(0, len(self.__sample_pointer), 1):
         if (index == optic_object.index_object):
             continue;
         
         distance = euclidean_distance(self.__sample_pointer[optic_object.index_object], self.__sample_pointer[index]);
         if (distance <= self.__eps):
             neighbor_description.append( [index, distance] );
         
     return neighbor_description;
示例#20
0
 def __neighbor_indexes(self, optic_object):
     """!
     @brief Return list of indexes of neighbors of specified point for the data.
     
     @param[in] optic_object (optics_descriptor): Object for which neighbors should be returned in line with connectivity radius.
     
     @return (list) List of indexes of neighbors in line the connectivity radius.
     
     """
           
     neighbor_description = [];
     
     for index in range(0, len(self.__sample_pointer), 1):
         if (index == optic_object.index_object):
             continue;
         
         distance = euclidean_distance(self.__sample_pointer[optic_object.index_object], self.__sample_pointer[index]);
         if (distance <= self.__eps):
             neighbor_description.append( [index, distance] );
         
     return neighbor_description;
示例#21
0
def cluster_distances(path_sample, amount_clusters):
    distances = ['euclidian', 'manhattan', 'avr-inter', 'avr-intra', 'variance'];
    
    sample = utils.read_sample(path_sample);
    
    agglomerative_instance = agglomerative(sample, amount_clusters);
    agglomerative_instance.process();
    
    obtained_clusters = agglomerative_instance.get_clusters();
    
    print("Measurements for:", path_sample);
    
    for index_cluster in range(len(obtained_clusters)):
        for index_neighbor in range(index_cluster + 1, len(obtained_clusters), 1):
            cluster1 = obtained_clusters[index_cluster];
            cluster2 = obtained_clusters[index_neighbor];
            
            center_cluster1 = utils.centroid(sample, cluster1);
            center_cluster2 = utils.centroid(sample, cluster2);
            
            for index_distance_type in range(len(distances)):
                distance = None;
                distance_type = distances[index_distance_type];
        
                if (distance_type == 'euclidian'):
                    distance = utils.euclidean_distance(center_cluster1, center_cluster2);
                    
                elif (distance_type == 'manhattan'):
                    distance = utils.manhattan_distance(center_cluster1, center_cluster2);
                    
                elif (distance_type == 'avr-inter'):
                    distance = utils.average_inter_cluster_distance(cluster1, cluster2, sample);
                
                elif (distance_type == 'avr-intra'):
                    distance = utils.average_intra_cluster_distance(cluster1, cluster2, sample);
                
                elif (distance_type == 'variance'):
                    distance = utils.variance_increase_distance(cluster1, cluster2, sample);
            
            print("\tDistance", distance_type, "from", index_cluster, "to", index_neighbor, "is:", distance);
示例#22
0
    def __bayesian_information_criterion(self, clusters, centers):
        """!
        @brief Calculates splitting criterion for input clusters using bayesian information criterion.
        
        @param[in] clusters (list): Clusters for which splitting criterion should be calculated.
        @param[in] centers (list): Centers of the clusters.
        
        @return (double) Splitting criterion in line with bayesian information criterion.
                High value of splitting cretion means that current structure is much better.
                
        @see __minimum_noiseless_description_length(clusters, centers)
        
        """

        scores = [0.0] * len(clusters)     # splitting criterion
        dimension = len(self.__pointer_data[0]);
          
        # estimation of the noise variance in the data set
        sigma = 0.0;
        K = len(clusters);
        N = 0.0;
          
        for index_cluster in range(0, len(clusters), 1):
            for index_object in clusters[index_cluster]:
                sigma += (euclidean_distance(self.__pointer_data[index_object], centers[index_cluster]));  # It works

            N += len(clusters[index_cluster]);
      
        if (N - K != 0):
            sigma /= (N - K);
        
            # splitting criterion    
            for index_cluster in range(0, len(clusters), 1):
                n = len(clusters[index_cluster]);
                
                if (sigma > 0.0):
                    scores[index_cluster] = n * math.log(n) - n * math.log(N) - n * math.log(2.0 * numpy.pi) / 2.0 - n * dimension * math.log(sigma) / 2.0 - (n - K) / 2.0;
                  
        return sum(scores);
示例#23
0
 def testFloatEuclideanDistance(self):
     assert euclidean_distance(0.5, 1.5) == 1
     assert self.float_comparasion(euclidean_distance(1.6, 1.4), 0.2)
     assert self.float_comparasion(euclidean_distance(4.23, 2.14), 2.09)
示例#24
0
 def testFloatEuclideanDistance(self):
     assert euclidean_distance(0.5, 1.5) == 1;
     assert self.float_comparasion(euclidean_distance(1.6, 1.4), 0.2);
     assert self.float_comparasion(euclidean_distance(4.23, 2.14), 2.09);
示例#25
0
def display_two_dimensional_cluster_distances(path_sample, amount_clusters):
    distances = [
        'euclidian', 'manhattan', 'avr-inter', 'avr-intra', 'variance'
    ]

    ajacency = [[0] * amount_clusters for i in range(amount_clusters)]

    sample = utils.read_sample(path_sample)

    agglomerative_instance = agglomerative(sample, amount_clusters)
    agglomerative_instance.process()

    obtained_clusters = agglomerative_instance.get_clusters()
    stage = utils.draw_clusters(sample,
                                obtained_clusters,
                                display_result=False)

    for index_cluster in range(len(ajacency)):
        for index_neighbor_cluster in range(index_cluster + 1, len(ajacency)):
            if ((index_cluster == index_neighbor_cluster) or
                (ajacency[index_cluster][index_neighbor_cluster] is True)):
                continue

            ajacency[index_cluster][index_neighbor_cluster] = True
            ajacency[index_neighbor_cluster][index_cluster] = True

            cluster1 = obtained_clusters[index_cluster]
            cluster2 = obtained_clusters[index_neighbor_cluster]

            center_cluster1 = utils.centroid(sample, cluster1)
            center_cluster2 = utils.centroid(sample, cluster2)

            x_maximum, x_minimum, y_maximum, y_minimum = None, None, None, None
            x_index_maximum, y_index_maximum = 1, 1

            if (center_cluster2[0] > center_cluster1[0]):
                x_maximum = center_cluster2[0]
                x_minimum = center_cluster1[0]
                x_index_maximum = 1
            else:
                x_maximum = center_cluster1[0]
                x_minimum = center_cluster2[0]
                x_index_maximum = -1

            if (center_cluster2[1] > center_cluster1[1]):
                y_maximum = center_cluster2[1]
                y_minimum = center_cluster1[1]
                y_index_maximum = 1
            else:
                y_maximum = center_cluster1[1]
                y_minimum = center_cluster2[1]
                y_index_maximum = -1

            print("Cluster 1:", cluster1, ", center:", center_cluster1)
            print("Cluster 2:", cluster2, ", center:", center_cluster2)

            stage.annotate(s='',
                           xy=(center_cluster1[0], center_cluster1[1]),
                           xytext=(center_cluster2[0], center_cluster2[1]),
                           arrowprops=dict(arrowstyle='<->'))

            for index_distance_type in range(len(distances)):
                distance = None
                distance_type = distances[index_distance_type]

                if (distance_type == 'euclidian'):
                    distance = utils.euclidean_distance(
                        center_cluster1, center_cluster2)

                elif (distance_type == 'manhattan'):
                    distance = utils.manhattan_distance(
                        center_cluster1, center_cluster2)

                elif (distance_type == 'avr-inter'):
                    distance = utils.average_inter_cluster_distance(
                        cluster1, cluster2, sample)

                elif (distance_type == 'avr-intra'):
                    distance = utils.average_intra_cluster_distance(
                        cluster1, cluster2, sample)

                elif (distance_type == 'variance'):
                    distance = utils.variance_increase_distance(
                        cluster1, cluster2, sample)

                print("\tCluster distance -", distance_type, ":", distance)

                x_multiplier = index_distance_type + 3
                if (x_index_maximum < 0):
                    x_multiplier = len(distances) - index_distance_type + 3

                y_multiplier = index_distance_type + 3
                if (y_index_maximum < 0):
                    y_multiplier = len(distances) - index_distance_type + 3

                x_text = x_multiplier * (x_maximum - x_minimum) / (
                    len(distances) + 6) + x_minimum
                y_text = y_multiplier * (y_maximum - y_minimum) / (
                    len(distances) + 6) + y_minimum

                #print(x_text, y_text, "\n");
                stage.text(x_text,
                           y_text,
                           distance_type + " {:.3f}".format(distance),
                           fontsize=9,
                           color='blue')

    plt.show()
示例#26
0
def display_two_dimensional_cluster_distances(path_sample, amount_clusters):
    distances = ['euclidian', 'manhattan', 'avr-inter', 'avr-intra', 'variance'];
    
    ajacency = [ [0] * amount_clusters for i in range(amount_clusters) ];
    
    sample = utils.read_sample(path_sample);
    
    agglomerative_instance = agglomerative(sample, amount_clusters);
    agglomerative_instance.process();
    
    obtained_clusters = agglomerative_instance.get_clusters();
    stage = utils.draw_clusters(sample, obtained_clusters, display_result = False);
    
    for index_cluster in range(len(ajacency)):
        for index_neighbor_cluster in range(index_cluster + 1, len(ajacency)):
            if ( (index_cluster == index_neighbor_cluster) or (ajacency[index_cluster][index_neighbor_cluster] is True) ):
                continue;
            
            ajacency[index_cluster][index_neighbor_cluster] = True;
            ajacency[index_neighbor_cluster][index_cluster] = True;
            
            cluster1 = obtained_clusters[index_cluster];
            cluster2 = obtained_clusters[index_neighbor_cluster];
            
            center_cluster1 = utils.centroid(sample, cluster1);
            center_cluster2 = utils.centroid(sample, cluster2);
            
            x_maximum, x_minimum, y_maximum, y_minimum = None, None, None, None;
            x_index_maximum, y_index_maximum = 1, 1;
            
            if (center_cluster2[0] > center_cluster1[0]):
                x_maximum = center_cluster2[0];
                x_minimum = center_cluster1[0];
                x_index_maximum = 1;
            else:
                x_maximum = center_cluster1[0];
                x_minimum = center_cluster2[0];
                x_index_maximum = -1;
            
            if (center_cluster2[1] > center_cluster1[1]):
                y_maximum = center_cluster2[1];
                y_minimum = center_cluster1[1];
                y_index_maximum = 1;
            else:
                y_maximum = center_cluster1[1];
                y_minimum = center_cluster2[1];
                y_index_maximum = -1;
            
            print("Cluster 1:", cluster1, ", center:", center_cluster1);
            print("Cluster 2:", cluster2, ", center:", center_cluster2);
            
            stage.annotate(s = '', xy = (center_cluster1[0], center_cluster1[1]), xytext = (center_cluster2[0], center_cluster2[1]), arrowprops = dict(arrowstyle = '<->'));
            
            for index_distance_type in range(len(distances)):
                distance = None;
                distance_type = distances[index_distance_type];
                
                if (distance_type == 'euclidian'):
                    distance = utils.euclidean_distance(center_cluster1, center_cluster2);
                    
                elif (distance_type == 'manhattan'):
                    distance = utils.manhattan_distance(center_cluster1, center_cluster2);
                    
                elif (distance_type == 'avr-inter'):
                    distance = utils.average_inter_cluster_distance(cluster1, cluster2, sample);
                
                elif (distance_type == 'avr-intra'):
                    distance = utils.average_intra_cluster_distance(cluster1, cluster2, sample);
                
                elif (distance_type == 'variance'):
                    distance = utils.variance_increase_distance(cluster1, cluster2, sample);
                
                print("\tCluster distance -", distance_type, ":", distance);
                
                x_multiplier = index_distance_type + 3;
                if (x_index_maximum < 0):
                    x_multiplier = len(distances) - index_distance_type + 3;
                
                y_multiplier = index_distance_type + 3;
                if (y_index_maximum < 0):
                    y_multiplier = len(distances) - index_distance_type + 3;
                
                x_text = x_multiplier * (x_maximum - x_minimum) / (len(distances) + 6) + x_minimum;
                y_text = y_multiplier * (y_maximum - y_minimum) / (len(distances) + 6) + y_minimum;
                
                #print(x_text, y_text, "\n");
                stage.text(x_text, y_text, distance_type + " {:.3f}".format(distance), fontsize = 9, color='blue');
    
    plt.show();