Пример #1
0
def os(X, labels, n_clusters):
    centroids = cluster_centroid.cluster_centroid(X, labels, n_clusters)
    cluster_sizes = cluster_centroid.count_cluster_sizes(labels, n_clusters)

    numerator = 0.0
    for k in range(0, n_clusters):
        for i in range(0, len(labels)):
            if labels[i] != k: continue
            numerator += ov(X, labels, X[i], k)

    denominator = 0.0
    for k in range(0, n_clusters):
        l = []
        for i in range(0, len(labels)):
            if labels[i] != k:
                continue
            l.append(utils.euclidian_dist(X[i], centroids[k]))

        # get sum of 0.1*|Ck| largest elements
        acc = 0.0
        max_n = heapq.nlargest(int(math.ceil(0.1 * cluster_sizes[k])), l)
        for i in range(0, len(max_n)):
            acc += max_n[i]

        denominator += acc * 10.0 / cluster_sizes[k]

    return -numerator / denominator
Пример #2
0
def dunn43(X, labels, n_clusters):
    centroids = cluster_centroid.cluster_centroid(X, labels, n_clusters)
    rows, colums = X.shape
    point_in_c = [0] * n_clusters
    for i in range(0, len(labels)):
        point_in_c[labels[i]] += 1
    dl = [0.0] * n_clusters
    d = np.array(dl)
    minimum_dif_c = sys.float_info.max  # min dist in different clusters
    maximum_same_c = sys.float_info.min  # max dist in the same cluster
    centres_l = [[0.0] * n_clusters] * n_clusters
    centers = np.array(centres_l)
    for i in range(0, n_clusters):
        for j in range(0, n_clusters):
            centers[i][j] = utils.euclidian_dist(centroids[i], centroids[j])

    for i in range(0, rows):
        for j in range(0, rows):
            if labels[i] != labels[j]:
                dist = centers[labels[i]][labels[j]]
                minimum_dif_c = min(dist, minimum_dif_c)
            else:
                d[labels[i]] += utils.euclidian_dist(X[i],
                                                     centroids[labels[i]])

    for i in range(0, n_clusters):
        d[i] /= point_in_c[i]
        d[i] += 2.0
        maximum_same_c = max(d[i], maximum_same_c)
    return -minimum_dif_c / maximum_same_c
Пример #3
0
    def find(self, X, labels, n_clusters):
        self.centroids = cluster_centroid.cluster_centroid(X, labels, n_clusters)
        self.diameter = utils.find_diameter(X)
        self.cluster_sizes = []
        self.distances = []
        self.s_c = 0
        self.n_w = 0
        rows, colums = X.shape
        for i in range(rows - 1):
            for j in range(i + 1, rows):
                if labels[i] == labels[j]:
                    self.s_c += utils.euclidian_dist(X[i], X[j])
        self.cluster_sizes = cluster_centroid.count_cluster_sizes(labels, n_clusters)

        for k in range(0, n_clusters):
            self.n_w += self.cluster_sizes[k] * (self.cluster_sizes[k] - 1) / 2

        for i in range(0, len(labels) - 1):
            for j in range(i + 1, len(labels)):
                self.distances.append(utils.euclidian_dist(X[i], X[j]))

        self.s_min = heapq.nsmallest(int(self.n_w), self.distances)
        self.s_max = heapq.nlargest(int(self.n_w), self.distances)

        #ones = [1] * int(self.n_w)
        #s_min_c = np.dot(self.s_min, np.transpose(ones))
        #s_max_c = np.dot(self.s_max, np.transpose(ones))
        s_min_c = sum(self.s_min)
        s_max_c = sum(self.s_max)
        return (self.s_c - s_min_c) / (s_max_c - s_min_c)
Пример #4
0
def cs_index(X, labels, n_clusters):
    elements, ignore_columns = X.shape
    centroids = cluster_centroid.cluster_centroid(X, labels, n_clusters)
    cluster_sizes = cluster_centroid.count_cluster_sizes(labels, n_clusters)
    max_dists = [sys.float_info.min] * elements

    for i in range(0, elements):  # for every element
        for j in range(i, elements - 1):  # for every other
            if labels[i] != labels[j]:
                continue  # if they are in the same cluster
            # update the distance to the farthest element in the same cluster
            max_dists[i] = max(max_dists[i], utils.euclidian_dist(X[i], X[j]))

    # max_dists contain for each element the farthest the his cluster

    numerator = 0.0
    for i in range(0, elements):
        numerator += max_dists[i] / cluster_sizes[labels[i]]

    denominator = 0.0
    for i in range(0, n_clusters):
        min_centroids_dist = sys.float_info.max
        for j in range(i + 1, n_clusters):
            min_centroids_dist = min(
                utils.euclidian_dist(centroids[i], centroids[j]),
                min_centroids_dist)
        denominator += min_centroids_dist

    assert denominator != 0.0
    return numerator / denominator
Пример #5
0
 def find(self, X, labels, n_clusters):
     self.centroids = cluster_centroid.cluster_centroid(X, labels, n_clusters)
     self.dists = [[0. for _ in range(len(labels))] for _ in range(len(labels))]
     self.sums = [0 for _ in range(n_clusters)]
     rows, colums = X.shape
     self.point_in_c = cluster_centroid.count_cluster_sizes(labels, n_clusters)
     self.delta_l = [[0.0] * n_clusters] * n_clusters
     self.delta = np.array(self.delta_l)
     self.centroid_dists = [0 for _ in range(len(labels))]
     #self.centroid_dists = [utils.euclidian_dist(X[i], self.centroids[labels[i]]) for i in range(len(X))]
     minimum_dif_c = sys.float_info.max
     for i in range(len(labels)):
         self.centroid_dists[i] = utils.euclidian_dist(X[i], self.centroids[labels[i]])
         self.sums[labels[i]] += self.centroid_dists[i]
     for i in range(rows - 1):
         for j in range(i + 1, rows):
             self.dists[i][j] = utils.euclidian_dist(X[i], X[j])
             self.dists[j][i] = self.dists[i][j]
             if labels[i] != labels[j]:
                 self.delta[labels[i]][labels[j]] += self.dists[i][j]
     for i in range(n_clusters):
         for j in range(n_clusters):
             self.delta[i][j] /= float(self.point_in_c[i] * self.point_in_c[j])
             if self.delta[i][j] != 0:
                 minimum_dif_c = min(minimum_dif_c, self.delta[i][j])
         self.sums[i] *= (2 / self.point_in_c[i])
     #print(max(self.sums))
     return -(minimum_dif_c / max(self.sums))
Пример #6
0
def sv(X, labels, n_clusters):
    centroids = cluster_centroid.cluster_centroid(X, labels, n_clusters)
    cluster_sizes = cluster_centroid.count_cluster_sizes(labels, n_clusters)

    numerator = 0.0
    for k in range(0, n_clusters - 1):
        min_dist = sys.float_info.max
        for l in range(k + 1, n_clusters):
            min_dist = min(min_dist,
                           utils.euclidian_dist(centroids[k], centroids[l]))
        numerator += min_dist

    denominator = 0.0
    for k in range(0, n_clusters):
        list = []
        for i in range(0, len(labels)):
            if labels[i] != k:
                continue
            list.append(utils.euclidian_dist(X[i], centroids[k]))

        # get sum of 0.1*|Ck| largest elements
        acc = 0.0
        max_n = heapq.nlargest(int(math.ceil(0.1 * cluster_sizes[k])), list)
        for i in range(0, len(max_n)):
            acc += max_n[i]
        denominator += acc * 10.0 / cluster_sizes[k]
    return -numerator / denominator
Пример #7
0
 def find(self, X, labels, n_clusters):
     self.centroids = cluster_centroid.cluster_centroid(
         X, labels, n_clusters)
     self.cluster_sizes = cluster_centroid.count_cluster_sizes(
         labels, n_clusters)
     self.diameter = utils.find_diameter(X)
     self.dists = [[0. for _ in range(len(labels))]
                   for _ in range(len(labels))]
     self.dist_same_c = []
     rows, colums = X.shape
     delta_l = [[0.0] * n_clusters] * n_clusters
     self.delta = np.array(delta_l)
     minimum_dif_c = sys.float_info.max  # min dist in different clusters
     maximum_same_c = sys.float_info.min  # max dist in the same cluster
     for i in range(rows - 1):
         for j in range(i + 1, rows):
             self.dists[i][j] = utils.euclidian_dist(X[i], X[j])
             self.dists[j][i] = self.dists[i][j]
             if labels[i] != labels[j]:
                 self.delta[labels[i]][labels[j]] += self.dists[i][j]
             else:
                 self.dist_same_c.append([i, j])
                 maximum_same_c = max(self.dists[i][j], maximum_same_c)
     for i in range(n_clusters - 1):
         for j in range(i + 1, n_clusters):
             self.delta[i][j] /= float(self.cluster_sizes[i] *
                                       self.cluster_sizes[j])
             if self.delta[i][j] != 0:
                 minimum_dif_c = min(minimum_dif_c, self.delta[i][j])
     return -minimum_dif_c / maximum_same_c
Пример #8
0
    def find(self, X, labels, n_clusters):
        self.diameter = utils.find_diameter(X)
        self.centroids = cluster_centroid.cluster_centroid(
            X, labels, n_clusters)
        self.cluster_sizes = cluster_centroid.count_cluster_sizes(
            labels, n_clusters)
        rows, colums = X.shape
        self.sums = [0 for _ in range(n_clusters)]
        minimum_dif_c = sys.float_info.max  # min dist in different clusters
        centres_l = [[sys.float_info.max] * n_clusters] * n_clusters
        self.centers = np.array(centres_l)
        self.centroid_dists = [0 for _ in range(len(labels))]
        # self.centroid_dists = [utils.euclidian_dist(X[i], self.centroids[labels[i]]) for i in range(len(X))]
        for i in range(len(labels)):
            self.centroid_dists[i] = utils.euclidian_dist(
                X[i], self.centroids[labels[i]])
            self.sums[labels[i]] += self.centroid_dists[i]
        for i in range(n_clusters):
            for j in range(n_clusters):
                if i != j:
                    self.centers[i][j] = utils.euclidian_dist(
                        self.centroids[i], self.centroids[j])
        for i in range(rows):
            for j in range(rows):
                if labels[i] != labels[j]:
                    dist = self.centers[labels[i]][labels[j]]
                    minimum_dif_c = min(dist, minimum_dif_c)

        denominator = list(self.sums)
        for i in range(n_clusters):
            denominator[i] *= (2 / self.cluster_sizes[i])

        return -(minimum_dif_c / max(denominator))
Пример #9
0
    def find(self, X, labels, n_clusters):
        self.diameter = utils.find_diameter(X)
        self.centroids = cluster_centroid.cluster_centroid(X, labels, n_clusters)
        self.cluster_sizes = cluster_centroid.count_cluster_sizes(labels, n_clusters)

        self.centroid_dists = [[sys.float_info.max for _ in range(n_clusters)] for _ in range(n_clusters)]
        self.dists = [[0 for _ in range(len(labels))] for _ in range(n_clusters)]
        numerator = 0.0
        for k in range(0, n_clusters - 1):
            for l in range(k + 1, n_clusters):
                self.centroid_dists[k][l] = utils.euclidian_dist(self.centroids[k], self.centroids[l])
                self.centroid_dists[l][k] = self.centroid_dists[k][l]
        for i in range(n_clusters):
            min_dist = np.amin(self.centroid_dists[i])
            numerator += min_dist
        denominator = 0.0

        for k in range(n_clusters):
            for i in range(len(labels)):
                if labels[i] != k:
                    continue
                self.dists[k][i] = utils.euclidian_dist(X[i], self.centroids[k])
        for k in range(n_clusters):
            # get sum of 0.1*|Ck| largest elements
            acc = 0.0
            max_n = heapq.nlargest(int(math.ceil(0.1 * self.cluster_sizes[k])), self.dists[k])
            for i in range(0, len(max_n)):
                acc += max_n[i]
            denominator += acc * 10.0 / self.cluster_sizes[k]
        return -(numerator / denominator)
Пример #10
0
    def find(self, X, labels, n_clusters):
        self.diameter = utils.find_diameter(X)
        self.centroids = cluster_centroid.cluster_centroid(
            X, labels, n_clusters)
        self.cluster_sizes = cluster_centroid.count_cluster_sizes(
            labels, n_clusters)
        self.dist_same_c = []
        rows, colums = X.shape
        self.dists = [[0. for _ in range(rows)] for _ in range(rows)]
        minimum_dif_c = sys.float_info.max  # min dist in different clusters
        maximum_same_c = sys.float_info.min  # max dist in the same cluster
        centres_l = [[sys.float_info.max] * n_clusters] * n_clusters
        self.centers = np.array(centres_l)
        for i in range(n_clusters):
            for j in range(n_clusters):
                if i != j:
                    self.centers[i][j] = utils.euclidian_dist(
                        self.centroids[i], self.centroids[j])

        for i in range(rows - 1):
            for j in range(i + 1, rows):
                self.dists[i][j] = utils.euclidian_dist(X[i], X[j])
                self.dists[j][i] = self.dists[i][j]
                if labels[i] != labels[j]:
                    dist = self.centers[labels[i]][labels[j]]
                    minimum_dif_c = min(dist, minimum_dif_c)
                else:
                    self.dist_same_c.append([i, j])
                    maximum_same_c = max(self.dists[i][j], maximum_same_c)
        return -(minimum_dif_c / maximum_same_c)
Пример #11
0
 def find(self, X, labels, n_clusters):
     self.diameter = utils.find_diameter(X)
     self.s_clusters = [0. for _ in range(n_clusters)]
     self.centroids = cluster_centroid.cluster_centroid(
         X, labels, n_clusters)
     db = 0
     self.points_in_clusters = cluster_centroid.count_cluster_sizes(
         labels, n_clusters)
     for i in range(n_clusters):
         self.s_clusters[i] = self.s(X, i, self.points_in_clusters, labels,
                                     self.centroids)
     self.sums = [[0 for _ in range(n_clusters)] for _ in range(n_clusters)]
     for i in range(0, n_clusters):
         for j in range(0, n_clusters):
             if i != j:
                 tm = utils.euclidian_dist(self.centroids[i],
                                           self.centroids[j])
                 if tm != 0:
                     self.sums[i][j] = (self.s_clusters[i] +
                                        self.s_clusters[j]) / tm
                 else:
                     pass
                     #a = -Constants.bad_cluster
         tmp = np.amax(self.sums[i])
         db += tmp
     db /= float(n_clusters)
     return db
Пример #12
0
def dunn53(X, labels, n_clusters):
    centroids = cluster_centroid.cluster_centroid(X, labels, n_clusters)
    rows, colums = X.shape
    dl = [0.0] * n_clusters
    d = np.array(dl)
    point_in_c = [0] * n_clusters
    for i in range(0, len(labels)):
        point_in_c[labels[i]] += 1
    delta_l = [[0.0] * n_clusters] * n_clusters
    delta = np.array(delta_l)
    minimum_dif_c = sys.float_info.max  # min dist in different clusters
    maximum_same_c = sys.float_info.min  # max dist in the same cluster
    for i in range(0, int(math.ceil(float(rows) / 2.0))):
        for j in range(0, rows):
            if (labels[i] != labels[j]):
                delta[labels[i]][labels[j]] += (
                    utils.euclidian_dist(X[i], centroids[labels[i]]) +
                    utils.euclidian_dist(X[j], centroids[labels[j]]))
            else:
                d[labels[i]] += utils.euclidian_dist(X[i],
                                                     centroids[labels[i]])

    for i in range(0, n_clusters):
        d[i] /= point_in_c[i]
        d[i] += 2.0
        maximum_same_c = max(d[i], maximum_same_c)
        for j in range(0, n_clusters):
            delta[i][j] /= float(point_in_c[i] + point_in_c[j])
            minimum_dif_c = min(minimum_dif_c, delta[i][j])
    return -minimum_dif_c / maximum_same_c
Пример #13
0
 def find(self, X, labels, n_clusters):
     self.diameter = utils.find_diameter(X)
     self.centroids = cluster_centroid.cluster_centroid(
         X, labels, n_clusters)
     self.cluster_sizes = cluster_centroid.count_cluster_sizes(
         labels, n_clusters)
     self.dists = [[0 for _ in range(len(labels))]
                   for _ in range(len(labels))]
     self.centroid_dists = [0 for _ in range(len(labels))]
     self.delta = [[0 for _ in range(n_clusters)]
                   for _ in range(n_clusters)]
     minimum_dif_c = sys.float_info.max  # min dist in different clusters
     maximum_same_c = sys.float_info.min  # max dist in the same cluster
     self.sums = [0 for _ in range(n_clusters)]
     for i in range(len(labels)):
         self.centroid_dists[i] = utils.euclidian_dist(
             X[i], self.centroids[labels[i]])
         self.sums[labels[i]] += self.centroid_dists[i]
     for i in range(len(labels) - 1):
         for j in range(i + 1, len(labels)):
             self.dists[i][j] = utils.euclidian_dist(X[i], X[j])
             self.dists[j][i] = self.dists[i][j]
             self.dists_same_c.append([i, j])
             maximum_same_c = max(self.dists[i][j], maximum_same_c)
     for i in range(n_clusters):
         for j in range(n_clusters):
             if i != j:
                 self.delta[i][j] = (self.sums[i] + self.sums[j]) / float(
                     self.cluster_sizes[i] + self.cluster_sizes[j])
                 minimum_dif_c = min(minimum_dif_c, self.delta[i][j])
     return -(minimum_dif_c / maximum_same_c)
Пример #14
0
def sf(X, labels, n_clusters):
    centroids = cluster_centroid.cluster_centroid(X, labels, n_clusters)
    cluster_sizes = cluster_centroid.count_cluster_sizes(labels, n_clusters)

    bcd = bcd_score(X, labels, n_clusters, centroids, cluster_sizes)
    wcd = wcd_score(X, labels, n_clusters, centroids, cluster_sizes)
    p = math.exp(-bcd - wcd)  #?????

    return -(1.0 - 1.0 / math.exp(p))
Пример #15
0
def sym(X, labels, n_clusters):
    centroids = cluster_centroid.cluster_centroid(X, labels, n_clusters)

    numerator = sys.float_info.min
    for k in range(0, n_clusters - 1):
        for l in range(k, n_clusters):
            numerator = max(numerator,
                            utils.euclidian_dist(centroids[k], centroids[l]))

    denominator = 0.0
    for i in range(0, len(labels)):
        denominator += d_ps(X, labels, X[i], labels[i], centroids)
    return -(numerator / denominator / n_clusters)
Пример #16
0
def sym_db(X, labels, n_clusters):
    centroids = cluster_centroid.cluster_centroid(X, labels, n_clusters)
    db = 0
    cluster_sizes = cluster_centroid.count_cluster_sizes(labels, n_clusters)
    max_fraction = sys.float_info.min
    for k in range(0, n_clusters):
        for l in range(0, n_clusters):
            if k != l:
                fraction = ((sym_s(X, labels, k, cluster_sizes, centroids) +
                             sym_s(X, labels, l, cluster_sizes, centroids)) /
                            utils.euclidian_dist(centroids[k], centroids[l]))
                max_fraction = max(max_fraction, fraction)
        db += max_fraction
    db /= float(n_clusters)
    return db
Пример #17
0
    def find(self, X, labels, n_clusters):
        self.centroids = cluster_centroid.cluster_centroid(X, labels, n_clusters)
        self.cluster_sizes = cluster_centroid.count_cluster_sizes(labels, n_clusters)
        self.max_s_sum = [[sys.float_info.min for _ in range(n_clusters)] for _ in range(n_clusters)]
        self.min_centroids_dist = [[sys.float_info.max for _ in range(n_clusters)] for _ in range(n_clusters)]
        self.s_clusters = [0 for _ in range(n_clusters)]
        self.diameter = utils.find_diameter(X)
        for i in range(n_clusters):
            self.s_clusters[i] = self.s(X, i, self.cluster_sizes, labels, self.centroids)
        numerator = 0.0
        for k in range(0, n_clusters):
            for l in range(k + 1, n_clusters):
                self.max_s_sum[k][l] = self.s_clusters[k] + self.s_clusters[l]
                self.min_centroids_dist[k][l] = utils.euclidian_dist(self.centroids[k], self.centroids[l])

            numerator += np.max(self.max_s_sum[k]) / np.min(self.min_centroids_dist[k])
        return numerator / n_clusters
Пример #18
0
def db_star_index(X, labels, n_clusters):
    centroids = cluster_centroid.cluster_centroid(X, labels, n_clusters)
    cluster_sizes = cluster_centroid.count_cluster_sizes(labels, n_clusters)
    numerator = 0.0
    for k in range(0, n_clusters):
        max_s_sum = sys.float_info.min
        min_centroids_dist = sys.float_info.max
        for l in range(k + 1, n_clusters):
            max_s_sum = max(
                max_s_sum,
                s(X, k, cluster_sizes, labels, centroids) +
                s(X, l, cluster_sizes, labels, centroids))
            min_centroids_dist = min(
                min_centroids_dist,
                utils.euclidian_dist(centroids[k], centroids[l]))
        numerator += max_s_sum / min_centroids_dist
    return numerator / n_clusters
Пример #19
0
    def get_nearest_centroids(self):
        row, column = self.X.shape
        centroids_numbers, centroid_distances = [], []

        for i in range(row):
            centroid_distances.append(sys.float_info.max)
            centroids_numbers.append(0)

        default_centroids = cluster_centroid.cluster_centroid(
            self.X, self.labels, self.n_clusters)

        for i in range(len(self.X)):
            for j in range(len(default_centroids)):
                distance = euclidian_dist(self.X[i], default_centroids[j])
                if (distance <= centroid_distances[j]):
                    centroid_distances[i] = distance
                    centroids_numbers[i] = j

        return centroids_numbers, centroid_distances
Пример #20
0
def davies_bouldin(X, n_clusters, labels):
    centroids = cluster_centroid.cluster_centroid(X, labels, n_clusters)
    db = 0
    point_in_c = cluster_centroid.count_cluster_sizes(labels, n_clusters)
    tmp = sys.float_info.min
    for i in range(0, n_clusters):
        for j in range(0, n_clusters):
            if i != j:
                tm = utils.euclidian_dist(centroids[i], centroids[j])
                if tm != 0:
                    a = (s(X, i, point_in_c, labels, centroids) +
                         s(X, j, point_in_c, labels, centroids)) / tm
                else:
                    pass
                    #a = -Constants.bad_cluster
                tmp = max(tmp, a)
        db += tmp
    db /= float(n_clusters)
    return db
Пример #21
0
    def find(self, X, labels, n_clusters):
        self.diameter = utils.find_diameter(X)
        self.centroids = cluster_centroid.cluster_centroid(
            X, labels, n_clusters)
        self.cluster_sizes = cluster_centroid.count_cluster_sizes(
            labels, n_clusters)
        self.numerators = [0.0] * n_clusters
        for i in range(0, len(labels)):
            self.numerators[labels[i]] += utils.euclidian_dist(
                X[i], self.centroids[labels[i]])

        self.inner_max_dists = [[0 for _ in range(len(labels))]
                                for _ in range(len(labels))]
        self.outer_min_dists = [[
            sys.float_info.max for _ in range(len(labels))
        ] for _ in range(n_clusters)]
        self.accumulator = [0 for _ in range(n_clusters)]
        for k in range(0, n_clusters):
            for i in range(len(labels)):  # iterate elements outside cluster
                if labels[i] == k:
                    continue
                for j in range(len(labels)):  # iterate inside cluster
                    if labels[j] != k:
                        continue
                    self.inner_max_dists[i][j] = utils.euclidian_dist(
                        X[i], X[j])
                    self.inner_max_dists[j][i] = self.inner_max_dists[i][j]

        for c in range(n_clusters):
            for i in range(len(labels)):
                if labels[i] == c:
                    continue
                inner_max_dist = 0
                for j in range(len(self.inner_max_dists[i])):
                    if labels[j] == c:
                        inner_max_dist = max(inner_max_dist,
                                             self.inner_max_dists[i][j])
                if inner_max_dist != 0:
                    self.outer_min_dists[c][i] = inner_max_dist
            outer_min_dist = np.amin(self.outer_min_dists[c])
            self.accumulator[c] = self.numerators[c] / outer_min_dist
        return sum(self.accumulator) / len(labels)
Пример #22
0
    def find(self, X, labels, n_clusters):
        self.diameter = utils.find_diameter(X)
        self.dist_centroids = [[0 for _ in range(n_clusters)]
                               for _ in range(n_clusters)]
        self.dist_ps = [0 for _ in range(len(labels))]
        self.centroids = cluster_centroid.cluster_centroid(
            X, labels, n_clusters)
        self.cluster_sizes = cluster_centroid.count_cluster_sizes(
            labels, n_clusters)

        for k in range(0, n_clusters - 1):
            for l in range(k + 1, n_clusters):
                self.dist_centroids[k][l] = utils.euclidian_dist(
                    self.centroids[k], self.centroids[l])
        numerator = np.amax(self.dist_centroids)
        for i in range(0, len(labels)):
            self.dist_ps[i] = utils.d_ps(X, labels, X[i], labels[i],
                                         self.centroids)
        denominator = sum(self.dist_ps)
        return -(numerator / (denominator * n_clusters))
Пример #23
0
def dunn41(X, labels, n_clusters):
    centroids = cluster_centroid.cluster_centroid(X, labels, n_clusters)
    rows, colums = X.shape
    minimum_dif_c = sys.float_info.max  # min dist in different clusters
    maximum_same_c = sys.float_info.min  # max dist in the same cluster
    centres_l = [[0.0] * n_clusters] * n_clusters
    centers = np.array(centres_l)
    for i in range(0, n_clusters):
        for j in range(0, n_clusters):
            centers[i][j] = utils.euclidian_dist(centroids[i], centroids[j])

    for i in range(0, int(math.ceil(float(rows) / 2.0))):
        for j in range(0, rows):
            if (labels[i] != labels[j]):
                dist = centers[labels[i]][labels[j]]
                minimum_dif_c = min(dist, minimum_dif_c)
            else:
                dist = utils.euclidian_dist(X[i], X[j])
                maximum_same_c = max(dist, maximum_same_c)
    return -minimum_dif_c / maximum_same_c
Пример #24
0
 def find(self, X, labels, n_clusters):
     self.diameter = utils.find_diameter(X)
     self.centroids = cluster_centroid.cluster_centroid(
         X, labels, n_clusters)
     rows, colums = X.shape
     self.dist = [[0. for _ in range(rows)] for _ in range(rows)]
     self.dist_dif_c = []
     self.dist_same_c = []
     minimum_dif_c = sys.float_info.max  # min self.dist in different clusters
     maximum_same_c = sys.float_info.min  # max self.dist in the same cluster
     for i in range(rows - 1):
         for j in range(i + 1, rows):
             self.dist[i][j] = utils.euclidian_dist(X[i], X[j])
             self.dist[j][i] = self.dist[i][j]
             if labels[i] != labels[j]:
                 self.dist_dif_c.append([i, j])
                 minimum_dif_c = min(self.dist[i][j], minimum_dif_c)
             else:
                 self.dist_same_c.append([i, j])
                 maximum_same_c = max(self.dist[i][j], maximum_same_c)
     return -(minimum_dif_c / maximum_same_c)
Пример #25
0
def s_dbw(X, labels, n_clusters):
    centroids = cluster_centroid.cluster_centroid(X, labels, n_clusters)

    sigmas = 0.0
    for k in range(0, n_clusters):
        sigmas += normed_cluster_sigma(X, labels, k)
    sigmas /= n_clusters
    sigmas /= normed_sigma(X)
    print(sigmas)

    stdev_val = stdev(X, labels, n_clusters)
    print(stdev_val)
    dens = 0.0
    for k in range(0, n_clusters):
        for l in range(0, n_clusters):
            dens += den2(X, labels, centroids, k, l, stdev_val) /\
                        max(den1(X, labels, centroids, k, stdev_val),
                            den1(X, labels, centroids, l, stdev_val))

    dens /= n_clusters * (n_clusters - 1)
    return sigmas + dens
Пример #26
0
    def find(self, X, labels, n_clusters):
        self.diameter = utils.find_diameter(X)
        self.centroids = cluster_centroid.cluster_centroid(X, labels, n_clusters)
        self.cluster_sizes = cluster_centroid.count_cluster_sizes(labels, n_clusters)
        self.centroid_dists = [0 for _ in range(len(labels))]
        self.delta = [[0 for _ in range(n_clusters)] for _ in range(n_clusters)]
        minimum_dif_c = sys.float_info.max  # min dist in different clusters
        self.sums = [0 for _ in range(n_clusters)]
        for i in range(len(labels)):
            self.centroid_dists[i] = utils.euclidian_dist(X[i], self.centroids[labels[i]])
            self.sums[labels[i]] += self.centroid_dists[i]
        for i in range(n_clusters):
            for j in range(n_clusters):
                if i != j:
                    self.delta[i][j] = (self.sums[i] + self.sums[j]) / float(self.cluster_sizes[i] + self.cluster_sizes[j])
                    minimum_dif_c = min(minimum_dif_c, self.delta[i][j])
        denominator = list(self.sums)
        #print(denominator)
        for i in range(n_clusters):
            denominator[i] *= (2 / self.cluster_sizes[i])

        return -(minimum_dif_c / max(denominator))
Пример #27
0
    def find(self, X, labels, n_clusters):
        self.diameter = utils.find_diameter(X)
        self.centroids = cluster_centroid.cluster_centroid(X, labels, n_clusters)
        self.cluster_sizes = cluster_centroid.count_cluster_sizes(labels, n_clusters)
        self.dists = [[0 for _ in range(len(labels))] for _ in range(n_clusters)]
        self.dists_e = [[0 for _ in range(len(labels))] for _ in range(len(labels))]
        self.dists_for_b = [0 for _ in range(len(labels))]
        self.max_b_ss = [0 for _ in range(len(labels))]
        self.b_ss_size = [0 for _ in range(len(labels))]
        for i in range(len(labels)):
            for j in range(len(labels)):
                self.dists_e[i][j] = utils.euclidian_dist(X[i], X[j])
                self.dists_e[j][i] = self.dists_e[i][j]
        self.a_ss = [0 for _ in range(len(labels))]
        self.b_ss = [0 for _ in range(len(labels))]
        for i in range(len(labels)):
            self.a_ss[i] = self.a(X, labels, i, labels[i])
            self.b_ss[i] = self.b(X, labels, i, labels[i])
        numerator = 0.0
        for k in range(n_clusters):
            for i in range(len(labels)):
                if labels[i] != k:
                    continue
                numerator += self.ov(i)
        denominator = 0.0

        for k in range(n_clusters):
            for i in range(len(labels)):
                if labels[i] != k:
                    continue
                self.dists[k][i] = utils.euclidian_dist(X[i], self.centroids[k])
        for k in range(n_clusters):
            # get sum of 0.1*|Ck| largest elements
            acc = 0.0
            max_n = heapq.nlargest(int(math.ceil(0.1 * self.cluster_sizes[k])), self.dists[k])
            for i in range(0, len(max_n)):
                acc += max_n[i]
            denominator += acc * 10.0 / self.cluster_sizes[k]
        return -(numerator / denominator)
Пример #28
0
 def find(self, X, labels, n_clusters):
     self.diameter = utils.find_diameter(X)
     self.dist_ps = [0 for _ in range(len(labels))]
     self.sym_s_clusters = [0 for _ in range(n_clusters)]
     self.fractions = [[0 for _ in range(n_clusters)] for _ in range(n_clusters)]
     self.centroids = cluster_centroid.cluster_centroid(X, labels, n_clusters)
     db = 0
     self.cluster_sizes = cluster_centroid.count_cluster_sizes(labels, n_clusters)
     for i in range(0, len(labels)):
         self.dist_ps[i] = utils.d_ps(X, labels, X[i], labels[i], self.centroids)
     for i in range(n_clusters):
         self.sym_s_clusters[i] = self.sym_s(X, labels, i, self.cluster_sizes, self.centroids)
     for k in range(0, n_clusters):
         for l in range(0, n_clusters):
             if k != l:
                 self.fractions[k][l] = ((self.sym_s_clusters[k] + self.sym_s_clusters[l]) /
                                   utils.euclidian_dist(self.centroids[k], self.centroids[l]))
     for k in range(n_clusters):
         max_fraction = np.amax(self.fractions[k])
         db += max_fraction
     db /= float(n_clusters)
     return db
Пример #29
0
def cop(X, labels, n_clusters):
    centroids = cluster_centroid.cluster_centroid(X, labels, n_clusters)
    numerators = [0.0] * n_clusters
    for i in range(0, len(labels)):
        numerators[labels[i]] += utils.euclidian_dist(X[i],
                                                      centroids[labels[i]])

    accumulator = 0.0
    for k in range(0, n_clusters):
        outer_min_dist = sys.float_info.max
        for i in range(0, len(labels)):  # iterate elements outside cluster
            if labels[i] == k: continue
            inner_max_dist = sys.float_info.min
            for j in range(i, len(labels)):  # iterate inside cluster
                if labels[j] != k: continue
                inner_max_dist = max(inner_max_dist,
                                     utils.euclidian_dist(X[i], X[j]))
            if inner_max_dist != sys.float_info.min:
                # TODO: there are cases, when inner_max_dist is not updated in iner loop. why?
                outer_min_dist = min(outer_min_dist, inner_max_dist)
        accumulator += numerators[k] / outer_min_dist
    return accumulator / len(labels)
Пример #30
0
    def find(self, X, labels, n_clusters):
        self.diameter = utils.find_diameter(X)
        elements, ignore_columns = X.shape
        self.centroids = cluster_centroid.cluster_centroid(
            X, labels, n_clusters)
        self.cluster_sizes = cluster_centroid.count_cluster_sizes(
            labels, n_clusters)

        self.dists = [[0 for _ in range(elements)] for _ in range(elements)]

        for i in range(0, elements - 1):  # for every element
            for j in range(i + 1, elements):  # for every other
                if labels[i] != labels[j]:
                    continue  # if they are in the same cluster
                # update the distance to the farthest element in the same cluster
                self.dists[i][j] = utils.euclidian_dist(X[i], X[j])
                self.dists[j][i] = self.dists[i][j]

        # max_self.dists contain for each element the farthest the his cluster

        numerator = 0.0
        for i in range(0, elements):
            max_dist = np.amax(self.dists[i])
            numerator += max_dist / self.cluster_sizes[labels[i]]

        denominator = 0.0
        self.centroids_dist = [[sys.float_info.max for _ in range(n_clusters)]
                               for _ in range(n_clusters)]
        for i in range(n_clusters):
            for j in range(n_clusters):
                if i != j:
                    self.centroids_dist[i][j] = utils.euclidian_dist(
                        self.centroids[i], self.centroids[j])
        for i in range(n_clusters):
            min_centroid_dist = np.amin(self.centroids_dist[i])
            denominator += min_centroid_dist

        return numerator / denominator