示例#1
0
def CMeans_CIQ(S, K, n_iterations=None, init_array=None):
    """
    K-MeansによるCIQ
    初期値と最大イテレーション数を与えられたときのみ(たぶんPSO-CIQとかCQ-ABCを適用するときとか)
    それらを引数として与えて処理する.
    :param S: 入力画像を,(画素数, 1, 3)とreshapeしている
    :param K:
    :param n_iterations:
    :param init_array:
    :return:
    """
    if n_iterations:
        cmeans_output = cmeans(data=S,
                        c=K,
                        maxiter=n_iterations,
                        m=2)

    else:
        cmeans_output = cmeans(data=S.T,
                        c=K,
                        m=2.0,
                        maxiter=300,
                        error=0.001)

    centers = cmeans_output[0]
    centers = np.reshape(centers, (K, 3)).astype(np.int)
    return centers
    def fit_predict(self, X: np.ndarray) -> np.ndarray:
        '''
			train and predict

			- params:
				- X : np.ndarray (S, N) : where S is the number of features,
											and N is the number of samples
			- return:
				- Y : np.ndarray (1, N) : output

		'''

        cntr, u, u0, d, jm, p, fpc = cmeans(data=X,
                                            c=self.params['n_clusters'],
                                            m=self.params['m'],
                                            error=self.params['tol'],
                                            maxiter=self.params['max_iter'],
                                            init=None,
                                            seed=self.params['random_state'])

        self.params['centers'] = cntr

        self.results['u'] = u
        self.results['u0'] = u0
        self.results['d'] = d
        self.results['jm'] = jm
        self.results['p'] = p
        self.results['fpc'] = fpc

        y = np.transpose(np.argmax(u, axis=0))

        return np.expand_dims(y, axis=1)
示例#3
0
def fcm(data, c, m=2):
    center, u, u0, d, jm, p, fpc = cmeans(data.T, c, m=m, error=1e-6, maxiter=20)
    # u_m = u ** m
    u_m = u
    D = np.sqrt(np.sum(np.square(data[:, None] - center), axis=2))
    radius = np.asarray([np.sum(u_m[i] @ D[:, i]) / np.sum(u_m[i]) for i in range(c)])
    return center, radius
def dimensionReductionandClustering(word_data_points):
    word_data_array = numpy.array(word_data_points)
    pca = PCA(n_components=2)
    result = pca.fit_transform(word_data_array)
    result_T = result.transpose()
    k = 6
    cntr, mem_matr, u0, d, jm, p, fpc = cmeans(result_T, k, 2, error=0.005, maxiter=1000, init=None)
    return mem_matr,k
示例#5
0
def iteration(X, U, V, labels, p, logger):

    from skfuzzy.cluster import cmeans

    V, U, _, _, _, t, _ = cmeans(X.T, len(V), 1.05, 1e-1, 200, U.T)
    metric_now = nmi_acc(U.T, labels)

    return U.T, V.T, t, metric_now
示例#6
0
def fcm(n_clusters, vector):
    cntr, u, u0, d, jm, p, fpc = cmeans(vector.T,
                                        n_clusters,
                                        2.5,
                                        0.0001,
                                        1000,
                                        seed=0)
    return {"center": cntr, "membership": u, "label": u.argmax(axis=0)}
示例#7
0
    def fkmeans(data_matrix, number_of_clusters, fuzzy_parameter, error,
                maximun_iterations):
        fmeans = cmeans(data_matrix.T, number_of_clusters, fuzzy_parameter,
                        error, maximun_iterations)
        centroids = fmeans[0]
        kmeans = KMeans(n_clusters=number_of_clusters,
                        algorithm='full').fit(centroids)

        return kmeans.cluster_centers_, kmeans.predict(
            data_matrix), davies_bouldin_score(data_matrix,
                                               kmeans.predict(data_matrix))
def CMeans_cluser(useful_feature, n, data_id, data_score, __m=2.0):
    data_columns = useful_feature.columns
    KFCM_result = np.matrix(useful_feature)
    KFCM_result = KFCM_result.T
    center, u, u0, d, jm, p, fpc = cmeans(KFCM_result,
                                          m=__m,
                                          c=n,
                                          error=0.00000001,
                                          maxiter=100000)
    # print('end KFCM')
    u = u.T
    final_location = normalise_U(u)
    label = []
    for i in final_location:
        i = list(i)
        temp = i.index(1)
        label.append(temp)
    score_sil = metrics.silhouette_score(useful_feature,
                                         label,
                                         metric='euclidean')
    print("当聚为%d簇时,KFCM轮廓系数Silhouette Coefficient为:%f" %
          (n, score_sil))  # 计算轮廓系数
    score_cal = metrics.calinski_harabaz_score(useful_feature, label)
    print("当聚为%d簇时,KFCM轮廓系数Calinski-Harabaz Index为:%f" % (n, score_cal))
    KFCM_result = KFCM_result.T
    KFCM_result = pd.DataFrame(KFCM_result)
    KFCM_result.columns = data_columns
    KFCM_result['label'] = label
    KFCM_result['overall'] = data_score
    center_overall_sum = {}
    for i in range(n):
        temp = (KFCM_result[KFCM_result.label == i])['overall'].sum()
        key_word = '第' + str(i) + '类'
        center_overall_sum[key_word] = temp
    center_overall_sum = sorted(center_overall_sum.items(),
                                key=lambda x: x[1],
                                reverse=True)
    Old_label = []
    New_label = []
    num = 0
    for i in center_overall_sum:
        num += 1
        Old_label.append(int(i[0][1]))
        New_label.append(num)
    label = list(deepcopy(KFCM_result['label']))
    temp = []
    for i in range(len(label)):
        for j in range(n):
            if label[i] == Old_label[j]:
                temp.append(New_label[j])
    KFCM_result.drop('label', axis=1, inplace=True)
    KFCM_result['label'] = temp
    KFCM_result.insert(0, 'eventid', data_id)
    return KFCM_result
示例#9
0
def findClusters_cmeans(data):
    '''
        Cluster data using fuzzy c-means clustering 
        algorithm
    '''
    # create the classifier object
    return cl.cmeans(
        data,
        c=5,  # number of clusters
        m=2,  # exponentiation factor

        # stopping criteria
        error=0.01,
        maxiter=300)
示例#10
0
 def fit(self, x, n_rules=5):
     """
     todo initiate the rule class, actually, rules are initially constructed by
      a combination of cluster center, std and labels
     :param x: the data where rules are generated
     :param n_rules: number of the rules, namely the number of cluster centers
     """
     center_list, data_partition, _, _, _, _, _ = \
         cmeans(x.t(), n_rules, 2, error=0.005, maxiter=1000)
     self.n_rules = n_rules
     self.center_list = torch.tensor(center_list)
     self.data_partition = torch.tensor(data_partition).t()
     self.consequent_list = None
     self.widths_list = self.get_widths_list(x)
示例#11
0
 def fit(self, X, Y=None):
     if self.n_clusters is None:
         self.n_clusters = int(
             (len(X) / 2)**0.5)  # heuristic from pevec2013
     self.labels_ = np.arange(self.n_clusters)
     centroids, u, _, dists, _, _, fpc = cmeans(X.T, self.n_clusters,
                                                self.exp, self.error,
                                                self.max_iter)
     self.centroids = centroids
     self.fpc = fpc
     # print(n_clusters, "#clusters", self.ref.N, "trN")
     # print(centroids.shape, "centroids") # (n_clusters, n_features)
     # print(u.shape, "u") # (n_clusters, N), membership grades
     # print(dists.shape, "dists") # (n_clusters, N)
     return self
def findClusters_cmeans(data):
    '''
        Cluster data using fuzzy c-means clustering 
        algorithm
    '''
    # create the classifier object
    return cl.cmeans(
        data,
        c = 5,          # number of clusters
        m = 2,          # exponentiation factor
        
        # stopping criteria
        error = 0.01,
        maxiter = 300
    )
def FCM(preprocessing = 'PCA', M = 1.5, Error = 0.005, Maxiter = 1000, pre_kernel = 'rbf'):
    if preprocessing == 'PCA':
        X, y = use_PCA('iris_data.txt')
    elif preprocessing == 'KPCA':
        X, y = use_KPCA('iris_data.txt', kernel = pre_kernel)
    elif preprocessing == 'LDA':
        X, y = use_LDA('iris_data.txt')
    elif preprocessing == 'None':
        loader = datasets.load_iris()
        X, y = loader['data'], loader['target']
    else:
        print('Please choose a data preprocessing method from the following method:\n')
        print('1.PCA, 2.KPCA, 3.LDA, 4.None')
        return
    
    X = X.T
    center, u, u0, d, jm, p, fpc = cmeans(X, m = M, c=3, error = Error, maxiter = Maxiter)

    for i in u:
        label = np.argmax(u, axis=0)

    fig1 = plt.subplot(1,2,1)
    fig1.set_title('Data after preprocessing')
    for i, tag in enumerate(y):
        if tag == 0:
            fig1.scatter(X[0][i], X[1][i], c='r')
        elif tag == 1:
            fig1.scatter(X[0][i], X[1][i], c='g')
        elif tag == 2:
            fig1.scatter(X[0][i], X[1][i], c='b')
    
    fig2 = plt.subplot(1,2,2)
    fig2.set_title('Clustering result')
    for i, label in enumerate(label):
        if label == 0:
            fig2.scatter(X[0][i], X[1][i], c='r')
        elif label == 1:
            fig2.scatter(X[0][i], X[1][i], c='g')
        elif label == 2:
            fig2.scatter(X[0][i], X[1][i], c='b')
 
    plt.show()
示例#14
0
def process_data(data, centers):
    cntr, u, u0, d, jm, p, fpc = cmeans(data,
                                        centers,
                                        2,
                                        error=0.005,
                                        maxiter=10000,
                                        init=None)
    cluster_membership = np.argmax(u, axis=0)

    data = pd.DataFrame(data=data)
    data['target'] = cluster_membership

    g = sns.FacetGrid(data, hue='target', palette='tab20', size=5)
    g.map(plt.scatter, 0, 1, s=100, linewidth=.5, edgecolor='white')
    g.add_legend()
    # for i in range(centers):
    #     plt.scatter(data[cluster_membership == i])
    # for pt in cntr:
    #     plt.plot(pt[0], pt[1], 'rs')
    plt.show()
示例#15
0
def classify(schoolList, schoolname, num1, num2):
    schoolArray = np.array(schoolList)
    schoolArray.dtype = np.float64
    schoolArray = schoolArray.T

    center, u, u0, d, jm, p, fpc = cmeans(schoolArray,
                                          m=1.5,
                                          c=3,
                                          error=0.005,
                                          maxiter=1000)

    print(center)
    print(fpc)

    for i in u:
        label = np.argmax(u, axis=0)

    kind1 = []
    kind2 = []
    kind3 = []

    targetList1 = []
    targetList2 = []
    for row in center:
        targetList1.append(row[num1])
        targetList2.append(row[num2])

    kind_num1 = targetList1.index(max(targetList1))
    kind_num2 = targetList2.index(max(targetList2))

    for i in range(0, len(schoolList)):
        if label[i] == 0:
            kind1.append([schoolname[i], label[i]])
        elif label[i] == 1:
            kind2.append([schoolname[i], label[i]])
        else:
            kind3.append([schoolname[i], label[i]])

    return kind1, kind2, kind3, kind_num1, kind_num2
示例#16
0
    def fmeans(data_matrix, number_of_clusters, fuzzy_parameter, error,
               maximun_iterations):  #alter
        fmeans = cmeans(data_matrix.T, number_of_clusters, fuzzy_parameter,
                        error, maximun_iterations)
        centroids = fmeans[0]
        datapoint_no = 0
        cluster_assignment_list = np.zeros(
            data_matrix.shape[0]).astype('int32')
        while (datapoint_no < data_matrix.shape[0]):
            distance = np.linalg.norm(data_matrix[datapoint_no] - centroids[0])
            assigned_cluster_no = 0
            centroid_no = 1
            while centroid_no < number_of_clusters:
                tmp_distance = np.linalg.norm(data_matrix[datapoint_no] -
                                              centroids[centroid_no])
                if tmp_distance < distance:
                    distance = tmp_distance
                    assigned_cluster_no = centroid_no
                centroid_no = centroid_no + 1
            cluster_assignment_list[datapoint_no] = assigned_cluster_no
            datapoint_no = datapoint_no + 1

        return centroids, cluster_assignment_list
示例#17
0
 def process(self, img, **kwargs):
     n_clusters = kwargs.get('n_clusters', 3)
     m = kwargs.get('m', 2)
     eps = kwargs.get('eps', 0.01)
     max_it = kwargs.get('max_it', 100)
     numpass = kwargs.get('numpass', 5)
     median_radius = kwargs.get('median_radius', 10)
     if isinstance(img, Dataset):
         img = img.pixel_array
     img, _ = median_otsu(img, numpass=numpass, median_radius=median_radius)
     flat = img.reshape((1, -1))
     c, u, a1, a2, a3, a4, a5 = cmeans(flat, n_clusters, m, eps, max_it)
     tumor_index = np.argmax(c, axis=0)
     defuz = np.argmax(u, axis=0)
     mask = np.full(defuz.shape[0], 0, dtype=np.uint16)
     mask[defuz == tumor_index] = 1
     mask = mask.reshape(img.shape)
     k1 = np.ones((3, 3), np.uint16)
     k2 = np.ones((5, 5), np.uint16)
     mask = cv.erode(mask, k2, iterations=1)
     mask = cv.dilate(mask, k1, iterations=1)
     mask = cv.erode(mask, k2, iterations=2)
     mask = cv.dilate(mask, k1, iterations=5)
     return mask
示例#18
0
    def class3_output(hz_fft3, num_fft3):
        max_sum = 0
        max_index = -1
        min_sum = 0
        min_index = -1
        fft = []
        # 最大周波数とピーク数のリストを一つのリストにまとめる
        for i in range(len(hz_fft3)):
            fft.append([hz_fft3[i], num_fft3[i]])
        fft = np.array(fft)

        # 最小-1,最大1にリストを正規化
        normal_fft = scipy.stats.zscore(fft).tolist()
        # 正規化されたリストから最小の和と最大の和のリストを抽出
        for i in range(len(normal_fft)):
            sum_fft = normal_fft[i][0] + normal_fft[i][1]
            if max_sum < sum_fft:
                max_sum = sum_fft
                max_index = [normal_fft[i][0], normal_fft[i][1]]
            if min_sum > sum_fft:
                min_sum = sum_fft
                min_index = [normal_fft[i][0], normal_fft[i][1]]

        #分類対象のデータのリスト。各要素はfloatのリスト
        vectors = normal_fft
        #分類対象のデータをクラスタ数3でクラスタリング
        centers = cmeans(np.array(vectors).T, 3, m, 0.003, 10000)
        u = centers[1].T

        class_list = []
        label = []

        for i in u:
            #print(i, np.amax(i))
            if np.amax(i) < fuzzyValue:
                class_list.append(-1)
            else:
                class_list.append(np.argmax(i))
            label.append(np.argmax(i))

        plot_label = class_list
        '''
        for i in normal_fft:
            label_input = (k_means.near(i, centers))
            plot_label.append(label_input)
            # 0,1:noise→0   2:feature→1
            #if label_input==0 or label_input==1:
            #    label.append(0)
            #else:
            #    label.append(1)
            label.append(label_input)
        '''
        '''
        # 各特徴点の平均値をラベルに従いプロットする
        fft_0x = []
        fft_0y = []
        fft_1x = []
        fft_1y = []
        fft_2x = []
        fft_2y = []
        print(label)
        for i in range(len(normal_fft)):
            if label[i]==0:
                fft_0x.append(normal_fft[i][0])
                fft_0y.append(normal_fft[i][1])
            elif label[i]==1:
                fft_1x.append(normal_fft[i][0])
                fft_1y.append(normal_fft[i][1])
            else:
                fft_2x.append(normal_fft[i][0])
                fft_2y.append(normal_fft[i][1])


        # figure
        fig = plt.figure(figsize=(14,10))
        ax = fig.add_subplot(1, 1, 1)

        # plot
        ax.scatter(fft_0x, fft_0y, color='g', s=36)
        ax.scatter(fft_1x, fft_1y, color='b', s=36)
        ax.scatter(fft_2x, fft_2y, color='r', s=36)

        plt.title('Method-3', fontsize=36)
        plt.xlabel('vector in x', fontsize=36)
        plt.ylabel('vector in y', fontsize=36)
        plt.tick_params(labelsize=36)
        plt.savefig('D:/opticalflow/cmeans2/plt/class3/' + videoName[:-4] + '_figure.png')
        '''
        fileName = 'D:/opticalflow/cmeans2/plt/class3/' + videoName[:-4] + '_Cmeans_figure.png'
        plot_data(fft, u, filename=fileName)

        return class_list, u
示例#19
0
    def class1_output(k_err, zahyou):
        x_err = []
        y_err = []
        err = []
        max_err = 0
        min_err = 0
        max_index = -1
        min_index = -1
        # 特徴点ごとのx,y座標の絶対誤差をそれぞれ一つの配列にまとめる
        for i in range(len(k_err)):
            for j in range(len(k_err[0])):
                x_err.append(k_err[i][j][0])
                y_err.append(k_err[i][j][1])
        # 最小-1,最大1で正規化する
        x_err_normal = scipy.stats.zscore(x_err).tolist()
        y_err_normal = scipy.stats.zscore(y_err).tolist()
        # 正規化したx,y座標の絶対誤差のリストを一つのリストにまとめる
        for i in range(len(x_err)):
            err.append([x_err_normal[i], y_err_normal[i]])
            # リストのx,y座標の和が最大のものを抽出する
            if max_err < x_err_normal[i] + y_err_normal[i]:
                max_index = [x_err_normal[i], y_err_normal[i]]
            # リストのx,y座標の和が最小のものを抽出する
            elif min_err > x_err_normal[i] + y_err_normal[i]:
                min_index = [x_err_normal[i], y_err_normal[i]]
        #分類対象のデータのリスト。各要素はfloatのリスト
        vectors = err
        #分類対象のデータをクラスタ数3でクラスタリング
        centers = cmeans(vectors.T, 3, 2, 0.003, 10000)
        u = centers[1].T

        label = u
        plot_label = u
        '''
        for i in normal_fft:
            label_input = (k_means.near(i, centers))
            plot_label.append(label_input)
            # 0,1:noise→0   2:feature→1
            #if label_input==0 or label_input==1:
            #    label.append(0)
            #else:
            #    label.append(1)
            label.append(label_input)
        '''

        # 特徴点ごと実行:フレーム数分k-meansで分類して一番多い分類を採用する
        label = []
        k_err = scipy.stats.zscore(k_err).tolist()
        zahyou_ave = []

        # 分類したデータで各特徴点をクラスタリングする
        for frame in k_err:
            tmp = []
            sum_x = 0
            sum_y = 0
            # フレームごとにクラスタリングする
            for i in frame:
                tmp.append(k_means.near(i, centers))
                sum_x += i[0]
                sum_y += i[1]
            # クラスタリングした結果の最頻値をラベル付けする
            label_input = mode(tmp)
            # 絶対誤差の平均を計算する
            zahyou_ave.append([sum_x / len(frame), sum_y / len(frame)])
            # 0:noise→0   1,2:feature→1
            #if label_input == 2 or label_input == 1:
            #    label_input = 1
            #else:
            #    label_input = 0
            label.append(label_input)  # 一番多い数字をlabelに追加

        x0 = []
        y0 = []
        x1 = []
        y1 = []
        x2 = []
        y2 = []
        #print(label)
        # 各特徴点の平均値をラベルに従いプロットする
        for index, zahyou_data in enumerate(zahyou_ave):
            if label[index] == 0:
                x0.append(zahyou_data[0])
                y0.append(zahyou_data[1])
            elif label[index] == 1:
                x1.append(zahyou_data[0])
                y1.append(zahyou_data[1])
            else:
                x2.append(zahyou_data[0])
                y2.append(zahyou_data[1])

        # figure
        fig = plt.figure(figsize=(14, 10))
        ax = fig.add_subplot(1, 1, 1)

        # plot
        ax.scatter(x0, y0, color='r')
        ax.scatter(x1, y1, color='b')
        ax.scatter(x2, y2, color='g')

        #plt.title('Method-1', fontsize=36)
        plt.xlabel('victor in x', fontsize=36)
        plt.ylabel('victor in y', fontsize=36)
        plt.tick_params(labelsize=36)
        # プロットした画像を保存する
        plt.savefig('D:/opticalflow/cmeans2/plt/class1/' + videoName[:-4] +
                    '_figure.png')

        return label
示例#20
0
import numpy as np
import matplotlib.pyplot as plt
from skfuzzy.cluster import cmeans
a = np.array([[1, 3], [1.5, 3.2], [1.3, 2.8], [3, 1]])
cntr, u, _, _, _, _, _ = cmeans(a.T, c=2, m=2, error=0.005, maxiter=1000)
print(cntr, '\n-----------------\n', u)
示例#21
0
clustering.labels_

#绘制聚类谱图,绘制聚类谱图必须指定distance_threshold
plt.title('Hierarchical Clustering Dendrogram')
# plot the top three levels of the dendrogram
plot_dendrogram(clustering, truncate_mode='level')
plt.xlabel("Number of points in node (or index of point if no parenthesis).")
plt.show()

#2.模糊聚类
data=pd.read_csv(r"D:\书籍资料整理\多元统计分析\表3-7.csv")
train=data[['人均国内生产总值','粗死亡率','粗出生率','城镇人口比重','平均预期寿命','65岁及以上人口比重']].apply(
    lambda x: (x - np.mean(x)) / (np.std(x)))

train =np.asarray(train)
# train=preprocessing(train)
train=train.T

center, u, u0, d, jm, p, fpc = cmeans(train, m=2, c=3, error=0.0001, maxiter=1000)
#center:聚类的中心
#u是最后的的隶属度矩阵
#u0是初始化的隶属度矩阵
#d是最终的每个数据点到各个中心的欧式距离矩阵。
#jm是目标函数优化的历史。
#p是迭代的次数。
#fpc全称是fuzzy partition coefficient,是一个评价分类好坏的指标。它的范围是0到1,1是效果最好。后面可以通过它来选择聚类的个数。
result=u.T
result=pd.DataFrame(result,columns=['[,1]','[,2]','[,3]'])
result['country']=data['国家和地区']
#书中使用的是R的fanny函数,这里的结论与那个有些不同。
# For reproducibility
np.random.seed(1000)


if __name__ == '__main__':
    # Load the dataset
    digits = load_digits()
    X = digits['data'] / 255.0
    Y = digits['target']

    # Perform a preliminary analysis
    Ws = []
    pcs = []

    for m in np.linspace(1.05, 1.5, 5):
        fc, W, _, _, _, _, pc = cmeans(X.T, c=10, m=m, error=1e-6, maxiter=20000, seed=1000)
        Ws.append(W)
        pcs.append(pc)

    # Show the results
    sns.set()

    fig, ax = plt.subplots(1, 5, figsize=(20, 4))

    for i, m in enumerate(np.linspace(1.05, 1.5, 5)):
        ax[i].bar(np.arange(10), -np.log(Ws[i][:, 0]))
        ax[i].set_xticks(np.arange(10))
        ax[i].set_title(r'$m={}, P_C={:.2f}$'.format(m, pcs[i]))

    ax[0].set_ylabel(r'$-log(w_0j)$')
示例#23
0
# x = agg[['player_assists', 'player_dbno', 'player_dist_ride',
agg_se = agg.loc[(agg["party_size"] == 2)].copy()
x = agg_se[[
    'player_assists', 'player_dbno', 'player_dist_ride', 'player_dist_walk',
    'player_dmg', 'player_kills', 'player_survive_time'
]].copy()
x_correlaton = x.corr()

# x = x.apply (lambda x: (x-x.min())/(x.max()-x.min()))
x = x.apply(lambda x: (x - x.mean()) / x.std(), axis=0)
x_T = x.T

cntr, u, u_0, d, obj_value, num_of_iter, fpc = sc.cmeans(data=x_T,
                                                         c=3,
                                                         m=2,
                                                         maxiter=100,
                                                         error=0.005)

print(u.shape)
print(obj_value[-1])
# plt.plot(obj_value)
# plt.show()
cluster_list = u.argmax(axis=0)
agg_se["class"] = cluster_list
num_class = np.bincount(cluster_list)
print(num_class)
print(cluster_list)

agg1 = agg_se.loc[(agg_se['class'] == 0)]
agg2 = agg_se.loc[(agg_se['class'] == 1)]
示例#24
0
import numpy as np

from skfuzzy.cluster import cmeans

from sklearn.cluster import KMeans
from sklearn.preprocessing import scale
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

iris = load_iris()
data = scale(iris.data)

n_samples, n_features = data.shape
n_iris = len(np.unique(iris.target))
target = iris.target

estimator = KMeans(n_clusters=3)

labels = estimator.fit_predict(data)
print ('K-Means Algorithm Accuracy:', accuracy_score(target, labels))

centr, u_origin, _, _, _, _, fpc = cmeans(data, c=10, m=2, 
                                          error=0.005, 
                                          maxiter=1000)

print('Fuzzy C-Means Accuracy:', fpc)
示例#25
0
文件: testing.py 项目: Semen52/FSA2
    tsne = TSNE(n_components=n_components, learning_rate=0)
    tsne_vectors = tsne.fit_transform(np.asfarray(word_vectors, dtype='float64'))

    num_clusters = 2
    start = time.time()  # Start time

    print('INFO: Clustering: ', num_clusters, ' clusters')
    # if not soft:
    kmeans_clustering = KMeans(n_clusters=num_clusters)
    kclusters = kmeans_clustering.fit_predict(word_vectors)
    # else:
    word_vectors_transpose = word_vectors.transpose()
    cntr, u, u0, d, jm, p, fpc = cluster.cmeans(word_vectors_transpose,
                                                num_clusters,
                                                2,
                                                error=1e-4,
                                                maxiter=300,
                                                init=None)
    # cclusters = np.argmax(u, axis=0)
    # print(cclusters, cclusters.shape)
    # cclusters_fuzzy = get_clusters(u, limit=1/num_clusters)
    cclusters_fuzzy = get_clusters(u, limit=0.35)
    # cl = get_clusters(u, n_components=1)
    # print(cclusters_fuzzy[-4], cclusters_fuzzy.shape)
    # exit(0)

    end = time.time()
    elapsed = end - start
    print("INFO: Time of clustering: ", elapsed, "seconds")

    jet = cm = plt.get_cmap('jet')
示例#26
0
plt.imshow(I0,cmap='gray'),plt.title('original image')
plt.show()

img1 = I0.reshape(I0.size,1)

# K-Means
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER,10,1.0)
flags = cv2.KMEANS_RANDOM_CENTERS
compactness,labels,centers = cv2.kmeans(img1,4,None,criteria,10,flags)

I1 = labels.reshape(I0.shape)
plt.imshow(I1),plt.title('kmeans')
plt.show()

# Fuzzy CMeans
center, u, u0, d, jm, p, fpc = cmeans(img1.T, m=3, c=3, error=0.005, maxiter=1000)
Labels = np.zeros(I0.size)
for ii in range(0,I0.size):
    label = np.where(u[:,ii] == max(u[:,ii]))
    Labels[ii] = label[0]
I2 = Labels.reshape(I0.shape)
plt.imshow(I2),plt.title('cmeans')
plt.show()

#
WM = np.where(I1==I1[100,94],1.0,0)
plt.imshow(WM)
plt.show()
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5))
erosion = cv2.erode(WM,kernel,iterations = 1)
dilation = cv2.dilate(WM,kernel,iterations = 1)
示例#27
0
def perform_fuzzy_clustering(training: np.array, test: np.array,
                             clusters: int, m: int) -> tuple:
    center, train_labels = cmeans(training.T, clusters, m, 0.005, 1000)[0:2]
    test_labels = cmeans_predict(test.T, center, m, 0.005, 1000)[0]
    return *(np.argmax(label, 0) for label in [train_labels, test_labels]),
示例#28
0
train = np.array(tr)
train = train.T
fpc_all = []
#循环调参c
#for j in range(2,40):
#    center, u1, u0, d, jm, p, fpc = cmeans(train, m=1.5, c=j, error=0.005, maxiter=1000000)
#    print(j)
#    fpc_all.append(fpc)
#plt.figure()
#plt.plot(range(2,40),fpc_all)
#plt.grid(True, linestyle = "-.", color = "b", linewidth = "1")
#plt.xlabel('Clustering number') 
#plt.ylabel('The fuzzy partition coefficient (FPC)')  
#plt.show()
c_num = 3  # 设置聚类个数3/5/30
center, u1, u0, d, jm, p, fpc = cmeans(train, m=1.5, c=c_num, error=0.005, maxiter=1000000)
for i in u1:
    label_1 = np.argmax(u1, axis=0)
    
  # 相同趋势类别,可视化
for j in range(c_num):
    t1 = np.where(label_1==j)[0]
    print("类别:%d,数量:%d"%(j,len(t1)))
    plt.figure()
    for i in range(len(t1)):
        plt.plot(range(w_size),tr[t1][i])
    plt.ylim(-1000,2000)
    plt.show()
# 保存标签数据
label_1 = pd.DataFrame(data = label_1)
label_1.to_csv('label_3.csv')
示例#29
0
def fuzzy_cmeans(data, n_of_clusters, *, m=1.07):
    n_of_clusters = n_of_clusters.cuda().cpu().detach().numpy().copy()
    result = cmeans(data.T, n_of_clusters, m, 0.001, 10000, seed=0)
    fuzzy_means = result[1].T
    result = torch.FloatTensor(np.array(fuzzy_means)).cuda()
    return result
示例#30
0
from skfuzzy.cluster import cmeans, cmeans_predict

from sklearn.datasets import load_digits

# Set random seed for reproducibility
np.random.seed(1000)

if __name__ == '__main__':
    # Load the dataset
    digits = load_digits()
    X_train = digits['data'] / np.max(digits['data'])

    # Perform Fuzzy C-Means
    fc, W, _, _, _, _, pc = cmeans(X_train.T,
                                   c=10,
                                   m=1.25,
                                   error=1e-6,
                                   maxiter=10000,
                                   seed=1000)

    print('Partition coeffiecient: {}'.format(pc))

    # Plot the centroids
    fig, ax = plt.subplots(1, 10, figsize=(10, 10))

    for i in range(10):
        c = fc[i]
        ax[i].matshow(c.reshape(8, 8) * 255.0, cmap='gray')
        ax[i].set_xticks([])
        ax[i].set_yticks([])

    plt.show()
示例#31
0
# End of for
labels = np.asarray(labels) # Convert back to numpy array

X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.4, random_state=0)

print(X_train.shape)
print(X_test.shape)
print(y_train)
print(y_test)

n_samples, n_features = data.shape
n_digits = len(np.unique(labels))
print("n_digits: %d, \t n_samples %d, \t n_features %d"
      % (n_digits, n_samples, n_features))

cntr, u, u0, d, jm, p, fpc = cmeans(data, 2, 2, error=0.005, maxiter=1000, init=None, seed=None)

print(cntr.shape)
# Predict
u,u0,d,jm,p,fpc = cmeans_predict(data, cntr, 2, error=0.005, maxiter=1000, init=None, seed=None)

# print('------ actual ----------')
# print(y_train.shape)
# print('------ predict ----------')
# print(u.shape)

# outputline = ','+ str(accuracy_score(y_train,u))+','+str(precision_score(y_train,u))+','+str(recall_score(y_train,u))+','+str(f1_score(y_train,u))

# f=open("out.csv", "a+")
# f.write(outputline)
# f.close()
示例#32
0
 def clustering(self):
     cntr, U, U0, d, Jm, p, fpc = cmeans(self.data, self.c, m=10, error=0, maxiter=1000)
     print cntr
     print '======='
     print U, p
示例#33
0
    model = gensim.models.Word2Vec(copus,
                                   size=2,
                                   min_count=1,
                                   window=5,
                                   iter=1000)
    # model = gensim.models.FastText(copus, size=100, min_count=10, window=5, iter=100)

    wordvector = []

    for word in model.wv.vocab.keys():
        wordvector.append(model.wv[word])
    print(model.wv.vectors)

    wordvector = np.array(wordvector)

    fuzzy_cmeans = cmeans(wordvector.T, 10, 2.5, 0.0001, 1000)
    cntr, u, u0, d, jm, p, fpc = fuzzy_cmeans

    print("クラスタ数 {}".format(10))
    print("クラスタ中心 {}".format(cntr))

    print("クラスタメンバーシップ")
    pprint(u)
    print("クラスタ割当")
    pprint(u.argmax(axis=0))

    # kmeans = KMeans(n_clusters=10)
    # kmeans.fit(wordvector)

    # print("クラスタ数 {}".format(kmeans.n_clusters))
    # print(kmeans.labels_)