Exemplo n.º 1
0
def GetClusterValues(clusterAlg, comparebothclusters, data1, df1Norm, nclusters):
    cntr = None
    means=None
    if 'cmeans' in clusterAlg or 'c-means' in clusterAlg or 'fuzzy' in clusterAlg:
        cntr, u, u0, d, jm, p, fpc = fuzz.cmeans(data1, nclusters, 2, error=0.005, maxiter=1500)

        # get maximum value for each
        Cluster_Values = getMaximumCmeans(u)
        AllCluster_Values = getMaximumCmeans(u, True)
        cluster_maximum_ineces = np.argmax(u, axis=0)
        Kmeanslabels = ['NA' for x in range(data1.shape[1])]
        if 'b' in comparebothclusters or 'c' in comparebothclusters:
            cntr, u, u0, d, jm, p, fpc = fuzz.cmeans(data1, nclusters, 2, error=0.005, maxiter=1500)
            Cluster_Values = getMaximumCmeans(u)
            AllCluster_Values = getMaximumCmeans(u, True)
            cluster_maximum_ineces = np.argmax(u, axis=0)
            means = KMeans(n_clusters=nclusters).fit(df1Norm)
            Kmeanslabels = means.labels_
    elif 'kmeans' in clusterAlg or 'k-means' in clusterAlg or 'means' in clusterAlg:
        means = KMeans(n_clusters=nclusters).fit(df1Norm)
        Kmeanslabels = means.labels_
        Cluster_Values = ['NA' for x in range(data1.shape[1])]
        AllCluster_Values = [data1.shape[0] * ['NA'] for x in range(data1.shape[1])]
        cluster_maximum_ineces = ['NA' for x in range(data1.shape[1])]
        if 'b' in comparebothclusters or 'c' in comparebothclusters:
            cntr, u, u0, d, jm, p, fpc = fuzz.cmeans(data1, nclusters, 2, error=0.005, maxiter=1500)
            Cluster_Values = getMaximumCmeans(u)
            AllCluster_Values = getMaximumCmeans(u, True)
            cluster_maximum_ineces = np.argmax(u, axis=0)
            means = KMeans(n_clusters=nclusters).fit(df1Norm)
            Kmeanslabels = means.labels_
    else:
        means = KMeans(n_clusters=nclusters).fit(df1Norm)
        Kmeanslabels = means.labels_
        Cluster_Values = ['NA' for x in range(data1.shape[1])]
        AllCluster_Values = [data1.shape[0] * ['NA'] for x in range(data1.shape[1])]
        cluster_maximum_ineces = ['NA' for x in range(data1.shape[1])]
        if 'b' in comparebothclusters or 'c' in comparebothclusters:
            cntr, u, u0, d, jm, p, fpc = fuzz.cmeans(data1, nclusters, 2, error=0.005, maxiter=1500)
            Cluster_Values = getMaximumCmeans(u)
            AllCluster_Values = getMaximumCmeans(u, True)
            cluster_maximum_ineces = np.argmax(u, axis=0)
            means = KMeans(n_clusters=nclusters).fit(df1Norm)
            Kmeanslabels = means.labels_
    if 'cmeans' in clusterAlg or 'c-means' in clusterAlg or 'fuzzy' in clusterAlg:
        Trained_labels = cluster_maximum_ineces
    elif 'kmeans' in clusterAlg or 'k-means' in clusterAlg or 'means' in clusterAlg:
        Trained_labels = means.labels_
    else:
        Trained_labels = means.labels_
    return AllCluster_Values, Cluster_Values, Kmeanslabels, cluster_maximum_ineces, cntr, means,Trained_labels
Exemplo n.º 2
0
def get_dominent_colors(img,
                        cluster_num=5,
                        cluster='kmeans',
                        if_show=False,
                        name=None):
    pixels = img.reshape(-1, 3).astype(np.float32)

    if cluster == 'kmeans':
        criteria = (cv.TERM_CRITERIA_MAX_ITER, 10, 0.1)
        flags = cv.KMEANS_RANDOM_CENTERS
        _, _, centers = cv.kmeans(pixels, cluster_num, None, criteria, 1,
                                  flags)
    elif cluster == 'fcm':
        import skfuzzy as skf
        pixels = np.transpose(pixels, (1, 0))
        cmeans_res = skf.cmeans(pixels, cluster_num, 2, 1e-4, 100)
        centers = cmeans_res[0]
    else:
        raise NotImplementedError('Unrecognised cluster method.')

    color_list = np.zeros((cluster_num, 3), dtype='int')
    for c in range(cluster_num):
        color_list[c] = centers[c, ::-1].astype('int')

    if if_show:
        show_plate(img, name, cluster_num, color_list)

    return color_list
Exemplo n.º 3
0
def MyFCM(img, ImageType, numClust):
    k = numClust
    num_features = img.shape[2]
    img_vector = np.zeros([img.shape[0] * img.shape[1], num_features])
    img_pixel = np.zeros([img.shape[0] * img.shape[1], 2], int)
    count = 0
    for i in xrange(0, img.shape[0]):
        for j in xrange(0, img.shape[1]):
            img_vector[count] = img[i][j]
            img_pixel[count] = np.array([i, j])
            count += 1
    num_samples = img_vector.shape[0]
    num_features = img_vector.shape[1]
    img_vector_T = img_vector.T
    cntr, u, u0, d, jm, p, fpc = fuzz.cmeans(img_vector_T,
                                             k,
                                             2.,
                                             error=0.05,
                                             maxiter=20)
    ClusterIm = np.zeros([img.shape[0], img.shape[1]], int)
    uT = u.T
    for i in xrange(0, num_samples):
        row = img_pixel[i][0]
        col = img_pixel[i][1]
        max_val = 0.
        max_cluster = 0
        for j in xrange(0, k):
            if uT[i][j] > max_val:
                max_val = uT[i][j]
                max_cluster = j + 1
        ClusterIm[row][col] = max_cluster

    ccImOneBase = getCCIM.getCCIM(ClusterIm, 4)
    return ClusterIm, ccImOneBase
Exemplo n.º 4
0
def fuzzy_kmeans(features, num_cluster, m=1.1, num_repeat=20):
    """
    Args:
        features (np.array): shape=(n, *other_dimension)
        num_cluster (int): 聚类簇数
        m (float): 严格大于1的指数
        num_repeat (int): 重复次数

    Returns:
        best_y_pred (np.array): shape=(n,), dtype=np.int32, 预测类别
        best_centroid (np.array): shape=(num_cluster, *other_dimension), 中心点特征

    """
    input_shape = features.shape
    best_y_pred = None
    best_centroid = None
    best_loss = None
    for i in range(num_repeat):
        returns = cmeans(data=features.reshape((input_shape[0], -1)).T,
                         c=num_cluster,
                         m=m,
                         error=1e-4,
                         maxiter=300)
        # 中心, 划分矩阵, 初始矩阵, 距离矩阵, 历史损失, 迭代次数, 好坏程度
        cntr, u, u0, d, jm, p, fpc = returns
        y_pred = np.argmax(u, axis=0)
        if best_y_pred is None or jm[-1] < best_loss:
            best_y_pred = y_pred
            best_centroid = cntr.T
            best_loss = jm[-1]
    best_centroid = best_centroid.reshape((num_cluster, *input_shape[1:]))
    return best_y_pred, best_centroid
Exemplo n.º 5
0
def separate_syn_dsyn(S, f, low=[4, 12], high=[30, 80], return_all=False):
    L = np.trapz(S[(low[0] <= f) & (f <= low[1]), :], axis=0)
    H = np.trapz(S[(high[0] <= f) & (f <= high[1]), :], axis=0)
    ##  Fuzzy c-means clustering
    import skfuzzy as fuzz
    data = np.vstack([np.log(L), np.log(H)])
    center, u = fuzz.cmeans(data,
                            c=2,
                            m=2.,
                            error=0.005,
                            maxiter=1000,
                            init=None)[:2]
    cluster_membership = np.argmax(u, axis=0)
    index1 = np.where(cluster_membership == 0)
    index2 = np.where(cluster_membership == 1)
    if center[0, 1] > center[1, 1]:
        dsync_idxs = index1
        sync_idxs = index2
    else:
        dsync_idxs = index2
        sync_idxs = index1
    ## Power ratio
    ratio = np.log(L) / np.log(H)
    ratio_syn_mean = np.nanmean(ratio[sync_idxs])
    ratio_syn_std = np.nanstd(ratio[sync_idxs])
    ratio_dsyn_mean = np.nanmean(ratio[dsync_idxs])
    ratio_dsyn_std = np.nanstd(ratio[dsync_idxs])
    d_idcs = np.where(ratio < ratio_dsyn_mean + ratio_dsyn_std)[0]
    s_idcs = np.where(ratio > ratio_syn_mean - ratio_syn_std)[0]
    if return_all:
        return dsync_idxs, sync_idxs, L, H
    else:
        return d_idcs, s_idcs
def fuzzy(predict_non, noise, label, zeros, thres):
    """
    퍼지 클러스터링 진행
    비지도 학습이지만 엔트로피 특성 상 음성 파일이 높은 부분에 몰린 특성을 사용하여 두 군집 중 높은 값을 가진 부분을 음성 구간으로 특정하고 분류 진행
    input) predict_non - 2차원 배열의 엔트로피값과 노이즈값이 포함, noise - 노이즈 포함된 음성 파일, label - 1차 분류 방식으로 분류한 라벨 (에너지, 엔트로피 등),
    zeros - 노이즈 포함 음성 파일의 값이 0인 부분의 인덱스 ,thres - 문턱값 변수 기준값
    output) nono - 퍼지클러스터링 이후 Vad된 음성 파일, label - 결과값
    """

    data_pu = np.array(predict_non).T
    final = skfuzzy.cmeans(data_pu, 2, 2, 0.00000000001, 2000)

    labels = label.copy()
    clustering = final[1]
    if final[0][0, 0] > final[0][1, 0]:
        n = 0
    else:
        n = 1
    for i in range(len(zeros)):

        if clustering[n, i] >= thres:
            labels[zeros[i]] = 1

    noise_1 = noise.copy()
    for i in range(len(label)):
        if labels[i] == 0:
            noise_1[i] = 0
    nono = []
    for i in noise_1:
        if i != 0:
            nono.append(i)

    nono = np.array(nono)
    return nono, labels
 def students_cluster(self, **kwargs):
     sf_df = self.students_features
     sf_df = sf_df.fillna(0)
     data = sf_df[ self.STUDENTS_F_LABELS ].as_matrix()
     if kwargs == {}:
         C = self._C_f; m = self._m; error = 1.e-10; maxiter = 100
         cntr, U, _, _, _, _, fpc = cmeans( data.T,
                                            C,
                                            m,
                                            error,
                                            maxiter )
     else:
         cntr, U, _, _, _, _, fpc = cmeans( data, kwargs )
     L = U.T.argmax(axis=1)
     self.students_features['fcm_cluster_ID'] = L
     self.cntr_sf = cntr
Exemplo n.º 8
0
def fuzzy(predict_non, noise, label, zeros,thres):
    data_pu=np.array(predict_non).T
    final=skfuzzy.cmeans(data_pu,2,2,0.000000001,2000) 

    labels=label.copy()
    clustering=final[1]
    if final[0][0,0]>final[0][1,0]:
        n=0
    else:
        n=1
    for i in range(len(zeros)):
        
        if clustering[n,i]>=thres:
            labels[zeros[i]]=1
    
    noise_1=noise.copy()
    for i in range(len(label)):
        if labels[i]==0:
            noise_1[i]=0
    nono=[]
    for i in noise_1:
        if i!=0:
            nono.append(i)
    nono=np.array(nono)
    return nono, labels
Exemplo n.º 9
0
def fcm_class_mask(img, brain_mask=None, hard_seg=False):
    """
    creates a mask of tissue classes for a target brain with fuzzy c-means

    Args:
        img (nibabel.nifti1.Nifti1Image): target image (must be T1w)
        brain_mask (nibabel.nifti1.Nifti1Image): mask covering the brain of img
            (none if already skull-stripped)
        hard_seg (bool): pick the maximum membership as the true class in output

    Returns:
        mask (np.ndarray): membership values for each of three classes in the image
            (or class determinations w/ hard_seg)
    """
    img_data = img.get_data()
    if brain_mask is not None:
        mask_data = brain_mask.get_data() > 0
    else:
        mask_data = img_data > img_data.mean()
    [t1_cntr, t1_mem, _, _, _, _,
     _] = cmeans(img_data[mask_data].reshape(-1, len(mask_data[mask_data])), 3,
                 2, 0.005, 50)
    t1_mem_list = [
        t1_mem[i] for i, _ in sorted(enumerate(t1_cntr), key=lambda x: x[1])
    ]  # CSF/GM/WM
    mask = np.zeros(img_data.shape + (3, ))
    for i in range(3):
        mask[..., i][mask_data] = t1_mem_list[i]
    if hard_seg:
        tmp_mask = np.zeros(img_data.shape)
        tmp_mask[mask_data] = np.argmax(mask[mask_data], axis=1) + 1
        mask = tmp_mask
    return mask
Exemplo n.º 10
0
def cluster_fcm(dataset):
	
	dataset = np.transpose(dataset)

	cntr, u, u0, d, jm, p, fpc = fuzz.cmeans(dataset, c=12, m=2, error=0.0001, maxiter=10000, init=None)

	labels = np.argmax(u, axis=0)

	infer_results(labels, "Fuzzy C-Means")
Exemplo n.º 11
0
def clustering(data):
    s_avg = []
    for i in range(2, len(data)):
        cntr, u, u0, distant, fObj, iterasi, fpc = fuzz.cmeans(
            np.asarray(data).T, i, 2, 0.00001, 1000, seed=0)
        membership = np.argmax(u, axis=0)

        #silhouette = silhouette_samples(tfidf, membership)
        s_avg.append(silhouette_score(data, membership, random_state=10))
    return s_avg
Exemplo n.º 12
0
def fuzzyruleset(data, labels, k=8, m=2):
    centroids, u, u0, d, jm, p, fpc = fuzzy.cmeans(data.T,
                                                   k,
                                                   m,
                                                   error=0.005,
                                                   maxiter=1000,
                                                   metric='euclidean',
                                                   init=None)
    # Obtain each samples' group
    groups = np.argmax(u, axis=0)

    # Calculate membership for each class
    u_class = []

    try:
        nclass = labels.nunique()[0]
    except TypeError:
        nclass = labels.nunique()

    for c in range(nclass):
        index = np.argwhere(np.array(labels).flatten() == c).ravel()
        u_class.append(np.sum(u[:, index], axis=1))
        u_class[c] = u_class[c] / np.linalg.norm(u_class[c])

    # Obtain each groups' class
    group_class = np.argmax(np.array(u_class), axis=0)

    # Calculate standard deviation of each group
    group_std = []

    for i in range(k):
        index = np.argwhere(groups == i).ravel()
        if len(index) == 0:
            try:
                group_std.append(np.mean(group_std))
            except RuntimeWarning:
                group_std.append(0.25)
        else:
            group_std.append(u_class[group_class[i]][i] * np.mean(
                euclidean_distances(data.iloc[index, :], [centroids[i, :]])))

    group_std = group_std / np.linalg.norm(group_std)

    # Create set of fuzzy rules
    # Rule: If X1 is A1 and X2 is A2 then Y = C, C = {0, 1}
    rule_set = []

    for r in range(len(group_class)):
        rule_set.append({
            'mu': centroids[r, :],
            'std': group_std[r],
            'con': group_class[r]
        })

    return rule_set, groups
Exemplo n.º 13
0
def fcm_init(x_train, n_rules):
    n_samples, n_features = x_train.shape
    centers, mem, _, _, _, _, _ = fuzz.cmeans(
        x_train.T, n_rules, 2.0, error=1e-5, maxiter=200)
    delta = np.zeros([n_rules, n_features])
    for i in range(n_rules):
        d = (x_train - centers[i, :]) ** 2
        delta[i, :] = np.sum(d * mem[i, :].reshape(-1, 1), axis=0) / np.sum(mem[i, :])
    delta = np.sqrt(delta)
    delta = np.where(delta < 0.05, 0.05, delta)
    return centers.T, delta.T
Exemplo n.º 14
0
def cluster(data_filename, labels_filename):
    data = np.load(data_filename)
    labels = np.load(labels_filename)
    data = data[:, 1:]
    clf = fuzz.cmeans(data, c=6, m=10, error=0.0001, maxiter=2000)
    recipe_labels = json.load(open("recipesNutrients.txt"))
    nutritions = []
    for recipe in recipe_labels:
        nutrient_vector = filter_by_nutrients(list(labels), recipe)
        nutritions.append(nutrient_vector)

    testClustering(data, np.array(nutritions).T)
Exemplo n.º 15
0
 def _train_model(self, data, num_clusters):
     """
     Train model with the number of clusters that has better evaluation
     :param data: Dataframe with train data
     :param num_clusters: Number of clusters to use
     :return: Trained model
     """
     super()._train_model(data, num_clusters)
     data_array = data.to_numpy()
     return fuzz.cmeans(data=data_array,
                        c=num_clusters,
                        m=2,
                        error=0.005,
                        maxiter=1000)
Exemplo n.º 16
0
def fcmcluster(points, maxclust):
    import skfuzzy as fuzz
    # 1.模型声明
    cntr, u, u0, d, jm, p, fpc = fuzz.cmeans(points.T,
                                             maxclust,
                                             2,
                                             error=0.005,
                                             maxiter=1000)
    # Plot assigned clusters, for each data point in training set
    # 2.计算聚类结果
    cluster_membership = np.argmax(u, axis=0)
    # 3.添加子图
    plt.subplot(269)
    plt.scatter(points[:, 0], points[:, 1], c=cluster_membership)
    plt.title('fcm')
def cl_fuzzyCMeans(pivot, original, pixels, metrics, k, cc):
    cntr, u, u0, d, jm, p, fpc = cmeans(pivot.T,
                                        k,
                                        2,
                                        error=0.005,
                                        maxiter=1000,
                                        init=None)
    method = 'F_{0}_{1}'.format(k, cc * 10)
    assignments = np.argmax(u, axis=0)
    metrics.loc[['FPC'], method] = fpc
    return add_computed_typed(method, assignments,
                              original), add_computed_typed_pixels(
                                  method, assignments,
                                  pixels), add_metrics_typed(
                                      method, metrics, assignments, pivot)
Exemplo n.º 18
0
    def estimate_centers(self):
        """
		
		Helper method for finding centers when using automatic modelling (designed for Genetic Programming in this case).


		"""

        self.seed = randint(1, 10)
        cluster_centers, _, _, _, _, p, fpc = cmeans(self._X.T,
                                                     c=self.centers,
                                                     m=1.75,
                                                     error=0.005,
                                                     maxiter=1000,
                                                     seed=self.seed)
        self.centers = cluster_centers
Exemplo n.º 19
0
 def cluster_data(self, data, c=1, m=1.00, err=1.0, maxiter=1):
     """
     data : 2d array, size (S, N)
         Data to be clustered.  N is the number of data sets; S is the number
         of features within each sample vector.
     c : int
         Desired number of clusters or classes.
     m : float
         Array exponentiation applied to the membership function U_old at each
         iteration, where U_new = U_old ** m.
     error : float
         Stopping criterion; stop early if the norm of (U[p] - U[p-1]) < error.
     maxiter : int
     Maximum number of iterations allowed.
     """
     (cntr, U, U0, d, Jm, p, fpc) = skfuzzy.cmeans(
         data, c, m, err,
         maxiter)  #!FIXME the source of this function is broken
     return cntr
Exemplo n.º 20
0
def MyFCM05(img1, ImageType, numClust):
    #normalizing image data
    img2 = img1.astype(float)
    img = img2 / 255

    #diff constants for use
    num_features = img.shape[2]
    num_samples = img.shape[0] * img.shape[1]
    img_height = img.shape[0]
    img_width = img.shape[1]

    #reshape img
    img_vector = np.reshape(img, (num_samples, num_features))
    img_vector_T = img_vector.T
    #for hyper simply change the img_vector after pca with 3 dimensions
    if (ImageType == 'Hyper'):
        pca = PCA(3)
        principalComponents = pca.fit_transform(img_vector)
        img_vector = principalComponents
    cntr, u, u0, d, jm, p, fpc = fuzz.cmeans(img_vector_T,
                                             numClust,
                                             2.,
                                             error=0.05,
                                             maxiter=20)
    uT = u.T
    output = np.zeros(num_samples)
    for i in xrange(0, num_samples):
        max_val = 0.
        max_cluster = 0
        for j in xrange(0, numClust):
            if uT[i][j] > max_val:
                max_val = uT[i][j]
                max_cluster = j + 1
        output[i] = max_cluster
    ClusterIm = np.reshape(output, (img_height, img_width))
    ClusterIm = ClusterIm.astype(int)
    #     imgplot = plt.imshow(img)
    #     plt.pause(2)
    #     plt.imshow(ClusterIm)
    #     plt.pause(5)

    ccImOneBase = getCCIM.getCCIM(ClusterIm, 4)
    return ClusterIm, ccImOneBase
def plot_fuzzyCMeans_elbow(pivot):
    maxK = 2
    maxSil = -1
    for i in range(2, 20):
        cntr, u, u0, d, jm, p, fpc = cmeans(pivot,
                                            i,
                                            2,
                                            error=0.005,
                                            maxiter=1000,
                                            init=None)
        silhouette = silhouette_score(pivot,
                                      np.argmax(u, axis=0),
                                      sample_size=10000)
        print("For n_clusters =", i, "The average silhouette_score is :",
              silhouette, "FPC is :", fpc)
        if silhouette > maxSil:
            maxSil = silhouette
            maxK = i
    return maxK
Exemplo n.º 22
0
def fuzzy(predict_non, noise, label, zeros,thres):
    data_pu=np.array(predict_non).T
    final=skfuzzy.cmeans(data_pu,2,2,error=0.00000000001,maxiter=2000) 

    plt.figure(figsize=(12,6))
    plt.grid()
    plt.title("fuzzy clustering")
    plt.xlabel("Entropy value")
    plt.ylabel("Amplitude")
    plt.scatter(data_pu[0],data_pu[1],color="red", label="sample")
    plt.scatter(final[0][:,0],final[0][:,1],color="black", label="center")
    plt.legend()
    plt.show()
    
    labels=label.copy()
    clustering=final[1]
    if final[0][0,0]>final[0][1,0]:
        n=0
    else:
        n=1
    
    for i in range(len(zeros)):
        
        if clustering[n,i]>=thres:
            labels[zeros[i]]=1
    
    noise_1=noise.copy()
    for i in range(len(label)):
        if labels[i]==0:
            noise_1[i]=0
    nono=[]
    for i in noise_1:
        if i!=0:
            nono.append(i)
    plt.figure(figsize=(20,2))
    plt.grid()
    plt.title("vad label")
    plt.plot(labels)

    plt.show()
    nono=np.array(nono)
    return nono, labels
def find_tissue_memberships(
    image: Array,
    mask: Array = None,
    hard_segmentation: bool = False,
) -> Array:
    """Tissue memberships for a T1-w brain image with fuzzy c-means

    Args:
        image: image to find tissue masks for (must be T1-w)
        mask: mask covering the brain of image (none if already skull-stripped)
        hard_segmentation: pick the maximum membership as the true class in output

    Returns:
        tissue_mask: membership values for each of three classes in the image
            (or class determinations w/ hard_seg)
    """
    if mask is None:
        mask = image > 0.0
    else:
        mask = mask > 0.0
    assert isinstance(mask, Array)
    foreground_size = mask.sum()
    foreground = image[mask].reshape(-1, foreground_size)
    centers, memberships_, *_ = cmeans(foreground, 3, 2, 0.005, 50)

    def get_center(element: Tuple[float, Array]) -> float:
        center: float = element[0]
        return center

    # sort the tissue memberships to CSF/GM/WM (assuming T1-w image)
    sorted_memberships = sorted(zip(centers, memberships_), key=get_center)
    memberships = [m for _, m in sorted_memberships]
    tissue_mask = np.zeros(image.shape + (3, ))
    for i in range(3):
        tissue_mask[..., i][mask] = memberships[i]
    if hard_segmentation:
        tmp_mask = np.zeros(image.shape)
        masked = tissue_mask[mask]
        tmp_mask[mask] = np.argmax(masked, axis=1) + 1
        tissue_mask = tmp_mask
    return tissue_mask
Exemplo n.º 24
0
    def estimate_missing_values(self):
        estimated_data = []
        complete_data = np.array([self.data[x] for x in self.complete_rows])
        centers, _, _, _, _, _, _ = cmeans(data=complete_data.transpose(),
                                           c=self.c,
                                           m=self.m,
                                           error=FCMParam.ERROR,
                                           maxiter=FCMParam.MAX_ITR,
                                           init=None)

        # Calculate distance between two points based on euclidean distance
        def calculate_distance(data_1, data_2):
            return np.linalg.norm(data_1 - data_2)

        # Calculate the membership value for given point
        def calculate_membership(dist_matrix, distance, m):
            numerator = np.power(distance, -2 / (1 - m))
            denominator = np.array(
                [np.power(x, -2 / (1 - m)) for x in dist_matrix]).sum()
            return numerator / denominator

        for i in self.incomplete_rows:
            estimated = 0
            dist, membership_value = [], []
            miss_ind = np.where(self.data[i] == NAN)[0][0]

            for center in centers:
                dist.append(
                    calculate_distance(
                        data_1=np.delete(np.array(center), miss_ind),
                        data_2=np.delete(np.array(self.data[i]), miss_ind)))

            for d in dist:
                membership_value.append(calculate_membership(dist, d, self.m))

            for k in range(self.c):
                estimated += centers[k][miss_ind] * membership_value[k]

            estimated_data.append(estimated)

        return np.array(estimated_data)
Exemplo n.º 25
0
def fcm(data, n_cluster):
    """
    Comute data centers and membership of each point by FCM, and compute the variance of each feature
    :param data: n_Samples * n_Features
    :param n_cluster: number of center
    :return: centers: data center, delta: variance of each feature
    """
    n_samples, n_features = data.shape
    centers, mem, _, _, _, _, _ = fuzz.cmeans(data.T,
                                              n_cluster,
                                              2.0,
                                              error=1e-5,
                                              maxiter=200)

    # compute delta compute the variance of each feature
    delta = np.zeros([n_cluster, n_features])
    for i in range(n_cluster):
        d = (data - centers[i, :])**2
        delta[i, :] = np.sum(d * mem[i, :].reshape(-1, 1), axis=0) / np.sum(
            mem[i, :])
    return centers, delta
Exemplo n.º 26
0
def fuzzy_clustering(train, train_labels, test, test_labels, size, plot,
                     plot_dims):

    [center, u, u0, d, jm, p, fpc] = skf.cmeans(train.T,
                                                c=2,
                                                m=.5,
                                                error=.001,
                                                maxiter=100)

    [nu, nu0, nd, njm, np, nfpc] = skf.cmeans_predict(test.T,
                                                      center,
                                                      3,
                                                      error=0.005,
                                                      maxiter=1000)
    results_train = u.argmax(axis=0)
    results_test = nu.argmax(axis=0)

    if plot:
        plot_results(test, test_labels, results_test, size, "Fuzzy Clustering")

    return results_train, results_test
Exemplo n.º 27
0
def fcm_init(x_train, n_rules, m=None, scale=1.):
    if m is not None:
        assert m > 1, "m must be larger than 1, received: {}".format(m)
    else:
        if min(x_train.shape[0], x_train.shape[1] - 1) >= 3:
            m = min(x_train.shape[0], x_train.shape[1] -
                    1) / (min(x_train.shape[0], x_train.shape[1] - 1) - 2)
        else:
            m = 2
    n_samples, n_features = x_train.shape
    centers, mem, _, _, _, _, _ = fuzz.cmeans(x_train.T,
                                              n_rules,
                                              m,
                                              error=1e-5,
                                              maxiter=200)
    delta = np.zeros([n_rules, n_features])
    for i in range(n_rules):
        d = (x_train - centers[i, :])**2
        delta[i, :] = np.sum(d * mem[i, :].reshape(-1, 1), axis=0) / np.sum(
            mem[i, :])
    delta = np.sqrt(delta) * scale
    delta = np.where(delta < 0.05, 0.05, delta)
    return centers.T, delta.T
Exemplo n.º 28
0
write_csv("tfidf%d.csv" % n, tfidf, 'a')

xBaru2 = seleksiFiturPearson(tfidf, 0.9)
xBaru1 = seleksiFiturPearson(xBaru2, 0.8)
'''
    Skenario:
    1. Cluster tanpa diseleksi
    2. Cluster dengan Seleksi Pearson, dengan threshold 0.8
    2. Cluster dengan Seleksi Pearson, dengan threshold 0.9
    2. Cluster dengan Seleksi Pearson, dengan threshold 0.95
'''

print("Cluster Tanpa Seleksi fitur")
cntr, u, u0, distant, fObj, iterasi, fpc = fuzz.cmeans(tfidf.T,
                                                       3,
                                                       2,
                                                       0.00001,
                                                       1000,
                                                       seed=0)
membership = np.argmax(u, axis=0)

silhouette = silhouette_samples(tfidf, membership)
s_avg = silhouette_score(tfidf, membership, random_state=10)

for i in range(total_doc):
    print("c " + str(membership[i]))  #+"\t" + str(silhouette[i]))
print(s_avg)

#kmeans = KMeans(n_clusters=3, random_state=0).fit(tfidf)
#print(kmeans.labels_)

write_csv("Cluster%d.csv" % n, [["Cluster"]])
Exemplo n.º 29
0
from sklearn import datasets
import numpy as np
from sklearn.metrics import confusion_matrix
import skfuzzy

iris = datasets.load_iris()

r = skfuzzy.cmeans(data=iris.data.T,
                   c=3,
                   m=2,
                   error=0.005,
                   maxiter=1000,
                   init=None)
previsoes_porcentagem = r[1]

previsoes_porcentagem[0][0]
previsoes_porcentagem[1][0]
previsoes_porcentagem[2][0]

previsoes = previsoes_porcentagem.argmax(axis=0)
resultados = confusion_matrix(iris.target, previsoes)
Exemplo n.º 30
0
def fair_clustering(dataset, config_file, max_points, cluster_num, m, epsilon,
                    maxiter, eta):
    # 读取数据
    config = configparser.ConfigParser(converters={'list': read_list})
    config.read(config_file)
    df = dp.read_data(config, dataset)

    # 设置数据集大小
    if max_points and len(df) > max_points:
        df = dp.subsample_data(df, max_points)

    df, _ = dp.clean_data(df, config, dataset)

    # 获取平衡属性
    fairness_variable = config[dataset].getlist("fairness_variable")

    # 对敏感属性建模
    # attributes 保存每个颜色类别的点的索引
    # color_flag 从点到它所属的颜色类别的映射(与“attributes”相反)
    attributes, color_flag = {}, {}
    for variable in fairness_variable:
        colors = defaultdict(list)
        this_color_flag = [0] * len(df)
        condition_str = variable + "_conditions"
        bucket_conditions = config[dataset].getlist(condition_str)
        for i, row in df.iterrows():
            for bucket_idx, bucket in enumerate(bucket_conditions):
                if eval(bucket)(row[variable]):
                    colors[bucket_idx].append(i)
                    this_color_flag[i] = bucket_idx

        attributes[variable] = colors
        color_flag[variable] = this_color_flag
    # 敏感属性在整个数据集所占比例
    representation = {}
    for var, bucket_dict in attributes.items():
        representation[var] = {
            k: (len(bucket_dict[k]) / len(df))
            for k in bucket_dict.keys()
        }
    # 选取用作定义距离的属性
    selected_columns = config[dataset].getlist("columns")
    df1 = df[[col for col in selected_columns]]
    df = df1.iloc[:, :].values
    centers, u_fz, u0, d_fz, jm, p, fpc = cmeans(df.T,
                                                 c=cluster_num,
                                                 m=m,
                                                 error=epsilon,
                                                 maxiter=1000)

    # 加入loss调整u

    u = u_fz.T
    label = np.argmax(u, axis=1)
    p = 0
    loss_old = 999999999
    while p < maxiter - 1:
        c = update_c(u, m, df)
        u = update_u(df, c, m, label, attributes, representation, df1, eta)
        label = np.argmax(u, axis=1)
        sizes = cal_sizes(label, cluster_num)
        ratios = cal_ratios(attributes, df1, label, cluster_num, sizes)
        loss = cal_loss(attributes, cluster_num, ratios, representation, eta)
        p += 1
        if np.max(np.abs(loss - loss_old)) < epsilon:
            break
        loss_old = loss
Exemplo n.º 31
0
for k in K:
    T = img.copy()
    print(img.shape)
    width = img.shape[0]
    height = img.shape[1]
    dim = img.shape[2]
    print(img[0][0])
    X = np.zeros((width * height, 3))
    index = 0
    for i in range(width):
        for j in range(height):
            X[index] = img[i][j]
            index += 1
    coef = np.max(X)
    X /= coef
    results = cmeans(X.T, k, m=3, error=0.01, maxiter=100)
    U = results[1].T
    index = 0
    for i in range(width):
        for j in range(height):
            center_index = np.argmax(U[index])
            T[i][j] = colors[center_index]
            index += 1
    plt.subplot(1, 2, 1)
    plt.imshow(T)
    C = T.copy()
    plt.imsave(fname=dir_name + 'human_fcm_' + str(k) + '.png', arr=T / 255)
    gamma = 0.01
    delta = 0.1
    lam1 = 0.1
    lam2 = 0.01