def GetClusterValues(clusterAlg, comparebothclusters, data1, df1Norm, nclusters): cntr = None means=None if 'cmeans' in clusterAlg or 'c-means' in clusterAlg or 'fuzzy' in clusterAlg: cntr, u, u0, d, jm, p, fpc = fuzz.cmeans(data1, nclusters, 2, error=0.005, maxiter=1500) # get maximum value for each Cluster_Values = getMaximumCmeans(u) AllCluster_Values = getMaximumCmeans(u, True) cluster_maximum_ineces = np.argmax(u, axis=0) Kmeanslabels = ['NA' for x in range(data1.shape[1])] if 'b' in comparebothclusters or 'c' in comparebothclusters: cntr, u, u0, d, jm, p, fpc = fuzz.cmeans(data1, nclusters, 2, error=0.005, maxiter=1500) Cluster_Values = getMaximumCmeans(u) AllCluster_Values = getMaximumCmeans(u, True) cluster_maximum_ineces = np.argmax(u, axis=0) means = KMeans(n_clusters=nclusters).fit(df1Norm) Kmeanslabels = means.labels_ elif 'kmeans' in clusterAlg or 'k-means' in clusterAlg or 'means' in clusterAlg: means = KMeans(n_clusters=nclusters).fit(df1Norm) Kmeanslabels = means.labels_ Cluster_Values = ['NA' for x in range(data1.shape[1])] AllCluster_Values = [data1.shape[0] * ['NA'] for x in range(data1.shape[1])] cluster_maximum_ineces = ['NA' for x in range(data1.shape[1])] if 'b' in comparebothclusters or 'c' in comparebothclusters: cntr, u, u0, d, jm, p, fpc = fuzz.cmeans(data1, nclusters, 2, error=0.005, maxiter=1500) Cluster_Values = getMaximumCmeans(u) AllCluster_Values = getMaximumCmeans(u, True) cluster_maximum_ineces = np.argmax(u, axis=0) means = KMeans(n_clusters=nclusters).fit(df1Norm) Kmeanslabels = means.labels_ else: means = KMeans(n_clusters=nclusters).fit(df1Norm) Kmeanslabels = means.labels_ Cluster_Values = ['NA' for x in range(data1.shape[1])] AllCluster_Values = [data1.shape[0] * ['NA'] for x in range(data1.shape[1])] cluster_maximum_ineces = ['NA' for x in range(data1.shape[1])] if 'b' in comparebothclusters or 'c' in comparebothclusters: cntr, u, u0, d, jm, p, fpc = fuzz.cmeans(data1, nclusters, 2, error=0.005, maxiter=1500) Cluster_Values = getMaximumCmeans(u) AllCluster_Values = getMaximumCmeans(u, True) cluster_maximum_ineces = np.argmax(u, axis=0) means = KMeans(n_clusters=nclusters).fit(df1Norm) Kmeanslabels = means.labels_ if 'cmeans' in clusterAlg or 'c-means' in clusterAlg or 'fuzzy' in clusterAlg: Trained_labels = cluster_maximum_ineces elif 'kmeans' in clusterAlg or 'k-means' in clusterAlg or 'means' in clusterAlg: Trained_labels = means.labels_ else: Trained_labels = means.labels_ return AllCluster_Values, Cluster_Values, Kmeanslabels, cluster_maximum_ineces, cntr, means,Trained_labels
def get_dominent_colors(img, cluster_num=5, cluster='kmeans', if_show=False, name=None): pixels = img.reshape(-1, 3).astype(np.float32) if cluster == 'kmeans': criteria = (cv.TERM_CRITERIA_MAX_ITER, 10, 0.1) flags = cv.KMEANS_RANDOM_CENTERS _, _, centers = cv.kmeans(pixels, cluster_num, None, criteria, 1, flags) elif cluster == 'fcm': import skfuzzy as skf pixels = np.transpose(pixels, (1, 0)) cmeans_res = skf.cmeans(pixels, cluster_num, 2, 1e-4, 100) centers = cmeans_res[0] else: raise NotImplementedError('Unrecognised cluster method.') color_list = np.zeros((cluster_num, 3), dtype='int') for c in range(cluster_num): color_list[c] = centers[c, ::-1].astype('int') if if_show: show_plate(img, name, cluster_num, color_list) return color_list
def MyFCM(img, ImageType, numClust): k = numClust num_features = img.shape[2] img_vector = np.zeros([img.shape[0] * img.shape[1], num_features]) img_pixel = np.zeros([img.shape[0] * img.shape[1], 2], int) count = 0 for i in xrange(0, img.shape[0]): for j in xrange(0, img.shape[1]): img_vector[count] = img[i][j] img_pixel[count] = np.array([i, j]) count += 1 num_samples = img_vector.shape[0] num_features = img_vector.shape[1] img_vector_T = img_vector.T cntr, u, u0, d, jm, p, fpc = fuzz.cmeans(img_vector_T, k, 2., error=0.05, maxiter=20) ClusterIm = np.zeros([img.shape[0], img.shape[1]], int) uT = u.T for i in xrange(0, num_samples): row = img_pixel[i][0] col = img_pixel[i][1] max_val = 0. max_cluster = 0 for j in xrange(0, k): if uT[i][j] > max_val: max_val = uT[i][j] max_cluster = j + 1 ClusterIm[row][col] = max_cluster ccImOneBase = getCCIM.getCCIM(ClusterIm, 4) return ClusterIm, ccImOneBase
def fuzzy_kmeans(features, num_cluster, m=1.1, num_repeat=20): """ Args: features (np.array): shape=(n, *other_dimension) num_cluster (int): 聚类簇数 m (float): 严格大于1的指数 num_repeat (int): 重复次数 Returns: best_y_pred (np.array): shape=(n,), dtype=np.int32, 预测类别 best_centroid (np.array): shape=(num_cluster, *other_dimension), 中心点特征 """ input_shape = features.shape best_y_pred = None best_centroid = None best_loss = None for i in range(num_repeat): returns = cmeans(data=features.reshape((input_shape[0], -1)).T, c=num_cluster, m=m, error=1e-4, maxiter=300) # 中心, 划分矩阵, 初始矩阵, 距离矩阵, 历史损失, 迭代次数, 好坏程度 cntr, u, u0, d, jm, p, fpc = returns y_pred = np.argmax(u, axis=0) if best_y_pred is None or jm[-1] < best_loss: best_y_pred = y_pred best_centroid = cntr.T best_loss = jm[-1] best_centroid = best_centroid.reshape((num_cluster, *input_shape[1:])) return best_y_pred, best_centroid
def separate_syn_dsyn(S, f, low=[4, 12], high=[30, 80], return_all=False): L = np.trapz(S[(low[0] <= f) & (f <= low[1]), :], axis=0) H = np.trapz(S[(high[0] <= f) & (f <= high[1]), :], axis=0) ## Fuzzy c-means clustering import skfuzzy as fuzz data = np.vstack([np.log(L), np.log(H)]) center, u = fuzz.cmeans(data, c=2, m=2., error=0.005, maxiter=1000, init=None)[:2] cluster_membership = np.argmax(u, axis=0) index1 = np.where(cluster_membership == 0) index2 = np.where(cluster_membership == 1) if center[0, 1] > center[1, 1]: dsync_idxs = index1 sync_idxs = index2 else: dsync_idxs = index2 sync_idxs = index1 ## Power ratio ratio = np.log(L) / np.log(H) ratio_syn_mean = np.nanmean(ratio[sync_idxs]) ratio_syn_std = np.nanstd(ratio[sync_idxs]) ratio_dsyn_mean = np.nanmean(ratio[dsync_idxs]) ratio_dsyn_std = np.nanstd(ratio[dsync_idxs]) d_idcs = np.where(ratio < ratio_dsyn_mean + ratio_dsyn_std)[0] s_idcs = np.where(ratio > ratio_syn_mean - ratio_syn_std)[0] if return_all: return dsync_idxs, sync_idxs, L, H else: return d_idcs, s_idcs
def fuzzy(predict_non, noise, label, zeros, thres): """ 퍼지 클러스터링 진행 비지도 학습이지만 엔트로피 특성 상 음성 파일이 높은 부분에 몰린 특성을 사용하여 두 군집 중 높은 값을 가진 부분을 음성 구간으로 특정하고 분류 진행 input) predict_non - 2차원 배열의 엔트로피값과 노이즈값이 포함, noise - 노이즈 포함된 음성 파일, label - 1차 분류 방식으로 분류한 라벨 (에너지, 엔트로피 등), zeros - 노이즈 포함 음성 파일의 값이 0인 부분의 인덱스 ,thres - 문턱값 변수 기준값 output) nono - 퍼지클러스터링 이후 Vad된 음성 파일, label - 결과값 """ data_pu = np.array(predict_non).T final = skfuzzy.cmeans(data_pu, 2, 2, 0.00000000001, 2000) labels = label.copy() clustering = final[1] if final[0][0, 0] > final[0][1, 0]: n = 0 else: n = 1 for i in range(len(zeros)): if clustering[n, i] >= thres: labels[zeros[i]] = 1 noise_1 = noise.copy() for i in range(len(label)): if labels[i] == 0: noise_1[i] = 0 nono = [] for i in noise_1: if i != 0: nono.append(i) nono = np.array(nono) return nono, labels
def students_cluster(self, **kwargs): sf_df = self.students_features sf_df = sf_df.fillna(0) data = sf_df[ self.STUDENTS_F_LABELS ].as_matrix() if kwargs == {}: C = self._C_f; m = self._m; error = 1.e-10; maxiter = 100 cntr, U, _, _, _, _, fpc = cmeans( data.T, C, m, error, maxiter ) else: cntr, U, _, _, _, _, fpc = cmeans( data, kwargs ) L = U.T.argmax(axis=1) self.students_features['fcm_cluster_ID'] = L self.cntr_sf = cntr
def fuzzy(predict_non, noise, label, zeros,thres): data_pu=np.array(predict_non).T final=skfuzzy.cmeans(data_pu,2,2,0.000000001,2000) labels=label.copy() clustering=final[1] if final[0][0,0]>final[0][1,0]: n=0 else: n=1 for i in range(len(zeros)): if clustering[n,i]>=thres: labels[zeros[i]]=1 noise_1=noise.copy() for i in range(len(label)): if labels[i]==0: noise_1[i]=0 nono=[] for i in noise_1: if i!=0: nono.append(i) nono=np.array(nono) return nono, labels
def fcm_class_mask(img, brain_mask=None, hard_seg=False): """ creates a mask of tissue classes for a target brain with fuzzy c-means Args: img (nibabel.nifti1.Nifti1Image): target image (must be T1w) brain_mask (nibabel.nifti1.Nifti1Image): mask covering the brain of img (none if already skull-stripped) hard_seg (bool): pick the maximum membership as the true class in output Returns: mask (np.ndarray): membership values for each of three classes in the image (or class determinations w/ hard_seg) """ img_data = img.get_data() if brain_mask is not None: mask_data = brain_mask.get_data() > 0 else: mask_data = img_data > img_data.mean() [t1_cntr, t1_mem, _, _, _, _, _] = cmeans(img_data[mask_data].reshape(-1, len(mask_data[mask_data])), 3, 2, 0.005, 50) t1_mem_list = [ t1_mem[i] for i, _ in sorted(enumerate(t1_cntr), key=lambda x: x[1]) ] # CSF/GM/WM mask = np.zeros(img_data.shape + (3, )) for i in range(3): mask[..., i][mask_data] = t1_mem_list[i] if hard_seg: tmp_mask = np.zeros(img_data.shape) tmp_mask[mask_data] = np.argmax(mask[mask_data], axis=1) + 1 mask = tmp_mask return mask
def cluster_fcm(dataset): dataset = np.transpose(dataset) cntr, u, u0, d, jm, p, fpc = fuzz.cmeans(dataset, c=12, m=2, error=0.0001, maxiter=10000, init=None) labels = np.argmax(u, axis=0) infer_results(labels, "Fuzzy C-Means")
def clustering(data): s_avg = [] for i in range(2, len(data)): cntr, u, u0, distant, fObj, iterasi, fpc = fuzz.cmeans( np.asarray(data).T, i, 2, 0.00001, 1000, seed=0) membership = np.argmax(u, axis=0) #silhouette = silhouette_samples(tfidf, membership) s_avg.append(silhouette_score(data, membership, random_state=10)) return s_avg
def fuzzyruleset(data, labels, k=8, m=2): centroids, u, u0, d, jm, p, fpc = fuzzy.cmeans(data.T, k, m, error=0.005, maxiter=1000, metric='euclidean', init=None) # Obtain each samples' group groups = np.argmax(u, axis=0) # Calculate membership for each class u_class = [] try: nclass = labels.nunique()[0] except TypeError: nclass = labels.nunique() for c in range(nclass): index = np.argwhere(np.array(labels).flatten() == c).ravel() u_class.append(np.sum(u[:, index], axis=1)) u_class[c] = u_class[c] / np.linalg.norm(u_class[c]) # Obtain each groups' class group_class = np.argmax(np.array(u_class), axis=0) # Calculate standard deviation of each group group_std = [] for i in range(k): index = np.argwhere(groups == i).ravel() if len(index) == 0: try: group_std.append(np.mean(group_std)) except RuntimeWarning: group_std.append(0.25) else: group_std.append(u_class[group_class[i]][i] * np.mean( euclidean_distances(data.iloc[index, :], [centroids[i, :]]))) group_std = group_std / np.linalg.norm(group_std) # Create set of fuzzy rules # Rule: If X1 is A1 and X2 is A2 then Y = C, C = {0, 1} rule_set = [] for r in range(len(group_class)): rule_set.append({ 'mu': centroids[r, :], 'std': group_std[r], 'con': group_class[r] }) return rule_set, groups
def fcm_init(x_train, n_rules): n_samples, n_features = x_train.shape centers, mem, _, _, _, _, _ = fuzz.cmeans( x_train.T, n_rules, 2.0, error=1e-5, maxiter=200) delta = np.zeros([n_rules, n_features]) for i in range(n_rules): d = (x_train - centers[i, :]) ** 2 delta[i, :] = np.sum(d * mem[i, :].reshape(-1, 1), axis=0) / np.sum(mem[i, :]) delta = np.sqrt(delta) delta = np.where(delta < 0.05, 0.05, delta) return centers.T, delta.T
def cluster(data_filename, labels_filename): data = np.load(data_filename) labels = np.load(labels_filename) data = data[:, 1:] clf = fuzz.cmeans(data, c=6, m=10, error=0.0001, maxiter=2000) recipe_labels = json.load(open("recipesNutrients.txt")) nutritions = [] for recipe in recipe_labels: nutrient_vector = filter_by_nutrients(list(labels), recipe) nutritions.append(nutrient_vector) testClustering(data, np.array(nutritions).T)
def _train_model(self, data, num_clusters): """ Train model with the number of clusters that has better evaluation :param data: Dataframe with train data :param num_clusters: Number of clusters to use :return: Trained model """ super()._train_model(data, num_clusters) data_array = data.to_numpy() return fuzz.cmeans(data=data_array, c=num_clusters, m=2, error=0.005, maxiter=1000)
def fcmcluster(points, maxclust): import skfuzzy as fuzz # 1.模型声明 cntr, u, u0, d, jm, p, fpc = fuzz.cmeans(points.T, maxclust, 2, error=0.005, maxiter=1000) # Plot assigned clusters, for each data point in training set # 2.计算聚类结果 cluster_membership = np.argmax(u, axis=0) # 3.添加子图 plt.subplot(269) plt.scatter(points[:, 0], points[:, 1], c=cluster_membership) plt.title('fcm')
def cl_fuzzyCMeans(pivot, original, pixels, metrics, k, cc): cntr, u, u0, d, jm, p, fpc = cmeans(pivot.T, k, 2, error=0.005, maxiter=1000, init=None) method = 'F_{0}_{1}'.format(k, cc * 10) assignments = np.argmax(u, axis=0) metrics.loc[['FPC'], method] = fpc return add_computed_typed(method, assignments, original), add_computed_typed_pixels( method, assignments, pixels), add_metrics_typed( method, metrics, assignments, pivot)
def estimate_centers(self): """ Helper method for finding centers when using automatic modelling (designed for Genetic Programming in this case). """ self.seed = randint(1, 10) cluster_centers, _, _, _, _, p, fpc = cmeans(self._X.T, c=self.centers, m=1.75, error=0.005, maxiter=1000, seed=self.seed) self.centers = cluster_centers
def cluster_data(self, data, c=1, m=1.00, err=1.0, maxiter=1): """ data : 2d array, size (S, N) Data to be clustered. N is the number of data sets; S is the number of features within each sample vector. c : int Desired number of clusters or classes. m : float Array exponentiation applied to the membership function U_old at each iteration, where U_new = U_old ** m. error : float Stopping criterion; stop early if the norm of (U[p] - U[p-1]) < error. maxiter : int Maximum number of iterations allowed. """ (cntr, U, U0, d, Jm, p, fpc) = skfuzzy.cmeans( data, c, m, err, maxiter) #!FIXME the source of this function is broken return cntr
def MyFCM05(img1, ImageType, numClust): #normalizing image data img2 = img1.astype(float) img = img2 / 255 #diff constants for use num_features = img.shape[2] num_samples = img.shape[0] * img.shape[1] img_height = img.shape[0] img_width = img.shape[1] #reshape img img_vector = np.reshape(img, (num_samples, num_features)) img_vector_T = img_vector.T #for hyper simply change the img_vector after pca with 3 dimensions if (ImageType == 'Hyper'): pca = PCA(3) principalComponents = pca.fit_transform(img_vector) img_vector = principalComponents cntr, u, u0, d, jm, p, fpc = fuzz.cmeans(img_vector_T, numClust, 2., error=0.05, maxiter=20) uT = u.T output = np.zeros(num_samples) for i in xrange(0, num_samples): max_val = 0. max_cluster = 0 for j in xrange(0, numClust): if uT[i][j] > max_val: max_val = uT[i][j] max_cluster = j + 1 output[i] = max_cluster ClusterIm = np.reshape(output, (img_height, img_width)) ClusterIm = ClusterIm.astype(int) # imgplot = plt.imshow(img) # plt.pause(2) # plt.imshow(ClusterIm) # plt.pause(5) ccImOneBase = getCCIM.getCCIM(ClusterIm, 4) return ClusterIm, ccImOneBase
def plot_fuzzyCMeans_elbow(pivot): maxK = 2 maxSil = -1 for i in range(2, 20): cntr, u, u0, d, jm, p, fpc = cmeans(pivot, i, 2, error=0.005, maxiter=1000, init=None) silhouette = silhouette_score(pivot, np.argmax(u, axis=0), sample_size=10000) print("For n_clusters =", i, "The average silhouette_score is :", silhouette, "FPC is :", fpc) if silhouette > maxSil: maxSil = silhouette maxK = i return maxK
def fuzzy(predict_non, noise, label, zeros,thres): data_pu=np.array(predict_non).T final=skfuzzy.cmeans(data_pu,2,2,error=0.00000000001,maxiter=2000) plt.figure(figsize=(12,6)) plt.grid() plt.title("fuzzy clustering") plt.xlabel("Entropy value") plt.ylabel("Amplitude") plt.scatter(data_pu[0],data_pu[1],color="red", label="sample") plt.scatter(final[0][:,0],final[0][:,1],color="black", label="center") plt.legend() plt.show() labels=label.copy() clustering=final[1] if final[0][0,0]>final[0][1,0]: n=0 else: n=1 for i in range(len(zeros)): if clustering[n,i]>=thres: labels[zeros[i]]=1 noise_1=noise.copy() for i in range(len(label)): if labels[i]==0: noise_1[i]=0 nono=[] for i in noise_1: if i!=0: nono.append(i) plt.figure(figsize=(20,2)) plt.grid() plt.title("vad label") plt.plot(labels) plt.show() nono=np.array(nono) return nono, labels
def find_tissue_memberships( image: Array, mask: Array = None, hard_segmentation: bool = False, ) -> Array: """Tissue memberships for a T1-w brain image with fuzzy c-means Args: image: image to find tissue masks for (must be T1-w) mask: mask covering the brain of image (none if already skull-stripped) hard_segmentation: pick the maximum membership as the true class in output Returns: tissue_mask: membership values for each of three classes in the image (or class determinations w/ hard_seg) """ if mask is None: mask = image > 0.0 else: mask = mask > 0.0 assert isinstance(mask, Array) foreground_size = mask.sum() foreground = image[mask].reshape(-1, foreground_size) centers, memberships_, *_ = cmeans(foreground, 3, 2, 0.005, 50) def get_center(element: Tuple[float, Array]) -> float: center: float = element[0] return center # sort the tissue memberships to CSF/GM/WM (assuming T1-w image) sorted_memberships = sorted(zip(centers, memberships_), key=get_center) memberships = [m for _, m in sorted_memberships] tissue_mask = np.zeros(image.shape + (3, )) for i in range(3): tissue_mask[..., i][mask] = memberships[i] if hard_segmentation: tmp_mask = np.zeros(image.shape) masked = tissue_mask[mask] tmp_mask[mask] = np.argmax(masked, axis=1) + 1 tissue_mask = tmp_mask return tissue_mask
def estimate_missing_values(self): estimated_data = [] complete_data = np.array([self.data[x] for x in self.complete_rows]) centers, _, _, _, _, _, _ = cmeans(data=complete_data.transpose(), c=self.c, m=self.m, error=FCMParam.ERROR, maxiter=FCMParam.MAX_ITR, init=None) # Calculate distance between two points based on euclidean distance def calculate_distance(data_1, data_2): return np.linalg.norm(data_1 - data_2) # Calculate the membership value for given point def calculate_membership(dist_matrix, distance, m): numerator = np.power(distance, -2 / (1 - m)) denominator = np.array( [np.power(x, -2 / (1 - m)) for x in dist_matrix]).sum() return numerator / denominator for i in self.incomplete_rows: estimated = 0 dist, membership_value = [], [] miss_ind = np.where(self.data[i] == NAN)[0][0] for center in centers: dist.append( calculate_distance( data_1=np.delete(np.array(center), miss_ind), data_2=np.delete(np.array(self.data[i]), miss_ind))) for d in dist: membership_value.append(calculate_membership(dist, d, self.m)) for k in range(self.c): estimated += centers[k][miss_ind] * membership_value[k] estimated_data.append(estimated) return np.array(estimated_data)
def fcm(data, n_cluster): """ Comute data centers and membership of each point by FCM, and compute the variance of each feature :param data: n_Samples * n_Features :param n_cluster: number of center :return: centers: data center, delta: variance of each feature """ n_samples, n_features = data.shape centers, mem, _, _, _, _, _ = fuzz.cmeans(data.T, n_cluster, 2.0, error=1e-5, maxiter=200) # compute delta compute the variance of each feature delta = np.zeros([n_cluster, n_features]) for i in range(n_cluster): d = (data - centers[i, :])**2 delta[i, :] = np.sum(d * mem[i, :].reshape(-1, 1), axis=0) / np.sum( mem[i, :]) return centers, delta
def fuzzy_clustering(train, train_labels, test, test_labels, size, plot, plot_dims): [center, u, u0, d, jm, p, fpc] = skf.cmeans(train.T, c=2, m=.5, error=.001, maxiter=100) [nu, nu0, nd, njm, np, nfpc] = skf.cmeans_predict(test.T, center, 3, error=0.005, maxiter=1000) results_train = u.argmax(axis=0) results_test = nu.argmax(axis=0) if plot: plot_results(test, test_labels, results_test, size, "Fuzzy Clustering") return results_train, results_test
def fcm_init(x_train, n_rules, m=None, scale=1.): if m is not None: assert m > 1, "m must be larger than 1, received: {}".format(m) else: if min(x_train.shape[0], x_train.shape[1] - 1) >= 3: m = min(x_train.shape[0], x_train.shape[1] - 1) / (min(x_train.shape[0], x_train.shape[1] - 1) - 2) else: m = 2 n_samples, n_features = x_train.shape centers, mem, _, _, _, _, _ = fuzz.cmeans(x_train.T, n_rules, m, error=1e-5, maxiter=200) delta = np.zeros([n_rules, n_features]) for i in range(n_rules): d = (x_train - centers[i, :])**2 delta[i, :] = np.sum(d * mem[i, :].reshape(-1, 1), axis=0) / np.sum( mem[i, :]) delta = np.sqrt(delta) * scale delta = np.where(delta < 0.05, 0.05, delta) return centers.T, delta.T
write_csv("tfidf%d.csv" % n, tfidf, 'a') xBaru2 = seleksiFiturPearson(tfidf, 0.9) xBaru1 = seleksiFiturPearson(xBaru2, 0.8) ''' Skenario: 1. Cluster tanpa diseleksi 2. Cluster dengan Seleksi Pearson, dengan threshold 0.8 2. Cluster dengan Seleksi Pearson, dengan threshold 0.9 2. Cluster dengan Seleksi Pearson, dengan threshold 0.95 ''' print("Cluster Tanpa Seleksi fitur") cntr, u, u0, distant, fObj, iterasi, fpc = fuzz.cmeans(tfidf.T, 3, 2, 0.00001, 1000, seed=0) membership = np.argmax(u, axis=0) silhouette = silhouette_samples(tfidf, membership) s_avg = silhouette_score(tfidf, membership, random_state=10) for i in range(total_doc): print("c " + str(membership[i])) #+"\t" + str(silhouette[i])) print(s_avg) #kmeans = KMeans(n_clusters=3, random_state=0).fit(tfidf) #print(kmeans.labels_) write_csv("Cluster%d.csv" % n, [["Cluster"]])
from sklearn import datasets import numpy as np from sklearn.metrics import confusion_matrix import skfuzzy iris = datasets.load_iris() r = skfuzzy.cmeans(data=iris.data.T, c=3, m=2, error=0.005, maxiter=1000, init=None) previsoes_porcentagem = r[1] previsoes_porcentagem[0][0] previsoes_porcentagem[1][0] previsoes_porcentagem[2][0] previsoes = previsoes_porcentagem.argmax(axis=0) resultados = confusion_matrix(iris.target, previsoes)
def fair_clustering(dataset, config_file, max_points, cluster_num, m, epsilon, maxiter, eta): # 读取数据 config = configparser.ConfigParser(converters={'list': read_list}) config.read(config_file) df = dp.read_data(config, dataset) # 设置数据集大小 if max_points and len(df) > max_points: df = dp.subsample_data(df, max_points) df, _ = dp.clean_data(df, config, dataset) # 获取平衡属性 fairness_variable = config[dataset].getlist("fairness_variable") # 对敏感属性建模 # attributes 保存每个颜色类别的点的索引 # color_flag 从点到它所属的颜色类别的映射(与“attributes”相反) attributes, color_flag = {}, {} for variable in fairness_variable: colors = defaultdict(list) this_color_flag = [0] * len(df) condition_str = variable + "_conditions" bucket_conditions = config[dataset].getlist(condition_str) for i, row in df.iterrows(): for bucket_idx, bucket in enumerate(bucket_conditions): if eval(bucket)(row[variable]): colors[bucket_idx].append(i) this_color_flag[i] = bucket_idx attributes[variable] = colors color_flag[variable] = this_color_flag # 敏感属性在整个数据集所占比例 representation = {} for var, bucket_dict in attributes.items(): representation[var] = { k: (len(bucket_dict[k]) / len(df)) for k in bucket_dict.keys() } # 选取用作定义距离的属性 selected_columns = config[dataset].getlist("columns") df1 = df[[col for col in selected_columns]] df = df1.iloc[:, :].values centers, u_fz, u0, d_fz, jm, p, fpc = cmeans(df.T, c=cluster_num, m=m, error=epsilon, maxiter=1000) # 加入loss调整u u = u_fz.T label = np.argmax(u, axis=1) p = 0 loss_old = 999999999 while p < maxiter - 1: c = update_c(u, m, df) u = update_u(df, c, m, label, attributes, representation, df1, eta) label = np.argmax(u, axis=1) sizes = cal_sizes(label, cluster_num) ratios = cal_ratios(attributes, df1, label, cluster_num, sizes) loss = cal_loss(attributes, cluster_num, ratios, representation, eta) p += 1 if np.max(np.abs(loss - loss_old)) < epsilon: break loss_old = loss
for k in K: T = img.copy() print(img.shape) width = img.shape[0] height = img.shape[1] dim = img.shape[2] print(img[0][0]) X = np.zeros((width * height, 3)) index = 0 for i in range(width): for j in range(height): X[index] = img[i][j] index += 1 coef = np.max(X) X /= coef results = cmeans(X.T, k, m=3, error=0.01, maxiter=100) U = results[1].T index = 0 for i in range(width): for j in range(height): center_index = np.argmax(U[index]) T[i][j] = colors[center_index] index += 1 plt.subplot(1, 2, 1) plt.imshow(T) C = T.copy() plt.imsave(fname=dir_name + 'human_fcm_' + str(k) + '.png', arr=T / 255) gamma = 0.01 delta = 0.1 lam1 = 0.1 lam2 = 0.01