class FCmeansDaily: def __init__(self, training_data): self.training_data = training_data data_for_clustering = self.preprocess_daily_data_to_fit_fcmeans_library( training_data) self.fcmeans = FCM(n_clusters=5, random_state=0) self.fcmeans.fit(data_for_clustering) def preprocess_daily_data_to_fit_fcmeans_library(self, training_data): list_to_pass_kmeans_function = [] for i in range(0, len(training_data)): temp = [] temp.append(training_data[i].upper_shadow_length) temp.append(training_data[i].lower_shadow_length) temp.append(training_data[i].body_length) temp.append(training_data[i].color) list_to_pass_kmeans_function.append(temp) data_for_clustering = np.array(list_to_pass_kmeans_function) return data_for_clustering def get_clusters(self): return self.fcmeans.centers def get_labels_list(self): X = self.preprocess_daily_data_to_fit_fcmeans_library( self.training_data) return self.fcmeans.predict(X) def get_labels_for_each_data_point(self, data_point_index): return self.get_labels_list()[data_point_index]
def fuzzy_clustering(data): svd = svd_decomposition(data) fcm = FCM(n_clusters=2, m=1.2, max_iter=250) fcm.fit(svd) fcm_centers = fcm.centers fcm_labels = fcm.u.argmax(axis=1) plot(fcm_centers, 'fuzzy', 'Decomposition using Fuzzy clustering')
def fuzzy(X): # X = np.array([(1,1),(1,2),(1,3),(2,2),(10,10),(100,100),(101,101),(102,102)]) # print(X.type) # fit the fuzzy-c-means fcm = FCM(n_clusters=4) fcm.fit(X) # outputs fcm_centers = fcm.centers fcm_labels = fcm.u.argmax(axis=1) print(fcm_labels) # plot result # %matplotlib inline f, axes = plt.subplots(1, 2, figsize=(11, 5)) scatter(X[:, 0], X[:, 1], ax=axes[0]) scatter(X[:, 0], X[:, 1], ax=axes[1], hue=fcm_labels) scatter(fcm_centers[:, 0], fcm_centers[:, 1], ax=axes[1], marker="s", s=200) plt.show() show_clusters(fcm_labels, X)
def fcm_alg(dataset_num, num_of_samples=None, num_of_clusters=None, get_figure=False, calc_ami_score=True, plot_anomaly=False): if plot_anomaly and (num_of_samples is None or (num_of_samples is not None and num_of_samples > 6000)): num_of_samples = 6000 data = d.get_data(dataset_num, n_samples=num_of_samples) # store the data frame which is given by a dimension reduction (using PCA) into 2 dimensions of the original # data set df = d.get_df_to_cluster(data) tag = d.get_tag(data) # if the number of clusters isn't defined, choose it to be the "real" number of clusters (according to the tag) if num_of_clusters is None: num_of_clusters = d.get_num_of_clusters(tag) # create a FCM-type object with the relevant number of clusters and fit it to the data frame fcm = FCM(n_clusters=num_of_clusters) fcm.fit(df) # store the labels result after the fitting labels = fcm.predict(df) if get_figure or plot_anomaly: if plot_anomaly: silhouettes = silhouette_samples(df, labels) labels[silhouettes < 0] = -1 # plot the clustered data and the centroid of each cluster plt.scatter(df['PC1'][labels != -1], df['PC2'][labels != -1], c=labels[labels != -1]) plt.scatter(df['PC1'][labels == -1], df['PC2'][labels == -1], c=['black'] * len(labels[labels == -1]), label='Anomaly') plt.scatter(fcm.centers["PC1"], fcm.centers["PC2"], marker="*", label='centroid', c='black') plt.legend() title = 'DS{} - Fuzzy C Means'.format(dataset_num) # fig_name = 'Images\Fuzzy C Means\\' + title plt.title(title) # save the figure # plt.savefig(fig_name) plt.show() # calculate the adjusted mutual info score of the clustering if calc_ami_score: labels_true = d.get_labels(tag) return adjusted_mutual_info_score(labels_true=labels_true, labels_pred=labels)
def fuzzy_cmeans_clustering(dataset, tags, k, show_plt=True): fcm = FCM(n_clusters=k) fcm.fit(dataset) labels = fcm.predict(dataset) if show_plt: plt.title('Fuzzy C Means') plt.scatter(dataset[:, 0], dataset[:, 1], c=labels, s=7, cmap='rainbow') plt.show() return metrics.adjusted_mutual_info_score(tags, labels)
def fuzzy_cmeans_clustering(self, show_plt=True): fcm = FCM(n_clusters=self.k) fcm.fit(self.data) labels = fcm.predict(self.data) if show_plt: plt.title('Fuzzy C Means') plt.scatter(self.data[:, 0], self.data[:, 1], c=labels, s=7, cmap='rainbow') plt.show() return metrics.adjusted_mutual_info_score(self.tags, labels)
def fcm(train_features): fcm = FCM(n_clusters=2) fcm.fit(train_features) fcm_centers = fcm.centers fcm_labels = fcm.u.argmax(axis=1) # savetxt('fcm_pp.csv', fcm_labels, fmt='%i', delimiter=',') return fcm_centers, fcm_labels
def main(): normal_dataset_url = 'kddcup_normal.csv' dataframe, labelsframe = read_dataset(normal_dataset_url) data_resampled, labels_resampled = resample_dataset(dataframe, labelsframe) kmeans = KMeans(n_clusters=2) kmeans.fit(data_resampled) performance_test(kmeans.labels_, labels_resampled.values) fcm = FCM(n_clusters=2) fcm.fit(data_resampled) fcm_labels = fcm.u.argmax(axis=1) performance_test(fcm_labels, label_resampled.values)
def decompose( path_to_features: str, path_to_images: str, path_to_decomposed_images_1: str, path_to_decomposed_images_2: str, class_name: str, fc: int ): """ Decomposition of extracted features using Fuzzy c means clustering. params: <string> path_to_features <string> path_to_images <string> path_to_decomposed_images_1 <string> path_to_decomposed_images_2 <int> fc: Number of clusters """ # Load features features = np.load(path_to_features) #fcm fcm = FCM(n_clusters=fc) fcm.fit(features) idx = fcm.predict(features) # Cluster index #idx = FCM(n_clusters=fc, random_state=111).fit(features) #idx = idx.predict(features) # Images list images = [filename for filename in os.listdir(path_to_images)] # Iterate through images progress_bar = tqdm(range(len(images))) progress_bar.set_description(f"Composing {class_name} images") for i in progress_bar: filename = os.path.join(path_to_images, images[i]) # Read image I = plt.imread(filename) filename_1 = os.path.join(path_to_decomposed_images_1, images[i]) filename_2 = os.path.join(path_to_decomposed_images_2, images[i]) # If image belongs to a cluster, write the image to a certain folder, otherwise, write it to the other folder. if (idx[i] == 1): plt.imsave(filename_1, I) else: plt.imsave(filename_2, I)
def c_mean_cluster_graph(Ehull, Form_eng): df = pd.DataFrame({'x': Ehull, 'y': Form_eng}) kmeans = KMeans(n_clusters=6) kmeans.fit(df) labels = kmeans.predict(df) z = pd.concat([Ehull, Form_eng], join='outer', axis=1) fcm = FCM(n_clusters=6) fcm.fit(z) fcm_labels = fcm.u.argmax(axis=1) f, axes = plt.subplots(1, 2, figsize=(11, 5)) scatter(Ehull, Form_eng, ax=axes[0], hue=labels) plt.title('fuzzy-c-means algorithm') scatter(Ehull, Form_eng, ax=axes[1], hue=fcm_labels) plt.show()
def _train_model(self, data, num_clusters): """ Train model with the number of clusters that has better evaluation :param data: Dataframe with train data :param num_clusters: Number of clusters to use :return: Trained model """ super()._train_model(data, num_clusters) model = FCM(n_clusters=num_clusters, max_iter=1000, m=2, error=0.005, random_state=self._seed) model.fit(data.to_numpy().T) return model
def get_colors_cluster(self, image, no_clusters, clustering_method): counts = dict([]) colors_centers = [] # forward assignmet if clustering_method == 'kmeans': clf = KMeans(n_clusters=no_clusters, n_jobs=10, n_init=15, tol=1e-4, max_iter=300, init='k-means++') # we are using higher number of max_iter and n_init for better convergence pred_labels = clf.fit_predict(image) counts = Counter(pred_labels) colors_centers = np.uint8(clf.cluster_centers_.round()) elif clustering_method == 'fcmeans': clf = FCM(n_clusters=no_clusters) clf.fit(image) pred_labels = clf.predict(image) counts = Counter(pred_labels) colors_centers = np.uint8(clf.centers.round()) else: print('Error choosing clustering method: either kmeans or fcmeans') exit() return counts, colors_centers
def fuuzyc(train_x1, train_y): #standarization pca = PCA(n_components=2) pca.fit(train_x1) train_x1 = pca.transform(train_x1) fcm = FCM(n_clusters=7) fcm.fit(train_x1) m = fcm.predict(train_x1) ps = purity_score(train_y, m) ss = silhouette_score(train_x1, m) #print(ss) #print(ps) a = f1_score(train_y.flatten(), m, average='weighted') #print(a) #plot_clusters(train_x1, m) return ps, a, ss
def segmentation(img, n_clusters, sigma=0.3): fcm = FCM(n_clusters=n_clusters, max_iter=10000, m=2) fcm.fit(utils.flat(img)) abun = fcm.u.reshape((img.shape[0], img.shape[1], n_clusters)) masks = np.empty(abun.shape, dtype=bool) for i in range(n_clusters): thresh = filters.threshold_otsu(abun[:, :, i]) filters.gaussian(abun[:, :, i], sigma=sigma, output=abun[:, :, i]) masks[:, :, i] = abun[:, :, i] > thresh masks[masks.sum(axis=2) > 1, :] = 0 label_imgs = [np.zeros(img.shape, dtype=np.uint8)] for i in range(n_clusters): binary_opening(masks[:, :, i], out=masks[:, :, i]) label_img = label(masks[:, :, i]) label_img[label_img > 0] += np.max(label_imgs[-1]) label_imgs.append(label_img) return np.dstack(label_imgs).sum(axis=2)
def fcm_alg(dataset_num, num_of_samples=10000, num_of_clusters=None, get_figure=False, calc_ami_score=True): data = d.get_data(dataset_num, n_samples=num_of_samples) # store the data frame which is given by a dimension reduction (using PCA) into 2 dimensions of the original # data set df = d.get_df_to_cluster(data) tag = d.get_tag(data) # if the number of clusters isn't defined, choose it to be the "real" number of clusters (according to the tag) if num_of_clusters is None: num_of_clusters = d.get_num_of_clusters(tag) # create a FCM-type object with the relevant number of clusters and fit it to the data frame fcm = FCM(n_clusters=num_of_clusters) fcm.fit(df) # store the labels result after the fitting labels = fcm.predict(df) if get_figure: # plot the clustered data and the centroid of each cluster plt.scatter(df["PC1"], df["PC2"], c=labels) plt.scatter(fcm.centers["PC1"], fcm.centers["PC2"], marker="*", label='centroid', c='black') plt.legend() title = 'DS{} - Fuzzy C Means'.format(dataset_num) fig_name = 'images/dataset {}/'.format( dataset_num) + title + " ({} clusters)".format(num_of_clusters) plt.title(title) # save the figure plt.savefig(fig_name) plt.show() # calculate the adjusted mutual info score of the clustering if calc_ami_score: labels_true = d.get_labels(tag) return adjusted_mutual_info_score(labels_true=labels_true, labels_pred=labels)
def fit(self, X, y=None): # fcm = FCM(n_clusters=self.max_rp_number) fcm.fit(X) c = fcm.u.argmax(axis=1) homongenious_clusters = np.where( pd.DataFrame({ 'c': c, 'y': y }).groupby('c').mean().isin(np.unique(y)))[0] # convert outputs to one-hot encoding y = one_hot(y) if len(y.shape) == 1 else y self.rp_X = fcm.centers[homongenious_clusters, :] self.rp_y = np.eye(y.shape[1]) self.D_in = sp.spatial.distance.cdist(X, self.rp_X) self.D_out = (y * (-1)) + 1 self.B = np.linalg.pinv(self.D_in) @ self.D_out
class FCmeans: def __init__(self, monthly_data): data_for_clustering = self.preprocess_monthly_data_to_fit_fcmeans_library( monthly_data) self.fcmeans = FCM(n_clusters=5, random_state=0) self.fcmeans.fit(data_for_clustering) def preprocess_monthly_data_to_fit_fcmeans_library(self, monthly_data): list_to_pass_fcmeans_function = [] for i in range(0, len(monthly_data)): temp = [] temp.append(monthly_data[i].upper_shadow_length) temp.append(monthly_data[i].lower_shadow_length) temp.append(monthly_data[i].body_length) temp.append(monthly_data[i].color) list_to_pass_fcmeans_function.append(temp) data_for_clustering = np.array(list_to_pass_fcmeans_function) return data_for_clustering def get_clusters(self): return self.fcmeans.centers
def fuuzyc(train_x1, vtype, weekend, rev, c): #standarization pca = PCA(n_components=2) pca.fit(train_x1) train_x1 = pca.transform(train_x1) fcm = FCM(n_clusters=c) fcm.fit(train_x1) m = fcm.predict(train_x1) ps = [] ps.append(purity_score(vtype, m)) ps.append(purity_score(weekend, m)) ps.append(purity_score(rev, m)) ss = silhouette_score(train_x1, m) print(ss) print(ps) a = [] a.append(f1_score(vtype.flatten(), m, average='weighted')) a.append(f1_score(weekend.flatten(), m, average='weighted')) a.append(f1_score(rev.flatten(), m, average='weighted')) print(a) plot_clusters(train_x1, m) return ps, a, ss
def fuuzyc(train_x1, tag, gender, race): #standarization pca = PCA(n_components=2) pca.fit(train_x1) train_x1 = pca.transform(train_x1) fcm = FCM(n_clusters=9) fcm.fit(train_x1) m = fcm.predict(train_x1) ps = [] ps.append(purity_score(tag, m)) ps.append(purity_score(gender, m)) ps.append(purity_score(race, m)) ss = silhouette_score(train_x1, m) #print(ss) #print(ps) a = [] a.append(f1_score(tag.flatten(), m, average='weighted')) a.append(f1_score(gender.flatten(), m, average='weighted')) a.append(f1_score(race.flatten(), m, average='weighted')) #print(a) # plot_clusters(train_x1, m) return ps, a, ss
def p_values(): vecs_kmeans, gt_kmeans = sample_vecs(credit,ratio = 100) vecs_fcm, gt_fcm = sample_vecs(credit,ratio = 7) vecs_gmm, gt_gmm = sample_vecs(credit,ratio = 2) vecs_spectral, gt_spectral = sample_vecs(credit,ratio = 3) vecs_dbscan, gt_dbscan = sample_vecs(credit,ratio = 1) kmeans = KMeans(n_clusters=2, random_state = 0).fit(vecs_kmeans) kmeans_labels = kmeans.labels_ if sum(kmeans_labels) > len(kmeans_labels)/2: kmeans_labels = 1 - kmeans_labels kmeans_centers = kmeans.cluster_centers_ # scatter(vecs[:,13], vecs[:,16], ax=axes[0], hue=kmeans_labels) # scatter(kmeans_centers[:,14], kmeans_centers[:,17], ax=axes[0],marker="s",s=100) fcm = FCM(n_clusters=2, m=1.1).fit(vecs_fcm) fcm_centers = fcm.centers fcm_labels = cutoff(fcm.u,0.6) #fcm_labels = fcm.u.argmax(axis = 1) if sum(fcm_labels) > len(fcm_labels)/2: fcm_labels = 1 - np.array(fcm_labels) # print('fcm_centers:\n',fcm_centers) # print('fcm_labels:\n',fcm_labels) # scatter(vecs[:,13], vecs[:,16], ax=axes[1], hue=fcm_labels) # scatter(fcm_centers[:,14], fcm_centers[:,17], ax=axes[1],marker="s",s=100) gmm = GaussianMixture(n_components=2, random_state = 0).fit(vecs_gmm) gmm_labels = gmm.predict(vecs_gmm) if sum(gmm_labels) > len(gmm_labels)/2: gmm_labels = 1 - gmm_labels warnings.filterwarnings('ignore') spectral_labels = SpectralClustering(n_clusters=2, affinity='nearest_neighbors', random_state=0).fit(dist(vecs_spectral,vecs_spectral)).labels_ # spec_labels = spec.fit(distances).labels_ # spectral_labels = spectral() db_labels = dbscan_func(vecs_dbscan,11,4.4) print("kmeans") print(randomize(kmeans_labels,gt_kmeans)) print("fcm") print(randomize(fcm_labels,gt_fcm)) print("gmm") print(randomize(gmm_labels,gt_gmm)) print("spectral") print(randomize(spectral_labels, gt_spectral)) print("dbscan") print(randomize(db_labels,gt_dbscan))
def merge_clusters(rgb_array_in, counts_from_cluster, clsuter_1D_method='Diff'): use_std = True # This is not working as expected, so we are setting it to False rgb_array = rgb_array_in.copy( ) # we need to make a copy, and it has to be of float type to prevent overflow rgb_array = np.expand_dims(rgb_array, axis=0) hsv = cv2.cvtColor(rgb_array, cv2.COLOR_RGB2HSV) # how about COLOR_BGR2HLS? hsv = np.squeeze(hsv, axis=0) if clsuter_1D_method == 'Diff': labels = differntial_1D_cluster( hsv[:, 0]) # hsv[:, 0] is the hue component elif clsuter_1D_method == 'MeanSift': result, ms = cluster_1D( hsv[:, 0:1] ) # hsv[:, 0:1] # getting the h value to be used in clustering labels = result['labels'] elif clsuter_1D_method == '2nd_fcm': # not so good clf = FCM(n_clusters=13) clf.fit(rgb_array_in) labels = clf.predict(rgb_array_in) rgb_array = clf.centers.round() rgb_array = np.expand_dims(rgb_array, axis=0) counts_from_cluster = Counter(labels) else: print('Incorrect choice of clsuter_1D_method') labels = 0 if use_std and clsuter_1D_method != '2nd_fcm': # second stage, decompose similar hue(s) labels = decompose_hue(labels, rgb_array_in.copy()) # now, average simliar colors rgb_array = average_similar_colors_pix_cnt(rgb_array, counts_from_cluster, labels) return rgb_array, labels
def fcm(): X,s = sample_vecs(credit, ratio = 7) fcm = FCM(n_clusters=2, m=2.5).fit(X) fcm_labels = cutoff(fcm.u,0.6) #fcm_labels = fcm.u.argmax(axis = 1) if sum(fcm_labels) > len(fcm_labels)/2: fcm_labels = 1 - np.array(fcm_labels) f, axes = plt.subplots(1, 2) scatter(X[:, 0], X[:, 1], ax=axes[0], hue=s) scatter(X[:, 0], X[:, 1], ax=axes[1], hue=fcm_labels) results = precision_recall_fscore_support(fcm_labels, s,average = "binary") print("precision:", results[0]) print("recall:", results[1]) print("f1:", results[2]) plt.show()
5451485.625738382, 21955.05634316005, 166.3279399863208, 116.3242207005973, 1.1114355393300457, 186.2265678694155, 119.16421198018449, 921.3124659206626, 407.06764737948333, 229.59300510278956 ], [ 633.5453868005061, 13.311492579165701, 35.45257318447153, 13.051967221349628, 4.065577055191767, 29.922603051099713, 7.58443241651873, 25286.235250015743, 17660.494008300826, 21851.365482692607, 14800.013759329424, 69.73815264740048, 5.022356752792227, 1233.9959969564013, 4.065577055191767, 7.58443241651873, 13.311492579165701, 35.45257318447153, 1053.2675415630874 ]] # fit the fuzzy-c-means fcm = FCM(n_clusters=2, first_center=centers, max_iter=100) fcm.fit(X_train) # outputs fcm_centers = fcm.centers # 첫번째는 'Bengin' cetroid, 두번쨰는 'attack' centroid fcm_labels = fcm.u.argmax(axis=1) probability = fcm.predict(X_test) result_df = pd.DataFrame(data=probability, columns=[0, 1, 'pre_class']) result_df['class'] = y_test print(color.BOLD + "\nValidate records in cluster(find invalid record)" + color.END) dif_data = check_different(result_df) print(dif_data.shape)
def GetFCM(data, classNum): fcm = FCM(n_clusters=classNum) fcm.fit(data) return fcm.centers, fcm.u.argmax(axis=1)
if __name__ == "__main__": # create artifitial dataset n_samples = 50000 n_bins = 3 # use 3 bins for calibration_curve as we have 3 clusters here centers = [(-5, -5), (0, 0), (5, 5)] X, _ = make_blobs(n_samples=n_samples, n_features=3, cluster_std=1.0, centers=centers, shuffle=False, random_state=42) # fit the fuzzy-c-means fcm = FCM(n_clusters=3) fcm.fit(X) # outputs fcm_centers = fcm.centers fcm_labels = fcm.u.argmax(axis=1) print(len(X)) print(len(fcm_labels)) # plot result f, axes = plt.subplots(1, 2, figsize=(11, 5)) scatter(X[:, 0], X[:, 1], ax=axes[0]) scatter(X[:, 0], X[:, 1], ax=axes[1], hue=fcm_labels) scatter(fcm_centers[:, 0], fcm_centers[:, 1],
s14 = 'C' s15 = 'C++' s16 = 'java' ####Select area by adding in Area_choice Area_choice = [s1, s2, s12, s14, s15] #Area_choice=[s1] #Area_choice=[s1,s2,s12,s14,s15,s7,s9] C_means_model_1 = FCM(n_clusters=5, random_state=123) distances_1 = C_means_model_1.fit(df[Area_choice]) labels_1 = C_means_model_1.u.argmax(axis=1) df['cluster_1'] = labels_1 df['cluster_1_label'] = df['cluster_1'].apply(cluster_1_label) Index_label = df[df['cluster_1_label'] == 3].index.tolist() Cluster = Index_label Index_label = np.array(Index_label) t = 1
import time def RGBXY(image): shape = list(image.shape) shape[2] = 5 img = np.zeros(shape) img[:, :, :3] = image / 255 indexes = np.array([[i, j] for i in range(shape[0]) for j in range(shape[1])]) img[:, :, 3:] = indexes.reshape((shape[0], shape[1], 2)) img[:, :, 3] = img[:, :, 3] / shape[0] img[:, :, 4] = img[:, :, 4] / shape[1] return img.reshape([-1, 5]), indexes c = int(input("Numbers of clusters(c): ")) img = np.asarray(Image.open('1558014721_E7jyWs_iiit_d.jpg')).copy() t1 = time.time() features, indexes = RGBXY(img) fcm = FCM(n_clusters=c) fcm.fit(features) fcm_centers = fcm.centers fcm_labels = fcm.predict(features) print(fcm_centers, fcm_centers.shape) img = fcm_centers[fcm_labels, :3].reshape((img.shape[0], img.shape[1], 3)) print("Time taken: %f" % (time.time() - t1)) plt.imshow(img) plt.axis('off') plt.show()
for elem in parts: predVec[row, col] = int(elem) col += 1 row += 1 predVecList = predVec.tolist() # Clustering NC = list(range(1, 11)) #numOfModels+1)) # list of numbers of clusters accuracies = np.zeros((numOfEPS, len(NC))) clusteringResult = {} with open("FCM_clustering_result.txt", "w") as fp: for numOfClusters in NC: # clustering into c groups print("Clustering: {} clusters".format(numOfClusters)) fcm = FCM(n_clusters=numOfClusters) fcm.fit(predVec) clusteringResult[numOfClusters] = fcm.u fp.write("\n## number of clusters: " + str(numOfClusters) + "\n") fp.write(str(fcm.u) + "\n") fp.write("\n") def vote2(pred, mem): ''' pred: numOfModels X numOfAEs X 2 mem : numOfModels X numOfClusters voteResult: numOfAEs X 2 ''' numOfModels = pred.shape[0] numOfAEs = pred[0].shape[0]
import numpy as np import matplotlib.pyplot as plt import pandas as pd from fcmeans import FCM dataset = pd.read_csv( 'D://Visual Exercise//Python//New folder//Fuzzy-C-Means Clustering//Fuzzy-C-Means Clustering//Mall_Customers.csv' ) #Getting data set X = dataset.iloc[:, [3, 4]].values print(X) # fit the fuzzy-c-means fcm = FCM(n_clusters=3, max_iter=150, random_state=0) fcm.fit(X) y_pred = fcm.predict(X) print(y_pred) # outputs #predict and labels are same fcm_centers = fcm.centers fcm_labels = fcm.u.argmax(axis=1) print(fcm_labels) # Visualising the clusters plt.scatter(X[y_pred == 0, 0],
subcategoryGet = trainData[:, 1].astype(float) # print(subcategoryGet) labelGet = trainData[:, 2].astype(float) # print(labelGet) cscGet = trainData[:, 0:2].astype(float) # print(cscGet) # plt.scatter(subcategoryGet, labelGet) # plt.show() testData = np.array(datasetTest) # print(testData) testDataGet = testData[:, 0:2].astype(float) # print(testDataGet) fcm = FCM(n_clusters=3) fcm.fit(cscGet) fcm_centers = fcm.centers fcm_labels = fcm.u.argmax(axis=1) f, axes = plt.subplots(1, 2, figsize=(11, 5)) scatter(cscGet[:, 0], cscGet[:, 1], ax=axes[0]) scatter(cscGet[:, 0], cscGet[:, 1], ax=axes[1], hue=fcm_labels) scatter(fcm_centers[:, 0], fcm_centers[:, 1], ax=axes[1], marker="*", s=200) closest_centroid = [] for x in range(len(testDataGet)): diff = fcm_centers - testDataGet[x, :] # print(diff) dist = np.sqrt(np.sum(diff**2, axis=-1))