def select_RPs(self): N = self.X.shape[0] if self.rp_number <= 1: self.rp_number = int(self.rp_number * N) # fcm = FCM(n_clusters=self.rp_number, random_state=self.random_state, max_iter=50) fcm = FCM(k=self.rp_number, random_state=self.random_state, max_iter=50) fcm.fit(self.X) # c = fcm.u.argmax(axis=1) c = fcm.labels_ # homongenious_clusters homongenious_clusters = np.where( np.bincount( np.unique(np.vstack((c, self.y.argmax( axis=1))), axis=1)[0, :]) == 1)[0] # centers = fcm.centers[homongenious_clusters,:] centers = fcm.cluster_centers_[homongenious_clusters, :] # if all clusters are heterogenious, use all clusters if len(centers) == 0: # centers = fcm.centers centers = fcm.cluster_centers_ # get most closest samples from centers rp_id = cdist(centers, self.X).argmin(axis=1) self.rp_X = self.X[rp_id, :] self.rp_y = self.y[rp_id, :] self.D_x = cdist(self.X, self.rp_X) self.D_y = cdist(self.y, self.rp_y)
def simulate(self): scales = np.random.gamma(1, 10, self.M)**-1 all_P = np.random.poisson(100*self.N) self.x = np.zeros((all_P, self.M)) self.assignments = np.zeros((all_P, 2), dtype=int) for i in range(all_P): z = list(np.random.multinomial(1, self.beta)).index(1) x = np.random.multivariate_normal(self.mu[z], self.Sigma[z]) self.x[i] = x self.assignments[i,1] = z # cluster x into N groups fuzzy_kmeans = FuzzyKMeans(k=self.N, m=2) fuzzy_kmeans.fit(self.x) self.pi = np.zeros((self.K, self.N)) self.P = np.zeros(self.N) self.x_hat = np.zeros((self.N, self.K, self.M)) for i in range(all_P): n = list(np.random.multinomial(1, fuzzy_kmeans.fuzzy_labels_[i])).index(1) self.assignments[i,0] = n self.x_hat[n][self.assignments[i,1]] += self.x[i] self.P[n] += 1 self.pi[self.assignments[i,1],n] += 1 # normalize pi self.pi = (self.pi.T / self.pi.sum(1).T).T self.y = self.draw(self.x_hat.sum(1) / self.P[:,np.newaxis], scales) self.x = self.itransform(self.x)
def __init__(self, n_clusters=8, m=1, init='k-means++', n_init=10, max_iter=300, tol=1e-4, precompute_distances='auto', verbose=0, random_state=None, copy_x=True, n_jobs=None, algorithm='auto'): FuzzyKMeans.__init__(self, k=n_clusters, m=m, max_iter=max_iter, random_state=random_state, tol=tol) KMeans.__init__(self, n_clusters=n_clusters, init=init, n_init=n_init, max_iter=max_iter, tol=tol, precompute_distances=precompute_distances, verbose=verbose, random_state=random_state, copy_x=copy_x, n_jobs=n_jobs, algorithm=algorithm)
def __init__(self, data, c): self.data = data self.n_clusters = c # Fuzzy index set to 2 in order to be aligned to the paper if c is None: self.fuzzy_means = FuzzyKMeans(m=2) else: self.fuzzy_means = FuzzyKMeans(self.n_clusters, m=2)
def plot_compare_mu(self, feature1, feature2): ## Run Other Models and plot results # Fuzzy K-Means mdl = FuzzyKMeans(k=self.settings.K) mdl.fit(self.data.obs[:]) # K-Means kmeans = KMeans(k=self.settings.K) kmeans.fit(self.data.obs[:]) # Gaussian Mixture Model gmm = GaussianMixture(n_components=self.settings.K) gmm.fit(self.data.obs[:]) # Plot Results plt.clf() plt.plot(self.data.obs[:, feature1], self.data.obs[:, feature2], 'ko', markersize=2, label='Data') # Observed Data plt.plot(self.inference_mu[:, feature1, len(self.iterations) - 1], self.inference_mu[:, feature2, len(self.iterations) - 1], 'b*', markersize=6, label='NDMs Inferred') # Inferred Centers plt.plot(self.data.mu[:, feature1], self.data.mu[:, feature2], 'ro--', markersize=6, label='True') # True Centers plt.plot(self.data.mu[[-1, 0], feature1], self.data.mu[[-1, 0], feature2], 'r--') # Complete True Polygon plt.plot(mdl.cluster_centers_[:, feature1], mdl.cluster_centers_[:, feature2], 'ks', markersize=6, label='Fuzzy K-Means') plt.plot(kmeans.cluster_centers_[:, feature1], kmeans.cluster_centers_[:, feature2], 'kv', markersize=6, label='K-Means') plt.plot(gmm.means_[:, feature1], gmm.means_[:, feature2], 'kh', markersize=4, label='GMM') plt.legend() plt.savefig(os.path.join(self.settings.outdir, 'Comparison_Graph.png'))
class FuzzyMeansAlgorithm: def __init__(self, data, c): self.data = data self.n_clusters = c # Fuzzy index set to 2 in order to be aligned to the paper if c is None: self.fuzzy_means = FuzzyKMeans(m=2) else: self.fuzzy_means = FuzzyKMeans(self.n_clusters, m=2) def model_name(self): title = "Fuzzy-Means" return title def clusterize(self): print(" * Clustering data with {}...".format(self.model_name())) return self.fuzzy_means.fit(self.data) def get_model(self): return self.fuzzy_means
if opts.minibatch: km = MiniBatchKMeans(n_clusters=true_k, init='k-means++', n_init=1, init_size=1000, batch_size=1000, verbose=opts.verbose) else: km = KMeans(n_clusters=true_k, init='k-means++', max_iter=100, n_init=1, verbose=opts.verbose) ############# fuzzy_kmeans = FuzzyKMeans(k=true_k, m=1.01) #, max_iter=1000) X1 = np.asarray(X.todense()) fuzzy_kmeans.fit(X1) def print_metrics(km): print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels, km.labels_)) print("Completeness: %0.3f" % metrics.completeness_score(labels, km.labels_)) print("V-measure: %0.3f" % metrics.v_measure_score(labels, km.labels_)) print("Adjusted Rand-Index: %.3f" % metrics.adjusted_rand_score(labels, km.labels_)) print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, km.labels_, sample_size=1000))
def get_FuzzyKmeans(dataframe,K): mdl = FuzzyKMeans(k=K) mdl.fit(dataframe) return mdl.labels_
""" import numpy as np from sklearn_extensions.fuzzy_kmeans import KMedians, FuzzyKMeans, KMeans from sklearn.datasets.samples_generator import make_blobs np.random.seed(0) batch_size = 45 centers = [[1, 1], [-1, -1], [1, -1]] n_clusters = len(centers) X, labels_true = make_blobs(n_samples=1200, centers=centers, cluster_std=0.3) kmeans = KMeans(k=3) kmeans.fit(X) kmedians = KMedians(k=3) kmedians.fit(X) fuzzy_kmeans = FuzzyKMeans(k=3, m=2) fuzzy_kmeans.fit(X) print('KMEANS') print(kmeans.cluster_centers_) print('KMEDIANS') print(kmedians.cluster_centers_) print('FUZZY_KMEANS') print(fuzzy_kmeans.cluster_centers_)
sentimentScore = [] sentimentScore.extend(pos_tweets.flatten()) sentimentScore.extend(neg_tweets.flatten()) shuffle(sentimentScore) sentimentScore = np.array(sentimentScore) alldata = sentimentScore.reshape(-1, 1) #### We are using FuzzyKMeans from skfuzzy because fuzzyCMeans of skfuzzy needs 2 dimensional data #### and we have one-dimensional data. ####http://pythonhosted.org/scikit-fuzzy/auto_examples/plot_cmeans.html #### Fuzzy KMeans #print('FUZZY_KMEANS') fuzzy_kmeans = FuzzyKMeans(k=6, m=2) fuzzy_kmeans.fit(alldata) print np.sort((np.array(fuzzy_kmeans.cluster_centers_).flatten())) # print(fuzzy_kmeans.cluster_centers_) # print (kmeans.labels_) # print (kmeans.cluster_centers_) # print (kmeans.n_clusters) ### contains predicted centroids for all 6 emoticons, will be printed in fuzzy_output.txt my_cluster_centers = np.array(fuzzy_kmeans.cluster_centers_).flatten().tolist() my_cluster_centers = sorted(my_cluster_centers) negative_centers = np.array(my_cluster_centers[:3]).flatten() positive_centers = np.array(my_cluster_centers[3:]).flatten() #print positive_centers, negative_centers ## cluster all data into 6 groups depending on which centroid its closest to
data1 = np.random.multivariate_normal(miu1, cov1, size=n) data2 = np.random.multivariate_normal(miu2, cov2, size=n) data3 = np.random.multivariate_normal(miu3, cov3, size=n) data = np.concatenate((data1, data2, data3)) np.random.shuffle(data) print(data.shape) plt.xlim(-40, 50) plt.ylim(-40, 50) start_time = time.time() fuzzy = FuzzyKMeans(k = 3) fuzzy.fit(data) centers = fuzzy.cluster_centers_ end_time = time.time() print((end_time - start_time)) plt.plot(data[:, 0], data[:, 1], marker="o", linestyle="") plt.plot(centers[:, 0], centers[:, 1], marker="o", linestyle="",color='red') plt.show()
# create kmeans object kmeans = KMeans(k=5) # fit kmeans object to data kmeans.fit(points) # print location of clusters learned by kmeans object print('K-means cluster centroids:\n', kmeans.cluster_centers_) ########################################################################################################################### # create kmedians object kmedians = KMedians(k=5) # fit kmeans object to data kmedians.fit(points) # print location of clusters learned by kmedians object print('K-medians cluster centroids:\n', kmedians.cluster_centers_) ########################################################################################################################### # create kmeansf (fuzzy) object kmeansf = FuzzyKMeans(k=5) # fit kmeans object to data kmeansf.fit(points) # print location of clusters learned by kmeans fuzzy object print('K-means fuzzy cluster centroids:\n', kmeansf.cluster_centers_) ########################################################################################################################### fig = plt.figure() colors = ['#4EACC5', '#FF9C34', '#4E9A06', '#8B0000', '#FFEF00'] objects = (kmeans, kmedians, kmeansf) X = points for i, obj in enumerate(objects): ax = fig.add_subplot(1, len(objects), i + 1) for k, col in zip(range(obj.k), colors):
clustering_df = df.drop(columns=["Unnamed: 0", "Player", "final_team", "Pos"]) # we keep the interesting value df_fcm = df[[ 'Player', 'TRB', 'PTS', 'AST', "DWS", '3PA', "OWS", "USG%", "Height" ]] # we keep the players name for later players_name = df_fcm["Player"] # we remove the player column for the computation df_fcm = df_fcm.loc[:, (df_fcm.columns != "Player")] # Computation nb_cluster_fuzzy = 35 fuzzy_kmeans = FuzzyKMeans(k=nb_cluster_fuzzy, m=1.1) fuzzy_kmeans.fit(df_fcm) fuzzy_clusters = pd.DataFrame(fuzzy_kmeans.fuzzy_labels_) # we add the players name back fuzzy_clusters = pd.concat([players_name, fuzzy_clusters], axis=1) nb_max_players_per_cluster_fcm = 3 final_clusters = pd.DataFrame() for i in range(nb_cluster_fuzzy): # lets keep the coresponding col of membership degree sets = fuzzy_clusters[["Player", i]] # lets sort
from sklearn_extensions.fuzzy_kmeans import FuzzyKMeans mdl = FuzzyKMeans(k=7, m=2) mdl.fit(np.concatenate([x_km, x_km_test], axis=0)) train_fcm_pred = list(mdl.labels_)[0:x_km.shape[0]] test_fcm_pred = list(mdl.labels_)[x_km.shape[0]:]
from sklearn_extensions.fuzzy_kmeans import FuzzyKMeans ###Fonction utilisée pour ouvrir un ComputedCluster et reconstuire un dataframe avec les informations sur les joueurs (équipe,PER, etc..) def import_cluster(epsilon=0.9, minPoints=2, NoiseProp=0.59): pathCluster = "../ComputedClusters/" + "epsilon_" + str( epsilon) + "_MinPoints_" + str(minPoints) + "_NoiseProp_" + str( NoiseProp) + ".csv" df = pd.read_csv(pathCluster, ';') df.rename(columns={'0': 'Player', '1': 'Cluster'}, inplace=True) df_PS = pd.read_csv('../csv/players_stats.csv', ',') df_join = df.join(df_PS.set_index('Player'), on='Player') return (df_join) df = import_cluster() df_noise = df[df.Cluster == -1] #On garde uniquement le bruit df_noise = df_noise.reset_index(drop=True) player_names = pd.DataFrame(df_noise["Player"]) #player_names = player_names.loc[~player_names.index.duplicated(keep='first')] df = df_noise[['TRB', 'PTS', 'AST', 'DWS', 'TS%', "3PA", "OWS", "USG%"]] fuzzy_kmeans = FuzzyKMeans(k=5, m=2) fuzzy_kmeans.fit(df) clusters = pd.DataFrame(fuzzy_kmeans.fuzzy_labels_) res = pd.concat([player_names, clusters], axis=1) print('FUZZY_KMEANS') print(fuzzy_kmeans.cluster_centers_)
fout.create_dataset("pf/z", data=pf_z) fout.create_dataset("pf/components", data=pf_components.T) # K-Means from sklearn.cluster import KMeans kmeans = KMeans(n_clusters=args.K) k_z = kmeans.fit_predict(obs) k_components = kmeans.cluster_centers_ fout.create_dataset("kmeans/z", data=k_z) fout.create_dataset("kmeans/components", data=k_components) # Fuzzy K-Means from sklearn_extensions.fuzzy_kmeans import FuzzyKMeans fkmeans = FuzzyKMeans(k=args.K, m=2) fkmeans.fit(obs) fk_z = fkmeans.fuzzy_labels_ fk_components = fkmeans.cluster_centers_ fout.create_dataset("fuzzy_kmeans/z", data=fk_z) fout.create_dataset("fuzzy_kmeans/components", data=fk_components) # GMM from sklearn.mixture import GaussianMixture gmm = GaussianMixture(n_components=args.K) gmm.fit(obs) gmm_z = gmm.predict_proba(obs) fout.create_dataset("gmm/z", data=gmm_z) fout.create_dataset("gmm/components", data=gmm.means_) fout.create_dataset("gmm/covariances", data=gmm.covariances_)
def create_fuzzy(self, number_of_clusters, data): fuzzy_kmeans = FuzzyKMeans(k=number_of_clusters, m=2, max_iter=100) fuzzy_kmeans.fit(data) return fuzzy_kmeans
def fuzzy_means(dados): fuzzy_kmeans = FuzzyKMeans(k=2, m=1000) fuzzy_kmeans.fit(dados) return fuzzy_kmeans
def sklearn(self): all_data = list() for x in range(0, len(data_handler_1.param1)): all_data.append( list([data_handler_1.param1[x], data_handler_1.param2[x]])) data = np.array(all_data) datapd = pd.DataFrame(data) scores = [] # for k in range(2, 10): k = 4 fuzzy_kmeans = FuzzyKMeans(k=k, m=4, max_iter=300) fuzzy_kmeans.fit(datapd) datapd['labels'] = pd.Series(fuzzy_kmeans.labels_) score = ss(datapd[[0, 1]], labels=datapd['labels']) scores.append(score) # # datapd.plot.scatter(x=0, y=1, c='labels', colormap='viridis') # plt.xlabel("Param 1") # plt.ylabel("Param2") # plt.title(f'K = {k}, Silhouette score = {score}') for center in fuzzy_kmeans.cluster_centers_: plt.plot(center[0], center[1], 'ro') # for datapd['labels'] group_data = datapd.groupby(pd.Series(fuzzy_kmeans.labels_), group_keys=datapd['labels'].unique()) # print(datapd) # print(group_data[1].get_group(1)) # n = [i for i in range(2, 10)] # plt.figure() # plt.plot(n, scores) # plt.xlabel("K") # plt.ylabel("Silhouette score") # plt.show() second_data = DataHandler() second_data.open_file("files\\sdmt3.txt") second = list() for x in range(0, len(data_handler_1.param1)): second.append( list([data_handler_1.param1[x], data_handler_1.param2[x]])) sec_data = np.array(second) sec_datapd = pd.DataFrame(sec_data) scores = [] # print(second_data.param1) max_values = [] min_values = [] for group_label in datapd['labels'].unique(): max_param1 = np.max(group_data[0].get_group(group_label)) max_param2 = np.max(group_data[1].get_group(group_label)) min_param1 = np.min(group_data[0].get_group(group_label)) min_param2 = np.min(group_data[1].get_group(group_label)) tuple_max = (max_param1, max_param2) tuple_min = (min_param1, min_param2) max_values.append(tuple_max) min_values.append(tuple_min) # print(max_values) # print(min_values) # print(sec_datapd) sec_datapd['labels'] = pd.Series(fuzzy_kmeans.labels_) sec_datapd.set_value(2, 'labels', 50) print(sec_datapd) for i in range(0, 40): for j in range(len(max_values)): x, y, z = sec_datapd.values[i] max1, max2 = max_values[j] min1, min2 = min_values[j] # print(x,y) if x > min1 and x < max1 and y > min2 and y < max2: sec_datapd.set_value(i, 'labels', j) sec_score = ss(sec_datapd[[0, 1]], labels=sec_datapd['labels']) print(sec_datapd) sec_datapd.plot.scatter(x=0, y=1, c='labels', colormap='viridis') plt.xlabel("Param 1") plt.ylabel("Param2") plt.title(f'K = {k}, Silhouette score = {sec_score}') plt.show()