Пример #1
0
    def select_RPs(self):
        N = self.X.shape[0]

        if self.rp_number <= 1: self.rp_number = int(self.rp_number * N)

        # fcm = FCM(n_clusters=self.rp_number, random_state=self.random_state, max_iter=50)
        fcm = FCM(k=self.rp_number,
                  random_state=self.random_state,
                  max_iter=50)
        fcm.fit(self.X)
        # c = fcm.u.argmax(axis=1)
        c = fcm.labels_
        # homongenious_clusters
        homongenious_clusters = np.where(
            np.bincount(
                np.unique(np.vstack((c, self.y.argmax(
                    axis=1))), axis=1)[0, :]) == 1)[0]
        # centers = fcm.centers[homongenious_clusters,:]
        centers = fcm.cluster_centers_[homongenious_clusters, :]
        # if all clusters are heterogenious, use all clusters
        if len(centers) == 0:
            # centers = fcm.centers
            centers = fcm.cluster_centers_

        # get most closest samples from centers
        rp_id = cdist(centers, self.X).argmin(axis=1)

        self.rp_X = self.X[rp_id, :]
        self.rp_y = self.y[rp_id, :]

        self.D_x = cdist(self.X, self.rp_X)
        self.D_y = cdist(self.y, self.rp_y)
Пример #2
0
    def simulate(self):
        scales = np.random.gamma(1, 10, self.M)**-1
        all_P = np.random.poisson(100*self.N)
        self.x = np.zeros((all_P, self.M))
        self.assignments = np.zeros((all_P, 2), dtype=int)
        for i in range(all_P):
            z = list(np.random.multinomial(1, self.beta)).index(1)
            x = np.random.multivariate_normal(self.mu[z], self.Sigma[z])
            self.x[i] = x
            self.assignments[i,1] = z

        # cluster x into N groups
        fuzzy_kmeans = FuzzyKMeans(k=self.N, m=2)
        fuzzy_kmeans.fit(self.x)

        self.pi = np.zeros((self.K, self.N))
        self.P = np.zeros(self.N)
        self.x_hat = np.zeros((self.N, self.K, self.M))
        for i in range(all_P):
            n = list(np.random.multinomial(1, fuzzy_kmeans.fuzzy_labels_[i])).index(1)
            self.assignments[i,0] = n
            self.x_hat[n][self.assignments[i,1]] += self.x[i]
            self.P[n] += 1
            self.pi[self.assignments[i,1],n] += 1
        # normalize pi
        self.pi = (self.pi.T / self.pi.sum(1).T).T
        self.y = self.draw(self.x_hat.sum(1) / self.P[:,np.newaxis], scales)
        self.x = self.itransform(self.x)
Пример #3
0
    def __init__(self,
                 n_clusters=8,
                 m=1,
                 init='k-means++',
                 n_init=10,
                 max_iter=300,
                 tol=1e-4,
                 precompute_distances='auto',
                 verbose=0,
                 random_state=None,
                 copy_x=True,
                 n_jobs=None,
                 algorithm='auto'):

        FuzzyKMeans.__init__(self,
                             k=n_clusters,
                             m=m,
                             max_iter=max_iter,
                             random_state=random_state,
                             tol=tol)
        KMeans.__init__(self,
                        n_clusters=n_clusters,
                        init=init,
                        n_init=n_init,
                        max_iter=max_iter,
                        tol=tol,
                        precompute_distances=precompute_distances,
                        verbose=verbose,
                        random_state=random_state,
                        copy_x=copy_x,
                        n_jobs=n_jobs,
                        algorithm=algorithm)
Пример #4
0
 def __init__(self, data, c):
     self.data = data
     self.n_clusters = c
     # Fuzzy index set to 2 in order to be aligned to the paper
     if c is None: self.fuzzy_means = FuzzyKMeans(m=2)
     else:
         self.fuzzy_means = FuzzyKMeans(self.n_clusters, m=2)
Пример #5
0
    def plot_compare_mu(self, feature1, feature2):

        ## Run Other Models and plot results
        # Fuzzy K-Means
        mdl = FuzzyKMeans(k=self.settings.K)
        mdl.fit(self.data.obs[:])

        # K-Means
        kmeans = KMeans(k=self.settings.K)
        kmeans.fit(self.data.obs[:])

        # Gaussian Mixture Model
        gmm = GaussianMixture(n_components=self.settings.K)
        gmm.fit(self.data.obs[:])

        # Plot Results
        plt.clf()
        plt.plot(self.data.obs[:, feature1],
                 self.data.obs[:, feature2],
                 'ko',
                 markersize=2,
                 label='Data')  # Observed Data
        plt.plot(self.inference_mu[:, feature1,
                                   len(self.iterations) - 1],
                 self.inference_mu[:, feature2,
                                   len(self.iterations) - 1],
                 'b*',
                 markersize=6,
                 label='NDMs Inferred')  # Inferred Centers
        plt.plot(self.data.mu[:, feature1],
                 self.data.mu[:, feature2],
                 'ro--',
                 markersize=6,
                 label='True')  # True Centers
        plt.plot(self.data.mu[[-1, 0], feature1], self.data.mu[[-1, 0],
                                                               feature2],
                 'r--')  # Complete True Polygon
        plt.plot(mdl.cluster_centers_[:, feature1],
                 mdl.cluster_centers_[:, feature2],
                 'ks',
                 markersize=6,
                 label='Fuzzy K-Means')
        plt.plot(kmeans.cluster_centers_[:, feature1],
                 kmeans.cluster_centers_[:, feature2],
                 'kv',
                 markersize=6,
                 label='K-Means')
        plt.plot(gmm.means_[:, feature1],
                 gmm.means_[:, feature2],
                 'kh',
                 markersize=4,
                 label='GMM')

        plt.legend()
        plt.savefig(os.path.join(self.settings.outdir, 'Comparison_Graph.png'))
Пример #6
0
class FuzzyMeansAlgorithm:

    def __init__(self, data, c):
        self.data = data
        self.n_clusters = c
        # Fuzzy index set to 2 in order to be aligned to the paper
        if c is None: self.fuzzy_means = FuzzyKMeans(m=2)
        else:
            self.fuzzy_means = FuzzyKMeans(self.n_clusters, m=2)

    def model_name(self):
        title = "Fuzzy-Means"
        return title

    def clusterize(self):
        print(" * Clustering data with {}...".format(self.model_name()))
        return self.fuzzy_means.fit(self.data)

    def get_model(self):
        return self.fuzzy_means
if opts.minibatch:
    km = MiniBatchKMeans(n_clusters=true_k,
                         init='k-means++',
                         n_init=1,
                         init_size=1000,
                         batch_size=1000,
                         verbose=opts.verbose)
else:
    km = KMeans(n_clusters=true_k,
                init='k-means++',
                max_iter=100,
                n_init=1,
                verbose=opts.verbose)

#############
fuzzy_kmeans = FuzzyKMeans(k=true_k, m=1.01)  #, max_iter=1000)

X1 = np.asarray(X.todense())

fuzzy_kmeans.fit(X1)


def print_metrics(km):
    print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels, km.labels_))
    print("Completeness: %0.3f" %
          metrics.completeness_score(labels, km.labels_))
    print("V-measure: %0.3f" % metrics.v_measure_score(labels, km.labels_))
    print("Adjusted Rand-Index: %.3f" %
          metrics.adjusted_rand_score(labels, km.labels_))
    print("Silhouette Coefficient: %0.3f" %
          metrics.silhouette_score(X, km.labels_, sample_size=1000))
Пример #8
0
def get_FuzzyKmeans(dataframe,K):
    mdl = FuzzyKMeans(k=K)
    mdl.fit(dataframe)
    return mdl.labels_
"""

import numpy as np
from sklearn_extensions.fuzzy_kmeans import KMedians, FuzzyKMeans, KMeans
from sklearn.datasets.samples_generator import make_blobs

np.random.seed(0)

batch_size = 45
centers = [[1, 1], [-1, -1], [1, -1]]
n_clusters = len(centers)
X, labels_true = make_blobs(n_samples=1200, centers=centers, cluster_std=0.3)

kmeans = KMeans(k=3)
kmeans.fit(X)

kmedians = KMedians(k=3)
kmedians.fit(X)

fuzzy_kmeans = FuzzyKMeans(k=3, m=2)
fuzzy_kmeans.fit(X)

print('KMEANS')
print(kmeans.cluster_centers_)

print('KMEDIANS')
print(kmedians.cluster_centers_)

print('FUZZY_KMEANS')
print(fuzzy_kmeans.cluster_centers_)
sentimentScore = []

sentimentScore.extend(pos_tweets.flatten())
sentimentScore.extend(neg_tweets.flatten())
shuffle(sentimentScore)
sentimentScore = np.array(sentimentScore)
alldata = sentimentScore.reshape(-1, 1)

#### We are using FuzzyKMeans from skfuzzy because fuzzyCMeans of skfuzzy needs 2 dimensional data
#### and we have one-dimensional data.
####http://pythonhosted.org/scikit-fuzzy/auto_examples/plot_cmeans.html

#### Fuzzy KMeans
#print('FUZZY_KMEANS')

fuzzy_kmeans = FuzzyKMeans(k=6, m=2)
fuzzy_kmeans.fit(alldata)
print np.sort((np.array(fuzzy_kmeans.cluster_centers_).flatten()))
# print(fuzzy_kmeans.cluster_centers_)
# print (kmeans.labels_)
# print (kmeans.cluster_centers_)
# print (kmeans.n_clusters)

### contains predicted centroids for all 6 emoticons, will be printed in fuzzy_output.txt
my_cluster_centers = np.array(fuzzy_kmeans.cluster_centers_).flatten().tolist()
my_cluster_centers = sorted(my_cluster_centers)
negative_centers = np.array(my_cluster_centers[:3]).flatten()
positive_centers = np.array(my_cluster_centers[3:]).flatten()
#print positive_centers, negative_centers

## cluster all data into 6 groups depending on which centroid its closest to
Пример #11
0
data1 = np.random.multivariate_normal(miu1, cov1, size=n)
data2 = np.random.multivariate_normal(miu2, cov2, size=n)
data3 = np.random.multivariate_normal(miu3, cov3, size=n)

data = np.concatenate((data1, data2, data3))
np.random.shuffle(data)

print(data.shape)

plt.xlim(-40, 50)
plt.ylim(-40, 50)

start_time = time.time()

fuzzy = FuzzyKMeans(k = 3)
fuzzy.fit(data)

centers = fuzzy.cluster_centers_

end_time = time.time()

print((end_time - start_time))


plt.plot(data[:, 0], data[:, 1], marker="o", linestyle="")
plt.plot(centers[:, 0], centers[:, 1], marker="o", linestyle="",color='red')
plt.show()


Пример #12
0
# create kmeans object
kmeans = KMeans(k=5)
# fit kmeans object to data
kmeans.fit(points)
# print location of clusters learned by kmeans object
print('K-means cluster centroids:\n', kmeans.cluster_centers_)
###########################################################################################################################
# create kmedians object
kmedians = KMedians(k=5)
# fit kmeans object to data
kmedians.fit(points)
# print location of clusters learned by kmedians object
print('K-medians cluster centroids:\n', kmedians.cluster_centers_)
###########################################################################################################################
# create kmeansf (fuzzy) object
kmeansf = FuzzyKMeans(k=5)
# fit kmeans object to data
kmeansf.fit(points)
# print location of clusters learned by kmeans fuzzy object
print('K-means fuzzy cluster centroids:\n', kmeansf.cluster_centers_)
###########################################################################################################################

fig = plt.figure()
colors = ['#4EACC5', '#FF9C34', '#4E9A06', '#8B0000', '#FFEF00']

objects = (kmeans, kmedians, kmeansf)
X = points

for i, obj in enumerate(objects):
    ax = fig.add_subplot(1, len(objects), i + 1)
    for k, col in zip(range(obj.k), colors):
Пример #13
0
clustering_df = df.drop(columns=["Unnamed: 0", "Player", "final_team", "Pos"])

# we keep the interesting value
df_fcm = df[[
    'Player', 'TRB', 'PTS', 'AST', "DWS", '3PA', "OWS", "USG%", "Height"
]]

# we keep the players name for later
players_name = df_fcm["Player"]

# we remove the player column for the computation
df_fcm = df_fcm.loc[:, (df_fcm.columns != "Player")]

# Computation
nb_cluster_fuzzy = 35
fuzzy_kmeans = FuzzyKMeans(k=nb_cluster_fuzzy, m=1.1)
fuzzy_kmeans.fit(df_fcm)
fuzzy_clusters = pd.DataFrame(fuzzy_kmeans.fuzzy_labels_)

# we add the players name back
fuzzy_clusters = pd.concat([players_name, fuzzy_clusters], axis=1)

nb_max_players_per_cluster_fcm = 3

final_clusters = pd.DataFrame()

for i in range(nb_cluster_fuzzy):
    # lets keep the coresponding col of membership degree
    sets = fuzzy_clusters[["Player", i]]

    # lets sort
Пример #14
0
from sklearn_extensions.fuzzy_kmeans import FuzzyKMeans

mdl = FuzzyKMeans(k=7, m=2)
mdl.fit(np.concatenate([x_km, x_km_test], axis=0))
train_fcm_pred = list(mdl.labels_)[0:x_km.shape[0]]
test_fcm_pred = list(mdl.labels_)[x_km.shape[0]:]
Пример #15
0
from sklearn_extensions.fuzzy_kmeans import FuzzyKMeans


###Fonction utilisée pour ouvrir un ComputedCluster et reconstuire un dataframe avec les informations sur les joueurs (équipe,PER, etc..)
def import_cluster(epsilon=0.9, minPoints=2, NoiseProp=0.59):
    pathCluster = "../ComputedClusters/" + "epsilon_" + str(
        epsilon) + "_MinPoints_" + str(minPoints) + "_NoiseProp_" + str(
            NoiseProp) + ".csv"
    df = pd.read_csv(pathCluster, ';')
    df.rename(columns={'0': 'Player', '1': 'Cluster'}, inplace=True)
    df_PS = pd.read_csv('../csv/players_stats.csv', ',')
    df_join = df.join(df_PS.set_index('Player'), on='Player')
    return (df_join)


df = import_cluster()
df_noise = df[df.Cluster == -1]  #On garde uniquement le bruit
df_noise = df_noise.reset_index(drop=True)
player_names = pd.DataFrame(df_noise["Player"])
#player_names = player_names.loc[~player_names.index.duplicated(keep='first')]
df = df_noise[['TRB', 'PTS', 'AST', 'DWS', 'TS%', "3PA", "OWS", "USG%"]]

fuzzy_kmeans = FuzzyKMeans(k=5, m=2)
fuzzy_kmeans.fit(df)

clusters = pd.DataFrame(fuzzy_kmeans.fuzzy_labels_)
res = pd.concat([player_names, clusters], axis=1)

print('FUZZY_KMEANS')
print(fuzzy_kmeans.cluster_centers_)
Пример #16
0
    fout.create_dataset("pf/z", data=pf_z)
    fout.create_dataset("pf/components", data=pf_components.T)

# K-Means
from sklearn.cluster import KMeans

kmeans = KMeans(n_clusters=args.K)
k_z = kmeans.fit_predict(obs)
k_components = kmeans.cluster_centers_
fout.create_dataset("kmeans/z", data=k_z)
fout.create_dataset("kmeans/components", data=k_components)

# Fuzzy K-Means
from sklearn_extensions.fuzzy_kmeans import FuzzyKMeans

fkmeans = FuzzyKMeans(k=args.K, m=2)
fkmeans.fit(obs)
fk_z = fkmeans.fuzzy_labels_
fk_components = fkmeans.cluster_centers_
fout.create_dataset("fuzzy_kmeans/z", data=fk_z)
fout.create_dataset("fuzzy_kmeans/components", data=fk_components)

# GMM
from sklearn.mixture import GaussianMixture

gmm = GaussianMixture(n_components=args.K)
gmm.fit(obs)
gmm_z = gmm.predict_proba(obs)
fout.create_dataset("gmm/z", data=gmm_z)
fout.create_dataset("gmm/components", data=gmm.means_)
fout.create_dataset("gmm/covariances", data=gmm.covariances_)
 def create_fuzzy(self, number_of_clusters, data):
     fuzzy_kmeans = FuzzyKMeans(k=number_of_clusters, m=2, max_iter=100)
     fuzzy_kmeans.fit(data)
     return fuzzy_kmeans
Пример #18
0
def fuzzy_means(dados):
    fuzzy_kmeans = FuzzyKMeans(k=2, m=1000)
    fuzzy_kmeans.fit(dados)

    return fuzzy_kmeans
    def sklearn(self):
        all_data = list()

        for x in range(0, len(data_handler_1.param1)):
            all_data.append(
                list([data_handler_1.param1[x], data_handler_1.param2[x]]))

        data = np.array(all_data)
        datapd = pd.DataFrame(data)
        scores = []

        # for k in range(2, 10):
        k = 4
        fuzzy_kmeans = FuzzyKMeans(k=k, m=4, max_iter=300)
        fuzzy_kmeans.fit(datapd)
        datapd['labels'] = pd.Series(fuzzy_kmeans.labels_)
        score = ss(datapd[[0, 1]], labels=datapd['labels'])
        scores.append(score)
        #
        # datapd.plot.scatter(x=0, y=1, c='labels', colormap='viridis')
        # plt.xlabel("Param 1")
        # plt.ylabel("Param2")
        # plt.title(f'K = {k}, Silhouette score = {score}')

        for center in fuzzy_kmeans.cluster_centers_:
            plt.plot(center[0], center[1], 'ro')

        # for datapd['labels']
        group_data = datapd.groupby(pd.Series(fuzzy_kmeans.labels_),
                                    group_keys=datapd['labels'].unique())
        # print(datapd)
        # print(group_data[1].get_group(1))

        # n = [i for i in range(2, 10)]
        # plt.figure()
        # plt.plot(n, scores)
        # plt.xlabel("K")
        # plt.ylabel("Silhouette score")
        # plt.show()

        second_data = DataHandler()
        second_data.open_file("files\\sdmt3.txt")
        second = list()

        for x in range(0, len(data_handler_1.param1)):
            second.append(
                list([data_handler_1.param1[x], data_handler_1.param2[x]]))

        sec_data = np.array(second)
        sec_datapd = pd.DataFrame(sec_data)
        scores = []
        # print(second_data.param1)
        max_values = []
        min_values = []
        for group_label in datapd['labels'].unique():
            max_param1 = np.max(group_data[0].get_group(group_label))
            max_param2 = np.max(group_data[1].get_group(group_label))
            min_param1 = np.min(group_data[0].get_group(group_label))
            min_param2 = np.min(group_data[1].get_group(group_label))
            tuple_max = (max_param1, max_param2)
            tuple_min = (min_param1, min_param2)
            max_values.append(tuple_max)
            min_values.append(tuple_min)
        # print(max_values)
        # print(min_values)
        # print(sec_datapd)

        sec_datapd['labels'] = pd.Series(fuzzy_kmeans.labels_)

        sec_datapd.set_value(2, 'labels', 50)

        print(sec_datapd)
        for i in range(0, 40):
            for j in range(len(max_values)):
                x, y, z = sec_datapd.values[i]
                max1, max2 = max_values[j]
                min1, min2 = min_values[j]
                # print(x,y)
                if x > min1 and x < max1 and y > min2 and y < max2:
                    sec_datapd.set_value(i, 'labels', j)

        sec_score = ss(sec_datapd[[0, 1]], labels=sec_datapd['labels'])

        print(sec_datapd)
        sec_datapd.plot.scatter(x=0, y=1, c='labels', colormap='viridis')
        plt.xlabel("Param 1")
        plt.ylabel("Param2")
        plt.title(f'K = {k}, Silhouette score = {sec_score}')
        plt.show()