Пример #1
0
# database = DatabaseIris()
# a = ['Compound.txt'  , 'flame.txt' ,'D31.txt',  'jain.txt' , 'pathbased.txt' , 'R15.txt' , 'spiral.txt']
#a = ['Aggregation.txt' ] 
# a = ['flame.txt', 'jain.txt']
#a = [('spiral.txt', 3)]
#a = [('R15.txt', 15)]
#a = [('Compound.txt', 6)]
#a = [('pathbased.txt', 3)]
a = [('flame.txt', 2)]

results = []

for f, K in a:
    fi = '../datasets/{}'.format(f)
    database = TwoDimensionData(fi, '\t')

    base_name = os.path.basename(fi)
    name = os.path.splitext(base_name)[0]

    for rho in np.arange(1.0, 3.4, 0.2):
    # for rho in [2.8]:
        dissimilarity = DensityDistance(rho=rho)

        cluster = Cluster(database, dissimilarity, P_size=50, K=K, max_iterations=50)

        score, score_normalized = cluster.compute()
        results.append((name, K, rho, score, score_normalized))


for name, K, rho, score, score_normalized in results:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.cluster import KMeans
from sklearn import metrics

from Dissimilarity import DensityDistance
from Database import TwoDimensionData

database = TwoDimensionData("../datasets/D31.txt", '\t')
#dissimilarity = DensityDistance(rho=1.2)

data = np.asarray(database.data)
labels = np.asarray(database.labels)

kmeans = KMeans(n_clusters=31, random_state=1).fit_predict(data)
metrics.silhouette_score(data, labels, metric='euclidean')

plt.scatter(data[:, 0], data[:, 1], c=kmeans)
plt.title("K-means")
plt.show()