Exemplo n.º 1
0
def kmedoidsWithScores(filenameData, filenameSilhMean, filenameDBS,
                       filenameCHS, kClusters):
    data = read_sample(str(root) + '\\' + filenameData)

    #kClusters = canoc(data, kmin, kmax)

    initial_medoids = randomCenters(len(data), kClusters)
    kmedoids_instance = kmedoids(data, initial_medoids, metric=metricResearch)

    kmedoids_instance.process()
    clusters = kmedoids_instance.get_clusters()
    predicted = kmedoids_instance.predict(data)

    silhouetteScore = silhouette(data, clusters).process().get_score()
    meanSilhouetteScore = np.mean(silhouetteScore)
    witTXT(meanSilhouetteScore,
           filenameSilhMean,
           filepath=root,
           note='k: ' + str(kClusters))

    dbsScore = dbs(data, predicted)
    witTXT(dbsScore, filenameDBS, filepath=root, note='k: ' + str(kClusters))

    chsScore = chs(data, predicted)
    witTXT(chsScore, filenameCHS, filepath=root, note='k: ' + str(kClusters))
Exemplo n.º 2
0
def kmediansWithScore(nameData, nameSilhouetteMean, nameDBS, nameCHS,
                      k_clusters, measure, kmin, kmax):
    data = read_sample(str(root) + '\\' + nameData)

    initial_medians = kppi(data, k_clusters).initialize()
    kmedians_instance = kmedians(data, initial_medians)
    kmedians_instance.process()

    clusters = kmedians_instance.get_clusters()
    #    final_medians = kmedians_instance.get_medians()

    predicted = kmedians_instance.predict(data)

    silhouetteScore = silhouette(data, clusters).process().get_score()
    meanSilhouetteScore = np.mean(silhouetteScore)
    #wlitCSV(silhouetteScore, filenameSilhouette, '', root)
    #witCSV(meanSilhouetteScore, nameSilhouetteMean, '', root)

    dbsScore = dbs(data, predicted)
    #witCSV(dbsScore, nameDBS, '', root)

    chsScore = chs(data, predicted)
    #witCSV(chsScore, nameCHS, '', root)

    elbow_instance = elbow(data, kmin, kmax)
    elbow_instance.process()
    amount_clusters = elbow_instance.get_amount(
    )  # most probable amount of clusters
    wce = elbow_instance.get_wce()
Exemplo n.º 3
0
def get_silhouette(args_dict):
    """
    Get the silhouette coefficients as an average for all the elements.
    :param cl: clustering result
    :return:   silhouette avg score
    """
    cl = args_dict["cl"]
    distance_metric = args_dict["distance_metric"]
    log = args_dict["log"]
    # get the clusters result with the following example format: [[0,4][1,2,3]] -> 2 clusters
    # where the indexes inside represents the chunk's row from distance metric
    clusters = cl.get_clusters()
    # initialize array with size of the comparable chunks
    # each index in the array represent the chunk'a row from distance metric while the value is the cluster's id
    cluster_indicator = [0] * len(distance_metric)
    i = 0
    for cluster in clusters:
        for chunk_index in cluster:
            cluster_indicator[chunk_index] = i
        i += 1
    silhouette_width_list = silhouette(distance_metric,
                                       clusters).process().get_score()
    silhouette_width = 0
    for score in silhouette_width_list:
        silhouette_width += score
    silhouette_width = float(silhouette_width) / len(silhouette_width_list)
    log.info("K={num_clusters}".format(num_clusters=str(len(clusters))))
    log.info("{result}".format(result=str(clusters)))
    log.info("Silhouette width={sil}".format(sil=str(silhouette_width)))
    return silhouette_width, cluster_indicator, cl
Exemplo n.º 4
0
def get_silhouette(samples1, samples2):
    cluster1, medoid_id1, kmedoid_instance1 = run_kmedoids(samples1, 1)
    cluster2, medoid_id2, kmedoid_instance12 = run_kmedoids(samples2, 1)
    cluster2 = np.array([[len(samples1) + x for x in cluster2[0]]])
    samples = np.concatenate((samples1, samples2), axis=0)
    clusters = np.concatenate((cluster1, cluster2), axis=0)
    score = sum(silhouette(samples, clusters).process().get_score()) / len(samples)

    return score
Exemplo n.º 5
0
    def template_correct_scores(self, sample_path, answer_path):
        sample = read_sample(sample_path)
        clusters = answer_reader(answer_path).get_clusters()

        scores = silhouette(sample, clusters).process().get_score()

        assertion.eq(len(sample), len(scores))
        for score in scores:
            assertion.le(-1.0, score)
            assertion.ge(1.0, score)
Exemplo n.º 6
0
    def correct_scores(sample_path, answer_path, ccore_flag):
        sample = read_sample(sample_path)
        clusters = answer_reader(answer_path).get_clusters()

        scores = silhouette(sample, clusters, ccore=ccore_flag).process().get_score()

        assertion.eq(len(sample), len(scores))
        for score in scores:
            assertion.le(-1.0, score)
            assertion.ge(1.0, score)
Exemplo n.º 7
0
    def correct_scores(sample_path, answer_path, ccore_flag, **kwargs):
        data_type = kwargs.get('data_type', 'points')

        sample = read_sample(sample_path)
        if data_type == 'distance_matrix':
            sample = calculate_distance_matrix(sample, distance_metric(type_metric.EUCLIDEAN_SQUARE))

        clusters = answer_reader(answer_path).get_clusters()

        scores = silhouette(sample, clusters, ccore=ccore_flag, data_type=data_type).process().get_score()

        assertion.eq(len(sample), len(scores))
        for score in scores:
            assertion.le(-1.0, score)
            assertion.ge(1.0, score)

        return scores
Exemplo n.º 8
0
initial_medoids = ut.initial_medoids_paper_method(sample, k, distances)

# create instance of K-Medoids algorithm
kmedoids_instance = kmedoids(sample, initial_medoids)

# run cluster analysis and obtain results
kmedoids_instance.process()
clusters = kmedoids_instance.get_clusters()

# show clusters
for clusteri in clusters:
    print(clusteri)
print("\n")

# Calculate Silhouette score
dirtyscore = silhouette(sample, clusters).process().get_score()
score = [x for x in dirtyscore if str(x) != 'nan']

print("score promedio de silhoette")
print(np.mean(np.asarray(score)))
print("\n")

#computing minimum distance in data
minimo = [
    np.asarray([
        distances[i][j] for j in range(number_of_data_points)
        if distances[i][j] != 0
    ]) for i in range(number_of_data_points)
]
minimo = [np.min(x) for x in minimo]
minimo = np.asarray(minimo)
Exemplo n.º 9
0
def meanSilh(data, clusters):
    silhouetteScore = silhouette(data, clusters).process().get_score()
    return np.mean(silhouetteScore)
Exemplo n.º 10
0
metric = distance_metric(type_metric.USER_DEFINED, func=weighted_distance);
random_state = check_random_state(None)
print('success')

options = [30]
for i in range(len(options)):
	print("enter round " + str(options[i]))
	initial_medoids = kpp_init(np.array(X), options[i], random_state)
	print(initial_medoids)
	kmedoids_instance = kmedoids(X, initial_medoids, data_type='distance_matrix');
	kmedoids_instance.process()
	clusters = kmedoids_instance.get_clusters()
	store_clusters(clusters, data, length, options[i])
	medoids = kmedoids_instance.get_medoids()
	medoids_vectors = store_medoids(medoids, data, options[i])
	score = silhouette(data, clusters, metric = metric).process().get_score()
	print(sum(score)/len(score))

	visual= cluster_visualizer_multidim()
	visual.append_clusters(clusters, data)
	visual.show(pair_filter=[[0, 10], [1, 10], [3,10], [4,10], [5,10],[6,10], [7,10], [9,10], [0,1]])
	visual.show()

def append_id_on_vector():
	with open('clean_ids.csv', newline='') as csvfile:
		id_d = list(csv.reader(csvfile))
	id_data = []
	length = []
	for line in id_d:
		playlist = list(filter(None, line))
		if len(playlist)>0:
Exemplo n.º 11
0
def main_fun():
    initial_centers = kmeans_plusplus_initializer(sample, 2).initialize()
    # Create instance of K-Means algorithm with prepared centers.
    kmeans_instance = kmeans(sample, initial_centers)
    # Run cluster analysis and obtain results.
    kmeans_instance.process()
    clusters = kmeans_instance.get_clusters()
    final_centers = kmeans_instance.get_centers()

    tolerance = 10
    cluster_ids = random.sample(range(0, len(sample)), 2)
    # Create instance of K-Medoids algorithm.
    kmedoids_instance = kmedoids(sample,
                                 cluster_ids,
                                 tolerance=tolerance,
                                 ccore=True)
    # Run cluster analysis and obtain results.
    kmedoids_instance.process()
    clusters1 = kmedoids_instance.get_clusters()
    final_centers1 = kmedoids_instance.get_medoids()
    print('K-Mean Centroids: ' + str(final_centers))
    final_center = []
    final_center.append(sample[final_centers1[0]])
    final_center.append(sample[final_centers1[1]])
    print('K-Medoid Medoid' + str(final_center))

    if (sample[final_centers1[0]][0] > sample[final_centers1[1]][0]):
        c3 = clusters1[:1][0]
        c4 = clusters1[1:][0]
    else:
        c4 = clusters1[:1][0]
        c3 = clusters1[1:][0]

    if (final_centers[0][0] > final_centers[1][0]):
        c1 = clusters[:1][0]
        c2 = clusters[1:][0]
    else:
        c2 = clusters[:1][0]
        c1 = clusters[1:][0]

    visualizer = cluster_visualizer()
    visualizer.append_cluster(cluster=c1,
                              data=sample,
                              color='red',
                              markersize=8)
    visualizer.append_cluster(cluster=c2,
                              data=sample,
                              color='black',
                              markersize=8)
    visualizer.append_cluster(cluster=c3, data=sample, color='yellow')
    visualizer.append_cluster(cluster=c4, data=sample, color='lime')
    visualizer.append_cluster(cluster=final_centers,
                              marker='*',
                              markersize=10,
                              color='purple')
    visualizer.append_cluster(cluster=final_center,
                              marker='*',
                              markersize=10,
                              color='pink')
    visualizer.set_canvas_title(text='k-mean vs k-medoids')
    visualizer.show(invisible_axis=False)

    kmeoids_score = silhouette.silhouette(sample,
                                          clusters1).process().get_score()
    kmean_score = silhouette.silhouette(sample, clusters).process().get_score()
    print('K-Medoid Score:' + str(sum(kmeoids_score)))
    print('K-Means Score:' + str(sum(kmean_score)))
Exemplo n.º 12
0
    return medoidsToInit


def kmedoidsWithScore(nameData, nameSilhouetteMean, nameDBS, nameCHS, k_clusters, measure, kmin, kmax):
	data = read_sample(str(root)+'\\'+filenameData)
    
    kClusters = canoc(data, kmin, kmax)
    
    initial_medoids = rci(data, kClusters).initialize()

    kmedoids_instance = kmedoids(data, initial_medoids)
    kmedoids_instance.process()
    clusters = kmedoids_instance.get_clusters()
    predicted = kmedoids_instance.predict(data)

    silhouetteScore = silhouette(data, clusters).process().get_score()
    meanSilhouetteScore = np.mean(silhouetteScore)
    #wlitCSV(silhouetteScore, filenameSilhouette, '', root)
    #witCSV(meanSilhouetteScore, nameSilhouetteMean, '', root)

    dbsScore = dbs(data, predicted)
    #witCSV(dbsScore, nameDBS, '', root)

    chsScore = chs(data, predicted)
    #witCSV(chsScore, nameCHS, '', root)

   # elbow_instance = elbow(data, kmin, kmax)
   # elbow_instance.process()
   # amount_clusters = elbow_instance.get_amount()  # most probable amount of clusters
   # wce = elbow_instance.get_wce()