def random_state(kmin, kmax, algorithm, random_state, ccore_flag): sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE4) ksearch_instance_1 = silhouette_ksearch(sample, kmin, kmax, algorithm=algorithm, random_state=random_state, ccore=ccore_flag).process() ksearch_instance_2 = silhouette_ksearch(sample, kmin, kmax, algorithm=algorithm, random_state=random_state, ccore=ccore_flag).process() assertion.eq(ksearch_instance_1.get_amount(), ksearch_instance_2.get_amount()) assertion.eq(ksearch_instance_1.get_score(), ksearch_instance_2.get_score()) assertion.eq(len(ksearch_instance_1.get_scores()), len(ksearch_instance_2.get_scores())) scores1 = ksearch_instance_1.get_scores() scores2 = ksearch_instance_2.get_scores() for key in scores1: key = int(key) if math.isnan(scores1[key]) and math.isnan(scores2[key]): continue else: assertion.eq(scores1[key], scores2[key])
def correct_ksearch(sample_path, answer_path, kmin, kmax, algorithm, ccore_flag): attempts = 15 testing_result = False sample = read_sample(sample_path) clusters = answer_reader(answer_path).get_clusters() for _ in range(attempts): ksearch_instance = silhouette_ksearch(sample, kmin, kmax, algorithm=algorithm, ccore=ccore_flag).process() amount = ksearch_instance.get_amount() score = ksearch_instance.get_score() scores = ksearch_instance.get_scores() assertion.le(-1.0, score) assertion.ge(1.0, score) assertion.eq(kmax - kmin, len(scores)) upper_limit = len(clusters) + 1 lower_limit = len(clusters) - 1 if lower_limit < 1: lower_limit = 1 if (amount > upper_limit) or (amount < lower_limit): continue testing_result = True break assertion.true(testing_result)
def correct_ksearch(sample_path, answer_path, kmin, kmax, algorithm, ccore_flag): attempts = 10 testing_result = False sample = read_sample(sample_path) clusters = answer_reader(answer_path).get_clusters() for _ in range(attempts): ksearch_instance = silhouette_ksearch(sample, kmin, kmax, algorithm=algorithm, ccore=ccore_flag).process() amount = ksearch_instance.get_amount() score = ksearch_instance.get_score() scores = ksearch_instance.get_scores() assertion.le(-1.0, score) assertion.ge(1.0, score) assertion.eq(kmax - kmin, len(scores)) upper_limit = len(clusters) + 1 lower_limit = len(clusters) - 1 if lower_limit < 1: lower_limit = 1 if (amount > upper_limit) or (amount < lower_limit): continue testing_result = True break assertion.true(testing_result)
def find_best_k(samples): search_instance = silhouette_ksearch(samples, 2, 10, algorithm=silhouette_ksearch_type.KMEDOIDS).process() amount = search_instance.get_amount() scores = search_instance.get_scores() print(f"Best Silhouette Score for k = {amount}: {scores[amount]}") return amount
def find_best_k(samples): logging.info("Searching best k for clustering.") search_instance = silhouette_ksearch(samples, 2, 10, algorithm=silhouette_ksearch_type.KMEDOIDS).process() amount = search_instance.get_amount() scores = search_instance.get_scores() logging.info(f"Best Silhouette Score for k = {amount}: {scores[amount]}") return amount
def use_experiment_with_pyclustering_kmedoids(file): TimingLogger.start('pyclustering.kmedoids', 'kmedoids') x = read_matrix(file) X = np.array(x) clusters = len(x) search_instance = silhouette_ksearch( X, 2, clusters, algorithm=silhouette_ksearch_type.KMEDOIDS).process() scores = search_instance.get_scores() for i in range(2, len(scores)): logging.info('For n_clusters = ' + str(i) + ' The average silhouette_score is : ' + str(scores[i - 2])) logging.info('Better choice is ' + str(search_instance.get_amount()) + ' clusters') TimingLogger.stop('pyclustering.kmedoids')
def find_optimal_amout_clusters(sample_path, kmin, kmax, algorithm): sample = read_sample(sample_path) search_instance = silhouette_ksearch(sample, kmin, kmax, algorithm=algorithm).process() amount = search_instance.get_amount() scores = search_instance.get_scores() print("Sample: '%s', Scores: '%s'" % (sample_path, str(scores))) initial_centers = kmeans_plusplus_initializer(sample, amount).initialize() kmeans_instance = kmeans(sample, initial_centers).process() clusters = kmeans_instance.get_clusters() visualizer = cluster_visualizer() visualizer.append_clusters(clusters, sample) visualizer.show()
def template_correct_ksearch(self, sample_path, answer_path, kmin, kmax, algorithm): attempts = 5 testing_result = False sample = read_sample(sample_path) clusters = answer_reader(answer_path).get_clusters() for _ in range(attempts): ksearch_instance = silhouette_ksearch( sample, kmin, kmax, algorithm=algorithm).process() amount = ksearch_instance.get_amount() score = ksearch_instance.get_score() scores = ksearch_instance.get_scores() assertion.le(-1.0, score) assertion.ge(1.0, score) assertion.eq(kmax - kmin, len(scores)) if amount != len(clusters): continue testing_result = True break assertion.true(testing_result)
from pyclustering.cluster.kmeans import kmeans from pyclustering.cluster.silhouette import silhouette from pyclustering.cluster.silhouette import silhouette_ksearch_type, silhouette_ksearch from pyclustering.utils.metric import distance_metric, type_metric # Prepare initial centers centers = kmeans_plusplus_initializer(data, 4).initialize() manhattan_metric = distance_metric(type_metric.MANHATTAN) kmeans_instance = kmeans(data, centers, metric = manhattan_metric) kmeans_instance.process() clusters = kmeans_instance.get_clusters() # # Calculate Silhouette score # score = silhouette(data, clusters).process().get_score() # %% search_instance = silhouette_ksearch(data, 2, 10, algorithm=silhouette_ksearch_type.KMEANS).process() amount = search_instance.get_amount() scores = search_instance.get_scores() print("Scores: '%s'" % str(scores)) #%% scores # %% import numpy as np import matplotlib.pyplot as plt x = np.arange(2, 10, 1)