def main(): timeData = [] silhouette_scoreData = [] sum_of_square_errorData = [] sourceData = inputDataset() choose = int( input('Determine the clustering algorithm, 1) K-means, 2) K-means++:')) N = int(input('Determine running time:')) K1, K2 = map( int, input('Determine K range, (ex1:2 10, K=2~10; ex2:3 3, K=3):').split()) try: deletePreviousOutputFile() for n_th in range(N): timeData.append([]) silhouette_scoreData.append([]) sum_of_square_errorData.append([]) for K in range(K1, K2 + 1, 1): if choose == 1: k_cluster = K_means(sourceData, K) elif choose == 2: k_cluster = K_meansPP(sourceData, K) start = time.time() k_cluster.initCentroid() # Step 1. iter = 0 while True: countChangingCluster = k_cluster.fitting() # Step 2. k_cluster.updateCentroid() # Step 3. if iter > 50 or countChangingCluster == 0: break iter += 1 end = time.time() print('Round: ' + str(n_th + 1) + ', K: ' + str(K) + ' done.') timeData[n_th].append(end - start) silhouette_scoreData[n_th].append( metrics.silhouette_score(sourceData, k_cluster.belongCluster)) sum_of_square_errorData[n_th].append( k_cluster.sum_of_square_error[-1]) outputResult( K, iter, end - start, metrics.silhouette_score(sourceData, k_cluster.belongCluster), k_cluster.sum_of_square_error) outputPlot(K1, K2, numpy.array(timeData), numpy.array(silhouette_scoreData), numpy.array(sum_of_square_errorData)) except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] print(exc_type, fname, exc_tb.tb_lineno)
def img_seg(img_a, k_points, max_gen): array = np.reshape(img_a, (img_a.shape[0] * img_a.shape[1], img_a.shape[2])) test = KM.k_means(array, k_points, max_gen) new_array = np.zeros(array.shape).astype(np.int64) for i in range(test[0].shape[0]): new_array += ((np.nan_to_num(test[1][i] / test[1][i])).astype(np.int64) * test[0][i].astype(np.int64)) # (np.tile((test[0][i]).astype(np.int64), test[1][i].shape[0]).reshape(test[1][i].shape)).astype(np.int64)) new_img_a = np.reshape(new_array, img_a.shape).astype(np.uint8) return new_img_a
def compare_algorithms(data, k): sys.stdout.write( f"Compare scikit and own implentation for k={k} on {len(data)} instances... " ) sys.stdout.flush() kmeans = K_means(k, m=2, max_iterations=300, verbose=False) iterations = kmeans.run(data) ndinit = np.array(kmeans.initial_centroids) sk = scikit_KMeans(n_clusters=k, init=ndinit, max_iter=300, tol=0, n_init=1) sk.fit(data) for i in range(0, len(data)): instance = data[i] sk_centroid = sk.cluster_centers_[sk.predict([instance])][0] own_centroid = kmeans.centroids[kmeans.closest_centroid(i)[0]] assert np.allclose( sk_centroid, own_centroid ), f"FAIL, instance {instance} has different centroids! {sk_centroid} (scikit) and {own_centroid} (own algorithm)" print("✔")
if __name__ == "__main__": plt.ion() plt.clf() def plot_k_means(k_means): plt.waitforbuttonpress() #plt.plot(k_means.instance_map) plt.clf() colors = 10 * ["r", "g", "c", "b", "k", "y"] for cluster_i in k_means.instances_by_cluster: color = colors[cluster_i] centroid = k_means.centroids[cluster_i] plt.scatter(centroid[0], centroid[1], marker="X", s=50) for i in k_means.instances_by_cluster[cluster_i]: instance = k_means.instances[i] plt.scatter(instance[0], instance[1], color=color, s=30) plt.draw() # execute k-means clustering k_means = K_means(k=4, m=2, init_strategy=3) def plot(k_means, cycle): plot_k_means(k_means) k_means.run(data, after_centroid_calculation=plot, after_cluster_membership=plot) plot_k_means(k_means) plt.show()
color=get_color(k), marker="o", label=f"source $X_{k}$") elif N == 3: ax.scatter(X[N_ranges[k]:N_ranges[k + 1], 0], X[N_ranges[k]:N_ranges[k + 1], 1], X[N_ranges[k]:N_ranges[k + 1], 2], color=get_color(k), marker="o", label=f"source $X_{k}$") plt.grid(True) ax.legend(loc="best", ncol=1, scatterpoints=1, numpoints=1) ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f')) plt.show() r = K_means(X, K=tK, v=1, title_str="K-means of original data") A = np.empty([M, M]) for i in range(M): for j in range(M): A[i, j] = np.exp(-(((X[i, :] - X[j, :])**2).sum()) / (2.0 * sigma**2)) for i in range(M): A[i, i] = 0 D = np.zeros([M, M]) for i in range(M): D[i, i] = A[i, :].sum() D2 = np.diag(A.dot(np.ones(M))) if not np.allclose(D, D2, eps): print("D - D2 =\n", D - D2)
def run_kmeans(data, k): print("K", k) kmeans = K_means(k, m=2, max_iterations=300, verbose=False) iterations = kmeans.run(data)
from k_means import K_means import numpy as np import matplotlib.pyplot as plt dataset = np.random.rand(100,2).tolist() # not limited to 2D dataset K = 5 k_mean = K_means(dataset,K) n_iter = 100 # max number of iterations i = 0 done = False centroids = k_mean.begin() # initialize centroids clusters = k_mean.find_dist(centroids) # initialize clusters while not done: centroids = k_mean.find_new_centroids(clusters) old_clusters = clusters clusters = k_mean.find_dist(centroids) done = k_mean.cluster_change(old_clusters, clusters) or i>n_iter i += 1 # Assigning colors to each clusters and combining them plot_dataset = np.ones((1,3)) for i in range(K): plot_clusters = np.column_stack((np.array(clusters[i]),i*np.ones((len(clusters[i]),1)))) plot_dataset = np.concatenate((plot_dataset,plot_clusters)) plot_dataset = plot_dataset[1:] plt.scatter(plot_dataset[:,0], plot_dataset[:,1], c=plot_dataset[:,2], s=50, cmap='viridis') plt.scatter(np.array(centroids)[:, 0], np.array(centroids)[:, 1], c='black', s=200, alpha=0.5);
train_data = np.genfromtxt(os.getcwd() + train_file_name, delimiter=',') test_data = np.genfromtxt(os.getcwd() + test_file_name, delimiter=',') p_train_data = train_data[:, :-1] train_label = train_data[:, -1] size_of_input = len(p_train_data[0]) p_test_data = test_data[:, :-1] test_label = test_data[:, -1] for k in k_list: model_table = [] for i in range(repeat): print("Repeat: ", i + 1) model = K_means(k, range_of_input, size_of_input) model.train(p_train_data) model_table.append((model.mse(p_train_data), model)) best_run = 0 for i in range(len(model_table)): if i != 0: if model_table[best_run][0] > model_table[i][0]: best_run = i print("<< k set to: {} >>".format(k)) print("Best run: ", best_run) print("Average mean-square-error: ", model_table[best_run][0]) print("Mean-square-separation: ", model_table[best_run][1].mss()) print("Mean entropy: ", model_table[best_run][1].m_entropy(number_of_classes, train_label))
cv2.createTrackbar('scale', 'image', 1, 100, f) while (True): # Capture frame-by-frame ret, frame = cap.read() scale = cv2.getTrackbarPos('scale', 'image') + 1 img_array = cv2.resize(frame, dsize=(int(frame.shape[1] * (1 / scale)), int(frame.shape[0] * (1 / scale))), interpolation=3) # Our operations on the frame come here k = cv2.getTrackbarPos('k points', 'image') kp = KM.make_k_points(k, 3, 0, 255) gray = IS.img_seg(img_array, kp, cv2.getTrackbarPos('max iteration', 'image')) gray = cv2.resize(gray, dsize=(int(gray.shape[1] * scale), int(gray.shape[0] * scale)), interpolation=3) # Display the resulting frame cv2.imshow('image', gray) if cv2.waitKey(50) & 0xFF == ord('q'): break # When everything done, release the capture cap.release()
import pickle import numpy as np from k_means import K_means from cluster_eval import * from sklearn import datasets from sklearn.cluster import KMeans ##Iris run ### iris = datasets.load_iris() X = iris.data Y = iris.target Y_df = pd.DataFrame(Y) give_external_eval( Y, X, 14, 2.0, print_message="iris_run_m2_k12", iris=True, ) k_means = K_means(k=14, m=2) k_means.run(X) give_tendency_eval(k_means) best_run = table_class_vs_cluster(Y, k_means, iris=True) total_nbr_instances = sum(sum(best_run.iloc[:, :3].values)) total_nbr_instances