def apply_kmeans(do_pca, x_train, y_train, x_test, y_test, kmeans_max_iter, kmeans_max_k): print('kmeans\n') train_sses_vs_iter = [] train_sses_vs_k = [] train_purities_vs_k = [] ################################## # YOUR CODE GOES HERE # ################################## max_repeat = 7 for repeat in range(1, max_repeat): if repeat == 1: for k in range(1, kmeans_max_k): kmeans = KMeans(k, kmeans_max_iter) sse_vs_iter = kmeans.fit(x_train) train_sses_vs_iter.append(sse_vs_iter) train_purities_vs_k.append(kmeans.get_purity(x_train, y_train)) train_sses_vs_k.append(min(sse_vs_iter)) elif repeat == max_repeat-1: for k in range(1, kmeans_max_k): kmeans = KMeans(k, kmeans_max_iter) sse_vs_iter = kmeans.fit(x_train) train_sses_vs_iter[k-1] += sse_vs_iter[k-1] train_sses_vs_iter[k-1] = train_sses_vs_iter[k-1]/repeat train_purities_vs_k[k-1] += kmeans.get_purity(x_train, y_train) train_purities_vs_k[k-1] = train_purities_vs_k[k-1] / repeat print("Purity: ", train_purities_vs_k[k-1]) train_sses_vs_k[k-1] += min(sse_vs_iter) train_sses_vs_k[k-1] = train_sses_vs_k[k-1]/repeat else: for k in range(1, kmeans_max_k): kmeans = KMeans(k, kmeans_max_iter) sse_vs_iter = kmeans.fit(x_train) train_sses_vs_iter[k-1] += sse_vs_iter[k-1] train_purities_vs_k[k-1] += kmeans.get_purity(x_train, y_train) train_sses_vs_k[k-1] += min(sse_vs_iter) plot_y_vs_x_list(train_sses_vs_iter, x_label='iter', y_label='sse', save_path='plot_sse_vs_k_subplots_%d'%do_pca) plot_y_vs_x(train_sses_vs_k, x_label='k', y_label='sse', save_path='plot_sse_vs_k_%d'%do_pca) plot_y_vs_x(train_purities_vs_k, x_label='k', y_label='purities', save_path='plot_purity_vs_k_%d'%do_pca)
def apply_kmeans(do_pca, x_train, y_train, kmeans_max_iter, kmeans_max_k): print('kmeans\n') train_sses_vs_iter = [] train_sses_vs_k = [] train_purities_vs_k = [] ################################## # YOUR CODE GOES HERE # ################################## # iterations for 5 different runs of k-means. for k in range(0, 5): kmeans = KMeans(6, kmeans_max_iter) sse_vs_iter = kmeans.fit(x_train) train_sses_vs_iter.append(sse_vs_iter) train_purities_vs_k.append(kmeans.get_purity(x_train, y_train)) train_sses_vs_k.append(min(sse_vs_iter)) if k == 0: avg_list = [0] * len(sse_vs_iter) avg_list = [ avg_list[i] + sse_vs_iter[i] for i in range(len(sse_vs_iter)) ] plot_y_vs_x_list(train_sses_vs_iter, x_label='iter', y_label='sse', save_path='plot_sse_vs_k_subplots_%d' % do_pca) plot_y_vs_x(avg_list, x_label='iterations', y_label='sse', save_path='plot_sse_vs_iter_%d' % do_pca)
def apply_kmeans(do_pca, x_train, y_train, kmeans_max_iter, kmeans_max_k): print('kmeans\n') train_sses_vs_iter = [] train_sses_vs_k = [] train_purities_vs_k = [] ################################## # YOUR CODE GOES HERE # ################################## start = time.time() for k in range(1, kmeans_max_k): print("On step k =", k, "of", kmeans_max_k, "\telapsed time: %.2f" % (time.time() - start), "s") kmeans = KMeans(k, kmeans_max_iter) sse_vs_iter = kmeans.fit(x_train) train_sses_vs_iter.append(sse_vs_iter) train_purities_vs_k.append(kmeans.get_purity(x_train, y_train)) train_sses_vs_k.append(min(sse_vs_iter)) plot_y_vs_x_list(train_sses_vs_iter, x_label='iter', y_label='sse', save_path='plot_sse_vs_k_subplots_%d' % do_pca) plot_y_vs_x(train_sses_vs_k, x_label='k', y_label='sse', save_path='plot_sse_vs_k_%d' % do_pca) plot_y_vs_x(train_purities_vs_k, x_label='k', y_label='purities', save_path='plot_purity_vs_k_%d' % do_pca)
def apply_kmeans(do_pca, x_train, y_train, x_test, y_test, kmeans_max_iter, kmeans_max_k): print('kmeans\n') train_sses_vs_iter = [] train_sses_vs_k = [] train_purities_vs_k = [] ################################## # YOUR CODE GOES HERE # ################################## for k in range(1, kmeans_max_k): kmeans = KMeans(k, kmeans_max_iter) sse_vs_iter = kmeans.fit(x_train) train_sses_vs_iter.append(sse_vs_iter) train_purities_vs_k.append(kmeans.get_purity(x_train, y_train)) train_sses_vs_k.append(min(sse_vs_iter)) plot_y_vs_x_list(train_sses_vs_iter, x_label='iter', y_label='sse', save_path='plot_sse_vs_k_subplots_%d' % do_pca) plot_y_vs_x(train_sses_vs_k, x_label='k', y_label='sse', save_path='plot_sse_vs_k_%d' % do_pca) plot_y_vs_x(train_purities_vs_k, x_label='k', y_label='purities', save_path='plot_purity_vs_k_%d' % do_pca)
def apply_kmeans3(do_pca, x_train, y_train, kmeans_max_iter, kmeans_max_k): print('kmeans\n') train_sses_vs_iter = [] train_sses_vs_k = [] train_purities_vs_k = [] ################################## # YOUR CODE GOES HERE # ################################## result = [] for k in range(1, 11): print('k:', k) for times in range(0, 5): kmeans = KMeans(k, kmeans_max_iter) sse_vs_iter = kmeans.fit(x_train) train_sses_vs_iter.append(sse_vs_iter) train_purities_vs_k.append(kmeans.get_purity(x_train, y_train)) train_sses_vs_k.append(min(sse_vs_iter)) print(train_purities_vs_k) avg = sum(train_purities_vs_k) / len(train_purities_vs_k) result.append(avg) train_purities_vs_k = [] print(result) print('max purity', max(result)) plot_y_vs_x(result, x_label='k', y_label='purities', save_path='plot_purity_vs_k_%d' % do_pca)
def apply_kmeans1(do_pca, x_train, y_train, kmeans_max_iter, kmeans_max_k): print('kmeans\n') train_sses_vs_iter = [] train_sses_vs_k = [] train_purities_vs_k = [] ################################## # YOUR CODE GOES HERE # ################################## for run in range(0, 5): kmeans = KMeans(6, kmeans_max_iter) sse_vs_iter = kmeans.fit(x_train) train_sses_vs_iter.append(sse_vs_iter) train_purities_vs_k.append(kmeans.get_purity(x_train, y_train)) train_sses_vs_k.append(min(sse_vs_iter)) result = [] for col in range(len(train_sses_vs_iter[0])): sum = 0 for row in range(0, 5): sum += train_sses_vs_iter[row][col] sum = sum / 5 result.append(sum) result = [result] print(result) plot_y_vs_x_list(result, x_label='iter', y_label='sse', save_path='plot_sse_vs_k_subplots_%d' % do_pca)
def apply_kmeans(do_pca, x_train, y_train, kmeans_max_iter, kmeans_max_k): print('kmeans\n') train_sses_vs_iter = [] train_sses_vs_k = [] train_purities_vs_k = [] ################################## # YOUR CODE GOES HERE # ################################## for k in range(1, kmeans_max_k): sses = None avg_purity = 0. # do five tests to reduce effect of random start for i in range(5): kmeans = KMeans(k, kmeans_max_iter) sse = kmeans.fit(x_train) if (sses == None): sses = sse else: for j in range(len(sse)): sses[j] = (sses[j] + sse[j]) avg_purity += kmeans.get_purity(x_train, y_train) avg_purity = avg_purity / 5. for j in range(len(sses)): sses[j] = sses[j] / 5.0 # avg_sses = np.sum(np.array(sses), 0) / 5 train_sses_vs_iter.append(sses) train_purities_vs_k.append(avg_purity) train_sses_vs_k.append(min(sses)) plot_y_vs_x_list(train_sses_vs_iter, x_label='iter', y_label='sse', save_path='plot_sse_vs_k_subplots_%d' % do_pca) plot_y_vs_x(train_sses_vs_k, x_label='k', y_label='sse', save_path='plot_sse_vs_k_%d' % do_pca) plot_y_vs_x(train_purities_vs_k, x_label='k', y_label='purities', save_path='plot_purity_vs_k_%d' % do_pca)
def apply_kmeans_3(do_pca, x_train, y_train, kmeans_max_iter, kmeans_max_k): train_sses_vs_iter = [] train_sses_vs_k = [] train_purities_vs_k = [] averg_list = [] for k in range(1, 11): for it in range(0, 5): kmeans = KMeans(k, kmeans_max_iter) sse_vs_iter = kmeans.fit(x_train) train_sses_vs_iter.append(sse_vs_iter) train_purities_vs_k.append(kmeans.get_purity(x_train, y_train)) train_sses_vs_k.append(min(sse_vs_iter)) averg_list.append( (sum(train_purities_vs_k) / len(train_purities_vs_k))) #plot the average purity plot_y_vs_x(averg_list, x_label='k', y_label='purities', save_path='plot_purity_vs_k_%d' % do_pca)
def apply_kmeans_2(do_pca, x_train, y_train, kmeans_max_iter, kmeans_max_k): print('kmeans\n') train_sses_vs_iter = [] train_sses_vs_k = [] train_purities_vs_k = [] avg_me = [] for k in range(1, 11): for it in range(0, 5): kmeans = KMeans(k, kmeans_max_iter) sse_vs_iter = kmeans.fit(x_train) train_sses_vs_iter.append(sse_vs_iter) train_purities_vs_k.append(kmeans.get_purity(x_train, y_train)) train_sses_vs_k.append(min(sse_vs_iter)) avg_me.append((sum(train_sses_vs_k) / len(train_sses_vs_k))) plot_y_vs_x(avg_me, x_label='k', y_label='sse', save_path='plot_sse_vs_k_%d' % do_pca)
def apply_kmeans_avg(x_train, y_train, kmeans_max_iter, k, iterations=5): train_sses_vs_iter = None sse = 0 purity = 0 print("") for step in range(iterations): print("On step ", step + 1, "of", iterations, "for k =", k) kmeans = KMeans(k, kmeans_max_iter) sse_vs_iter_loop = np.array(kmeans.fit(x_train)) # initialize the train sse array if train_sses_vs_iter is None: train_sses_vs_iter = np.zeros(len(sse_vs_iter_loop)) train_sses_vs_iter += sse_vs_iter_loop purity += kmeans.get_purity(x_train, y_train) sse += sse_vs_iter_loop.min() return (train_sses_vs_iter / iterations).tolist(), sse / iterations, purity / iterations
def apply_kmeans(do_pca, x_train, y_train, kmeans_max_iter, kmeans_max_k): train_sses_vs_iter = [] train_sses_vs_k = [] train_purities_vs_k = [] ################################## # YOUR CODE GOES HERE # ################################## sses_sum = 0 purities_sum = 0 for k in range(1, kmeans_max_k): # for k in range(1, 6): for i in range(5): kmeans = KMeans(k, kmeans_max_iter) sse_vs_iter = kmeans.fit(x_train) sses_sum += min(sse_vs_iter) purities_sum += kmeans.get_purity(x_train, y_train) print(k) sses_sum /= 5 purities_sum /= 5 train_sses_vs_k.append(sses_sum) train_purities_vs_k.append(purities_sum) print(train_sses_vs_k) print(train_purities_vs_k) plot_y_vs_x_list(train_sses_vs_iter, x_label='iter', y_label='sse', save_path='plot_sse_vs_k_subplots_%d' % do_pca) plot_y_vs_x(train_sses_vs_k, x_label='k', y_label='sse', save_path='plot_sse_vs_k_%d' % do_pca) plot_y_vs_x(train_purities_vs_k, x_label='k', y_label='purities', save_path='plot_purity_vs_k_%d' % do_pca)