def experiments(data, labellist, labelidxs, num_clusters): def find_closest_point(data, Y, labellist, labelidx): err = [] for x in data: r = x.shape[1] err.append(np.sqrt(r - np.trace(Y.T @ x @ x.T @ Y))) idx = np.argmin(err) closest_label = [labellist[ii] for ii in np.where(labelidxs == idx)[0]] return closest_label p_correct = {'Flag Median': 0, 'Sine Median': 0, 'Max Cosine': 0} clusters = np.random.randint(0, num_clusters, len(data)) for ii in range(num_clusters): n_its = 30 idx = np.where(clusters == ii)[0] print(len(idx)) X = [data[i] for i in idx] labels = [labellist[i] for i in idx] labelid = [labelidxs[i] for i in idx] most_common = max(set(labels), key=labels.count) #from stack exchange k = X[0].shape[1] flagmean = ca.flag_mean(X, k, fast=False) print('Flag Mean finished') sin_median = ca.irls_flag(X, k, n_its, 'sine', fast=False)[0] print('Sine Median finished') max_cosine = ca.irls_flag(X, k, n_its, 'cosine', fast=False)[0] print('Max Cos finished') p_correct['Flag Median'] += int(most_common in find_closest_point( data, flagmean, labellist, labelidxs)) p_correct['Sine Median'] += int(most_common in find_closest_point( data, sin_median, labellist, labelidxs)) p_correct['Max Cosine'] += int(most_common in find_closest_point( data, max_cosine, labellist, labelidxs)) print('Iteration ' + str(ii + 1) + ' out of ' + str(num_clusters) + ' finished') print('--------------------------------------------') p_correct['Flag Median'] = p_correct['Flag Median'] / num_clusters p_correct['Sine Median'] = p_correct['Sine Median'] / num_clusters p_correct['Max Cosine'] = p_correct['Max Cosine'] / num_clusters return p_correct
def getCentroids(dataSet, labels, centroids, med): # Each centroid is the geometric mean of the points that # have that centroid's label. Important: If a centroid is empty (no points have # that centroid's label) you should randomly re-initialize it. [n, r] = dataSet[0].shape new_centroids = [] for ii in range(len(centroids)): idx = np.where(np.array(labels) == ii)[0] # if idx.size == 0: # centroids[ii] = getRandomCentorids(1,n,r)[0] if len(idx) != 0: X = [dataSet[i] for i in idx] if med == 'flag': new_centroids.append(ca.flag_mean(X, r, fast=False)) elif med == 'sine': new_centroids.append(ca.irls_flag(X, r, 5, 'sine')[0]) elif med == 'cosine': new_centroids.append(ca.irls_flag(X, r, 5, 'cosine')[0]) return new_centroids
center = np.random.rand(n,k)*10 center_rep = np.linalg.qr(center)[0][:,:k] #generate dataset of points in Gr(k,n) data = [] for i in range(num_points): Y_raw = center_rep + (np.random.rand(n,k)-.5)*.01 Y = np.linalg.qr(Y_raw)[0][:,:k] data.append(Y) np.random.seed(1) Y_init = np.linalg.qr(np.random.rand(n,n))[0][:,:k] start = time.time() errors = ca.irls_flag(data, k, n_its, 'sine', opt_err = 'sine', fast = False, init = Y_init)[1] trial_times.append(time.time()- start) mean_times.append(np.mean(trial_times)) std_times.append(np.std(trial_times)) # iterations.append(len(errors)) print(str(j)+' done.') import pandas trial_stats = pandas.DataFrame(columns = ['n','k','Iterations', 'Time']) trial_stats['k'] = ks trial_stats['Mean'] = mean_times
def lbg_subspace(X, epsilon, n_centers=17, opt_type='sine', n_its=10, seed=1): n_pts = len(X) error = 1 r = 48 distortions = [] #init centers np.random.seed(seed) centers = [] for i in range(n_centers): centers.append(X[np.random.randint(n_pts)]) #calculate distance matrix d_mat = distance_matrix(X, centers, opt_type) #find the closest center for each point index = np.argmin(d_mat, axis=0) #calculate first distortion new_distortion = np.sum(d_mat[index]) distortions.append(new_distortion) errors = [] while error > epsilon: #set new distortion as old one old_distortion = new_distortion m = len(centers) #calculate new centers centers = [] for c in range(m): idx = np.where(index == c)[0] if len(idx) > 0: if opt_type == 'sinesq': centers.append( ca.flag_mean([X[i] for i in idx], r, fast=False)) else: centers.append( ca.irls_flag([X[i] for i in idx], r, n_its, opt_type, opt_type)[0]) # centers.append(np.mean([X[i] for i in idx], axis = 0)) #calculate distance matrix d_mat = distance_matrix(X, centers, opt_type) #find the closest center for each point index = np.argmin(d_mat, axis=0) #new distortion new_distortion = np.sum(d_mat[index]) distortions.append(new_distortion) if new_distortion < 0.00000000001: error = 0 else: error = np.abs(new_distortion - old_distortion) / old_distortion errors.append(error) print(error) return centers, errors, distortions
def visual_2D(num1, num2): k = 1 n_its = 20 Process1 = np.vstack( [np.random.normal(0, .2, num1), np.random.normal(1, .2, num1)]) if num2 != 0: Process2 = np.vstack( [np.random.normal(1, .2, num2), np.random.normal(0, .2, num2)]) data_array = np.hstack([Process1, Process2]) else: data_array = Process1 gr_list = [] for i in range(data_array.shape[1]): point = data_array[:, [i]] gr_list.append(point / np.linalg.norm(point)) plt.plot([-gr_list[i][0, 0], gr_list[i][0, 0]], [-gr_list[i][1, 0], gr_list[i][1, 0]], color='.5', linestyle='dashed') flagmean = ca.flag_mean(gr_list, k, fast=False) print('Flag Mean finished') sin_median = ca.irls_flag(gr_list, k, n_its, 'sine', fast=False)[0] print('Sine Median finished') geodesic_median = ca.irls_flag(gr_list, k, n_its, 'geodesic', fast=False)[0] print('Geodesic finished') max_cosine = ca.irls_flag(gr_list, k, n_its, 'cosine', fast=False)[0] print('Max Cos finished') l0 = plt.plot([-flagmean[0, 0], flagmean[0, 0]], [-flagmean[1, 0], flagmean[1, 0]], label='Flag Mean', color='b') l1 = plt.plot([-sin_median[0, 0], sin_median[0, 0]], [-sin_median[1, 0], sin_median[1, 0]], label='Sine Median', color='g') l2 = plt.plot([-geodesic_median[0, 0], geodesic_median[0, 0]], [-geodesic_median[1, 0], geodesic_median[1, 0]], label='Geodesic Median', color='r') l3 = plt.plot([-max_cosine[0, 0], max_cosine[0, 0]], [-max_cosine[1, 0], max_cosine[1, 0]], label='Maximum Cosine', color='y') plt.xlim(-1, 1) plt.ylim(-1, 1) plt.savefig('./Figures/2example_2D_' + str(num1) + '_' + str(num2) + '.png') plt.close() return l0, l1, l2, l3
def convergence_check(gr_list, n_its): [n, k] = gr_list[0].shape irls_sin_median = [] gd_sin_median = [] # irls_geodesic_median = [] # gd_geodesic_median = [] irls_max_cosine = [] gd_max_cosine = [] Y_raw = np.random.rand(n, k) Y = np.linalg.qr(Y_raw)[0][:, :k] for i in range(10): irls_sin_median.append( ca.irls_flag(gr_list, k, n_its, 'sine', opt_err='sine', fast=False, init=Y)[1]) gd_sin_median.append( ca.gradient_descent(gr_list, k, -.01, n_its, 'sine', init=Y)[1]) print('Sine Median finished') # irls_geodesic_median.append(ca.irls_flag(gr_list, k, n_its, 'geodesic', fast = False, init = Y)[1]) # gd_geodesic_median.append(ca.gradient_descent(gr_list, k, .01, n_its, 'geodesic', init = Y)[1]) # print('Geodesic finished') irls_max_cosine.append( ca.irls_flag(gr_list, k, n_its, 'cosine', opt_err='cosine', fast=False, init=Y)[1]) gd_max_cosine.append( ca.gradient_descent(gr_list, k, -.01, n_its, 'cosine', init=Y)[1]) print('Max Cos finished') irls_sin_median = np.vstack(irls_sin_median) gd_sin_median = np.vstack(gd_sin_median) # irls_geodesic_median = np.vstack(irls_geodesic_median) # gd_geodesic_median = np.vstack(gd_geodesic_median) irls_max_cosine = np.vstack(irls_max_cosine) gd_max_cosine = np.vstack(gd_max_cosine) #make the plots LINESTYLES = ["-", "--", ":", "-."] MARKERS = ['D', 'o', 'X', '*', '<', 'd', 'S', '>', 's', 'v'] COLORS = ['b', 'k', 'c', 'm', 'y'] add_line(irls_sin_median, LINESTYLES[0], None, 'FlagIRLS', 'b') add_line(gd_sin_median, LINESTYLES[2], None, 'Gradient Descent', 'k') plt.xticks([0, 1, 2, 3, 4, 5]) plt.legend() plt.xlabel('Iteration') plt.ylabel('Objective function value') plt.title('Sine Median') plt.savefig('./Figures/sin_median_convergence.png') plt.close() # add_line(irls_geodesic_median, LINESTYLES[0], MARKERS[0], 'IRLS', 'g') # add_line(gd_geodesic_median, LINESTYLES[1], MARKERS[1], 'Gradient Descent', 'b') # plt.legend() # plt.savefig('./Figures/geodesic_median_convergence.png') # plt.close() add_line(irls_max_cosine, LINESTYLES[3], None, 'FlagIRLS', 'g') add_line(gd_max_cosine, LINESTYLES[2], None, 'Gradient Descent', 'k') plt.xticks([0, 1, 2, 3, 4, 5]) plt.legend() plt.xlabel('Iteration') plt.ylabel('Objective function value') plt.title('Maximum Cosine') plt.savefig('./Figures/max_cosine_convergence.png') plt.close()