def mnist_eucl_proc(digits, num_points, num_avg): """Evaluate kmeans accuracy """ eucl_dist = lambda a, b: np.linalg.norm(a - b) proc_dist1 = lambda a, b: procrustes.procrustes(a, b) proc_dist2 = lambda a, b: procrustes.procrustes2(a, b) proc_dist3 = lambda a, b: procrustes.procrustes3(a, b, 50) k = len(digits) a1, a2, a3, a4, a5 = [], [], [], [], [] for i in range(num_avg): originals, shapes, ext_shapes, labels = pick_data([num_points] * k, digits) l1, _, _, _ = kmeans.kmeans_(k, originals, eucl_dist) l2, _, _, _ = kmeans.kmeans_(k, ext_shapes, proc_dist1) l3, _, _, _ = kmeans.kmeans_(k, shapes, proc_dist3) l4, _, _, _ = kmeans.kmeans_(k, shapes, proc_dist1) l5, _, _, _ = kmeans.kmeans_(k, shapes, proc_dist2) a1.append(kmeans.accuracy(labels, l1)) a2.append(kmeans.accuracy(labels, l2)) a3.append(kmeans.accuracy(labels, l3)) a4.append(kmeans.accuracy(labels, l4)) a5.append(kmeans.accuracy(labels, l5)) print "d_E = %f" % np.mean(a1) print "d_{P_0} = %f" % np.mean(a2) print "d_{P_3} = %f" % np.mean(a3) print "d_{P} = %f" % np.mean(a4) print "d_{P_l} = %f" % np.mean(a5)
def mnist_eucl_proc(digits, num_points, num_avg): """Evaluate kmeans accuracy """ eucl_dist = lambda a, b: np.linalg.norm(a-b) proc_dist1 = lambda a, b: procrustes.procrustes(a, b) proc_dist2 = lambda a, b: procrustes.procrustes2(a, b) proc_dist3 = lambda a, b: procrustes.procrustes3(a, b, 50) k = len(digits) a1, a2, a3, a4, a5 = [], [], [], [], [] for i in range(num_avg): originals, shapes, ext_shapes, labels = pick_data([num_points]*k, digits) l1, _, _, _ = kmeans.kmeans_(k, originals, eucl_dist) l2, _, _, _ = kmeans.kmeans_(k, ext_shapes, proc_dist1) l3, _, _, _ = kmeans.kmeans_(k, shapes, proc_dist3) l4, _, _, _ = kmeans.kmeans_(k, shapes, proc_dist1) l5, _, _, _ = kmeans.kmeans_(k, shapes, proc_dist2) a1.append(kmeans.accuracy(labels, l1)) a2.append(kmeans.accuracy(labels, l2)) a3.append(kmeans.accuracy(labels, l3)) a4.append(kmeans.accuracy(labels, l4)) a5.append(kmeans.accuracy(labels, l5)) print "d_E = %f" % np.mean(a1) print "d_{P_0} = %f" % np.mean(a2) print "d_{P_3} = %f" % np.mean(a3) print "d_{P} = %f" % np.mean(a4) print "d_{P_l} = %f" % np.mean(a5)
def clustering_eucl(nrange, digits, num_sample, outfile): """Cluster originals and binaries with K-means/Euclidean.""" eucl_dist = lambda a, b: np.linalg.norm(a-b) k = len(digits) a1, a2 = [], [] for n in nrange: print "Doing %i of %i"%(n, nrange[-1]) ns = [n]*k for m in range(num_sample): originals, shapes, ext_shapes, labels = pick_data(ns, digits) l1, _, _, _ = kmeans.kmeans_(k, originals, eucl_dist) l2, _, _, _ = kmeans.kmeans_(k, shapes, eucl_dist) ac1 = kmeans.accuracy(labels, l1) ac2 = kmeans.accuracy(labels, l2) a1.append([n, ac1]) a2.append([n, ac2]) print ' ', ac1, ac2 a1 = np.array(a1) a2 = np.array(a2) # plotting results fig = plt.figure() ax = fig.add_subplot(111) ax.plot(a1[:,0], a1[:,1], 'o', color='b', alpha=.5, label=r'$d_E$') ax.plot(a2[:,0], a2[:,1], 'o', color='r', alpha=.5, label=r'$d_{E_b}$') a1_avg, a2_avg = [], [] for n in nrange: mu1 = a1[np.where(a1[:,0]==n)][:,1].mean() mu2 = a2[np.where(a2[:,0]==n)][:,1].mean() a1_avg.append([n, mu1]) a2_avg.append([n, mu2]) a1_avg = np.array(a1_avg) a2_avg = np.array(a2_avg) ax.plot(a1_avg[:,0], a1_avg[:,1], '-', color='b') ax.plot(a2_avg[:,0], a2_avg[:,1], '-', color='r') ax.set_xlabel(r'$N_i$') ax.set_ylabel(r'$A$') leg = ax.legend(loc=0) leg.get_frame().set_alpha(0.6) ax.set_title(r'$\{%s\}$'%(','.join([str(d) for d in digits]))) fig.savefig(outfile)
def clustering_eucl(nrange, digits, num_sample, outfile): """Cluster originals and binaries with K-means/Euclidean.""" eucl_dist = lambda a, b: np.linalg.norm(a - b) k = len(digits) a1, a2 = [], [] for n in nrange: print "Doing %i of %i" % (n, nrange[-1]) ns = [n] * k for m in range(num_sample): originals, shapes, ext_shapes, labels = pick_data(ns, digits) l1, _, _, _ = kmeans.kmeans_(k, originals, eucl_dist) l2, _, _, _ = kmeans.kmeans_(k, shapes, eucl_dist) ac1 = kmeans.accuracy(labels, l1) ac2 = kmeans.accuracy(labels, l2) a1.append([n, ac1]) a2.append([n, ac2]) print ' ', ac1, ac2 a1 = np.array(a1) a2 = np.array(a2) # plotting results fig = plt.figure() ax = fig.add_subplot(111) ax.plot(a1[:, 0], a1[:, 1], 'o', color='b', alpha=.5, label=r'$d_E$') ax.plot(a2[:, 0], a2[:, 1], 'o', color='r', alpha=.5, label=r'$d_{E_b}$') a1_avg, a2_avg = [], [] for n in nrange: mu1 = a1[np.where(a1[:, 0] == n)][:, 1].mean() mu2 = a2[np.where(a2[:, 0] == n)][:, 1].mean() a1_avg.append([n, mu1]) a2_avg.append([n, mu2]) a1_avg = np.array(a1_avg) a2_avg = np.array(a2_avg) ax.plot(a1_avg[:, 0], a1_avg[:, 1], '-', color='b') ax.plot(a2_avg[:, 0], a2_avg[:, 1], '-', color='r') ax.set_xlabel(r'$N_i$') ax.set_ylabel(r'$A$') leg = ax.legend(loc=0) leg.get_frame().set_alpha(0.6) ax.set_title(r'$\{%s\}$' % (','.join([str(d) for d in digits]))) fig.savefig(outfile)
def kmeans_procrustes(k, data, true_labels): def dist_func(X, Y): d = distance.procrustes(X, Y) print d return d labels, mus, obj, count = kmeans.kmeans_(k, data, dist_func, 30) error = clusval.class_error(true_labels, labels) return error
def mnist_procrustes(digits, num_points, num_avg): proc_dist = lambda a, b: procrustes.procrustes(a, b) k = len(digits) a = [] for i in range(num_avg): originals, shapes, ext_shapes, labels = pick_data([num_points]*k, digits) l, _, _, _ = kmeans.kmeans_(k, shapes, proc_dist) accu = kmeans.accuracy(labels, l) a.append(accu) print accu print print "d_{P} = %f" % np.mean(a)
def mnist_euclidean(digits, num_points, num_avg): eucl_dist = lambda a, b: np.linalg.norm(a-b) k = len(digits) a = [] for i in range(num_avg): originals, shapes, ext_shapes, labels = pick_data([num_points]*k, digits) l, _, _, _ = kmeans.kmeans_(k, originals, eucl_dist) accu = kmeans.accuracy(labels, l) a.append(accu) print accu print print "d_E = %f" % np.mean(a)
def mnist_procrustes(digits, num_points, num_avg): proc_dist = lambda a, b: procrustes.procrustes(a, b) k = len(digits) a = [] for i in range(num_avg): originals, shapes, ext_shapes, labels = pick_data([num_points] * k, digits) l, _, _, _ = kmeans.kmeans_(k, shapes, proc_dist) accu = kmeans.accuracy(labels, l) a.append(accu) print accu print print "d_{P} = %f" % np.mean(a)
def mnist_euclidean(digits, num_points, num_avg): eucl_dist = lambda a, b: np.linalg.norm(a - b) k = len(digits) a = [] for i in range(num_avg): originals, shapes, ext_shapes, labels = pick_data([num_points] * k, digits) l, _, _, _ = kmeans.kmeans_(k, originals, eucl_dist) accu = kmeans.accuracy(labels, l) a.append(accu) print accu print print "d_E = %f" % np.mean(a)
def mnist_procrustes_filling(digits, num_points, num_avg): eucl_dist = lambda a, b: np.linalg.norm(a-b) proc_dist = lambda a, b: procrustes.procrustes(a, b) proc_dist_filling = lambda a, b: fill.procrustes_filling(a, b, N=40, scale=200) k = len(digits) aa1 = [] aa2 = [] aa3 = [] for i in range(num_avg): originals, shapes, ext_shapes, labels = pick_data([num_points]*k, digits) l1, _, _, _ = kmeans.kmeans_(k, originals, eucl_dist) l2, _, _, _ = kmeans.kmeans_(k, ext_shapes, proc_dist) l3, _, _, _ = kmeans.kmeans_(k, ext_shapes, proc_dist_filling) a1 = kmeans.accuracy(labels, l1) a2 = kmeans.accuracy(labels, l2) a3 = kmeans.accuracy(labels, l3) aa1.append(a1) aa2.append(a2) aa3.append(a3) print "d_{E} = %f" % np.mean(aa1) print "d_{P} = %f" % np.mean(aa2) print "d_{F} = %f" % np.mean(aa3)
def mnist_procrustes_filling(digits, num_points, num_avg): eucl_dist = lambda a, b: np.linalg.norm(a - b) proc_dist = lambda a, b: procrustes.procrustes(a, b) proc_dist_filling = lambda a, b: fill.procrustes_filling( a, b, N=40, scale=200) k = len(digits) aa1 = [] aa2 = [] aa3 = [] for i in range(num_avg): originals, shapes, ext_shapes, labels = pick_data([num_points] * k, digits) l1, _, _, _ = kmeans.kmeans_(k, originals, eucl_dist) l2, _, _, _ = kmeans.kmeans_(k, ext_shapes, proc_dist) l3, _, _, _ = kmeans.kmeans_(k, ext_shapes, proc_dist_filling) a1 = kmeans.accuracy(labels, l1) a2 = kmeans.accuracy(labels, l2) a3 = kmeans.accuracy(labels, l3) aa1.append(a1) aa2.append(a2) aa3.append(a3) print "d_{E} = %f" % np.mean(aa1) print "d_{P} = %f" % np.mean(aa2) print "d_{F} = %f" % np.mean(aa3)
def mnist_alignment(digits, num_points, num_avg): def dist_func(im1, im2): d = fill.euclidean_alignment(im1, im2) return d k = len(digits) a = [] for i in range(num_avg): originals, shapes, ext_shapes, labels = pick_data([num_points] * k, digits) l, _, _, _ = kmeans.kmeans_(k, originals, dist_func) accu = kmeans.accuracy(labels, l) a.append(accu) print accu print print "d_A = %f" % np.mean(a)
def mnist_alignment(digits, num_points, num_avg): def dist_func(im1, im2): d = fill.euclidean_alignment(im1, im2) return d k = len(digits) a = [] for i in range(num_avg): originals, shapes, ext_shapes, labels = pick_data([num_points]*k, digits) l, _, _, _ = kmeans.kmeans_(k, originals, dist_func) accu = kmeans.accuracy(labels, l) a.append(accu) print accu print print "d_A = %f" % np.mean(a)
def kmeans_euclidean(k, data, true_labels): dist_func = distance.euclidean labels, mus, obj, count = kmeans.kmeans_(k, data, dist_func, 30) error = clusval.class_error(true_labels, labels) return error
def mnist_standard_vs_procrustes(nrange, digits, num_sample, outfile): """Plot accuracy when clustering MNIST digits, using procrustes and Euclidean distance. """ eucl_dist = lambda a, b: np.linalg.norm(a-b) proc_dist1 = lambda a, b: procrustes.procrustes(a, b) proc_dist2 = lambda a, b: procrustes.procrustes2(a, b) proc_dist3 = lambda a, b: procrustes.procrustes3(a, b, 50) k = len(digits) a1, a2, a3, a4, a5 = [], [], [], [], [] for n in nrange: print "Doing %i of %i"%(n, nrange[-1]) ns = [n]*k for m in range(num_sample): originals, shapes, ext_shapes, labels = pick_data(ns, digits) l1, _, _, _ = kmeans.kmeans_(k, originals, eucl_dist) l2, _, _, _ = kmeans.kmeans_(k, ext_shapes, proc_dist1) l3, _, _, _ = kmeans.kmeans_(k, shapes, proc_dist3) l4, _, _, _ = kmeans.kmeans_(k, shapes, proc_dist1) l5, _, _, _ = kmeans.kmeans_(k, shapes, proc_dist2) ac1 = kmeans.accuracy(labels, l1) ac2 = kmeans.accuracy(labels, l2) ac3 = kmeans.accuracy(labels, l3) ac4 = kmeans.accuracy(labels, l4) ac5 = kmeans.accuracy(labels, l5) a1.append([n, ac1]) a2.append([n, ac2]) a3.append([n, ac3]) a4.append([n, ac4]) a5.append([n, ac5]) print ' ', ac1, ac2, ac3, ac4, ac5 a1 = np.array(a1) a2 = np.array(a2) a3 = np.array(a3) a4 = np.array(a4) a5 = np.array(a5) # plotting results fig = plt.figure() ax = fig.add_subplot(111) ax.plot(a1[:,0], a1[:,1], 'o', color='b', alpha=.5, label=r'$d_E$') ax.plot(a2[:,0], a2[:,1], 'o', color='r', alpha=.5, label=r'$d_{P_0}$') ax.plot(a3[:,0], a3[:,1], 'o', color='g', alpha=.5, label=r'$d_{P_3}$') ax.plot(a4[:,0], a4[:,1], 'o', color='c', alpha=.5, label=r'$d_{P}$') ax.plot(a5[:,0], a5[:,1], 'o', color='m', alpha=.5, label=r'$d_{P_l}$') a1_avg, a2_avg, a3_avg, a4_avg, a5_avg = [], [], [], [], [] for n in nrange: mu1 = a1[np.where(a1[:,0]==n)][:,1].mean() mu2 = a2[np.where(a2[:,0]==n)][:,1].mean() mu3 = a3[np.where(a3[:,0]==n)][:,1].mean() mu4 = a4[np.where(a4[:,0]==n)][:,1].mean() mu5 = a5[np.where(a5[:,0]==n)][:,1].mean() a1_avg.append([n, mu1]) a2_avg.append([n, mu2]) a3_avg.append([n, mu3]) a4_avg.append([n, mu4]) a5_avg.append([n, mu5]) a1_avg = np.array(a1_avg) a2_avg = np.array(a2_avg) a3_avg = np.array(a3_avg) a4_avg = np.array(a4_avg) a5_avg = np.array(a5_avg) ax.plot(a1_avg[:,0], a1_avg[:,1], '-', color='b') ax.plot(a2_avg[:,0], a2_avg[:,1], '-', color='r') ax.plot(a3_avg[:,0], a3_avg[:,1], '-', color='g') ax.plot(a4_avg[:,0], a4_avg[:,1], '-', color='c') ax.plot(a5_avg[:,0], a5_avg[:,1], '-', color='m') ax.set_xlabel(r'$N_i$') ax.set_ylabel(r'$A$') leg = ax.legend(loc=0) leg.get_frame().set_alpha(0.6) ax.set_title(r'$\{%s\}$'%(','.join([str(d) for d in digits]))) fig.savefig(outfile)
def mnist_standard_vs_procrustes(nrange, digits, num_sample, outfile): """Plot accuracy when clustering MNIST digits, using procrustes and Euclidean distance. """ eucl_dist = lambda a, b: np.linalg.norm(a - b) proc_dist1 = lambda a, b: procrustes.procrustes(a, b) proc_dist2 = lambda a, b: procrustes.procrustes2(a, b) proc_dist3 = lambda a, b: procrustes.procrustes3(a, b, 50) k = len(digits) a1, a2, a3, a4, a5 = [], [], [], [], [] for n in nrange: print "Doing %i of %i" % (n, nrange[-1]) ns = [n] * k for m in range(num_sample): originals, shapes, ext_shapes, labels = pick_data(ns, digits) l1, _, _, _ = kmeans.kmeans_(k, originals, eucl_dist) l2, _, _, _ = kmeans.kmeans_(k, ext_shapes, proc_dist1) l3, _, _, _ = kmeans.kmeans_(k, shapes, proc_dist3) l4, _, _, _ = kmeans.kmeans_(k, shapes, proc_dist1) l5, _, _, _ = kmeans.kmeans_(k, shapes, proc_dist2) ac1 = kmeans.accuracy(labels, l1) ac2 = kmeans.accuracy(labels, l2) ac3 = kmeans.accuracy(labels, l3) ac4 = kmeans.accuracy(labels, l4) ac5 = kmeans.accuracy(labels, l5) a1.append([n, ac1]) a2.append([n, ac2]) a3.append([n, ac3]) a4.append([n, ac4]) a5.append([n, ac5]) print ' ', ac1, ac2, ac3, ac4, ac5 a1 = np.array(a1) a2 = np.array(a2) a3 = np.array(a3) a4 = np.array(a4) a5 = np.array(a5) # plotting results fig = plt.figure() ax = fig.add_subplot(111) ax.plot(a1[:, 0], a1[:, 1], 'o', color='b', alpha=.5, label=r'$d_E$') ax.plot(a2[:, 0], a2[:, 1], 'o', color='r', alpha=.5, label=r'$d_{P_0}$') ax.plot(a3[:, 0], a3[:, 1], 'o', color='g', alpha=.5, label=r'$d_{P_3}$') ax.plot(a4[:, 0], a4[:, 1], 'o', color='c', alpha=.5, label=r'$d_{P}$') ax.plot(a5[:, 0], a5[:, 1], 'o', color='m', alpha=.5, label=r'$d_{P_l}$') a1_avg, a2_avg, a3_avg, a4_avg, a5_avg = [], [], [], [], [] for n in nrange: mu1 = a1[np.where(a1[:, 0] == n)][:, 1].mean() mu2 = a2[np.where(a2[:, 0] == n)][:, 1].mean() mu3 = a3[np.where(a3[:, 0] == n)][:, 1].mean() mu4 = a4[np.where(a4[:, 0] == n)][:, 1].mean() mu5 = a5[np.where(a5[:, 0] == n)][:, 1].mean() a1_avg.append([n, mu1]) a2_avg.append([n, mu2]) a3_avg.append([n, mu3]) a4_avg.append([n, mu4]) a5_avg.append([n, mu5]) a1_avg = np.array(a1_avg) a2_avg = np.array(a2_avg) a3_avg = np.array(a3_avg) a4_avg = np.array(a4_avg) a5_avg = np.array(a5_avg) ax.plot(a1_avg[:, 0], a1_avg[:, 1], '-', color='b') ax.plot(a2_avg[:, 0], a2_avg[:, 1], '-', color='r') ax.plot(a3_avg[:, 0], a3_avg[:, 1], '-', color='g') ax.plot(a4_avg[:, 0], a4_avg[:, 1], '-', color='c') ax.plot(a5_avg[:, 0], a5_avg[:, 1], '-', color='m') ax.set_xlabel(r'$N_i$') ax.set_ylabel(r'$A$') leg = ax.legend(loc=0) leg.get_frame().set_alpha(0.6) ax.set_title(r'$\{%s\}$' % (','.join([str(d) for d in digits]))) fig.savefig(outfile)