def showExpo(z, v, aux=1, T=4): ''' Displays elements on the exponential curve exp_z(t*v) for time t in [0,1]''' Time = np.arange(T + 1) / (aux * T) path = np.zeros((T, N), dtype=complex) for i in range(T): t = Time[i] path[i] = expo(z, t * v) drawMany(path)
def multi_tangLog(m, dataset): '''Same as log() but for several shapes.''' K = len(dataset) v_log = np.zeros((K, N), dtype=complex) for k in range(K): z = dataset[k] v_log[k] = log(m, z) print('\n m + v_k on the tangent space are') drawMany(m + v_log) return v_log
def showGeo(z, w): # preshapes '''Shows the geodesic curve returned by geo(z,w). The results are similar to showExpo(z,log(z,w)).''' path = geo(z, w) T = len(path) - 1 # same as in geo(z,w) for i in range(T + 1): draw(path[i], i=i) drawMany(path)
def multi_tangProj(m, dataset): '''Same as tangProj() but for several shapes.''' K = len(dataset) v_proj = np.zeros((K, N), dtype=complex) for k in range(K): z = dataset[k] v_proj[k] = tangProj(m, z) global X X = v_proj.T print('\n m + v_k on the tangent space are') drawMany(m + X.T) return v_proj
def alignfirst_dico(dataset, N0, J, init=None, save=False, directory=None, verbose=False): '''Performs (real) dictionary learning on the dataset, after it is optimally rotated along its mean. Relies on the SPAMS toolbox of Mairal et al. ''' K1 = len(dataset) dataset = align_rot(dataset) dataset_r = multi_complex2real(dataset) X = sqrtPsi @ dataset_r.T X = np.asfortranarray(X) # necessary for using spams toolbox D = spams.trainDL(X, K=J, D=init, mode=3, modeD=0, lambda1=N0, verbose=verbose) A = spams.omp(X, D=D, L=N0) Ad = np.array(A.todense()).reshape(J, K) D_c = multi_real2complex((sqrtPsi_inv @ D).T).T drawMany(D_c.T, show=False) plt.title('Align-first dictionary N0 = {} J = {}'.format(N0, J)) if save: plt.savefig(directory + '/dico_alignfirst.png', dpi=200) plt.show() if verbose: DA = D_c @ A for k in test_k_set: display_res(dataset, DA, k, save=save, directory=directory) diffs = dataset.T - D_c @ Ad if K1 < 10000: E = np.diag(diffs.T.conj() @ Phi @ diffs).sum().real else: E = 0 for k in range(K): E += (diffs[:, k].conj() @ Phi @ diffs[:, k]).real print('final loss : ', E) print('RMSE :', np.sqrt(E / K)) if save: text_file = open(directory + '/readme_alignfirst.txt', 'a') text_file.write('Final loss: {}\n'.format(E)) text_file.write('Final RMSE: {}\n'.format(np.sqrt(E / K))) text_file.close() return D_c, Ad, E
def complex_PCA(dataset, J): ''' Computes the first J modes of complex PCA and the RMSE rate of the N0-term reconstruction, for 1 <= N0 <= J. ''' K = len(dataset) average = np.mean(dataset, axis=0) dataset_a = dataset - average sum_norms = np.diag(dataset_a.conj() @ Phi @ dataset_a.T).sum().real Z = dataset_a.T # dataset in columns Y = Z.T.conj( ) @ sqrtPhi # deforming through sqrtPhi to recast to a standard form Lambdas, Vmodes = np.linalg.eig(Y.T.conj() @ Y) # complex values! Lambdas = Lambdas.real sorting = np.argsort(Lambdas) sorting = sorting[::-1] # eigenvalues ordered by decreasing absolute value Lambdas = Lambdas[sorting[:J]] Vmodes = Vmodes[:, sorting[:J]] Atoms = np.linalg.inv(sqrtPhi) @ Vmodes # recasting to our form cumul = np.cumsum(Lambdas) E_rate = sum_norms - cumul RMSE_rate = np.sqrt(E_rate / K) plt.plot(np.arange(J), RMSE_rate) drawMany(Atoms.T, force=True, show=False) plt.title('PCA modes J = {}'.format(J)) plt.show() Recs = Atoms @ (Atoms.T.conj() @ Phi @ Z) Recs = (Recs.T + average).T for k in test_k_set: display_res(dataset, Recs, k) return RMSE_rate
ax.plot(xaxis, PCA_RMSE, marker='o') ax.plot(xaxis, AF_RMSE, marker='^') ax.plot(xaxis, KSD_RMSE, marker='*') ax.set_yscale('log') ax.set_xlabel('N0') ax.set_ylabel('RMSE') #ax.set_ylim(top=None,bottom=None) ax.legend(['PCA', 'A-F', 'KSD']) plt.savefig(filename, dpi=200) plt.show() if __name__ == '__main__': aligned = align_rot(shapes) print('Aligning the shapes...') drawMany(aligned) learn_dico = True # if True then launches dictionary learning N0, J = 5, 10 # N0: nb of atoms to be picked to reconstruct (l0 penalty) # J: number of atoms to learn SAVE = True # do we save the results into the directory? if learn_dico: if SAVE: directory = 'RESULTS/' + database[choice] + '/N0_' + str( N0) + '_J_' + str(J) if not os.path.exists(directory): print('CREATING THE FOLDER')
def KSD_optimal_directions_multiproc_ORMP(dataset, N0, J, init=None, Ntimes=100, batch_size=1024, verbose=False, save=False, directory=None): '''See KSD_optimal_directions(). THIS FUNCTION IS INTERESTING ONLY FOR LARGE DATASETS (K > 4000 for instance). In this function, the ORMP sparse coding step in computed in parallel and independently on the different z_k, by randomly chosen batches of size given in batch_size, thanks to the multiprocessing library run with the function OMRP_multiproc_helper(). ''' K = len(dataset) if type(init) == np.ndarray: D_c = init else: D_c = initializeD_c(J, dataset) if verbose: print('Initializing the dictionary.') drawMany(D_c.T, force=True) lossCurve = np.array([]) A_c = np.zeros((J, K), dtype=complex) print("Let's wait for {} iterations...".format(Ntimes)) start = time.time() for t in range(Ntimes): if t % 5 == 0: print('t =', t) indices = np.arange(K) random.shuffle(indices) indices = indices[:batch_size] 'parallel ORMP used, avoids distorted atoms but increases run-time' G = D_c.T.conj() @ Phi @ D_c pool = mp.Pool(mp.cpu_count()) results = pool.starmap( OMRP_multiproc_helper, zip(indices, repeat(D_c), repeat(dataset), repeat(G), repeat(N0))) pool.close() A_c[:, indices] = np.array(results).T if verbose: diffs = dataset.T - D_c @ A_c if K < 10000: E = np.diag(diffs.T.conj() @ Phi @ diffs).sum().real else: E = 0 for k in range(K): E += (diffs[:, k].conj() @ Phi @ diffs[:, k]).real lossCurve = np.append(lossCurve, E) try: Mat = np.linalg.inv(A_c @ A_c.T.conj()) except np.linalg.LinAlgError: global A_error A_error = A_c print('A @ A^H not invertible, using SVD') U, sigmas, VH = np.linalg.svd(A_c) sigmas_rec = reciprocal(sigmas) Sigma_rec = fill_diagonal(sigmas_rec, J, K) D_c = dataset.T @ VH.T.conj() @ Sigma_rec @ U.T.conj() else: D_c = dataset.T @ A_c.T.conj() @ Mat D_c = normalize(D_c) # the new atoms are preshaped purge_j = np.where((np.abs(A_c) > 1e-3).sum(axis=1) / K < N0 / (5 * J))[0] purged_list = [] for j in range(J): if norm(D_c[:, j]) < 1e-8 or j in purge_j: purged_list += [j] #print('purged ',j,'at iteration',t) D_c[:, j] = shapes[np.random.randint(K)] if len(purged_list) > 0: print('purged atoms ', purged_list, 'at iteration', t) print('using parallel ORMP to compute the final weights...') 'parallel ORMP used for the final computation' G = D_c.T.conj() @ Phi @ D_c pool = mp.Pool(mp.cpu_count()) results = pool.starmap( OMRP_multiproc_helper, zip(range(K), repeat(D_c), repeat(dataset), repeat(G), repeat(N0))) pool.close() A_c = np.array(results).T elapsed = (time.time() - start) print('duration of the algorithm: ', np.round(elapsed, 2), 'seconds') diffs = dataset.T - D_c @ A_c if K < 10000: E = np.diag(diffs.T.conj() @ Phi @ diffs).sum().real else: E = 0 for k in range(K): E += (diffs[:, k].conj() @ Phi @ diffs[:, k]).real print('FINAL RESULTS') display(D_c, A_c, dataset, save=save, directory=directory) if verbose: lossCurve = np.append(lossCurve, E) plt.figure() plt.plot(np.arange(len(lossCurve)), lossCurve) plt.title('Loss curve for the KSD algorithm') if save: plt.savefig(directory + '/losscurve.png', dpi=100) plt.show() drawMany(D_c.T, force=False, show=False) plt.title('KSD dictionary N0 = {} J = {}'.format(N0, J)) if save: plt.savefig(directory + '/dico_KSD.png', dpi=200) plt.show() D_al = align_rot(D_c.T).T drawMany(D_al.T, force=False, show=False) plt.title('KSD rotated dictionary N0 = {} J = {}'.format(N0, J)) if save: plt.savefig(directory + '/dico_KSD_rotated.png', dpi=200) plt.show() print('Final loss : ', E) print('RMSE :', np.sqrt(E / K)) if save: text_file = open(directory + '/readme.txt', 'a') text_file.write('\nduration of the algorithm: {} s \n \n'.format( np.round(elapsed, 2))) text_file.write('Final loss: {}\n'.format(E)) text_file.write('Final RMSE: {}\n'.format(np.sqrt(E / K))) text_file.close() return D_c, A_c, E
def KSD_optimal_directions(dataset, N0, J, init=None, Ntimes=100, verbose=False, save=False, directory=None): ''' The 2D Kendall Shape Dictionary classically alternates between: - a sparse coding step : the weights A are updated using a Cholesky-based Order Recursive Matching Pursuit (ORMP), as a direct adaptation to the complex setting of Mairal's implementation for the real setting in the SPAMS toolbox. - a dictionary update : following the Method of Optimal Directions (MOD), we update D as D <- [z_1,...,z_K] @ A^H @ (A @ A^H)^{-1} D <- Pi_S(D) (center and normalize all the non-null atoms d_j) and then replace under-utilized or null atoms by randomly picked data. An atom d_j is arbitrarily said to be under-utilized if (nb of data using d_j) / (K*N0) < 1 / (50*J) Parameters: - dataset in C^{(K,n)} is a complex array containing the horizontally stacked dataset [z_1,...,z_K]^T - N0 determines the L0 sparsity of the weights a_k - J fixes the number of atoms that we want to learn - init = None initializes the dictionary with randomly picked data shapes. if init is a given (n,J) complex array, then the initialization starts with init. - Ntimes is the number of iterations - if verbose == True, the algorithm keeps track of the loss function E to be minimized at each iteration. It saves time to set verbose = False. ''' K = len(dataset) if type(init) == np.ndarray: D_c = init else: D_c = initializeD_c(J, dataset) if verbose: print('Initializing the dictionary.') drawMany(D_c.T, force=True) lossCurve = np.array([]) print("Let's wait for {} iterations...".format(Ntimes)) start = time.time() for t in range(Ntimes): if t % 5 == 0: print('t =', t) A_c = ORMP_cholesky(D_c, dataset, N0) if verbose: diffs = dataset.T - D_c @ A_c if K < 10000: E = np.diag(diffs.T.conj() @ Phi @ diffs).sum().real else: E = 0 for k in range(K): E += (diffs[:, k].conj() @ Phi @ diffs[:, k]).real lossCurve = np.append(lossCurve, E) try: Mat = np.linalg.inv(A_c @ A_c.T.conj()) except np.linalg.LinAlgError: global A_error A_error = A_c print('A @ A^H not invertible, using SVD') U, sigmas, VH = np.linalg.svd(A_c) sigmas_rec = reciprocal(sigmas) Sigma_rec = fill_diagonal(sigmas_rec, J, K) D_c = dataset.T @ VH.T.conj() @ Sigma_rec @ U.T.conj() else: D_c = dataset.T @ A_c.T.conj() @ Mat D_c = normalize(D_c) # the new atoms are preshaped purge_j = np.where((np.abs(A_c) > 1e-3).sum(axis=1) / K < N0 / (5 * J))[0] for j in range(J): if norm(D_c[:, j]) < 1e-8 or j in purge_j: print('purged ', j, 'at iteration', t) D_c[:, j] = shapes[np.random.randint(K)] print('computing the final weights...') A_c = ORMP_cholesky(D_c, dataset, N0) elapsed = (time.time() - start) print('duration of the algorithm: ', np.round(elapsed, 2), 'seconds') diffs = dataset.T - D_c @ A_c if K < 10000: E = np.diag(diffs.T.conj() @ Phi @ diffs).sum().real else: E = 0 for k in range(K): E += (diffs[:, k].conj() @ Phi @ diffs[:, k]).real print('FINAL RESULTS') if verbose: display(D_c, A_c, dataset, save=save, directory=directory) lossCurve = np.append(lossCurve, E) plt.figure() plt.plot(np.arange(len(lossCurve)), lossCurve) plt.title('Loss curve for the KSD algorithm') if save: plt.savefig(directory + '/losscurve.png', dpi=100) plt.show() drawMany(D_c.T, force=True, show=False) plt.title('KSD dictionary N0 = {} J = {}'.format(N0, J)) if save: plt.savefig(directory + '/dico_KSD.png', dpi=200) plt.show() D_al = align_rot(D_c.T).T drawMany(D_al.T, force=True, show=False) plt.title('KSD rotated dictionary N0 = {} J = {}'.format(N0, J)) if save: plt.savefig(directory + '/dico_KSD_rotated.png', dpi=200) plt.show() print('Final loss : ', E) print('RMSE :', np.sqrt(E / K)) if save: text_file = open(directory + '/readme.txt', 'a') text_file.write('\nduration of the algorithm: {} s \n \n'.format( np.round(elapsed, 2))) text_file.write('Final loss: {}\n'.format(E)) text_file.close() return D_c, A_c, E