Exemplo n.º 1
0
def showExpo(z, v, aux=1, T=4):
    ''' Displays elements on the exponential curve exp_z(t*v) for time t in [0,1]'''
    Time = np.arange(T + 1) / (aux * T)
    path = np.zeros((T, N), dtype=complex)

    for i in range(T):
        t = Time[i]
        path[i] = expo(z, t * v)

    drawMany(path)
Exemplo n.º 2
0
def multi_tangLog(m, dataset):
    '''Same as log() but for several shapes.'''
    K = len(dataset)
    v_log = np.zeros((K, N), dtype=complex)
    for k in range(K):
        z = dataset[k]
        v_log[k] = log(m, z)
    print('\n m + v_k on the tangent space are')
    drawMany(m + v_log)
    return v_log
Exemplo n.º 3
0
def showGeo(z, w):  # preshapes
    '''Shows the geodesic curve returned by geo(z,w).
    The results are similar to showExpo(z,log(z,w)).'''
    path = geo(z, w)
    T = len(path) - 1  # same as in geo(z,w)

    for i in range(T + 1):
        draw(path[i], i=i)

    drawMany(path)
Exemplo n.º 4
0
def multi_tangProj(m, dataset):
    '''Same as tangProj() but for several shapes.'''
    K = len(dataset)
    v_proj = np.zeros((K, N), dtype=complex)
    for k in range(K):
        z = dataset[k]
        v_proj[k] = tangProj(m, z)
    global X
    X = v_proj.T
    print('\n m + v_k on the tangent space are')
    drawMany(m + X.T)
    return v_proj
Exemplo n.º 5
0
def alignfirst_dico(dataset,
                    N0,
                    J,
                    init=None,
                    save=False,
                    directory=None,
                    verbose=False):
    '''Performs (real) dictionary learning on the dataset, after it is optimally rotated along its mean.
    Relies on the SPAMS toolbox of Mairal et al. '''
    K1 = len(dataset)
    dataset = align_rot(dataset)
    dataset_r = multi_complex2real(dataset)
    X = sqrtPsi @ dataset_r.T
    X = np.asfortranarray(X)  # necessary for using spams toolbox
    D = spams.trainDL(X,
                      K=J,
                      D=init,
                      mode=3,
                      modeD=0,
                      lambda1=N0,
                      verbose=verbose)
    A = spams.omp(X, D=D, L=N0)
    Ad = np.array(A.todense()).reshape(J, K)
    D_c = multi_real2complex((sqrtPsi_inv @ D).T).T

    drawMany(D_c.T, show=False)
    plt.title('Align-first dictionary  N0 = {}  J = {}'.format(N0, J))
    if save:
        plt.savefig(directory + '/dico_alignfirst.png', dpi=200)
    plt.show()

    if verbose:
        DA = D_c @ A
        for k in test_k_set:
            display_res(dataset, DA, k, save=save, directory=directory)

    diffs = dataset.T - D_c @ Ad
    if K1 < 10000:
        E = np.diag(diffs.T.conj() @ Phi @ diffs).sum().real
    else:
        E = 0
        for k in range(K):
            E += (diffs[:, k].conj() @ Phi @ diffs[:, k]).real
    print('final loss : ', E)
    print('RMSE :', np.sqrt(E / K))
    if save:
        text_file = open(directory + '/readme_alignfirst.txt', 'a')
        text_file.write('Final loss: {}\n'.format(E))
        text_file.write('Final RMSE: {}\n'.format(np.sqrt(E / K)))
        text_file.close()
    return D_c, Ad, E
Exemplo n.º 6
0
def complex_PCA(dataset, J):
    '''
    Computes the first J modes of complex PCA and the RMSE rate of the N0-term reconstruction,
    for 1 <= N0 <= J.
    '''
    K = len(dataset)

    average = np.mean(dataset, axis=0)
    dataset_a = dataset - average

    sum_norms = np.diag(dataset_a.conj() @ Phi @ dataset_a.T).sum().real

    Z = dataset_a.T  # dataset in columns
    Y = Z.T.conj(
    ) @ sqrtPhi  # deforming through sqrtPhi to recast to a standard form
    Lambdas, Vmodes = np.linalg.eig(Y.T.conj() @ Y)  # complex values!
    Lambdas = Lambdas.real
    sorting = np.argsort(Lambdas)
    sorting = sorting[::-1]  # eigenvalues ordered by decreasing absolute value
    Lambdas = Lambdas[sorting[:J]]
    Vmodes = Vmodes[:, sorting[:J]]
    Atoms = np.linalg.inv(sqrtPhi) @ Vmodes  # recasting to our form

    cumul = np.cumsum(Lambdas)
    E_rate = sum_norms - cumul

    RMSE_rate = np.sqrt(E_rate / K)
    plt.plot(np.arange(J), RMSE_rate)

    drawMany(Atoms.T, force=True, show=False)
    plt.title('PCA modes J = {}'.format(J))
    plt.show()

    Recs = Atoms @ (Atoms.T.conj() @ Phi @ Z)
    Recs = (Recs.T + average).T

    for k in test_k_set:
        display_res(dataset, Recs, k)

    return RMSE_rate
Exemplo n.º 7
0
    ax.plot(xaxis, PCA_RMSE, marker='o')
    ax.plot(xaxis, AF_RMSE, marker='^')
    ax.plot(xaxis, KSD_RMSE, marker='*')
    ax.set_yscale('log')
    ax.set_xlabel('N0')
    ax.set_ylabel('RMSE')
    #ax.set_ylim(top=None,bottom=None)
    ax.legend(['PCA', 'A-F', 'KSD'])
    plt.savefig(filename, dpi=200)
    plt.show()


if __name__ == '__main__':
    aligned = align_rot(shapes)
    print('Aligning the shapes...')
    drawMany(aligned)

    learn_dico = True  # if True then launches dictionary learning
    N0, J = 5, 10
    # N0: nb of atoms to be picked to reconstruct (l0 penalty)
    # J: number of atoms to learn
    SAVE = True  # do we save the results into the directory?

    if learn_dico:

        if SAVE:
            directory = 'RESULTS/' + database[choice] + '/N0_' + str(
                N0) + '_J_' + str(J)

            if not os.path.exists(directory):
                print('CREATING THE FOLDER')
Exemplo n.º 8
0
def KSD_optimal_directions_multiproc_ORMP(dataset,
                                          N0,
                                          J,
                                          init=None,
                                          Ntimes=100,
                                          batch_size=1024,
                                          verbose=False,
                                          save=False,
                                          directory=None):
    '''See KSD_optimal_directions().
    
    THIS FUNCTION IS INTERESTING ONLY FOR LARGE DATASETS (K > 4000 for instance).
    
    In this function, the ORMP sparse coding step in computed in parallel and independently
    on the different z_k, by randomly chosen batches of size given in batch_size,
    thanks to the multiprocessing library run with the function OMRP_multiproc_helper(). 
    '''
    K = len(dataset)

    if type(init) == np.ndarray:
        D_c = init
    else:
        D_c = initializeD_c(J, dataset)
    if verbose:
        print('Initializing the dictionary.')
        drawMany(D_c.T, force=True)
        lossCurve = np.array([])

    A_c = np.zeros((J, K), dtype=complex)

    print("Let's wait for {} iterations...".format(Ntimes))
    start = time.time()

    for t in range(Ntimes):

        if t % 5 == 0:
            print('t =', t)

        indices = np.arange(K)
        random.shuffle(indices)
        indices = indices[:batch_size]

        'parallel ORMP used, avoids distorted atoms but increases run-time'
        G = D_c.T.conj() @ Phi @ D_c
        pool = mp.Pool(mp.cpu_count())
        results = pool.starmap(
            OMRP_multiproc_helper,
            zip(indices, repeat(D_c), repeat(dataset), repeat(G), repeat(N0)))
        pool.close()
        A_c[:, indices] = np.array(results).T

        if verbose:
            diffs = dataset.T - D_c @ A_c
            if K < 10000:
                E = np.diag(diffs.T.conj() @ Phi @ diffs).sum().real
            else:
                E = 0
                for k in range(K):
                    E += (diffs[:, k].conj() @ Phi @ diffs[:, k]).real
            lossCurve = np.append(lossCurve, E)

        try:
            Mat = np.linalg.inv(A_c @ A_c.T.conj())
        except np.linalg.LinAlgError:
            global A_error
            A_error = A_c
            print('A @ A^H not invertible, using SVD')
            U, sigmas, VH = np.linalg.svd(A_c)
            sigmas_rec = reciprocal(sigmas)
            Sigma_rec = fill_diagonal(sigmas_rec, J, K)
            D_c = dataset.T @ VH.T.conj() @ Sigma_rec @ U.T.conj()
        else:
            D_c = dataset.T @ A_c.T.conj() @ Mat

        D_c = normalize(D_c)  # the new atoms are preshaped

        purge_j = np.where((np.abs(A_c) > 1e-3).sum(axis=1) / K < N0 /
                           (5 * J))[0]
        purged_list = []
        for j in range(J):
            if norm(D_c[:, j]) < 1e-8 or j in purge_j:
                purged_list += [j]
                #print('purged ',j,'at iteration',t)
                D_c[:, j] = shapes[np.random.randint(K)]
        if len(purged_list) > 0:
            print('purged atoms ', purged_list, 'at iteration', t)

    print('using parallel ORMP to compute the final weights...')

    'parallel ORMP used for the final computation'
    G = D_c.T.conj() @ Phi @ D_c
    pool = mp.Pool(mp.cpu_count())
    results = pool.starmap(
        OMRP_multiproc_helper,
        zip(range(K), repeat(D_c), repeat(dataset), repeat(G), repeat(N0)))
    pool.close()
    A_c = np.array(results).T

    elapsed = (time.time() - start)
    print('duration of the algorithm: ', np.round(elapsed, 2), 'seconds')

    diffs = dataset.T - D_c @ A_c
    if K < 10000:
        E = np.diag(diffs.T.conj() @ Phi @ diffs).sum().real
    else:
        E = 0
        for k in range(K):
            E += (diffs[:, k].conj() @ Phi @ diffs[:, k]).real

    print('FINAL RESULTS')

    display(D_c, A_c, dataset, save=save, directory=directory)

    if verbose:
        lossCurve = np.append(lossCurve, E)
        plt.figure()
        plt.plot(np.arange(len(lossCurve)), lossCurve)
        plt.title('Loss curve for the KSD algorithm')
        if save: plt.savefig(directory + '/losscurve.png', dpi=100)
    plt.show()

    drawMany(D_c.T, force=False, show=False)
    plt.title('KSD dictionary  N0 = {}  J = {}'.format(N0, J))
    if save:
        plt.savefig(directory + '/dico_KSD.png', dpi=200)
    plt.show()

    D_al = align_rot(D_c.T).T
    drawMany(D_al.T, force=False, show=False)
    plt.title('KSD rotated dictionary  N0 = {}  J = {}'.format(N0, J))
    if save:
        plt.savefig(directory + '/dico_KSD_rotated.png', dpi=200)
    plt.show()

    print('Final loss : ', E)
    print('RMSE :', np.sqrt(E / K))
    if save:
        text_file = open(directory + '/readme.txt', 'a')
        text_file.write('\nduration of the algorithm: {} s \n \n'.format(
            np.round(elapsed, 2)))
        text_file.write('Final loss: {}\n'.format(E))
        text_file.write('Final RMSE: {}\n'.format(np.sqrt(E / K)))
        text_file.close()

    return D_c, A_c, E
Exemplo n.º 9
0
def KSD_optimal_directions(dataset,
                           N0,
                           J,
                           init=None,
                           Ntimes=100,
                           verbose=False,
                           save=False,
                           directory=None):
    ''' The 2D Kendall Shape Dictionary classically alternates between:
    
    - a sparse coding step : the weights A are updated using a Cholesky-based 
    Order Recursive Matching Pursuit (ORMP), as a direct adaptation to the
    complex setting of Mairal's implementation for the real setting in the SPAMS toolbox.
    - a dictionary update : following the Method of Optimal Directions (MOD),
    we update D as
    
            D <- [z_1,...,z_K] @ A^H @ (A @ A^H)^{-1}
            D <- Pi_S(D) (center and normalize all the non-null atoms d_j)
    
    and then replace under-utilized or null atoms by randomly picked data.
    An atom d_j is arbitrarily said to be under-utilized if 
            (nb of data using d_j) / (K*N0) < 1 / (50*J)
            
            
    Parameters:
        - dataset in C^{(K,n)} is a complex array containing the horizontally stacked dataset [z_1,...,z_K]^T
        - N0 determines the L0 sparsity of the weights a_k
        - J fixes the number of atoms that we want to learn
        - init = None initializes the dictionary with randomly picked data shapes.
            if init is a given (n,J) complex array, then the initialization starts with init.
        - Ntimes is the number of iterations
        - if verbose == True, the algorithm keeps track of the loss function E to be minimized at each iteration.
            It saves time to set verbose = False.
        
    '''
    K = len(dataset)
    if type(init) == np.ndarray:
        D_c = init
    else:
        D_c = initializeD_c(J, dataset)
    if verbose:
        print('Initializing the dictionary.')
        drawMany(D_c.T, force=True)
        lossCurve = np.array([])

    print("Let's wait for {} iterations...".format(Ntimes))
    start = time.time()

    for t in range(Ntimes):

        if t % 5 == 0:
            print('t =', t)

        A_c = ORMP_cholesky(D_c, dataset, N0)

        if verbose:
            diffs = dataset.T - D_c @ A_c
            if K < 10000:
                E = np.diag(diffs.T.conj() @ Phi @ diffs).sum().real
            else:
                E = 0
                for k in range(K):
                    E += (diffs[:, k].conj() @ Phi @ diffs[:, k]).real
            lossCurve = np.append(lossCurve, E)

        try:
            Mat = np.linalg.inv(A_c @ A_c.T.conj())
        except np.linalg.LinAlgError:
            global A_error
            A_error = A_c
            print('A @ A^H not invertible, using SVD')
            U, sigmas, VH = np.linalg.svd(A_c)
            sigmas_rec = reciprocal(sigmas)
            Sigma_rec = fill_diagonal(sigmas_rec, J, K)
            D_c = dataset.T @ VH.T.conj() @ Sigma_rec @ U.T.conj()
        else:
            D_c = dataset.T @ A_c.T.conj() @ Mat

        D_c = normalize(D_c)  # the new atoms are preshaped

        purge_j = np.where((np.abs(A_c) > 1e-3).sum(axis=1) / K < N0 /
                           (5 * J))[0]
        for j in range(J):
            if norm(D_c[:, j]) < 1e-8 or j in purge_j:
                print('purged ', j, 'at iteration', t)
                D_c[:, j] = shapes[np.random.randint(K)]

    print('computing the final weights...')
    A_c = ORMP_cholesky(D_c, dataset, N0)

    elapsed = (time.time() - start)
    print('duration of the algorithm: ', np.round(elapsed, 2), 'seconds')

    diffs = dataset.T - D_c @ A_c
    if K < 10000:
        E = np.diag(diffs.T.conj() @ Phi @ diffs).sum().real
    else:
        E = 0
        for k in range(K):
            E += (diffs[:, k].conj() @ Phi @ diffs[:, k]).real

    print('FINAL RESULTS')
    if verbose:
        display(D_c, A_c, dataset, save=save, directory=directory)

        lossCurve = np.append(lossCurve, E)
        plt.figure()
        plt.plot(np.arange(len(lossCurve)), lossCurve)
        plt.title('Loss curve for the KSD algorithm')
        if save: plt.savefig(directory + '/losscurve.png', dpi=100)
    plt.show()

    drawMany(D_c.T, force=True, show=False)
    plt.title('KSD dictionary  N0 = {}  J = {}'.format(N0, J))
    if save:
        plt.savefig(directory + '/dico_KSD.png', dpi=200)
    plt.show()

    D_al = align_rot(D_c.T).T
    drawMany(D_al.T, force=True, show=False)
    plt.title('KSD rotated dictionary  N0 = {}  J = {}'.format(N0, J))
    if save:
        plt.savefig(directory + '/dico_KSD_rotated.png', dpi=200)
    plt.show()

    print('Final loss : ', E)
    print('RMSE :', np.sqrt(E / K))

    if save:
        text_file = open(directory + '/readme.txt', 'a')
        text_file.write('\nduration of the algorithm: {} s \n \n'.format(
            np.round(elapsed, 2)))
        text_file.write('Final loss: {}\n'.format(E))
        text_file.close()

    return D_c, A_c, E