Exemplo n.º 1
0
def proj_S(D_c):  # columns of D_c are general configurations in C^N
    '''converts non-zero columns of D_c to preshapes, by projecting them on the subspace
    rthogonal to u       {d | u.T.conj() Phi d = 0)}
    and then normalizing them. Those which are zero are left as they are.
    '''
    from settings import uu
    J = D_c.shape[1]

    for j in range(J):
        d = D_c[:, j]
        no = norm(d)
        if no > 1e-8:
            orth_d = her(uu, d) * uu
            d = d - orth_d
            d = d / norm(d)
            D_c[:, j] = d
    return D_c
Exemplo n.º 2
0
def normalize(D_c):  # columns of D_c are centered configurations
    ''' Same as proj_S(), but we know that all the columns are already centered. '''
    J = D_c.shape[1]

    for j in range(J):
        d = D_c[:, j]
        no = norm(d)
        if no > 1e-8:
            D_c[:, j] = d / no
    return D_c
Exemplo n.º 3
0
def display_res(dataset, DA, k, save=False, directory=None):
    '''Auxiliary function of display. display_res() show the reconstruction for data z_k'''
    print('k = ', k)
    z = dataset[k]
    w = DA[:, k]  # not necessarily normalized
    ta = theta(z, z0)
    z = np.exp(1j * ta) * z
    w = np.exp(1j * ta) * w
    norm_error = np.round(norm(z - w).real, 4)
    draw(z, i=7, comparing=True)  # i = 7 for gray
    draw(w, i=0)
    plt.axis('equal')
    plt.axis('off')
    title_text = '$ |z_k - D \\alpha_k|_\\Phi : {} \% $'.format(
        np.round(100 * norm_error, 4))
    plt.title(title_text, fontsize=20, y=1)
    if save:
        plt.savefig(directory + '/rec_' + str(k) + '.png', dpi=200)
    plt.show()
Exemplo n.º 4
0
def expo(z, v):  # z preshape, v in C^n referring to a tangent vector
    '''Computes the exponential of v at z, that corresponds to a preshape'''
    t = norm(v)
    if t < 1e-16:
        return z
    return cos(t) * z + sin(t) / t * v
Exemplo n.º 5
0
    if learn_dico:

        if SAVE:
            directory = 'RESULTS/' + database[choice] + '/N0_' + str(
                N0) + '_J_' + str(J)

            if not os.path.exists(directory):
                print('CREATING THE FOLDER')
                os.makedirs(directory)
            else:
                if os.path.exists(directory + '/dico_alignfirst.png'):
                    SAVE = False
                    print('WILL NOT OVERWRITE PREVIOUS SAVED RESULTS')

        D, A, E_AF = alignfirst_dico(shapes,
                                     N0,
                                     J,
                                     save=SAVE,
                                     directory=directory,
                                     verbose=True)

        for j in range(
                J
        ):  # normalize to obtain preshaped atoms (the atoms are already centered)
            no = norm(D[:, j])
            if no > 1e-3:
                D[:, j] /= no
                A[:, ] *= no
            else:
                print('did not normalize because small norm for j = ', j)
Exemplo n.º 6
0
def KSD_optimal_directions_multiproc_ORMP(dataset,
                                          N0,
                                          J,
                                          init=None,
                                          Ntimes=100,
                                          batch_size=1024,
                                          verbose=False,
                                          save=False,
                                          directory=None):
    '''See KSD_optimal_directions().
    
    THIS FUNCTION IS INTERESTING ONLY FOR LARGE DATASETS (K > 4000 for instance).
    
    In this function, the ORMP sparse coding step in computed in parallel and independently
    on the different z_k, by randomly chosen batches of size given in batch_size,
    thanks to the multiprocessing library run with the function OMRP_multiproc_helper(). 
    '''
    K = len(dataset)

    if type(init) == np.ndarray:
        D_c = init
    else:
        D_c = initializeD_c(J, dataset)
    if verbose:
        print('Initializing the dictionary.')
        drawMany(D_c.T, force=True)
        lossCurve = np.array([])

    A_c = np.zeros((J, K), dtype=complex)

    print("Let's wait for {} iterations...".format(Ntimes))
    start = time.time()

    for t in range(Ntimes):

        if t % 5 == 0:
            print('t =', t)

        indices = np.arange(K)
        random.shuffle(indices)
        indices = indices[:batch_size]

        'parallel ORMP used, avoids distorted atoms but increases run-time'
        G = D_c.T.conj() @ Phi @ D_c
        pool = mp.Pool(mp.cpu_count())
        results = pool.starmap(
            OMRP_multiproc_helper,
            zip(indices, repeat(D_c), repeat(dataset), repeat(G), repeat(N0)))
        pool.close()
        A_c[:, indices] = np.array(results).T

        if verbose:
            diffs = dataset.T - D_c @ A_c
            if K < 10000:
                E = np.diag(diffs.T.conj() @ Phi @ diffs).sum().real
            else:
                E = 0
                for k in range(K):
                    E += (diffs[:, k].conj() @ Phi @ diffs[:, k]).real
            lossCurve = np.append(lossCurve, E)

        try:
            Mat = np.linalg.inv(A_c @ A_c.T.conj())
        except np.linalg.LinAlgError:
            global A_error
            A_error = A_c
            print('A @ A^H not invertible, using SVD')
            U, sigmas, VH = np.linalg.svd(A_c)
            sigmas_rec = reciprocal(sigmas)
            Sigma_rec = fill_diagonal(sigmas_rec, J, K)
            D_c = dataset.T @ VH.T.conj() @ Sigma_rec @ U.T.conj()
        else:
            D_c = dataset.T @ A_c.T.conj() @ Mat

        D_c = normalize(D_c)  # the new atoms are preshaped

        purge_j = np.where((np.abs(A_c) > 1e-3).sum(axis=1) / K < N0 /
                           (5 * J))[0]
        purged_list = []
        for j in range(J):
            if norm(D_c[:, j]) < 1e-8 or j in purge_j:
                purged_list += [j]
                #print('purged ',j,'at iteration',t)
                D_c[:, j] = shapes[np.random.randint(K)]
        if len(purged_list) > 0:
            print('purged atoms ', purged_list, 'at iteration', t)

    print('using parallel ORMP to compute the final weights...')

    'parallel ORMP used for the final computation'
    G = D_c.T.conj() @ Phi @ D_c
    pool = mp.Pool(mp.cpu_count())
    results = pool.starmap(
        OMRP_multiproc_helper,
        zip(range(K), repeat(D_c), repeat(dataset), repeat(G), repeat(N0)))
    pool.close()
    A_c = np.array(results).T

    elapsed = (time.time() - start)
    print('duration of the algorithm: ', np.round(elapsed, 2), 'seconds')

    diffs = dataset.T - D_c @ A_c
    if K < 10000:
        E = np.diag(diffs.T.conj() @ Phi @ diffs).sum().real
    else:
        E = 0
        for k in range(K):
            E += (diffs[:, k].conj() @ Phi @ diffs[:, k]).real

    print('FINAL RESULTS')

    display(D_c, A_c, dataset, save=save, directory=directory)

    if verbose:
        lossCurve = np.append(lossCurve, E)
        plt.figure()
        plt.plot(np.arange(len(lossCurve)), lossCurve)
        plt.title('Loss curve for the KSD algorithm')
        if save: plt.savefig(directory + '/losscurve.png', dpi=100)
    plt.show()

    drawMany(D_c.T, force=False, show=False)
    plt.title('KSD dictionary  N0 = {}  J = {}'.format(N0, J))
    if save:
        plt.savefig(directory + '/dico_KSD.png', dpi=200)
    plt.show()

    D_al = align_rot(D_c.T).T
    drawMany(D_al.T, force=False, show=False)
    plt.title('KSD rotated dictionary  N0 = {}  J = {}'.format(N0, J))
    if save:
        plt.savefig(directory + '/dico_KSD_rotated.png', dpi=200)
    plt.show()

    print('Final loss : ', E)
    print('RMSE :', np.sqrt(E / K))
    if save:
        text_file = open(directory + '/readme.txt', 'a')
        text_file.write('\nduration of the algorithm: {} s \n \n'.format(
            np.round(elapsed, 2)))
        text_file.write('Final loss: {}\n'.format(E))
        text_file.write('Final RMSE: {}\n'.format(np.sqrt(E / K)))
        text_file.close()

    return D_c, A_c, E
Exemplo n.º 7
0
def KSD_optimal_directions(dataset,
                           N0,
                           J,
                           init=None,
                           Ntimes=100,
                           verbose=False,
                           save=False,
                           directory=None):
    ''' The 2D Kendall Shape Dictionary classically alternates between:
    
    - a sparse coding step : the weights A are updated using a Cholesky-based 
    Order Recursive Matching Pursuit (ORMP), as a direct adaptation to the
    complex setting of Mairal's implementation for the real setting in the SPAMS toolbox.
    - a dictionary update : following the Method of Optimal Directions (MOD),
    we update D as
    
            D <- [z_1,...,z_K] @ A^H @ (A @ A^H)^{-1}
            D <- Pi_S(D) (center and normalize all the non-null atoms d_j)
    
    and then replace under-utilized or null atoms by randomly picked data.
    An atom d_j is arbitrarily said to be under-utilized if 
            (nb of data using d_j) / (K*N0) < 1 / (50*J)
            
            
    Parameters:
        - dataset in C^{(K,n)} is a complex array containing the horizontally stacked dataset [z_1,...,z_K]^T
        - N0 determines the L0 sparsity of the weights a_k
        - J fixes the number of atoms that we want to learn
        - init = None initializes the dictionary with randomly picked data shapes.
            if init is a given (n,J) complex array, then the initialization starts with init.
        - Ntimes is the number of iterations
        - if verbose == True, the algorithm keeps track of the loss function E to be minimized at each iteration.
            It saves time to set verbose = False.
        
    '''
    K = len(dataset)
    if type(init) == np.ndarray:
        D_c = init
    else:
        D_c = initializeD_c(J, dataset)
    if verbose:
        print('Initializing the dictionary.')
        drawMany(D_c.T, force=True)
        lossCurve = np.array([])

    print("Let's wait for {} iterations...".format(Ntimes))
    start = time.time()

    for t in range(Ntimes):

        if t % 5 == 0:
            print('t =', t)

        A_c = ORMP_cholesky(D_c, dataset, N0)

        if verbose:
            diffs = dataset.T - D_c @ A_c
            if K < 10000:
                E = np.diag(diffs.T.conj() @ Phi @ diffs).sum().real
            else:
                E = 0
                for k in range(K):
                    E += (diffs[:, k].conj() @ Phi @ diffs[:, k]).real
            lossCurve = np.append(lossCurve, E)

        try:
            Mat = np.linalg.inv(A_c @ A_c.T.conj())
        except np.linalg.LinAlgError:
            global A_error
            A_error = A_c
            print('A @ A^H not invertible, using SVD')
            U, sigmas, VH = np.linalg.svd(A_c)
            sigmas_rec = reciprocal(sigmas)
            Sigma_rec = fill_diagonal(sigmas_rec, J, K)
            D_c = dataset.T @ VH.T.conj() @ Sigma_rec @ U.T.conj()
        else:
            D_c = dataset.T @ A_c.T.conj() @ Mat

        D_c = normalize(D_c)  # the new atoms are preshaped

        purge_j = np.where((np.abs(A_c) > 1e-3).sum(axis=1) / K < N0 /
                           (5 * J))[0]
        for j in range(J):
            if norm(D_c[:, j]) < 1e-8 or j in purge_j:
                print('purged ', j, 'at iteration', t)
                D_c[:, j] = shapes[np.random.randint(K)]

    print('computing the final weights...')
    A_c = ORMP_cholesky(D_c, dataset, N0)

    elapsed = (time.time() - start)
    print('duration of the algorithm: ', np.round(elapsed, 2), 'seconds')

    diffs = dataset.T - D_c @ A_c
    if K < 10000:
        E = np.diag(diffs.T.conj() @ Phi @ diffs).sum().real
    else:
        E = 0
        for k in range(K):
            E += (diffs[:, k].conj() @ Phi @ diffs[:, k]).real

    print('FINAL RESULTS')
    if verbose:
        display(D_c, A_c, dataset, save=save, directory=directory)

        lossCurve = np.append(lossCurve, E)
        plt.figure()
        plt.plot(np.arange(len(lossCurve)), lossCurve)
        plt.title('Loss curve for the KSD algorithm')
        if save: plt.savefig(directory + '/losscurve.png', dpi=100)
    plt.show()

    drawMany(D_c.T, force=True, show=False)
    plt.title('KSD dictionary  N0 = {}  J = {}'.format(N0, J))
    if save:
        plt.savefig(directory + '/dico_KSD.png', dpi=200)
    plt.show()

    D_al = align_rot(D_c.T).T
    drawMany(D_al.T, force=True, show=False)
    plt.title('KSD rotated dictionary  N0 = {}  J = {}'.format(N0, J))
    if save:
        plt.savefig(directory + '/dico_KSD_rotated.png', dpi=200)
    plt.show()

    print('Final loss : ', E)
    print('RMSE :', np.sqrt(E / K))

    if save:
        text_file = open(directory + '/readme.txt', 'a')
        text_file.write('\nduration of the algorithm: {} s \n \n'.format(
            np.round(elapsed, 2)))
        text_file.write('Final loss: {}\n'.format(E))
        text_file.close()

    return D_c, A_c, E