Пример #1
0
def starting_point(T, Tsize, S, U, R, ordering, options):
    """
    This function generates a starting point to begin the iterations. There are three options:
        - list: the user may give a list with the factor matrices to be used as starting point.
        - 'random': the entries the factor matrices are generated by the normal distribution with mean 0 and variance 1.
        - 'smart_random': generates a random starting point with a method based on the MLSVD which always guarantee a 
           small relative error. Check the function 'smart_random' for more details about this method.
        - 'smart': works similar as the smart_random method, but this one is deterministic and generates the best rank-R
           approximation based on the MLSVD.
    
    Inputs
    ------
    T: float array
    Tsize: float
    S: float array
        The core tensor of the MLSVD of T.
    U: list of float 2D arrays
        Each element of U is a orthogonal matrix of the MLSVD of T.
    R: int
        the desired rank.
    ordering: list of ints
       Since the cpd may change the index ordering, this list may be necessary if the user gives an initialization,
       which will be based on the original ordering.
    options: class
    
    Outputs
    -------
    init_factors: list of float 2D arrays
    """

    # Extract all variable from the class of options.
    initialization = options.initialization
    c = options.factors_norm
    symm = options.symm
    display = options.display
    dims = S.shape
    L = len(dims)

    if type(initialization) == list:
        init_factors = [
            dot(U[l].T, initialization[ordering[l]]) for l in range(L)
        ]

    elif initialization == 'random':
        init_factors = [randn(dims[l], R) for l in range(L)]

    elif initialization == 'smart_random':
        init_factors = smart_random(S, dims, R)

    elif initialization == 'smart':
        init_factors = smart(S, dims, R)

    else:
        sys.exit('Error with init parameter.')

    if type(initialization) != list:

        # Depending on the tensor, the factors may have null entries. We want to avoid that. The solution is to
        # introduce a very small random noise.
        init_factors = clean_zeros(init_factors, dims, R)

        # Make all factors balanced.
        init_factors = cnv.equalize(init_factors, R)

        # Apply additional transformations if requested.
        init_factors = cnv.transform(init_factors, symm, c)

    if display > 2 or display < -1:
        S_init = cnv.cpd2tens(init_factors)
        if type(T) == list:
            rel_error = mlinalg.compute_error(T, Tsize, S_init, U, dims)
        else:
            S1_init = cnv.unfold(S_init, 1)
            rel_error = mlinalg.compute_error(T, Tsize, S1_init, U, dims)
        return init_factors, rel_error

    return init_factors
Пример #2
0
def mlsvd(T, Tsize, R, options):
    """
    This function computes a truncated MLSVD of tensors of any order. The output is such that T = (U_1,...,U_L)*S, and
    UT is the list of the transposes of U.
    The parameter n_iter of the randomized SVD is set to 2. It is only good to increase this value when the tensor has
    much noise. Still this issue is addressed by the low rank CPD approximation, so n_iter=2 is enough.

    Inputs
    ------
    T: float array
        Objective tensor in coordinates.
    Tsize: float
        Frobenius norm of T.
    R: int
        An upper bound for the multilinear rank of T. Normally one will use the rank of T.
    options: class with the parameters previously defined.

    Outputs
    -------
    S: float array
        Core tensor of the MLSVD.
    U: list of float 2-D arrays
        List with truncated matrices of the original U.
    T1: float 2-D arrays
        First unfolding of T.
    sigmas: list of float 1-D arrays
        List with truncated arrays of the original sigmas.
    """

    # INITIALIZE RELEVANT VARIABLES.

    sigmas = []
    U = []

    # Verify if T is sparse, in which case it will be given as a list with the data.
    if type(T) == list:
        data, idxs, dims = T
    else:
        dims = T.shape
    L = len(dims)

    # Set options.
    options = aux.make_options(options, L)
    trunc_dims = options.trunc_dims
    display = options.display
    mlsvd_method = options.mlsvd_method
    tol_mlsvd = options.tol_mlsvd
    if type(tol_mlsvd) == list:
        if L > 3:
            tol_mlsvd = tol_mlsvd[0]
        else:
            tol_mlsvd = tol_mlsvd[1]
    gpu = options.gpu
    if gpu:
        import pycuda.gpuarray as gpuarray
        import pycuda.autoinit
        from skcuda import linalg, rlinalg

    # tol_mlsvd = -1 means no truncation and no compression, that is, the original tensor.
    if tol_mlsvd == -1:
        T1 = cnv.unfold(T, 1)
        U = [identity(dims[l]) for l in range(L)]
        sigmas = [ones(dims[l]) for l in range(L)]
        if display > 2 or display < -1:
            return T, U, T1, sigmas, 0.0
        else:
            return T, U, T1, sigmas

    # T is sparse.
    elif type(T) == list:
        for l in range(L):
            Tl = cnv.sparse_unfold(data, idxs, dims, l + 1)
            if l == 0:
                T1 = cnv.sparse_unfold(data, idxs, dims, l + 1)
            mlsvd_method = 'sparse'
            U, sigmas, Vlt, dim = compute_svd(Tl, U, sigmas, dims, R,
                                              mlsvd_method, tol_mlsvd, gpu, L,
                                              l)

        # Compute (U_1^T,...,U_L^T)*T = S.
        new_dims = [U[l].shape[1] for l in range(L)]
        UT = [U[l].T for l in range(L)]
        S = mlinalg.sparse_multilin_mult(UT, data, idxs, new_dims)

    # Compute MLSVD base on sequentially truncated method.
    elif mlsvd_method == 'seq':
        S_dims = copy(dims)
        S = T
        for l in range(L):
            Sl = cnv.unfold(S, l + 1)
            if l == 0:
                T1 = cnv.unfold_C(S, l + 1)
            U, sigmas, Vlt, dim = compute_svd(Sl, U, sigmas, dims, R,
                                              mlsvd_method, tol_mlsvd, gpu, L,
                                              l)

            # Compute l-th unfolding of S truncated at the l-th mode.
            Sl = (Vlt.T * sigmas[-1]).T
            S_dims[l] = dim
            S = empty(S_dims, dtype=float64)
            S = cnv.foldback(S, Sl, l + 1)

    # Compute MLSVD based on classic method.
    elif mlsvd_method == 'classic':
        for l in range(L):
            Tl = cnv.unfold(T, l + 1)
            if l == 0:
                T1 = cnv.unfold_C(T, l + 1)
            U, sigmas, Vlt, dim = compute_svd(Tl, U, sigmas, dims, R,
                                              mlsvd_method, tol_mlsvd, gpu, L,
                                              l)

        # Compute (U_1^T,...,U_L^T)*T = S.
        UT = [U[l].T for l in range(L)]
        S = mlinalg.multilin_mult(UT, T1, dims)

    # Specific truncation is given by the user.
    if type(trunc_dims) == list:
        slices = []
        for l in range(L):
            slices.append(slice(0, trunc_dims[l]))
            if trunc_dims[l] > U[l].shape[1]:
                print('trunc_dims[', l, '] =', trunc_dims[l], 'and U[', l,
                      '].shape =', U[l].shape)
                sys.exit(
                    'Must have trunc_dims[l] <= min(dims[l], R) for all mode l=1...'
                    + str(L))
            U[l] = U[l][:, :trunc_dims[l]]
        S = S[tuple(slices)]

    # Compute error of compressed tensor.
    if display > 2 or display < -1:
        if type(T) == list:
            best_error = mlinalg.compute_error(T, Tsize, S, U, dims)
        else:
            S1 = cnv.unfold(S, 1)
            best_error = mlinalg.compute_error(T, Tsize, S1, U, S.shape)
        return S, U, T1, sigmas, best_error

    return S, U, T1, sigmas
Пример #3
0
def bicpd(T, R, fixed_factor, options):
    """
    Practically the same as tricpd, but this function keeps the some factor fixed during all the computations. This
    function is to be used as part of the tensor train cpd.
    """

    # INITIALIZE RELEVANT VARIABLES

    # Extract all variable from the class of options.
    initialization = options.initialization
    refine = options.refine
    symm = options.symm
    display = options.display
    tol_mlsvd = options.tol_mlsvd
    bi_method = options.bi_method_parameters[0]
    if type(tol_mlsvd) == list:
        tol_mlsvd = tol_mlsvd[1]

    # Set the other variables.
    m, n, p = T.shape
    Tsize = norm(T)
    ordering = [0, 1, 2]

    # Test consistency of dimensions and rank.
    aux.consistency(R, (m, n, p), options)

    # COMPRESSION STAGE

    if display > 0:
        print(
            '-----------------------------------------------------------------------------------------------'
        )
        print('Computing MLSVD of T')

    # Compute compressed version of T with the MLSVD. We have that T = (U1, U2, U3)*S.
    if display > 2 or display < -1:
        S, U, T1, sigmas, best_error = cmpr.mlsvd(T, Tsize, R, options)
    else:
        S, U, T1, sigmas = cmpr.mlsvd(T, Tsize, R, options)
    R1, R2, R3 = S.shape
    U1, U2, U3 = U

    # When the tensor is symmetric we want S to have equal dimensions.
    if symm:
        R_min = min(R1, R2, R3)
        R1, R2, R3 = R_min, R_min, R_min
        S = S[:R_min, :R_min, :R_min]
        U1, U2, U3 = U1[:, :R_min], U2[:, :R_min], U3[:, :R_min]

    if display > 0:
        if (R1, R2, R3) == (m, n, p):
            if tol_mlsvd == -1:
                print('    No compression and no truncation requested by user')
                print('    Working with dimensions', T.shape)
            else:
                print('    No compression detected')
                print('    Working with dimensions', T.shape)
        else:
            print('    Compression detected')
            print('    Compressing from', T.shape, 'to', S.shape)
        if display > 2:
            print('    Compression relative error = {:7e}'.format(best_error))

    # GENERATION OF STARTING POINT STAGE

    # Generate initial to start dGN.
    if display > 2 or display < -1:
        [X, Y, Z], init_error = init.starting_point(T, Tsize, S, U, R,
                                                    ordering, options)
    else:
        [X, Y, Z] = init.starting_point(T, Tsize, S, U, R, ordering, options)

    # Discard the factor computed in start_point and use the previous one. Then project it on the compressed space.
    if fixed_factor[1] == 0:
        X = dot(U1.T, fixed_factor[0])
        X = [X, 0]
    elif fixed_factor[1] == 1:
        Y = dot(U2.T, fixed_factor[0])
        Y = [Y, 1]
    elif fixed_factor[1] == 2:
        Z = dot(U3.T, fixed_factor[0])
        Z = [Z, 2]

    if display > 0:
        print(
            '-----------------------------------------------------------------------------------------------'
        )
        if type(initialization) == list:
            print('Type of initialization: fixed + user')
        else:
            print('Type of initialization: fixed +', initialization)
        if display > 2:
            if fixed_factor[1] == 0:
                S_init = cnv.cpd2tens([X[0], Y, Z])
            elif fixed_factor[1] == 1:
                S_init = cnv.cpd2tens([X, Y[0], Z])
            elif fixed_factor[1] == 2:
                S_init = cnv.cpd2tens([X, Y, Z[0]])
            S1_init = cnv.unfold(S_init, 1)
            init_error = mlinalg.compute_error(T, Tsize, S1_init, [U1, U2, U3],
                                               (R1, R2, R3))
            print(
                '    Initial guess relative error = {:5e}'.format(init_error))

    # DAMPED GAUSS-NEWTON STAGE

    if display > 0:
        print(
            '-----------------------------------------------------------------------------------------------'
        )
        print('Computing CPD of T')

    # Compute the approximated tensor in coordinates with dGN or ALS.
    if bi_method == 'als':
        factors, step_sizes_main, errors_main, improv_main, gradients_main, stop_main = \
            als.als(S, [X, Y, Z], R, options)
    else:
        factors, step_sizes_main, errors_main, improv_main, gradients_main, stop_main = \
            gn.dGN(S, [X, Y, Z], R, options)
    X, Y, Z = factors

    # FINAL WORKS

    # Use the orthogonal transformations to obtain the CPD of T.
    if fixed_factor[1] == 0:
        Y = dot(U2, Y)
        Z = dot(U3, Z)
    elif fixed_factor[1] == 1:
        X = dot(U1, X)
        Z = dot(U3, Z)
    elif fixed_factor[1] == 2:
        X = dot(U1, X)
        Y = dot(U2, Y)

    # Compute error.
    T1_approx = empty(T1.shape)
    if fixed_factor[1] == 0:
        T1_approx = cnv.cpd2unfold1(T1_approx, [fixed_factor[0], Y, Z])
    elif fixed_factor[1] == 1:
        T1_approx = cnv.cpd2unfold1(T1_approx, [X, fixed_factor[0], Z])
    elif fixed_factor[1] == 2:
        T1_approx = cnv.cpd2unfold1(T1_approx, [X, Y, fixed_factor[0]])

    # Save and display final information.
    step_sizes_refine = array([0])
    errors_refine = array([0])
    improv_refine = array([0])
    gradients_refine = array([0])
    stop_refine = 5
    output = aux.output_info(T1, Tsize, T1_approx, step_sizes_main,
                             step_sizes_refine, errors_main, errors_refine,
                             improv_main, improv_refine, gradients_main,
                             gradients_refine, stop_main, stop_refine, options)

    if display > 0:
        print(
            '==============================================================================================='
        )
        print('Final results of bicpd')
        if refine:
            print('    Number of steps =', output.num_steps)
        else:
            print('    Number of steps =', output.num_steps)
        print('    Relative error =', output.rel_error)
        acc = float('%.6e' % Decimal(output.accuracy))
        print('    Accuracy = ', acc, '%')

    return X, Y, Z, output
Пример #4
0
def test_truncation(T,
                    trunc_list,
                    display=True,
                    n_iter=2,
                    power_iteration_normalizer='none'):
    """
    This function test one or several possible truncations for the MLSVD of T, showing the  error of the truncations. It
    is possible to accomplish the same results calling the function mlsvd with display=3 but this is not advisable since
    each call recomputes the same unfolding SVD's.
    The variable trunc_list must be a list of truncations. Even if it is only one truncation, it must be a list with one
    truncation only.
    """

    # Set the main variables about T.
    dims = T.shape
    L = len(dims)
    Tsize = norm(T)

    # Transform list into array and get the maximum for each dimension.
    max_trunc_dims = np.max(array(trunc_list), axis=0)

    # Compute truncated SVD of all unfoldings of T.
    sigmas = []
    U = []
    T1 = empty((dims[0], prod(dims) // dims[0]), dtype=float64)
    for l in range(L):
        Tl = cnv.unfold(T, l + 1)
        if l == 0:
            T1 = cnv.unfold_C(T, l + 1)
        low_rank = min(dims[l], max_trunc_dims[l])
        Ul, sigma_l, Vlt = rand_svd(
            Tl,
            low_rank,
            n_iter=n_iter,
            power_iteration_normalizer=power_iteration_normalizer)
        sigmas.append(sigma_l)
        U.append(Ul)

    # Save errors in a list.
    trunc_error = []

    # Truncated MLSVD.
    for trunc in trunc_list:
        # S, U and UT truncated.
        current_dims = trunc
        current_U = []
        current_sigmas = []
        for l in range(L):
            current_U.append(U[l][:, :current_dims[l]])
            current_sigmas.append(sigmas[l][:current_dims[l]])
        current_UT = [current_U[l].T for l in range(L)]
        S = mlinalg.multilin_mult(current_UT, T1, dims)

        # Error of truncation.
        S1 = cnv.unfold(S, 1)
        current_error = mlinalg.compute_error(T, Tsize, S1, current_U,
                                              current_dims)
        trunc_error.append(current_error)

        # Display results.
        if display:
            print('Truncation:', current_dims)
            print('Error:', current_error)
            print()

    return trunc_error