示例#1
0
def non_negative_parafac(tensor,
                         rank,
                         n_iter_max=100,
                         init='svd',
                         svd='numpy_svd',
                         tol=10e-7,
                         random_state=None,
                         verbose=0,
                         normalize_factors=False,
                         return_errors=False,
                         mask=None,
                         orthogonalise=False,
                         cvg_criterion='abs_rec_error',
                         fixed_modes=[]):
    """
    Non-negative CP decomposition
    Uses multiplicative updates, see [2]_
    This is the same as parafac(non_negative=True).
    Parameters
    ----------
    tensor : ndarray
    rank   : int
            number of components
    n_iter_max : int
                 maximum number of iteration
    init : {'svd', 'random'}, optional
    svd : str, default is 'numpy_svd'
        function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS
    tol : float, optional
          tolerance: the algorithm stops when the variation in
          the reconstruction error is less than the tolerance
    random_state : {None, int, np.random.RandomState}
    verbose : int, optional
        level of verbosity
    fixed_modes : list, default is []
        A list of modes for which the initial value is not modified.
        The last mode cannot be fixed due to error computation.
    Returns
    -------
    factors : ndarray list
            list of positive factors of the CP decomposition
            element `i` is of shape ``(tensor.shape[i], rank)``
    References
    ----------
    .. [2] Amnon Shashua and Tamir Hazan,
       "Non-negative tensor factorization with applications to statistics and computer vision",
       In Proceedings of the International Conference on Machine Learning (ICML),
       pp 792-799, ICML, 2005
    """
    epsilon = 10e-12
    rank = validate_cp_rank(tl.shape(tensor), rank=rank)

    if mask is not None and init == "svd":
        message = "Masking occurs after initialization. Therefore, random initialization is recommended."
        warnings.warn(message, Warning)

    if orthogonalise and not isinstance(orthogonalise, int):
        orthogonalise = n_iter_max

    weights, factors = initialize_cp(tensor,
                                     rank,
                                     init=init,
                                     svd=svd,
                                     random_state=random_state,
                                     non_negative=True,
                                     normalize_factors=normalize_factors)
    rec_errors = []
    norm_tensor = tl.norm(tensor, 2)

    if tl.ndim(tensor) - 1 in fixed_modes:
        warnings.warn(
            'You asked for fixing the last mode, which is not supported while tol is fixed.\n The last mode will not be fixed. Consider using tl.moveaxis()'
        )
        fixed_modes.remove(tl.ndim(tensor) - 1)
    modes_list = [
        mode for mode in range(tl.ndim(tensor)) if mode not in fixed_modes
    ]

    for iteration in range(n_iter_max):
        if orthogonalise and iteration <= orthogonalise:
            for i, f in enumerate(factors):
                if min(tl.shape(f)) >= rank:
                    factors[i] = tl.abs(tl.qr(f)[0])

        if verbose > 1:
            print("Starting iteration", iteration + 1)
        for mode in modes_list:
            if verbose > 1:
                print("Mode", mode, "of", tl.ndim(tensor))

            accum = 1
            # khatri_rao(factors).tl.dot(khatri_rao(factors))
            # simplifies to multiplications
            sub_indices = [i for i in range(len(factors)) if i != mode]
            for i, e in enumerate(sub_indices):
                if i:
                    accum *= tl.dot(tl.transpose(factors[e]), factors[e])
                else:
                    accum = tl.dot(tl.transpose(factors[e]), factors[e])

            if mask is not None:
                tensor = tensor * mask + tl.cp_to_tensor(
                    (None, factors), mask=1 - mask)

            mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode)

            numerator = tl.clip(mttkrp, a_min=epsilon, a_max=None)
            denominator = tl.dot(factors[mode], accum)
            denominator = tl.clip(denominator, a_min=epsilon, a_max=None)
            factor = factors[mode] * numerator / denominator

            factors[mode] = factor

        if normalize_factors:
            weights, factors = cp_normalize((weights, factors))

        if tol:
            # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec>
            factors_norm = cp_norm((weights, factors))

            # mttkrp and factor for the last mode. This is equivalent to the
            # inner product <tensor, factorization>
            iprod = tl.sum(tl.sum(mttkrp * factor, axis=0) * weights)
            rec_error = tl.sqrt(
                tl.abs(norm_tensor**2 + factors_norm**2 -
                       2 * iprod)) / norm_tensor
            rec_errors.append(rec_error)
            if iteration >= 1:
                rec_error_decrease = rec_errors[-2] - rec_errors[-1]

                if verbose:
                    print(
                        "iteration {}, reconstraction error: {}, decrease = {}"
                        .format(iteration, rec_error, rec_error_decrease))

                if cvg_criterion == 'abs_rec_error':
                    stop_flag = abs(rec_error_decrease) < tol
                elif cvg_criterion == 'rec_error':
                    stop_flag = rec_error_decrease < tol
                else:
                    raise TypeError("Unknown convergence criterion")

                if stop_flag:
                    if verbose:
                        print("PARAFAC converged after {} iterations".format(
                            iteration))
                    break
            else:
                if verbose:
                    print('reconstruction error={}'.format(rec_errors[-1]))

    cp_tensor = CPTensor((weights, factors))

    if return_errors:
        return cp_tensor, rec_errors
    else:
        return cp_tensor
示例#2
0
def parafac(tensor,
            rank,
            n_iter_max=100,
            init='svd',
            svd='numpy_svd',
            normalize_factors=False,
            tol=1e-8,
            orthogonalise=False,
            random_state=None,
            verbose=0,
            return_errors=False,
            non_negative=False,
            mask=None):
    """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS)
    Computes a rank-`rank` decomposition of `tensor` [1]_ such that,

        ``tensor = [|weights; factors[0], ..., factors[-1] |]``.

    Parameters
    ----------
    tensor : ndarray
    rank  : int
        Number of components.
    n_iter_max : int
        Maximum number of iteration
    init : {'svd', 'random'}, optional
        Type of factor matrix initialization. See `initialize_factors`.
    svd : str, default is 'numpy_svd'
        function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS
    normalize_factors : if True, aggregate the weights of each factor in a 1D-tensor
        of shape (rank, ), which will contain the norms of the factors
    tol : float, optional
        (Default: 1e-6) Relative reconstruction error tolerance. The
        algorithm is considered to have found the global minimum when the
        reconstruction error is less than `tol`.
    random_state : {None, int, np.random.RandomState}
    verbose : int, optional
        Level of verbosity
    return_errors : bool, optional
        Activate return of iteration errors
    non_negative : bool, optional
        Perform non_negative PARAFAC. See :func:`non_negative_parafac`.
    mask : ndarray
        array of booleans with the same shape as ``tensor`` should be 0 where
        the values are missing and 1 everywhere else. Note:  if tensor is
        sparse, then mask should also be sparse with a fill value of 1 (or
        True). Allows for missing values [2]_


    Returns
    -------
    KruskalTensor : (weight, factors)
        * weights : 1D array of shape (rank, )
            all ones if normalize_factors is False (default), 
            weights of the (normalized) factors otherwise
        * factors : List of factors of the CP decomposition element `i` is of shape
            (tensor.shape[i], rank)

    errors : list
        A list of reconstruction errors at each iteration of the algorithms.

    References
    ----------
    .. [1] T.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications",
       SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009.
       
    .. [2] Tomasi, Giorgio, and Rasmus Bro. "PARAFAC and missing values." 
            Chemometrics and Intelligent Laboratory Systems 75.2 (2005): 163-180.


    """
    epsilon = 10e-12

    if orthogonalise and not isinstance(orthogonalise, int):
        orthogonalise = n_iter_max

    factors = initialize_factors(tensor,
                                 rank,
                                 init=init,
                                 svd=svd,
                                 random_state=random_state,
                                 non_negative=non_negative,
                                 normalize_factors=normalize_factors)
    rec_errors = []
    norm_tensor = tl.norm(tensor, 2)
    weights = tl.ones(rank, **tl.context(tensor))

    for iteration in range(n_iter_max):
        if orthogonalise and iteration <= orthogonalise:
            factors = [
                tl.qr(f)[0] if min(tl.shape(f)) >= rank else f
                for i, f in enumerate(factors)
            ]

        if verbose > 1:
            print("Starting iteration", iteration + 1)
        for mode in range(tl.ndim(tensor)):
            if verbose > 1:
                print("Mode", mode, "of", tl.ndim(tensor))
            if non_negative:
                accum = 1
                # khatri_rao(factors).tl.dot(khatri_rao(factors))
                # simplifies to multiplications
                sub_indices = [i for i in range(len(factors)) if i != mode]
                for i, e in enumerate(sub_indices):
                    if i:
                        accum *= tl.dot(tl.transpose(factors[e]), factors[e])
                    else:
                        accum = tl.dot(tl.transpose(factors[e]), factors[e])

            pseudo_inverse = tl.tensor(np.ones((rank, rank)),
                                       **tl.context(tensor))
            for i, factor in enumerate(factors):
                if i != mode:
                    pseudo_inverse = pseudo_inverse * tl.dot(
                        tl.conj(tl.transpose(factor)), factor)

            if mask is not None:
                tensor = tensor * mask + tl.kruskal_to_tensor(
                    (None, factors), mask=1 - mask)

            mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode)

            if non_negative:
                numerator = tl.clip(mttkrp, a_min=epsilon, a_max=None)
                denominator = tl.dot(factors[mode], accum)
                denominator = tl.clip(denominator, a_min=epsilon, a_max=None)
                factor = factors[mode] * numerator / denominator
            else:
                factor = tl.transpose(
                    tl.solve(tl.conj(tl.transpose(pseudo_inverse)),
                             tl.transpose(mttkrp)))

            if normalize_factors:
                weights = tl.norm(factor, order=2, axis=0)
                weights = tl.where(
                    tl.abs(weights) <= tl.eps(tensor.dtype),
                    tl.ones(tl.shape(weights), **tl.context(factors[0])),
                    weights)
                factor = factor / (tl.reshape(weights, (1, -1)))

            factors[mode] = factor

        if tol:
            # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec>
            factors_norm = kruskal_norm((weights, factors))

            # mttkrp and factor for the last mode. This is equivalent to the
            # inner product <tensor, factorization>
            iprod = tl.sum(tl.sum(mttkrp * factor, axis=0) * weights)
            rec_error = tl.sqrt(
                tl.abs(norm_tensor**2 + factors_norm**2 -
                       2 * iprod)) / norm_tensor
            rec_errors.append(rec_error)

            if iteration >= 1:
                if verbose:
                    print('reconstruction error={}, variation={}.'.format(
                        rec_errors[-1], rec_errors[-2] - rec_errors[-1]))

                if tol and abs(rec_errors[-2] - rec_errors[-1]) < tol:
                    if verbose:
                        print('converged in {} iterations.'.format(iteration))
                    break
            else:
                if verbose:
                    print('reconstruction error={}'.format(rec_errors[-1]))

    kruskal_tensor = KruskalTensor((weights, factors))

    if return_errors:
        return kruskal_tensor, rec_errors
    else:
        return kruskal_tensor
示例#3
0
def right_left_ttcross_step(input_tensor, k, rank, row_idx, col_idx):
    """ Compute the next (left) core's col indices by QR decomposition.

            For the current Tensor train core, we use the row indices and col indices to extract the entries from the input tensor
            and compute the next core's col indices by QR and max volume algorithm.

    Parameters
    ----------

    k: int
            the actual sweep iteration
    rank: list of int
            list of upper rank (tensor_order)
    row_idx: list of list of int
            list of (tensor_order-1) of lists of left indices
    col_idx: list of list of int
            list of (tensor_order-1) of lists of right indices

    Returns
    -------
    next_col_idx : list of int
            the list of new col indices,
    fibers_list : list of slice
            the used fibers,
    Q_skeleton : matrix
            approximation of Q as product of Q and inverse of its maximum volume submatrix
    """

    tensor_shape = tl.shape(input_tensor)
    tensor_order = tl.ndim(input_tensor)
    fibers_list = []

    # Extract fibers
    for i in range(rank[k - 1]):
        for j in range(rank[k]):
            fiber = row_idx[k - 1][i] + (slice(None, None, None),) + col_idx[k - 1][j]
            fibers_list.append(fiber)

    if k == tensor_order:  # Is[k] will be empty
        idx = tuple(zip(*row_idx[k - 1])) + (slice(None, None, None),)
    else:
        idx = [[] for i in range(tensor_order)]
        for lidx in row_idx[k - 1]:
            for ridx in col_idx[k - 1]:
                for j, jj in enumerate(lidx): idx[j].append(jj)
                for j, jj in enumerate(ridx): idx[len(lidx) + 1 + j].append(jj)
        idx[k - 1] = slice(None, None, None)
        idx = tuple(idx)

    core = input_tensor[idx]
    # shape the core as a 3-tensor_order cube
    core = tl.reshape(core, (rank[k - 1], rank[k], tensor_shape[k - 1]))
    core = tl.transpose(core, (0, 2, 1))
    # merge n_{k-1} and r_k, get a matrix
    core = tl.reshape(core, (rank[k - 1], tensor_shape[k - 1] * rank[k]))
    core = tl.transpose(core)

    # Compute QR decomposition
    (Q, R) = tl.qr(core)
    # Maxvol
    (J, Q_inv) = maxvol(Q)
    Q_inv = tl.tensor(Q_inv)
    Q_skeleton = tl.dot(Q, Q_inv)

    # Retrive indices in folded tensor
    new_idx = [np.unravel_index(idx, [tensor_shape[k - 1], rank[k]]) for idx in J]  # First retrive idx in folded core
    next_col_idx = [(jc[0],) + col_idx[k - 1][jc[1]] for jc in new_idx]  # Then reconstruct the idx in the tensor

    return (next_col_idx, fibers_list, Q_skeleton)
示例#4
0
def parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd',\
            normalize_factors=False, orthogonalise=False,\
            tol=1e-8, random_state=None,\
            verbose=0, return_errors=False,\
            sparsity = None,\
            l2_reg = 0,  mask=None,\
            cvg_criterion = 'abs_rec_error',\
            fixed_modes = [],
            svd_mask_repeats=5,
            linesearch = False):
    """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS)
    Computes a rank-`rank` decomposition of `tensor` [1]_ such that::

        tensor = [|weights; factors[0], ..., factors[-1] |].

    Parameters
    ----------
    tensor : ndarray
    rank  : int
        Number of components.
    n_iter_max : int
        Maximum number of iteration
    init : {'svd', 'random'}, optional
        Type of factor matrix initialization. See `initialize_factors`.
    svd : str, default is 'numpy_svd'
        function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS
    normalize_factors : if True, aggregate the weights of each factor in a 1D-tensor
        of shape (rank, ), which will contain the norms of the factors
    tol : float, optional
        (Default: 1e-6) Relative reconstruction error tolerance. The
        algorithm is considered to have found the global minimum when the
        reconstruction error is less than `tol`.
    random_state : {None, int, np.random.RandomState}
    verbose : int, optional
        Level of verbosity
    return_errors : bool, optional
        Activate return of iteration errors
    mask : ndarray
        array of booleans with the same shape as ``tensor`` should be 0 where
        the values are missing and 1 everywhere else. Note:  if tensor is
        sparse, then mask should also be sparse with a fill value of 1 (or
        True). Allows for missing values [2]_
    cvg_criterion : {'abs_rec_error', 'rec_error'}, optional
       Stopping criterion for ALS, works if `tol` is not None. 
       If 'rec_error',  ALS stops at current iteration if ``(previous rec_error - current rec_error) < tol``.
       If 'abs_rec_error', ALS terminates when `|previous rec_error - current rec_error| < tol`.
    sparsity : float or int
        If `sparsity` is not None, we approximate tensor as a sum of low_rank_component and sparse_component, where low_rank_component = cp_to_tensor((weights, factors)). `sparsity` denotes desired fraction or number of non-zero elements in the sparse_component of the `tensor`.
    fixed_modes : list, default is []
        A list of modes for which the initial value is not modified.
        The last mode cannot be fixed due to error computation.
    svd_mask_repeats: int
        If using a tensor with masked values, this initializes using SVD multiple times to
        remove the effect of these missing values on the initialization.
    linesearch : bool, default is False
        Whether to perform line search as proposed by Bro [3].

    Returns
    -------
    CPTensor : (weight, factors)
        * weights : 1D array of shape (rank, )

          * all ones if normalize_factors is False (default)
          * weights of the (normalized) factors otherwise

        * factors : List of factors of the CP decomposition element `i` is of shape ``(tensor.shape[i], rank)``
        * sparse_component : nD array of shape tensor.shape. Returns only if `sparsity` is not None.

    errors : list
        A list of reconstruction errors at each iteration of the algorithms.

    References
    ----------
    .. [1] T.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications", SIAM 
           REVIEW, vol. 51, n. 3, pp. 455-500, 2009.

    .. [2] Tomasi, Giorgio, and Rasmus Bro. "PARAFAC and missing values." 
           Chemometrics and Intelligent Laboratory Systems 75.2 (2005): 163-180.

    .. [3] R. Bro, "Multi-Way Analysis in the Food Industry: Models, Algorithms, and 
           Applications", PhD., University of Amsterdam, 1998
    """
    rank = validate_cp_rank(tl.shape(tensor), rank=rank)
    
    if orthogonalise and not isinstance(orthogonalise, int):
        orthogonalise = n_iter_max

    if linesearch:
        acc_pow = 2.0 # Extrapolate to the iteration^(1/acc_pow) ahead
        acc_fail = 0 # How many times acceleration have failed
        max_fail = 4 # Increase acc_pow with one after max_fail failure

    weights, factors = initialize_cp(tensor, rank, init=init, svd=svd,
                                 random_state=random_state,
                                 normalize_factors=normalize_factors)

    if mask is not None and init == "svd":
        for _ in range(svd_mask_repeats):
            tensor = tensor*mask + tl.cp_to_tensor((weights, factors), mask=1-mask)

            weights, factors = initialize_cp(tensor, rank, init=init, svd=svd, random_state=random_state, normalize_factors=normalize_factors)

    rec_errors = []
    norm_tensor = tl.norm(tensor, 2)
    Id = tl.eye(rank, **tl.context(tensor))*l2_reg

    if tl.ndim(tensor)-1 in fixed_modes:
        warnings.warn('You asked for fixing the last mode, which is not supported.\n The last mode will not be fixed. Consider using tl.moveaxis()')
        fixed_modes.remove(tl.ndim(tensor)-1)
    modes_list = [mode for mode in range(tl.ndim(tensor)) if mode not in fixed_modes]

    if sparsity:
        sparse_component = tl.zeros_like(tensor)
        if isinstance(sparsity, float):
            sparsity = int(sparsity * np.prod(tensor.shape))
        else:
            sparsity = int(sparsity)

    for iteration in range(n_iter_max):
        if orthogonalise and iteration <= orthogonalise:
            factors = [tl.qr(f)[0] if min(tl.shape(f)) >= rank else f for i, f in enumerate(factors)]

        if linesearch and iteration % 2 == 0:
            factors_last = [tl.copy(f) for f in factors]
            weights_last = tl.copy(weights)

        if verbose > 1:
            print("Starting iteration", iteration + 1)
        for mode in modes_list:
            if verbose > 1:
                print("Mode", mode, "of", tl.ndim(tensor))

            pseudo_inverse = tl.tensor(np.ones((rank, rank)), **tl.context(tensor))
            for i, factor in enumerate(factors):
                if i != mode:
                    pseudo_inverse = pseudo_inverse*tl.dot(tl.conj(tl.transpose(factor)), factor)
            pseudo_inverse += Id

            if not iteration and weights is not None:
                # Take into account init weights
                mttkrp = unfolding_dot_khatri_rao(tensor, (weights, factors), mode)
            else:
                mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode)

            factor = tl.transpose(tl.solve(tl.conj(tl.transpose(pseudo_inverse)),
                                    tl.transpose(mttkrp)))

            if normalize_factors:
                scales = tl.norm(factor, 2, axis=0)
                weights = tl.where(scales==0, tl.ones(tl.shape(scales), **tl.context(factor)), scales)
                factor = factor / tl.reshape(weights, (1, -1))

            factors[mode] = factor

        # Will we be performing a line search iteration
        if linesearch and iteration % 2 == 0 and iteration > 5:
            line_iter = True
        else:
            line_iter = False

        # Calculate the current unnormalized error if we need it
        if (tol or return_errors) and line_iter is False:
            unnorml_rec_error, tensor, norm_tensor = error_calc(tensor, norm_tensor, weights, factors, sparsity, mask, mttkrp)
        else:
            if mask is not None:
                tensor = tensor*mask + tl.cp_to_tensor((weights, factors), mask=1-mask)

        # Start line search if requested.
        if line_iter is True:
            jump = iteration ** (1.0 / acc_pow)

            new_weights = weights_last + (weights - weights_last) * jump
            new_factors = [factors_last[ii] + (factors[ii] - factors_last[ii])*jump for ii in range(tl.ndim(tensor))]

            new_rec_error, new_tensor, new_norm_tensor = error_calc(tensor, norm_tensor, new_weights, new_factors, sparsity, mask)

            if (new_rec_error / new_norm_tensor) < rec_errors[-1]:
                factors, weights = new_factors, new_weights
                tensor, norm_tensor = new_tensor, new_norm_tensor
                unnorml_rec_error = new_rec_error
                acc_fail = 0

                if verbose:
                    print("Accepted line search jump of {}.".format(jump))
            else:
                unnorml_rec_error, tensor, norm_tensor = error_calc(tensor, norm_tensor, weights, factors, sparsity, mask, mttkrp)
                acc_fail += 1

                if verbose:
                    print("Line search failed for jump of {}.".format(jump))

                if acc_fail == max_fail:
                    acc_pow += 1.0
                    acc_fail = 0

                    if verbose:
                        print("Reducing acceleration.")

        rec_error = unnorml_rec_error / norm_tensor
        rec_errors.append(rec_error)

        if tol:

            if iteration >= 1:
                rec_error_decrease = rec_errors[-2] - rec_errors[-1]
                
                if verbose:
                    print("iteration {}, reconstruction error: {}, decrease = {}, unnormalized = {}".format(iteration, rec_error, rec_error_decrease, unnorml_rec_error))

                if cvg_criterion == 'abs_rec_error':
                    stop_flag = abs(rec_error_decrease) < tol
                elif cvg_criterion == 'rec_error':
                    stop_flag =  rec_error_decrease < tol
                else:
                    raise TypeError("Unknown convergence criterion")
                
                if stop_flag:
                    if verbose:
                        print("PARAFAC converged after {} iterations".format(iteration))
                    break
                    
            else:
                if verbose:
                    print('reconstruction error={}'.format(rec_errors[-1]))

    cp_tensor = CPTensor((weights, factors))
    
    if sparsity:
        sparse_component = sparsify_tensor(tensor -\
                                           cp_to_tensor((weights, factors)),\
                                           sparsity)
        cp_tensor = (cp_tensor, sparse_component)

    if return_errors:
        return cp_tensor, rec_errors
    else:
        return cp_tensor
def parafac(tensor,
            rank,
            n_iter_max=100,
            init='svd',
            svd='numpy_svd',
            tol=1e-8,
            orthogonalise=False,
            random_state=None,
            verbose=False,
            return_errors=False,
            non_negative=False):
    """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS)

    Computes a rank-`rank` decomposition of `tensor` [1]_ such that,

        ``tensor = [| factors[0], ..., factors[-1] |]``.

    Parameters
    ----------
    tensor : ndarray
    rank  : int
        Number of components.
    n_iter_max : int
        Maximum number of iteration
    init : {'svd', 'random'}, optional
        Type of factor matrix initialization. See `initialize_factors`.
    svd : str, default is 'numpy_svd'
        function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS
    tol : float, optional
        (Default: 1e-6) Relative reconstruction error tolerance. The
        algorithm is considered to have found the global minimum when the
        reconstruction error is less than `tol`.
    random_state : {None, int, np.random.RandomState}
    verbose : int, optional
        Level of verbosity
    return_errors : bool, optional
        Activate return of iteration errors
    non_negative : bool, optional
        Perform non_negative PARAFAC. See :func:`non_negative_parafac`.

    Returns
    -------
    factors : ndarray list
        List of factors of the CP decomposition element `i` is of shape
        (tensor.shape[i], rank)
    errors : list
        A list of reconstruction errors at each iteration of the algorithms.

    References
    ----------
    .. [1] tl.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications",
       SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009.
    """
    epsilon = 10e-12

    if orthogonalise and not isinstance(orthogonalise, int):
        orthogonalise = n_iter_max

    factors = initialize_factors(tensor,
                                 rank,
                                 init=init,
                                 svd=svd,
                                 random_state=random_state,
                                 non_negative=non_negative)
    rec_errors = []
    norm_tensor = tl.norm(tensor, 2)

    for iteration in range(n_iter_max):
        if orthogonalise and iteration <= orthogonalise:
            factor = [tl.qr(factor)[0] for factor in factors]

        if verbose:
            print("Starting iteration", iteration)
        for mode in range(tl.ndim(tensor)):
            if verbose:
                print("Mode", mode, "of", tl.ndim(tensor))
            if non_negative:
                accum = 1
                # khatri_rao(factors).tl.dot(khatri_rao(factors))
                # simplifies to multiplications
                sub_indices = [i for i in range(len(factors)) if i != mode]
                for i, e in enumerate(sub_indices):
                    if i:
                        accum *= tl.dot(tl.transpose(factors[e]), factors[e])
                    else:
                        accum = tl.dot(tl.transpose(factors[e]), factors[e])

            pseudo_inverse = tl.tensor(np.ones((rank, rank)),
                                       **tl.context(tensor))
            for i, factor in enumerate(factors):
                if i != mode:
                    pseudo_inverse = pseudo_inverse * tl.dot(
                        tl.conj(tl.transpose(factor)), factor)

            #factor = tl.dot(unfold(tensor, mode), khatri_rao(factors, skip_matrix=mode).conj())
            mttkrp = tl.tenalg.unfolding_dot_khatri_rao(tensor, factors, mode)

            if non_negative:
                numerator = tl.clip(mttkrp, a_min=epsilon, a_max=None)
                denominator = tl.dot(factors[mode], accum)
                denominator = tl.clip(denominator, a_min=epsilon, a_max=None)
                factor = factors[mode] * numerator / denominator
            else:
                factor = tl.transpose(
                    tl.solve(tl.conj(tl.transpose(pseudo_inverse)),
                             tl.transpose(mttkrp)))

            factors[mode] = factor

        if tol:
            # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec>
            # This is ||kruskal_to_tensor(factors)||^2
            factors_norm = tl.sum(
                tl.prod(
                    tl.stack([tl.dot(tl.transpose(f), f) for f in factors], 0),
                    0))
            # mttkrp and factor for the last mode. This is equivalent to the
            # inner product <tensor, factorization>
            iprod = tl.sum(mttkrp * factor)
            rec_error = tl.sqrt(
                tl.abs(norm_tensor**2 + factors_norm -
                       2 * iprod)) / norm_tensor
            rec_errors.append(rec_error)

            if iteration >= 1:
                if verbose:
                    print('reconstruction error={}, variation={}.'.format(
                        rec_errors[-1], rec_errors[-2] - rec_errors[-1]))

                if tol and abs(rec_errors[-2] - rec_errors[-1]) < tol:
                    if verbose:
                        print('converged in {} iterations.'.format(iteration))
                    break
            else:
                if verbose:
                    print('reconstruction error={}'.format(rec_errors[-1]))

    if return_errors:
        return factors, rec_errors
    else:
        return factors
示例#6
0
def non_negative_parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd',
                         tol=10e-7, random_state=None, verbose=0, normalize_factors=False,
                         return_errors=False, mask=None, orthogonalise=False, cvg_criterion='abs_rec_error'):
    """
    Non-negative CP decomposition

    Uses multiplicative updates, see [2]_

    This is the same as parafac(non_negative=True).

    Parameters
    ----------
    tensor : ndarray
    rank   : int
            number of components
    n_iter_max : int
                 maximum number of iteration
    init : {'svd', 'random'}, optional
    svd : str, default is 'numpy_svd'
        function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS
    tol : float, optional
          tolerance: the algorithm stops when the variation in
          the reconstruction error is less than the tolerance
    random_state : {None, int, np.random.RandomState}
    verbose : int, optional
        level of verbosity

    Returns
    -------
    factors : ndarray list
            list of positive factors of the CP decomposition
            element `i` is of shape ``(tensor.shape[i], rank)``

    References
    ----------
    .. [2] Amnon Shashua and Tamir Hazan,
       "Non-negative tensor factorization with applications to statistics and computer vision",
       In Proceedings of the International Conference on Machine Learning (ICML),
       pp 792-799, ICML, 2005
    """
    epsilon = 10e-12

    if orthogonalise and not isinstance(orthogonalise, int):
        orthogonalise = n_iter_max

    factors = initialize_factors(tensor, rank, init=init, svd=svd,
                                 random_state=random_state,
                                 non_negative=True,
                                 normalize_factors=normalize_factors)
    rec_errors = []
    norm_tensor = tl.norm(tensor, 2)
    weights = tl.ones(rank, **tl.context(tensor))

    for iteration in range(n_iter_max):
        if orthogonalise and iteration <= orthogonalise:
            for i, f in enumerate(factors):
                if min(tl.shape(f)) >= rank:
                    factors[i] = tl.abs(tl.qr(f)[0])

        if verbose > 1:
            print("Starting iteration", iteration + 1)
        for mode in range(tl.ndim(tensor)):
            if verbose > 1:
                print("Mode", mode, "of", tl.ndim(tensor))

            accum = 1
            # khatri_rao(factors).tl.dot(khatri_rao(factors))
            # simplifies to multiplications
            sub_indices = [i for i in range(len(factors)) if i != mode]
            for i, e in enumerate(sub_indices):
                if i:
                    accum *= tl.dot(tl.transpose(factors[e]), factors[e])
                else:
                    accum = tl.dot(tl.transpose(factors[e]), factors[e])

            if mask is not None:
                tensor = tensor*mask + tl.kruskal_to_tensor((None, factors), mask=1-mask)

            mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode)

            numerator = tl.clip(mttkrp, a_min=epsilon, a_max=None)
            denominator = tl.dot(factors[mode], accum)
            denominator = tl.clip(denominator, a_min=epsilon, a_max=None)
            factor = factors[mode] * numerator / denominator
            
            if normalize_factors:
                weights = tl.norm(factor, order=2, axis=0)
                weights = tl.where(tl.abs(weights) <= tl.eps(tensor.dtype), 
                                   tl.ones(tl.shape(weights), **tl.context(factors[0])),
                                   weights)
                factor = factor/(tl.reshape(weights, (1, -1)))

            factors[mode] = factor

        if tol:
            # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec>
            factors_norm = kruskal_norm((weights, factors))

            # mttkrp and factor for the last mode. This is equivalent to the
            # inner product <tensor, factorization>
            iprod = tl.sum(tl.sum(mttkrp*factor, axis=0)*weights)
            rec_error = tl.sqrt(tl.abs(norm_tensor**2 + factors_norm**2 - 2*iprod)) / norm_tensor
            rec_errors.append(rec_error)
            if iteration >= 1:
                rec_error_decrease = rec_errors[-2] - rec_errors[-1]
                
                if verbose:
                    print("iteration {}, reconstraction error: {}, decrease = {}".format(iteration, rec_error, rec_error_decrease))

                if cvg_criterion == 'abs_rec_error':
                    stop_flag = abs(rec_error_decrease) < tol
                elif cvg_criterion == 'rec_error':
                    stop_flag =  rec_error_decrease < tol
                else:
                    raise TypeError("Unknown convergence criterion")
                
                if stop_flag:
                    if verbose:
                        print("PARAFAC converged after {} iterations".format(iteration))
                    break 
            else:
                if verbose:
                    print('reconstruction error={}'.format(rec_errors[-1]))

    kruskal_tensor = KruskalTensor((weights, factors))

    if return_errors:
        return kruskal_tensor, rec_errors
    else:
        return kruskal_tensor
示例#7
0
def parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd',\
            normalize_factors=False, orthogonalise=False,\
            tol=1e-8, random_state=None,\
            verbose=0, return_errors=False,\
            non_negative=False,\
            sparsity = None,\
            l2_reg = 0,  mask=None,\
            cvg_criterion = 'abs_rec_error'):
    """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS)
    Computes a rank-`rank` decomposition of `tensor` [1]_ such that,

        ``tensor = [|weights; factors[0], ..., factors[-1] |]``.

    Parameters
    ----------
    tensor : ndarray
    rank  : int
        Number of components.
    n_iter_max : int
        Maximum number of iteration
    init : {'svd', 'random'}, optional
        Type of factor matrix initialization. See `initialize_factors`.
    svd : str, default is 'numpy_svd'
        function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS
    normalize_factors : if True, aggregate the weights of each factor in a 1D-tensor
        of shape (rank, ), which will contain the norms of the factors
    tol : float, optional
        (Default: 1e-6) Relative reconstruction error tolerance. The
        algorithm is considered to have found the global minimum when the
        reconstruction error is less than `tol`.
    random_state : {None, int, np.random.RandomState}
    verbose : int, optional
        Level of verbosity
    return_errors : bool, optional
        Activate return of iteration errors
    mask : ndarray
        array of booleans with the same shape as ``tensor`` should be 0 where
        the values are missing and 1 everywhere else. Note:  if tensor is
        sparse, then mask should also be sparse with a fill value of 1 (or
        True). Allows for missing values [2]_
    cvg_criterion : {'abs_rec_error', 'rec_error'}, optional
       Stopping criterion for ALS, works if `tol` is not None. 
       If 'rec_error',  ALS stops at current iteration if (previous rec_error - current rec_error) < tol.
       If 'abs_rec_error', ALS terminates when |previous rec_error - current rec_error| < tol.
    sparsity : float or int
        If `sparsity` is not None, we approximate tensor as a sum of low_rank_component and sparse_component, where low_rank_component = kruskal_to_tensor((weights, factors)). `sparsity` denotes desired fraction or number of non-zero elements in the sparse_component of the `tensor`.

    Returns
    -------
    KruskalTensor : (weight, factors)
        * weights : 1D array of shape (rank, )
            all ones if normalize_factors is False (default), 
            weights of the (normalized) factors otherwise
        * factors : List of factors of the CP decomposition element `i` is of shape
            (tensor.shape[i], rank)
        * sparse_component : nD array of shape tensor.shape. Returns only if `sparsity` is not None.

    errors : list
        A list of reconstruction errors at each iteration of the algorithms.

    References
    ----------
    .. [1] T.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications",
       SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009.
       
    .. [2] Tomasi, Giorgio, and Rasmus Bro. "PARAFAC and missing values." 
            Chemometrics and Intelligent Laboratory Systems 75.2 (2005): 163-180.

    """
    epsilon = 10e-12

    if orthogonalise and not isinstance(orthogonalise, int):
        orthogonalise = n_iter_max

    factors = initialize_factors(tensor, rank, init=init, svd=svd,
                                 random_state=random_state,
                                 normalize_factors=normalize_factors)
    rec_errors = []
    norm_tensor = tl.norm(tensor, 2)
    weights = tl.ones(rank, **tl.context(tensor))
    Id = tl.eye(rank, **tl.context(tensor))*l2_reg

    if sparsity:
        sparse_component = tl.zeros_like(tensor)
        if isinstance(sparsity, float):
            sparsity = int(sparsity * np.prod(tensor.shape))
        else:
            sparsity = int(sparsity)
            
    for iteration in range(n_iter_max):
        if orthogonalise and iteration <= orthogonalise:
            factors = [tl.qr(f)[0] if min(tl.shape(f)) >= rank else f for i, f in enumerate(factors)]

        if verbose > 1:
            print("Starting iteration", iteration + 1)
        for mode in range(tl.ndim(tensor)):
            if verbose > 1:
                print("Mode", mode, "of", tl.ndim(tensor))

            pseudo_inverse = tl.tensor(np.ones((rank, rank)), **tl.context(tensor))
            for i, factor in enumerate(factors):
                if i != mode:
                    pseudo_inverse = pseudo_inverse*tl.dot(tl.conj(tl.transpose(factor)), factor)
            pseudo_inverse += Id

            if mask is not None:
                tensor = tensor*mask + tl.kruskal_to_tensor((None, factors), mask=1-mask)

            mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode)
            factor = tl.transpose(tl.solve(tl.conj(tl.transpose(pseudo_inverse)),
                                    tl.transpose(mttkrp)))

            if normalize_factors:
                weights = tl.norm(factor, order=2, axis=0)
                weights = tl.where(tl.abs(weights) <= tl.eps(tensor.dtype), 
                                   tl.ones(tl.shape(weights), **tl.context(factors[0])),
                                   weights)
                factor = factor/(tl.reshape(weights, (1, -1)))

            factors[mode] = factor

        if tol:
            if sparsity:
                low_rank_component = kruskal_to_tensor((weights, factors))
                sparse_component = sparsify_tensor(tensor - low_rank_component, sparsity)
                
                unnorml_rec_error = tl.norm(tensor - low_rank_component - sparse_component, 2)
            else:
                # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec>
                factors_norm = kruskal_norm((weights, factors))

                # mttkrp and factor for the last mode. This is equivalent to the
                # inner product <tensor, factorization>
                iprod = tl.sum(tl.sum(mttkrp*factor, axis=0)*weights)
                unnorml_rec_error = tl.sqrt(tl.abs(norm_tensor**2 + factors_norm**2 - 2*iprod))
                
            rec_error = unnorml_rec_error / norm_tensor
            rec_errors.append(rec_error)

            if iteration >= 1:
                rec_error_decrease = rec_errors[-2] - rec_errors[-1]
                
                if verbose:
                    print("iteration {},  reconstraction error: {}, decrease = {}, unnormalized = {}".format(iteration, rec_error, rec_error_decrease, unnorml_rec_error))

                if cvg_criterion == 'abs_rec_error':
                    stop_flag = abs(rec_error_decrease) < tol
                elif cvg_criterion == 'rec_error':
                    stop_flag =  rec_error_decrease < tol
                else:
                    raise TypeError("Unknown convergence criterion")
                
                if stop_flag:
                    if verbose:
                        print("PARAFAC converged after {} iterations".format(iteration))
                    break
                    
            else:
                if verbose:
                    print('reconstruction error={}'.format(rec_errors[-1]))

    kruskal_tensor = KruskalTensor((weights, factors))
    
    if sparsity:
        sparse_component = sparsify_tensor(tensor -\
                                           kruskal_to_tensor((weights, factors)),\
                                           sparsity)
        kruskal_tensor = (kruskal_tensor, sparse_component)

    if return_errors:
        return kruskal_tensor, rec_errors
    else:
        return kruskal_tensor
示例#8
0
def parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd', tol=1e-8,
            orthogonalise=False, random_state=None, verbose=False, return_errors=False):
    """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS)

    Computes a rank-`rank` decomposition of `tensor` [1]_ such that,

        ``tensor = [| factors[0], ..., factors[-1] |]``.

    Parameters
    ----------
    tensor : ndarray
    rank  : int
        Number of components.
    n_iter_max : int
        Maximum number of iteration
    init : {'svd', 'random'}, optional
        Type of factor matrix initialization. See `initialize_factors`.
    svd : str, default is 'numpy_svd'
        function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS
    tol : float, optional
        (Default: 1e-6) Relative reconstruction error tolerance. The
        algorithm is considered to have found the global minimum when the
        reconstruction error is less than `tol`.
    random_state : {None, int, np.random.RandomState}
    verbose : int, optional
        Level of verbosity
    return_errors : bool, optional
        Activate return of iteration errors


    Returns
    -------
    factors : ndarray list
        List of factors of the CP decomposition element `i` is of shape
        (tensor.shape[i], rank)
    errors : list
        A list of reconstruction errors at each iteration of the algorithms.

    References
    ----------
    .. [1] tl.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications",
       SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009.
    """
    if orthogonalise and not isinstance(orthogonalise, int):
        orthogonalise = n_iter_max

    factors = initialize_factors(tensor, rank, init=init, svd=svd, random_state=random_state)
    rec_errors = []
    norm_tensor = tl.norm(tensor, 2)

    for iteration in range(n_iter_max):
        if orthogonalise and iteration <= orthogonalise:
            factor = [tl.qr(factor)[0] for factor in factors]

        for mode in range(tl.ndim(tensor)):
            pseudo_inverse = tl.tensor(np.ones((rank, rank)), **tl.context(tensor))
            for i, factor in enumerate(factors):
                if i != mode:
                    pseudo_inverse = pseudo_inverse*tl.dot(tl.transpose(factor), factor)
            factor = tl.dot(unfold(tensor, mode), khatri_rao(factors, skip_matrix=mode))
            factor = tl.transpose(tl.solve(tl.transpose(pseudo_inverse), tl.transpose(factor)))
            factors[mode] = factor

        if tol:
            rec_error = tl.norm(tensor - kruskal_to_tensor(factors), 2) / norm_tensor
            rec_errors.append(rec_error)

            if iteration > 1:
                if verbose:
                    print('reconstruction error={}, variation={}.'.format(
                        rec_errors[-1], rec_errors[-2] - rec_errors[-1]))

                if tol and abs(rec_errors[-2] - rec_errors[-1]) < tol:
                    if verbose:
                        print('converged in {} iterations.'.format(iteration))
                    break
                    
    if return_errors:
        return factors, rec_errors
    else:
        return factors