def parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd', normalize_factors=False, tol=1e-8, orthogonalise=False, random_state=None, verbose=0, return_errors=False, non_negative=False, mask=None): """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS) Computes a rank-`rank` decomposition of `tensor` [1]_ such that, ``tensor = [|weights; factors[0], ..., factors[-1] |]``. Parameters ---------- tensor : ndarray rank : int Number of components. n_iter_max : int Maximum number of iteration init : {'svd', 'random'}, optional Type of factor matrix initialization. See `initialize_factors`. svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS normalize_factors : if True, aggregate the weights of each factor in a 1D-tensor of shape (rank, ), which will contain the norms of the factors tol : float, optional (Default: 1e-6) Relative reconstruction error tolerance. The algorithm is considered to have found the global minimum when the reconstruction error is less than `tol`. random_state : {None, int, np.random.RandomState} verbose : int, optional Level of verbosity return_errors : bool, optional Activate return of iteration errors non_negative : bool, optional Perform non_negative PARAFAC. See :func:`non_negative_parafac`. mask : ndarray array of booleans with the same shape as ``tensor`` should be 0 where the values are missing and 1 everywhere else. Note: if tensor is sparse, then mask should also be sparse with a fill value of 1 (or True). Allows for missing values [2]_ Returns ------- KruskalTensor : (weight, factors) * weights : 1D array of shape (rank, ) all ones if normalize_factors is False (default), weights of the (normalized) factors otherwise * factors : List of factors of the CP decomposition element `i` is of shape (tensor.shape[i], rank) errors : list A list of reconstruction errors at each iteration of the algorithms. References ---------- .. [1] T.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications", SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009. .. [2] Tomasi, Giorgio, and Rasmus Bro. "PARAFAC and missing values." Chemometrics and Intelligent Laboratory Systems 75.2 (2005): 163-180. """ epsilon = 10e-12 if orthogonalise and not isinstance(orthogonalise, int): orthogonalise = n_iter_max factors = initialize_factors(tensor, rank, init=init, svd=svd, random_state=random_state, non_negative=non_negative, normalize_factors=normalize_factors) rec_errors = [] norm_tensor = tl.norm(tensor, 2) weights = tl.ones(rank, **tl.context(tensor)) for iteration in range(n_iter_max): if orthogonalise and iteration <= orthogonalise: factors = [ tl.qr(f)[0] if min(tl.shape(f)) >= rank else f for i, f in enumerate(factors) ] if verbose > 1: print("Starting iteration", iteration + 1) for mode in range(tl.ndim(tensor)): if verbose > 1: print("Mode", mode, "of", tl.ndim(tensor)) if non_negative: accum = 1 # khatri_rao(factors).tl.dot(khatri_rao(factors)) # simplifies to multiplications sub_indices = [i for i in range(len(factors)) if i != mode] for i, e in enumerate(sub_indices): if i: accum *= tl.dot(tl.transpose(factors[e]), factors[e]) else: accum = tl.dot(tl.transpose(factors[e]), factors[e]) pseudo_inverse = tl.tensor(np.ones((rank, rank)), **tl.context(tensor)) for i, factor in enumerate(factors): if i != mode: pseudo_inverse = pseudo_inverse * tl.dot( tl.conj(tl.transpose(factor)), factor) if mask is not None: tensor = tensor * mask + tl.kruskal_to_tensor( (None, factors), mask=1 - mask) mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode) if non_negative: numerator = tl.clip(mttkrp, a_min=epsilon, a_max=None) denominator = tl.dot(factors[mode], accum) denominator = tl.clip(denominator, a_min=epsilon, a_max=None) factor = factors[mode] * numerator / denominator else: factor = tl.transpose( tl.solve(tl.conj(tl.transpose(pseudo_inverse)), tl.transpose(mttkrp))) if normalize_factors: weights = tl.norm(factor, order=2, axis=0) weights = tl.where( tl.abs(weights) <= tl.eps(tensor.dtype), tl.ones(tl.shape(weights), **tl.context(factors[0])), weights) factor = factor / (tl.reshape(weights, (1, -1))) factors[mode] = factor if tol: # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec> factors_norm = kruskal_norm((weights, factors)) # mttkrp and factor for the last mode. This is equivalent to the # inner product <tensor, factorization> iprod = tl.sum(tl.sum(mttkrp * factor, axis=0) * weights) rec_error = tl.sqrt( tl.abs(norm_tensor**2 + factors_norm**2 - 2 * iprod)) / norm_tensor rec_errors.append(rec_error) if iteration >= 1: if verbose: print('reconstruction error={}, variation={}.'.format( rec_errors[-1], rec_errors[-2] - rec_errors[-1])) if tol and abs(rec_errors[-2] - rec_errors[-1]) < tol: if verbose: print('converged in {} iterations.'.format(iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) kruskal_tensor = KruskalTensor((weights, factors)) if return_errors: return kruskal_tensor, rec_errors else: return kruskal_tensor
def non_negative_tucker_hals(tensor, rank, n_iter_max=100, init="svd", svd='numpy_svd', tol=1e-8, sparsity_coefficients=None, core_sparsity_coefficient=None, fixed_modes=None, random_state=None, verbose=False, normalize_factors=False, return_errors=False, exact=False, algorithm='fista'): """ Non-negative Tucker decomposition Uses HALS to update each factor columnwise and uses fista or active set algorithm to update the core, see [1]_ Parameters ---------- tensor : ndarray rank : None, int or int list size of the core tensor, ``(len(ranks) == tensor.ndim)`` if int, the same rank is used for all modes n_iter_max : int maximum number of iteration init : {'svd', 'random'}, optional svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS tol : float, optional tolerance: the algorithm stops when the variation in the reconstruction error is less than the tolerance Default: 1e-8 sparsity_coefficients : array of float (as much as the number of modes) The sparsity coefficients are used for each factor If set to None, the algorithm is computed without sparsity Default: None core_sparsity_coefficient : array of float. This coefficient imposes sparsity on core when it is updated with fista. Default: None fixed_modes : array of integers (between 0 and the number of modes) Has to be set not to update a factor, 0 and 1 for U and V respectively Default: None verbose : boolean Indicates whether the algorithm prints the successive reconstruction errors or not Default: False normalize_factors : if True, aggregates the core which will contain the norms of the factors. return_errors : boolean Indicates whether the algorithm should return all reconstruction errors and computation time of each iteration or not Default: False exact : If it is True, the HALS nnls subroutines give results with high precision but it needs high computational cost. If it is False, the algorithm gives an approximate solution. Default: False algorithm : {'fista', 'active_set'} Non negative least square solution to update the core. Default: 'fista' Returns ------- factors : ndarray list list of positive factors of the CP decomposition element `i` is of shape ``(tensor.shape[i], rank)`` errors: list A list of reconstruction errors at each iteration of the algorithm. Notes ----- Tucker decomposes a tensor into a core tensor and list of factors: .. math:: \\begin{equation} tensor = [| core; factors[0], ... ,factors[-1] |] \\end{equation} We solve the following problem for each factor: .. math:: \\begin{equation} \\min_{tensor >= 0} ||tensor_[i] - factors[i]\\times core_[i] \\times (\\prod_{i\\neq j}(factors[j]))^T||^2 \\end{equation} If we define two variables such as: .. math:: U = core_[i] \\times (\\prod_{i\\neq j}(factors[j]\\times factors[j]^T)) \\ M = tensor_[i] Gradient of the problem becomes: .. math:: \\begin{equation} \\delta = -U^TM + factors[i] \\times U^TU \\end{equation} In order to calculate UTU and UTM, we define two variables: .. math:: \\begin{equation} core_cross = \prod_{i\\neq j}(core_[i] \\times (\\prod_{i\\neq j}(factors[j]\\times factors[j]^T)) \\ tensor_cross = \prod_{i\\neq j} tensor_[i] \\times factors_[i] \\end{equation} Then UTU and UTM becomes: .. math:: \\begin{equation} UTU = core_cross_[j] \\times core_[j]^T \\ UTM = (tensor_cross_[j] \\times \\times core_[j]^T)^T \\end{equation} References ---------- .. [1] tl.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications", SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009. """ rank = validate_tucker_rank(tl.shape(tensor), rank=rank) n_modes = tl.ndim(tensor) if sparsity_coefficients is None or not isinstance(sparsity_coefficients, Iterable): sparsity_coefficients = [sparsity_coefficients] * n_modes if fixed_modes is None: fixed_modes = [] # Avoiding errors for fixed_value in fixed_modes: sparsity_coefficients[fixed_value] = None # Generating the mode update sequence modes = [ mode for mode in range(tl.ndim(tensor)) if mode not in fixed_modes ] nn_core, nn_factors = initialize_tucker(tensor, rank, modes, init=init, svd=svd, random_state=random_state, non_negative=True) # initialisation - declare local variables norm_tensor = tl.norm(tensor, 2) rec_errors = [] # Iterate over one step of NTD for iteration in range(n_iter_max): # One pass of least squares on each updated mode for mode in modes: # Computing Hadamard of cross-products pseudo_inverse = nn_factors.copy() for i, factor in enumerate(nn_factors): if i != mode: pseudo_inverse[i] = tl.dot(tl.conj(tl.transpose(factor)), factor) # UtU core_cross = multi_mode_dot(nn_core, pseudo_inverse, skip=mode) UtU = tl.dot(unfold(core_cross, mode), tl.transpose(unfold(nn_core, mode))) # UtM tensor_cross = multi_mode_dot(tensor, nn_factors, skip=mode, transpose=True) MtU = tl.dot(unfold(tensor_cross, mode), tl.transpose(unfold(nn_core, mode))) UtM = tl.transpose(MtU) # Call the hals resolution with nnls, optimizing the current mode nn_factor, _, _, _ = hals_nnls( UtM, UtU, tl.transpose(nn_factors[mode]), n_iter_max=100, sparsity_coefficient=sparsity_coefficients[mode], exact=exact) nn_factors[mode] = tl.transpose(nn_factor) # updating core if algorithm == 'fista': pseudo_inverse[-1] = tl.dot(tl.transpose(nn_factors[-1]), nn_factors[-1]) core_estimation = multi_mode_dot(tensor, nn_factors, transpose=True) learning_rate = 1 for MtM in pseudo_inverse: learning_rate *= 1 / (tl.partial_svd(MtM)[1][0]) nn_core = fista( core_estimation, pseudo_inverse, x=nn_core, n_iter_max=n_iter_max, sparsity_coef=core_sparsity_coefficient, lr=learning_rate, ) if algorithm == 'active_set': pseudo_inverse[-1] = tl.dot(tl.transpose(nn_factors[-1]), nn_factors[-1]) core_estimation_vec = tl.base.tensor_to_vec( tl.tenalg.mode_dot(tensor_cross, tl.transpose(nn_factors[modes[-1]]), modes[-1])) pseudo_inverse_kr = tl.tenalg.kronecker(pseudo_inverse) vectorcore = active_set_nnls(core_estimation_vec, pseudo_inverse_kr, x=nn_core, n_iter_max=n_iter_max) nn_core = tl.reshape(vectorcore, tl.shape(nn_core)) # Adding the l1 norm value to the reconstruction error sparsity_error = 0 for index, sparse in enumerate(sparsity_coefficients): if sparse: sparsity_error += 2 * (sparse * tl.norm(nn_factors[index], order=1)) # error computation rec_error = tl.norm(tensor - tucker_to_tensor( (nn_core, nn_factors)), 2) / norm_tensor rec_errors.append(rec_error) if iteration > 1: if verbose: print('reconstruction error={}, variation={}.'.format( rec_errors[-1], rec_errors[-2] - rec_errors[-1])) if tol and abs(rec_errors[-2] - rec_errors[-1]) < tol: if verbose: print('converged in {} iterations.'.format(iteration)) break if normalize_factors: nn_core, nn_factors = tucker_normalize((nn_core, nn_factors)) tensor = TuckerTensor((nn_core, nn_factors)) if return_errors: return tensor, rec_errors else: return tensor
def parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd',\ normalize_factors=False, orthogonalise=False,\ tol=1e-8, random_state=None,\ verbose=0, return_errors=False,\ sparsity = None,\ l2_reg = 0, mask=None,\ cvg_criterion = 'abs_rec_error',\ fixed_modes = [], svd_mask_repeats=5, linesearch = False): """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS) Computes a rank-`rank` decomposition of `tensor` [1]_ such that:: tensor = [|weights; factors[0], ..., factors[-1] |]. Parameters ---------- tensor : ndarray rank : int Number of components. n_iter_max : int Maximum number of iteration init : {'svd', 'random'}, optional Type of factor matrix initialization. See `initialize_factors`. svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS normalize_factors : if True, aggregate the weights of each factor in a 1D-tensor of shape (rank, ), which will contain the norms of the factors tol : float, optional (Default: 1e-6) Relative reconstruction error tolerance. The algorithm is considered to have found the global minimum when the reconstruction error is less than `tol`. random_state : {None, int, np.random.RandomState} verbose : int, optional Level of verbosity return_errors : bool, optional Activate return of iteration errors mask : ndarray array of booleans with the same shape as ``tensor`` should be 0 where the values are missing and 1 everywhere else. Note: if tensor is sparse, then mask should also be sparse with a fill value of 1 (or True). Allows for missing values [2]_ cvg_criterion : {'abs_rec_error', 'rec_error'}, optional Stopping criterion for ALS, works if `tol` is not None. If 'rec_error', ALS stops at current iteration if ``(previous rec_error - current rec_error) < tol``. If 'abs_rec_error', ALS terminates when `|previous rec_error - current rec_error| < tol`. sparsity : float or int If `sparsity` is not None, we approximate tensor as a sum of low_rank_component and sparse_component, where low_rank_component = cp_to_tensor((weights, factors)). `sparsity` denotes desired fraction or number of non-zero elements in the sparse_component of the `tensor`. fixed_modes : list, default is [] A list of modes for which the initial value is not modified. The last mode cannot be fixed due to error computation. svd_mask_repeats: int If using a tensor with masked values, this initializes using SVD multiple times to remove the effect of these missing values on the initialization. linesearch : bool, default is False Whether to perform line search as proposed by Bro [3]. Returns ------- CPTensor : (weight, factors) * weights : 1D array of shape (rank, ) * all ones if normalize_factors is False (default) * weights of the (normalized) factors otherwise * factors : List of factors of the CP decomposition element `i` is of shape ``(tensor.shape[i], rank)`` * sparse_component : nD array of shape tensor.shape. Returns only if `sparsity` is not None. errors : list A list of reconstruction errors at each iteration of the algorithms. References ---------- .. [1] T.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications", SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009. .. [2] Tomasi, Giorgio, and Rasmus Bro. "PARAFAC and missing values." Chemometrics and Intelligent Laboratory Systems 75.2 (2005): 163-180. .. [3] R. Bro, "Multi-Way Analysis in the Food Industry: Models, Algorithms, and Applications", PhD., University of Amsterdam, 1998 """ rank = validate_cp_rank(tl.shape(tensor), rank=rank) if orthogonalise and not isinstance(orthogonalise, int): orthogonalise = n_iter_max if linesearch: acc_pow = 2.0 # Extrapolate to the iteration^(1/acc_pow) ahead acc_fail = 0 # How many times acceleration have failed max_fail = 4 # Increase acc_pow with one after max_fail failure weights, factors = initialize_cp(tensor, rank, init=init, svd=svd, random_state=random_state, normalize_factors=normalize_factors) if mask is not None and init == "svd": for _ in range(svd_mask_repeats): tensor = tensor*mask + tl.cp_to_tensor((weights, factors), mask=1-mask) weights, factors = initialize_cp(tensor, rank, init=init, svd=svd, random_state=random_state, normalize_factors=normalize_factors) rec_errors = [] norm_tensor = tl.norm(tensor, 2) Id = tl.eye(rank, **tl.context(tensor))*l2_reg if tl.ndim(tensor)-1 in fixed_modes: warnings.warn('You asked for fixing the last mode, which is not supported.\n The last mode will not be fixed. Consider using tl.moveaxis()') fixed_modes.remove(tl.ndim(tensor)-1) modes_list = [mode for mode in range(tl.ndim(tensor)) if mode not in fixed_modes] if sparsity: sparse_component = tl.zeros_like(tensor) if isinstance(sparsity, float): sparsity = int(sparsity * np.prod(tensor.shape)) else: sparsity = int(sparsity) for iteration in range(n_iter_max): if orthogonalise and iteration <= orthogonalise: factors = [tl.qr(f)[0] if min(tl.shape(f)) >= rank else f for i, f in enumerate(factors)] if linesearch and iteration % 2 == 0: factors_last = [tl.copy(f) for f in factors] weights_last = tl.copy(weights) if verbose > 1: print("Starting iteration", iteration + 1) for mode in modes_list: if verbose > 1: print("Mode", mode, "of", tl.ndim(tensor)) pseudo_inverse = tl.tensor(np.ones((rank, rank)), **tl.context(tensor)) for i, factor in enumerate(factors): if i != mode: pseudo_inverse = pseudo_inverse*tl.dot(tl.conj(tl.transpose(factor)), factor) pseudo_inverse += Id if not iteration and weights is not None: # Take into account init weights mttkrp = unfolding_dot_khatri_rao(tensor, (weights, factors), mode) else: mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode) factor = tl.transpose(tl.solve(tl.conj(tl.transpose(pseudo_inverse)), tl.transpose(mttkrp))) if normalize_factors: scales = tl.norm(factor, 2, axis=0) weights = tl.where(scales==0, tl.ones(tl.shape(scales), **tl.context(factor)), scales) factor = factor / tl.reshape(weights, (1, -1)) factors[mode] = factor # Will we be performing a line search iteration if linesearch and iteration % 2 == 0 and iteration > 5: line_iter = True else: line_iter = False # Calculate the current unnormalized error if we need it if (tol or return_errors) and line_iter is False: unnorml_rec_error, tensor, norm_tensor = error_calc(tensor, norm_tensor, weights, factors, sparsity, mask, mttkrp) else: if mask is not None: tensor = tensor*mask + tl.cp_to_tensor((weights, factors), mask=1-mask) # Start line search if requested. if line_iter is True: jump = iteration ** (1.0 / acc_pow) new_weights = weights_last + (weights - weights_last) * jump new_factors = [factors_last[ii] + (factors[ii] - factors_last[ii])*jump for ii in range(tl.ndim(tensor))] new_rec_error, new_tensor, new_norm_tensor = error_calc(tensor, norm_tensor, new_weights, new_factors, sparsity, mask) if (new_rec_error / new_norm_tensor) < rec_errors[-1]: factors, weights = new_factors, new_weights tensor, norm_tensor = new_tensor, new_norm_tensor unnorml_rec_error = new_rec_error acc_fail = 0 if verbose: print("Accepted line search jump of {}.".format(jump)) else: unnorml_rec_error, tensor, norm_tensor = error_calc(tensor, norm_tensor, weights, factors, sparsity, mask, mttkrp) acc_fail += 1 if verbose: print("Line search failed for jump of {}.".format(jump)) if acc_fail == max_fail: acc_pow += 1.0 acc_fail = 0 if verbose: print("Reducing acceleration.") rec_error = unnorml_rec_error / norm_tensor rec_errors.append(rec_error) if tol: if iteration >= 1: rec_error_decrease = rec_errors[-2] - rec_errors[-1] if verbose: print("iteration {}, reconstruction error: {}, decrease = {}, unnormalized = {}".format(iteration, rec_error, rec_error_decrease, unnorml_rec_error)) if cvg_criterion == 'abs_rec_error': stop_flag = abs(rec_error_decrease) < tol elif cvg_criterion == 'rec_error': stop_flag = rec_error_decrease < tol else: raise TypeError("Unknown convergence criterion") if stop_flag: if verbose: print("PARAFAC converged after {} iterations".format(iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) cp_tensor = CPTensor((weights, factors)) if sparsity: sparse_component = sparsify_tensor(tensor -\ cp_to_tensor((weights, factors)),\ sparsity) cp_tensor = (cp_tensor, sparse_component) if return_errors: return cp_tensor, rec_errors else: return cp_tensor
def parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd', tol=1e-8, orthogonalise=False, random_state=None, verbose=False, return_errors=False, non_negative=False): """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS) Computes a rank-`rank` decomposition of `tensor` [1]_ such that, ``tensor = [| factors[0], ..., factors[-1] |]``. Parameters ---------- tensor : ndarray rank : int Number of components. n_iter_max : int Maximum number of iteration init : {'svd', 'random'}, optional Type of factor matrix initialization. See `initialize_factors`. svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS tol : float, optional (Default: 1e-6) Relative reconstruction error tolerance. The algorithm is considered to have found the global minimum when the reconstruction error is less than `tol`. random_state : {None, int, np.random.RandomState} verbose : int, optional Level of verbosity return_errors : bool, optional Activate return of iteration errors non_negative : bool, optional Perform non_negative PARAFAC. See :func:`non_negative_parafac`. Returns ------- factors : ndarray list List of factors of the CP decomposition element `i` is of shape (tensor.shape[i], rank) errors : list A list of reconstruction errors at each iteration of the algorithms. References ---------- .. [1] tl.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications", SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009. """ epsilon = 10e-12 if orthogonalise and not isinstance(orthogonalise, int): orthogonalise = n_iter_max factors = initialize_factors(tensor, rank, init=init, svd=svd, random_state=random_state, non_negative=non_negative) rec_errors = [] norm_tensor = tl.norm(tensor, 2) for iteration in range(n_iter_max): if orthogonalise and iteration <= orthogonalise: factor = [tl.qr(factor)[0] for factor in factors] if verbose: print("Starting iteration", iteration) for mode in range(tl.ndim(tensor)): if verbose: print("Mode", mode, "of", tl.ndim(tensor)) if non_negative: accum = 1 # khatri_rao(factors).tl.dot(khatri_rao(factors)) # simplifies to multiplications sub_indices = [i for i in range(len(factors)) if i != mode] for i, e in enumerate(sub_indices): if i: accum *= tl.dot(tl.transpose(factors[e]), factors[e]) else: accum = tl.dot(tl.transpose(factors[e]), factors[e]) pseudo_inverse = tl.tensor(np.ones((rank, rank)), **tl.context(tensor)) for i, factor in enumerate(factors): if i != mode: pseudo_inverse = pseudo_inverse * tl.dot( tl.conj(tl.transpose(factor)), factor) #factor = tl.dot(unfold(tensor, mode), khatri_rao(factors, skip_matrix=mode).conj()) mttkrp = tl.tenalg.unfolding_dot_khatri_rao(tensor, factors, mode) if non_negative: numerator = tl.clip(mttkrp, a_min=epsilon, a_max=None) denominator = tl.dot(factors[mode], accum) denominator = tl.clip(denominator, a_min=epsilon, a_max=None) factor = factors[mode] * numerator / denominator else: factor = tl.transpose( tl.solve(tl.conj(tl.transpose(pseudo_inverse)), tl.transpose(mttkrp))) factors[mode] = factor if tol: # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec> # This is ||kruskal_to_tensor(factors)||^2 factors_norm = tl.sum( tl.prod( tl.stack([tl.dot(tl.transpose(f), f) for f in factors], 0), 0)) # mttkrp and factor for the last mode. This is equivalent to the # inner product <tensor, factorization> iprod = tl.sum(mttkrp * factor) rec_error = tl.sqrt( tl.abs(norm_tensor**2 + factors_norm - 2 * iprod)) / norm_tensor rec_errors.append(rec_error) if iteration >= 1: if verbose: print('reconstruction error={}, variation={}.'.format( rec_errors[-1], rec_errors[-2] - rec_errors[-1])) if tol and abs(rec_errors[-2] - rec_errors[-1]) < tol: if verbose: print('converged in {} iterations.'.format(iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) if return_errors: return factors, rec_errors else: return factors
def parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd',\ normalize_factors=False, orthogonalise=False,\ tol=1e-8, random_state=None,\ verbose=0, return_errors=False,\ non_negative=False,\ sparsity = None,\ l2_reg = 0, mask=None,\ cvg_criterion = 'abs_rec_error'): """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS) Computes a rank-`rank` decomposition of `tensor` [1]_ such that, ``tensor = [|weights; factors[0], ..., factors[-1] |]``. Parameters ---------- tensor : ndarray rank : int Number of components. n_iter_max : int Maximum number of iteration init : {'svd', 'random'}, optional Type of factor matrix initialization. See `initialize_factors`. svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS normalize_factors : if True, aggregate the weights of each factor in a 1D-tensor of shape (rank, ), which will contain the norms of the factors tol : float, optional (Default: 1e-6) Relative reconstruction error tolerance. The algorithm is considered to have found the global minimum when the reconstruction error is less than `tol`. random_state : {None, int, np.random.RandomState} verbose : int, optional Level of verbosity return_errors : bool, optional Activate return of iteration errors mask : ndarray array of booleans with the same shape as ``tensor`` should be 0 where the values are missing and 1 everywhere else. Note: if tensor is sparse, then mask should also be sparse with a fill value of 1 (or True). Allows for missing values [2]_ cvg_criterion : {'abs_rec_error', 'rec_error'}, optional Stopping criterion for ALS, works if `tol` is not None. If 'rec_error', ALS stops at current iteration if (previous rec_error - current rec_error) < tol. If 'abs_rec_error', ALS terminates when |previous rec_error - current rec_error| < tol. sparsity : float or int If `sparsity` is not None, we approximate tensor as a sum of low_rank_component and sparse_component, where low_rank_component = kruskal_to_tensor((weights, factors)). `sparsity` denotes desired fraction or number of non-zero elements in the sparse_component of the `tensor`. Returns ------- KruskalTensor : (weight, factors) * weights : 1D array of shape (rank, ) all ones if normalize_factors is False (default), weights of the (normalized) factors otherwise * factors : List of factors of the CP decomposition element `i` is of shape (tensor.shape[i], rank) * sparse_component : nD array of shape tensor.shape. Returns only if `sparsity` is not None. errors : list A list of reconstruction errors at each iteration of the algorithms. References ---------- .. [1] T.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications", SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009. .. [2] Tomasi, Giorgio, and Rasmus Bro. "PARAFAC and missing values." Chemometrics and Intelligent Laboratory Systems 75.2 (2005): 163-180. """ epsilon = 10e-12 if orthogonalise and not isinstance(orthogonalise, int): orthogonalise = n_iter_max factors = initialize_factors(tensor, rank, init=init, svd=svd, random_state=random_state, normalize_factors=normalize_factors) rec_errors = [] norm_tensor = tl.norm(tensor, 2) weights = tl.ones(rank, **tl.context(tensor)) Id = tl.eye(rank, **tl.context(tensor))*l2_reg if sparsity: sparse_component = tl.zeros_like(tensor) if isinstance(sparsity, float): sparsity = int(sparsity * np.prod(tensor.shape)) else: sparsity = int(sparsity) for iteration in range(n_iter_max): if orthogonalise and iteration <= orthogonalise: factors = [tl.qr(f)[0] if min(tl.shape(f)) >= rank else f for i, f in enumerate(factors)] if verbose > 1: print("Starting iteration", iteration + 1) for mode in range(tl.ndim(tensor)): if verbose > 1: print("Mode", mode, "of", tl.ndim(tensor)) pseudo_inverse = tl.tensor(np.ones((rank, rank)), **tl.context(tensor)) for i, factor in enumerate(factors): if i != mode: pseudo_inverse = pseudo_inverse*tl.dot(tl.conj(tl.transpose(factor)), factor) pseudo_inverse += Id if mask is not None: tensor = tensor*mask + tl.kruskal_to_tensor((None, factors), mask=1-mask) mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode) factor = tl.transpose(tl.solve(tl.conj(tl.transpose(pseudo_inverse)), tl.transpose(mttkrp))) if normalize_factors: weights = tl.norm(factor, order=2, axis=0) weights = tl.where(tl.abs(weights) <= tl.eps(tensor.dtype), tl.ones(tl.shape(weights), **tl.context(factors[0])), weights) factor = factor/(tl.reshape(weights, (1, -1))) factors[mode] = factor if tol: if sparsity: low_rank_component = kruskal_to_tensor((weights, factors)) sparse_component = sparsify_tensor(tensor - low_rank_component, sparsity) unnorml_rec_error = tl.norm(tensor - low_rank_component - sparse_component, 2) else: # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec> factors_norm = kruskal_norm((weights, factors)) # mttkrp and factor for the last mode. This is equivalent to the # inner product <tensor, factorization> iprod = tl.sum(tl.sum(mttkrp*factor, axis=0)*weights) unnorml_rec_error = tl.sqrt(tl.abs(norm_tensor**2 + factors_norm**2 - 2*iprod)) rec_error = unnorml_rec_error / norm_tensor rec_errors.append(rec_error) if iteration >= 1: rec_error_decrease = rec_errors[-2] - rec_errors[-1] if verbose: print("iteration {}, reconstraction error: {}, decrease = {}, unnormalized = {}".format(iteration, rec_error, rec_error_decrease, unnorml_rec_error)) if cvg_criterion == 'abs_rec_error': stop_flag = abs(rec_error_decrease) < tol elif cvg_criterion == 'rec_error': stop_flag = rec_error_decrease < tol else: raise TypeError("Unknown convergence criterion") if stop_flag: if verbose: print("PARAFAC converged after {} iterations".format(iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) kruskal_tensor = KruskalTensor((weights, factors)) if sparsity: sparse_component = sparsify_tensor(tensor -\ kruskal_to_tensor((weights, factors)),\ sparsity) kruskal_tensor = (kruskal_tensor, sparse_component) if return_errors: return kruskal_tensor, rec_errors else: return kruskal_tensor
def one_ntd_step(tensor, ranks, in_core, in_factors, norm_tensor, sparsity_coefficients, fixed_modes, normalize, mode_core_norm, alpha=0.5, delta=0.01): """ One pass of Hierarchical Alternating Least Squares update along all modes, and gradient update on the core, which decreases reconstruction error in Nonnegative Tucker Decomposition. Update the factors by solving a least squares problem per mode, as described in [1]. Note that the unfolding order is the one described in [2], which is different from [1]. This function is strictly superior to a least squares solver ran on the matricized problems min_X ||Y - AX||_F^2 since A is structured as a Kronecker product of the other factors/core. Tensors are manipulated with the tensorly toolbox [3]. Parameters ---------- unfolded_tensors: list of array The spectrogram tensor, unfolded according to all its modes. ranks: list of integers Ranks for eac factor of the decomposition. in_core : tensorly tensor Current estimates of the core in_factors: list of array Current estimates for the factors of this NTD. The value of factor[update_mode] will be updated using a least squares update. The values in in_factors are not modified. norm_tensor : float The Frobenius norm of the input tensor sparsity_coefficients: list of float (as much as the number of modes + 1 for the core) The sparsity coefficients on each factor and on the core respectively. fixed_modes: list of integers (between 0 and the number of modes + 1 for the core) Has to be set not to update a factor, taken in the order of modes and lastly on the core. normalize: list of boolean (as much as the number of modes + 1 for the core) A boolean whereas the factors need to be normalized. The normalization is a l_2 normalization on each of the rank components (For the factors, each column will be normalized, ie each atom of the dimension of the current rank). mode_core_norm: integer or None The mode on which normalize the core, or None if normalization shouldn't be enforced. Will only be useful if the last element of the previous "normalise" argument is set to True. Indexes of the modes start at 0. Default: None alpha : positive float Ratio between outer computations and inner loops. Typically set to 0.5 or 1. Set to +inf in the deterministic mode, as it depends on runtime. Default: 0.5 delta : float in [0,1] Early stop criterion, while err_k > delta*err_0. Set small for almost exact nnls solution, or larger (e.g. 1e-2) for inner loops of a NTD computation. Default: 0.01 Returns ------- core: tensorly tensor The core tensor linking the factors of the decomposition factors: list of factors An array containing all the factors computed with the NTD cost_fct_val: The value of the cost function at this step, normalized by the squared norm of the original tensor. References ---------- [1] Tamara G Kolda and Brett W Bader. "Tensor decompositions and applications", SIAM review 51.3 (2009), pp. 455{500. [2] Jeremy E Cohen. "About notations in multiway array processing", arXiv preprint arXiv:1511.01306, (2015). [3] J. Kossai et al. "TensorLy: Tensor Learning in Python", arxiv preprint (2018) """ # Avoiding errors for fixed_value in fixed_modes: sparsity_coefficients[fixed_value] = None # Copy core = in_core.copy() factors = in_factors.copy() # Generating the mode update sequence modes_list = [mode for mode in range(tl.ndim(tensor)) if mode not in fixed_modes] for mode in modes_list: #unfolded_core = tl.base.unfold(core, mode) tic = time.time() # UtU # First, element-wise products # some computations could be reused but the gain is small. elemprod = factors.copy() for i, factor in enumerate(factors): if i != mode: elemprod[i] = tl.dot(tl.conj(tl.transpose(factor)), factor) # Second, the multiway product with core G temp = tl.tenalg.multi_mode_dot(core, elemprod, skip=mode) # this line can be computed with tensor contractions con_modes = [i for i in range(tl.ndim(tensor)) if i != mode] UtU = tl.tenalg.contract(temp, con_modes, core, con_modes) #UtU = unfold(temp, mode)@tl.transpose(unfold(core, mode)) # UtM # First, the contraction of data with other factors temp = tl.tenalg.multi_mode_dot(tensor, factors, skip=mode, transpose = True) # again, computable by tensor contractions #MtU = unfold(temp, mode)@tl.transpose(unfold(core, mode)) MtU = tl.tenalg.contract(temp, con_modes, core, con_modes) UtM = tl.transpose(MtU) # Computing the Kronekcer product #kron = tl.tenalg.kronecker(factors, skip_matrix = mode, reverse = False) #kron_core = tl.dot(kron, tl.transpose(unfolded_core)) #rhs = tl.dot(unfolded_tensors[mode], kron_core) # Maybe suboptimal #cross = tl.dot(tl.transpose(kron_core), kron_core) timer = time.time() - tic # Call the hals resolution with nnls, optimizing the current mode factors[mode] = tl.transpose(nnls.hals_nnls_acc(UtM, UtU, tl.transpose(factors[mode]), maxiter=100, atime=timer, alpha=alpha, delta=delta, sparsity_coefficient = sparsity_coefficients[mode], normalize = normalize[mode])[0]) #refolded_tensor = tl.base.fold(unfolded_tensors[0], 0, tensor_shape) # Core update #all_MtX = tl.tenalg.multi_mode_dot(tensor, factors, transpose = True) # better implementation: reuse the computation of temp ! # Also reuse elemprod form last update all_MtX = tl.tenalg.mode_dot(temp, tl.transpose(factors[modes_list[-1]]), modes_list[-1]) all_MtM = tl.copy(elemprod) all_MtM[modes_list[-1]] = factors[modes_list[-1]].T@factors[modes_list[-1]] #all_MtM = np.array([fac.T@fac for fac in factors]) # Projected gradient gradient_step = 1 #print(f"factors[modes_list[-1]]: {factors[modes_list[-1]]}") #print(f"all_MtM: {all_MtM}") for MtM in all_MtM: #print(f"MtM: {MtM}") gradient_step *= 1/(scipy.sparse.linalg.svds(MtM, k=1)[1][0]) gradient_step = round(gradient_step, 6) # Heurisitc, to avoid consecutive imprecision cnt = 1 upd_0 = 0 upd = 1 if sparsity_coefficients[-1] is None: sparse = 0 else: sparse = sparsity_coefficients[-1] # TODO: dynamic stopping criterion # Maybe: try fast gradient instead of gradient while cnt <= 300 and upd>= delta * upd_0: gradient = - all_MtX + tl.tenalg.multi_mode_dot(core, all_MtM, transpose = False) + sparse * tl.ones(core.shape) # Proposition of reformulation for error computations delta_core = np.minimum(gradient_step*gradient, core) core = core - delta_core upd = tl.norm(delta_core) if cnt == 1: upd_0 = upd cnt += 1 if normalize[-1]: unfolded_core = tl.unfold(core, mode_core_norm) for idx_mat in range(unfolded_core.shape[0]): if tl.norm(unfolded_core[idx_mat]) != 0: unfolded_core[idx_mat] = unfolded_core[idx_mat] / tl.norm(unfolded_core[idx_mat], 2) core = tl.fold(unfolded_core, mode_core_norm, core.shape) # Adding the l1 norm value to the reconstruction error sparsity_error = 0 for index, sparse in enumerate(sparsity_coefficients): if sparse: if index < len(factors): sparsity_error += 2 * (sparse * np.linalg.norm(factors[index], ord=1)) elif index == len(factors): sparsity_error += 2 * (sparse * tl.norm(core, 1)) else: raise NotImplementedError("TODEBUG: Too many sparsity coefficients, should have been raised before.") rec_error = norm_tensor ** 2 - 2*tl.tenalg.inner(all_MtX, core) + tl.tenalg.inner(tl.tenalg.multi_mode_dot(core, all_MtM, transpose = False), core) cost_fct_val = (rec_error + sparsity_error) / (norm_tensor ** 2) #exhaustive_rec_error = (tl.norm(tensor - tl.tenalg.multi_mode_dot(core, factors, transpose = False), 2) + sparsity_error) / norm_tensor #print("diff: " + str(rec_error - exhaustive_rec_error)) #print("max" + str(np.amax(factors[2]))) return core, factors, cost_fct_val # exhaustive_rec_error
Id = tl.eye(rank, **tl.context( tensor)) * l2_reg #暂时没看明白这一步是在做什么,默认l2_reg=0,会得到一个全0矩阵,看下面操作好像是正则项的意思 for iteration in range(n_iter_max): if verbose > 1: print("Starting iteration", iteration + 1) for mode in modes_list: #每个mode依次更新 if verbose > 1: print("Mode", mode, "of", tl.ndim(tensor)) pseudo_inverse = tl.tensor(np.ones((rank, rank)), **tl.context(tensor)) #全1的矩阵,ALS显式解的第三项 for i, factor in enumerate(factors): if i != mode: pseudo_inverse = pseudo_inverse * tl.dot( tl.conj(tl.transpose(factor)), factor) pseudo_inverse += Id #看起来Id的作用是正则项 if not iteration and weights is not None: # Take into account init weights mttkrp = unfolding_dot_khatri_rao(tensor, (weights, factors), mode) #ALS显式解的前两项 else: mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode) factor = tl.transpose( tl.solve( tl.conj(tl.transpose(pseudo_inverse)), tl.transpose(mttkrp))) #这里之所以有这么多转置是因为solve(a,b)是解ax = b中的x #如果需要列单位化 if normalize_factors: