def simplex_prox(tensor, parameter): """ Projects the input tensor on the simplex of radius parameter. Parameters ---------- tensor : ndarray parameter : float Returns ------- ndarray References ---------- .. [1]: Held, Michael, Philip Wolfe, and Harlan P. Crowder. "Validation of subgradient optimization." Mathematical programming 6.1 (1974): 62-88. """ _, col = tl.shape(tensor) tensor = tl.clip(tensor, 0, tl.max(tensor)) tensor_sort = tl.sort(tensor, axis=0, descending=True) to_change = tl.sum(tl.where( tensor_sort > (tl.cumsum(tensor_sort, axis=0) - parameter), 1.0, 0.0), axis=0) difference = tl.zeros(col) for i in range(col): if to_change[i] > 0: difference = tl.index_update( difference, tl.index[i], tl.cumsum(tensor_sort, axis=0)[int(to_change[i] - 1), i]) difference = (difference - parameter) / to_change return tl.clip(tensor - difference, a_min=0)
def sparsify_tensor(tensor, card): """Zeros out all elements in the `tensor` except `card` elements with maximum absolute values. Parameters ---------- tensor : ndarray card : int Desired number of non-zero elements in the `tensor` Returns ------- ndarray of shape tensor.shape """ if card >= np.prod(tensor.shape): return tensor bound = tl.sort(tl.abs(tensor), axis = None)[-card] return tl.where(tl.abs(tensor) < bound, tl.zeros(tensor.shape, **tl.context(tensor)), tensor)
def show_proj(factors, matlab_data, ex1_em0): color = ["r", "g", "b"] for i in range(3): mat1 = np.array(factors[1][1]).transpose()[i] w = tl.norm(factors[1][0][i]) sq_mat = np.array(mat1).reshape(61, 201) m = np.max(sq_mat) ind = tl.where(sq_mat == m) if (ex1_em0 == 1): x = matlab_data["EmAx"][0] y = w * tl.transpose(sq_mat[ind[0]]) else: x = matlab_data["ExAx"][0] y = w * np.transpose(tl.transpose(sq_mat)[ind[1]]) plt.grid() plt.plot(x, y, color[i]) plt.show()
def normalize_factors(factors): """Normalizes factors to unit length and returns factor magnitudes Turns ``factors = [|U_1, ... U_n|]`` into ``[weights; |V_1, ... V_n|]``, where the columns of each `V_k` are normalized to unit Euclidean length from the columns of `U_k` with the normalizing constants absorbed into `weights`. In the special case of a symmetric tensor, `weights` holds the eigenvalues of the tensor. Parameters ---------- factors : ndarray list list of matrices, all with the same number of columns i.e.:: for u in U: u[i].shape == (s_i, R) where `R` is fixed while `s_i` can vary with `i` Returns ------- normalized_factors : list of ndarrays list of matrices with the same shape as `factors` weights : ndarray vector of length `R` holding normalizing constants """ # allocate variables for weights, and normalized factors rank = factors[0].shape[1] weights = tl.ones(rank, **tl.context(factors[0])) normalized_factors = [] # normalize columns of factor matrices for factor in factors: scales = tl.norm(factor, axis=0) weights *= scales scales_non_zero = tl.where( scales == 0, tl.ones(tl.shape(scales), **tl.context(factors[0])), scales) normalized_factors.append(factor / scales_non_zero) return normalized_factors, weights
def show_z(factors, ex, em): # ax = fig.add_subplot(111, projection='3d') all_ind = [] for i in range(3): fig = plt.figure() mat1 = np.array(factors[1][1]).transpose()[i] w = tl.norm(factors[1][0][i]) my_col = cm.jet(w * mat1 / np.amax(w * mat1)) # ax.scatter(ex,em, tl.zeros_like(w*mat1), marker=".",color=my_col) # plot the point (2,3,4) on the figure sq_mat = np.array(mat1).reshape(61, 201) m = np.max(sq_mat) ind = tl.where(sq_mat == m) plt.scatter(ex, em, marker=".", color=my_col) plt.scatter(ex[0] + ind[1], em[0] + ind[0], marker="*", color="w", label="max") all_ind.append([ex[0] + ind[1], em[0] + ind[0]]) plt.legend() plt.show() return all_ind
def hard_thresholding(tensor, number_of_non_zero): """ Proximal operator of the l0 ``norm'' Keeps greater "number_of_non_zero" elements untouched and sets other elements to zero. Parameters ---------- tensor : ndarray number_of_non_zero : int Returns ------- ndarray Thresholded tensor on which the operator has been applied """ tensor_vec = tl.copy(tl.tensor_to_vec(tensor)) sorted_indices = tl.argsort(tl.argsort(tl.abs(tensor_vec), axis=0, descending=True), axis=0) return tl.reshape( tl.where(sorted_indices < number_of_non_zero, tensor_vec, tl.tensor(0, **tl.context(tensor_vec))), tensor.shape)
def parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd', normalize_factors=False, tol=1e-8, orthogonalise=False, random_state=None, verbose=0, return_errors=False, non_negative=False, mask=None): """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS) Computes a rank-`rank` decomposition of `tensor` [1]_ such that, ``tensor = [|weights; factors[0], ..., factors[-1] |]``. Parameters ---------- tensor : ndarray rank : int Number of components. n_iter_max : int Maximum number of iteration init : {'svd', 'random'}, optional Type of factor matrix initialization. See `initialize_factors`. svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS normalize_factors : if True, aggregate the weights of each factor in a 1D-tensor of shape (rank, ), which will contain the norms of the factors tol : float, optional (Default: 1e-6) Relative reconstruction error tolerance. The algorithm is considered to have found the global minimum when the reconstruction error is less than `tol`. random_state : {None, int, np.random.RandomState} verbose : int, optional Level of verbosity return_errors : bool, optional Activate return of iteration errors non_negative : bool, optional Perform non_negative PARAFAC. See :func:`non_negative_parafac`. mask : ndarray array of booleans with the same shape as ``tensor`` should be 0 where the values are missing and 1 everywhere else. Note: if tensor is sparse, then mask should also be sparse with a fill value of 1 (or True). Allows for missing values [2]_ Returns ------- KruskalTensor : (weight, factors) * weights : 1D array of shape (rank, ) all ones if normalize_factors is False (default), weights of the (normalized) factors otherwise * factors : List of factors of the CP decomposition element `i` is of shape (tensor.shape[i], rank) errors : list A list of reconstruction errors at each iteration of the algorithms. References ---------- .. [1] T.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications", SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009. .. [2] Tomasi, Giorgio, and Rasmus Bro. "PARAFAC and missing values." Chemometrics and Intelligent Laboratory Systems 75.2 (2005): 163-180. """ epsilon = 10e-12 if orthogonalise and not isinstance(orthogonalise, int): orthogonalise = n_iter_max factors = initialize_factors(tensor, rank, init=init, svd=svd, random_state=random_state, non_negative=non_negative, normalize_factors=normalize_factors) rec_errors = [] norm_tensor = tl.norm(tensor, 2) weights = tl.ones(rank, **tl.context(tensor)) for iteration in range(n_iter_max): if orthogonalise and iteration <= orthogonalise: factors = [ tl.qr(f)[0] if min(tl.shape(f)) >= rank else f for i, f in enumerate(factors) ] if verbose > 1: print("Starting iteration", iteration + 1) for mode in range(tl.ndim(tensor)): if verbose > 1: print("Mode", mode, "of", tl.ndim(tensor)) if non_negative: accum = 1 # khatri_rao(factors).tl.dot(khatri_rao(factors)) # simplifies to multiplications sub_indices = [i for i in range(len(factors)) if i != mode] for i, e in enumerate(sub_indices): if i: accum *= tl.dot(tl.transpose(factors[e]), factors[e]) else: accum = tl.dot(tl.transpose(factors[e]), factors[e]) pseudo_inverse = tl.tensor(np.ones((rank, rank)), **tl.context(tensor)) for i, factor in enumerate(factors): if i != mode: pseudo_inverse = pseudo_inverse * tl.dot( tl.conj(tl.transpose(factor)), factor) if mask is not None: tensor = tensor * mask + tl.kruskal_to_tensor( (None, factors), mask=1 - mask) mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode) if non_negative: numerator = tl.clip(mttkrp, a_min=epsilon, a_max=None) denominator = tl.dot(factors[mode], accum) denominator = tl.clip(denominator, a_min=epsilon, a_max=None) factor = factors[mode] * numerator / denominator else: factor = tl.transpose( tl.solve(tl.conj(tl.transpose(pseudo_inverse)), tl.transpose(mttkrp))) if normalize_factors: weights = tl.norm(factor, order=2, axis=0) weights = tl.where( tl.abs(weights) <= tl.eps(tensor.dtype), tl.ones(tl.shape(weights), **tl.context(factors[0])), weights) factor = factor / (tl.reshape(weights, (1, -1))) factors[mode] = factor if tol: # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec> factors_norm = kruskal_norm((weights, factors)) # mttkrp and factor for the last mode. This is equivalent to the # inner product <tensor, factorization> iprod = tl.sum(tl.sum(mttkrp * factor, axis=0) * weights) rec_error = tl.sqrt( tl.abs(norm_tensor**2 + factors_norm**2 - 2 * iprod)) / norm_tensor rec_errors.append(rec_error) if iteration >= 1: if verbose: print('reconstruction error={}, variation={}.'.format( rec_errors[-1], rec_errors[-2] - rec_errors[-1])) if tol and abs(rec_errors[-2] - rec_errors[-1]) < tol: if verbose: print('converged in {} iterations.'.format(iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) kruskal_tensor = KruskalTensor((weights, factors)) if return_errors: return kruskal_tensor, rec_errors else: return kruskal_tensor
def make_svd_non_negative(tensor, U, S, V, nntype): """ Use NNDSVD method to transform SVD results into a non-negative form. This method leads to more efficient solving with NNMF [1]. Parameters ---------- tensor : tensor being decomposed U, S, V: SVD factorization results nntype : {'nndsvd', 'nndsvda'} Whether to fill small values with 0.0 (nndsvd), or the tensor mean (nndsvda, default). [1]: Boutsidis & Gallopoulos. Pattern Recognition, 41(4): 1350-1362, 2008. """ # NNDSVD initialization W = tl.zeros_like(U) H = tl.zeros_like(V) # The leading singular triplet is non-negative # so it can be used as is for initialization. W = tl.index_update(W, tl.index[:, 0], tl.sqrt(S[0]) * tl.abs(U[:, 0])) H = tl.index_update(H, tl.index[0, :], tl.sqrt(S[0]) * tl.abs(V[0, :])) for j in range(1, tl.shape(U)[1]): x, y = U[:, j], V[j, :] # extract positive and negative parts of column vectors x_p, y_p = tl.clip(x, a_min=0.0), tl.clip(y, a_min=0.0) x_n, y_n = tl.abs(tl.clip(x, a_max=0.0)), tl.abs(tl.clip(y, a_max=0.0)) # and their norms x_p_nrm, y_p_nrm = tl.norm(x_p), tl.norm(y_p) x_n_nrm, y_n_nrm = tl.norm(x_n), tl.norm(y_n) m_p, m_n = x_p_nrm * y_p_nrm, x_n_nrm * y_n_nrm # choose update if m_p > m_n: u = x_p / x_p_nrm v = y_p / y_p_nrm sigma = m_p else: u = x_n / x_n_nrm v = y_n / y_n_nrm sigma = m_n lbd = tl.sqrt(S[j] * sigma) W = tl.index_update(W, tl.index[:, j], lbd * u) H = tl.index_update(H, tl.index[j, :], lbd * v) # After this point we no longer need H eps = tl.eps(tensor.dtype) if nntype == "nndsvd": W = soft_thresholding(W, eps) elif nntype == "nndsvda": avg = tl.mean(tensor) W = tl.where(W < eps, tl.ones(tl.shape(W), **tl.context(W)) * avg, W) else: raise ValueError( 'Invalid nntype parameter: got %r instead of one of %r' % (nntype, ('nndsvd', 'nndsvda'))) return W
def parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd',\ normalize_factors=False, orthogonalise=False,\ tol=1e-8, random_state=None,\ verbose=0, return_errors=False,\ sparsity = None,\ l2_reg = 0, mask=None,\ cvg_criterion = 'abs_rec_error',\ fixed_modes = [], svd_mask_repeats=5, linesearch = False): """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS) Computes a rank-`rank` decomposition of `tensor` [1]_ such that:: tensor = [|weights; factors[0], ..., factors[-1] |]. Parameters ---------- tensor : ndarray rank : int Number of components. n_iter_max : int Maximum number of iteration init : {'svd', 'random'}, optional Type of factor matrix initialization. See `initialize_factors`. svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS normalize_factors : if True, aggregate the weights of each factor in a 1D-tensor of shape (rank, ), which will contain the norms of the factors tol : float, optional (Default: 1e-6) Relative reconstruction error tolerance. The algorithm is considered to have found the global minimum when the reconstruction error is less than `tol`. random_state : {None, int, np.random.RandomState} verbose : int, optional Level of verbosity return_errors : bool, optional Activate return of iteration errors mask : ndarray array of booleans with the same shape as ``tensor`` should be 0 where the values are missing and 1 everywhere else. Note: if tensor is sparse, then mask should also be sparse with a fill value of 1 (or True). Allows for missing values [2]_ cvg_criterion : {'abs_rec_error', 'rec_error'}, optional Stopping criterion for ALS, works if `tol` is not None. If 'rec_error', ALS stops at current iteration if ``(previous rec_error - current rec_error) < tol``. If 'abs_rec_error', ALS terminates when `|previous rec_error - current rec_error| < tol`. sparsity : float or int If `sparsity` is not None, we approximate tensor as a sum of low_rank_component and sparse_component, where low_rank_component = cp_to_tensor((weights, factors)). `sparsity` denotes desired fraction or number of non-zero elements in the sparse_component of the `tensor`. fixed_modes : list, default is [] A list of modes for which the initial value is not modified. The last mode cannot be fixed due to error computation. svd_mask_repeats: int If using a tensor with masked values, this initializes using SVD multiple times to remove the effect of these missing values on the initialization. linesearch : bool, default is False Whether to perform line search as proposed by Bro [3]. Returns ------- CPTensor : (weight, factors) * weights : 1D array of shape (rank, ) * all ones if normalize_factors is False (default) * weights of the (normalized) factors otherwise * factors : List of factors of the CP decomposition element `i` is of shape ``(tensor.shape[i], rank)`` * sparse_component : nD array of shape tensor.shape. Returns only if `sparsity` is not None. errors : list A list of reconstruction errors at each iteration of the algorithms. References ---------- .. [1] T.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications", SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009. .. [2] Tomasi, Giorgio, and Rasmus Bro. "PARAFAC and missing values." Chemometrics and Intelligent Laboratory Systems 75.2 (2005): 163-180. .. [3] R. Bro, "Multi-Way Analysis in the Food Industry: Models, Algorithms, and Applications", PhD., University of Amsterdam, 1998 """ rank = validate_cp_rank(tl.shape(tensor), rank=rank) if orthogonalise and not isinstance(orthogonalise, int): orthogonalise = n_iter_max if linesearch: acc_pow = 2.0 # Extrapolate to the iteration^(1/acc_pow) ahead acc_fail = 0 # How many times acceleration have failed max_fail = 4 # Increase acc_pow with one after max_fail failure weights, factors = initialize_cp(tensor, rank, init=init, svd=svd, random_state=random_state, normalize_factors=normalize_factors) if mask is not None and init == "svd": for _ in range(svd_mask_repeats): tensor = tensor*mask + tl.cp_to_tensor((weights, factors), mask=1-mask) weights, factors = initialize_cp(tensor, rank, init=init, svd=svd, random_state=random_state, normalize_factors=normalize_factors) rec_errors = [] norm_tensor = tl.norm(tensor, 2) Id = tl.eye(rank, **tl.context(tensor))*l2_reg if tl.ndim(tensor)-1 in fixed_modes: warnings.warn('You asked for fixing the last mode, which is not supported.\n The last mode will not be fixed. Consider using tl.moveaxis()') fixed_modes.remove(tl.ndim(tensor)-1) modes_list = [mode for mode in range(tl.ndim(tensor)) if mode not in fixed_modes] if sparsity: sparse_component = tl.zeros_like(tensor) if isinstance(sparsity, float): sparsity = int(sparsity * np.prod(tensor.shape)) else: sparsity = int(sparsity) for iteration in range(n_iter_max): if orthogonalise and iteration <= orthogonalise: factors = [tl.qr(f)[0] if min(tl.shape(f)) >= rank else f for i, f in enumerate(factors)] if linesearch and iteration % 2 == 0: factors_last = [tl.copy(f) for f in factors] weights_last = tl.copy(weights) if verbose > 1: print("Starting iteration", iteration + 1) for mode in modes_list: if verbose > 1: print("Mode", mode, "of", tl.ndim(tensor)) pseudo_inverse = tl.tensor(np.ones((rank, rank)), **tl.context(tensor)) for i, factor in enumerate(factors): if i != mode: pseudo_inverse = pseudo_inverse*tl.dot(tl.conj(tl.transpose(factor)), factor) pseudo_inverse += Id if not iteration and weights is not None: # Take into account init weights mttkrp = unfolding_dot_khatri_rao(tensor, (weights, factors), mode) else: mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode) factor = tl.transpose(tl.solve(tl.conj(tl.transpose(pseudo_inverse)), tl.transpose(mttkrp))) if normalize_factors: scales = tl.norm(factor, 2, axis=0) weights = tl.where(scales==0, tl.ones(tl.shape(scales), **tl.context(factor)), scales) factor = factor / tl.reshape(weights, (1, -1)) factors[mode] = factor # Will we be performing a line search iteration if linesearch and iteration % 2 == 0 and iteration > 5: line_iter = True else: line_iter = False # Calculate the current unnormalized error if we need it if (tol or return_errors) and line_iter is False: unnorml_rec_error, tensor, norm_tensor = error_calc(tensor, norm_tensor, weights, factors, sparsity, mask, mttkrp) else: if mask is not None: tensor = tensor*mask + tl.cp_to_tensor((weights, factors), mask=1-mask) # Start line search if requested. if line_iter is True: jump = iteration ** (1.0 / acc_pow) new_weights = weights_last + (weights - weights_last) * jump new_factors = [factors_last[ii] + (factors[ii] - factors_last[ii])*jump for ii in range(tl.ndim(tensor))] new_rec_error, new_tensor, new_norm_tensor = error_calc(tensor, norm_tensor, new_weights, new_factors, sparsity, mask) if (new_rec_error / new_norm_tensor) < rec_errors[-1]: factors, weights = new_factors, new_weights tensor, norm_tensor = new_tensor, new_norm_tensor unnorml_rec_error = new_rec_error acc_fail = 0 if verbose: print("Accepted line search jump of {}.".format(jump)) else: unnorml_rec_error, tensor, norm_tensor = error_calc(tensor, norm_tensor, weights, factors, sparsity, mask, mttkrp) acc_fail += 1 if verbose: print("Line search failed for jump of {}.".format(jump)) if acc_fail == max_fail: acc_pow += 1.0 acc_fail = 0 if verbose: print("Reducing acceleration.") rec_error = unnorml_rec_error / norm_tensor rec_errors.append(rec_error) if tol: if iteration >= 1: rec_error_decrease = rec_errors[-2] - rec_errors[-1] if verbose: print("iteration {}, reconstruction error: {}, decrease = {}, unnormalized = {}".format(iteration, rec_error, rec_error_decrease, unnorml_rec_error)) if cvg_criterion == 'abs_rec_error': stop_flag = abs(rec_error_decrease) < tol elif cvg_criterion == 'rec_error': stop_flag = rec_error_decrease < tol else: raise TypeError("Unknown convergence criterion") if stop_flag: if verbose: print("PARAFAC converged after {} iterations".format(iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) cp_tensor = CPTensor((weights, factors)) if sparsity: sparse_component = sparsify_tensor(tensor -\ cp_to_tensor((weights, factors)),\ sparsity) cp_tensor = (cp_tensor, sparse_component) if return_errors: return cp_tensor, rec_errors else: return cp_tensor
if not iteration and weights is not None: # Take into account init weights mttkrp = unfolding_dot_khatri_rao(tensor, (weights, factors), mode) #ALS显式解的前两项 else: mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode) factor = tl.transpose( tl.solve( tl.conj(tl.transpose(pseudo_inverse)), tl.transpose(mttkrp))) #这里之所以有这么多转置是因为solve(a,b)是解ax = b中的x #如果需要列单位化 if normalize_factors: scales = tl.norm(factor, 2, axis=0) weights = tl.where(scales == 0, tl.ones(tl.shape(scales), **tl.context(factor)), scales) factor = factor / tl.reshape(weights, (1, -1)) #这里做归一化要小心,有numpy不熟悉的除法 factors[mode] = factor # Will we be performing a line search iteration line_iter = False # Calculate the current unnormalized error if we need it if (tol or return_errors) and line_iter is False: unnorml_rec_error, tensor, norm_tensor = error_calc( tensor, norm_tensor, weights, factors, sparsity, mask, mttkrp) #The unnormalized reconstruction error else:
def parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd',\ normalize_factors=False, orthogonalise=False,\ tol=1e-8, random_state=None,\ verbose=0, return_errors=False,\ non_negative=False,\ sparsity = None,\ l2_reg = 0, mask=None,\ cvg_criterion = 'abs_rec_error'): """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS) Computes a rank-`rank` decomposition of `tensor` [1]_ such that, ``tensor = [|weights; factors[0], ..., factors[-1] |]``. Parameters ---------- tensor : ndarray rank : int Number of components. n_iter_max : int Maximum number of iteration init : {'svd', 'random'}, optional Type of factor matrix initialization. See `initialize_factors`. svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS normalize_factors : if True, aggregate the weights of each factor in a 1D-tensor of shape (rank, ), which will contain the norms of the factors tol : float, optional (Default: 1e-6) Relative reconstruction error tolerance. The algorithm is considered to have found the global minimum when the reconstruction error is less than `tol`. random_state : {None, int, np.random.RandomState} verbose : int, optional Level of verbosity return_errors : bool, optional Activate return of iteration errors mask : ndarray array of booleans with the same shape as ``tensor`` should be 0 where the values are missing and 1 everywhere else. Note: if tensor is sparse, then mask should also be sparse with a fill value of 1 (or True). Allows for missing values [2]_ cvg_criterion : {'abs_rec_error', 'rec_error'}, optional Stopping criterion for ALS, works if `tol` is not None. If 'rec_error', ALS stops at current iteration if (previous rec_error - current rec_error) < tol. If 'abs_rec_error', ALS terminates when |previous rec_error - current rec_error| < tol. sparsity : float or int If `sparsity` is not None, we approximate tensor as a sum of low_rank_component and sparse_component, where low_rank_component = kruskal_to_tensor((weights, factors)). `sparsity` denotes desired fraction or number of non-zero elements in the sparse_component of the `tensor`. Returns ------- KruskalTensor : (weight, factors) * weights : 1D array of shape (rank, ) all ones if normalize_factors is False (default), weights of the (normalized) factors otherwise * factors : List of factors of the CP decomposition element `i` is of shape (tensor.shape[i], rank) * sparse_component : nD array of shape tensor.shape. Returns only if `sparsity` is not None. errors : list A list of reconstruction errors at each iteration of the algorithms. References ---------- .. [1] T.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications", SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009. .. [2] Tomasi, Giorgio, and Rasmus Bro. "PARAFAC and missing values." Chemometrics and Intelligent Laboratory Systems 75.2 (2005): 163-180. """ epsilon = 10e-12 if orthogonalise and not isinstance(orthogonalise, int): orthogonalise = n_iter_max factors = initialize_factors(tensor, rank, init=init, svd=svd, random_state=random_state, normalize_factors=normalize_factors) rec_errors = [] norm_tensor = tl.norm(tensor, 2) weights = tl.ones(rank, **tl.context(tensor)) Id = tl.eye(rank, **tl.context(tensor))*l2_reg if sparsity: sparse_component = tl.zeros_like(tensor) if isinstance(sparsity, float): sparsity = int(sparsity * np.prod(tensor.shape)) else: sparsity = int(sparsity) for iteration in range(n_iter_max): if orthogonalise and iteration <= orthogonalise: factors = [tl.qr(f)[0] if min(tl.shape(f)) >= rank else f for i, f in enumerate(factors)] if verbose > 1: print("Starting iteration", iteration + 1) for mode in range(tl.ndim(tensor)): if verbose > 1: print("Mode", mode, "of", tl.ndim(tensor)) pseudo_inverse = tl.tensor(np.ones((rank, rank)), **tl.context(tensor)) for i, factor in enumerate(factors): if i != mode: pseudo_inverse = pseudo_inverse*tl.dot(tl.conj(tl.transpose(factor)), factor) pseudo_inverse += Id if mask is not None: tensor = tensor*mask + tl.kruskal_to_tensor((None, factors), mask=1-mask) mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode) factor = tl.transpose(tl.solve(tl.conj(tl.transpose(pseudo_inverse)), tl.transpose(mttkrp))) if normalize_factors: weights = tl.norm(factor, order=2, axis=0) weights = tl.where(tl.abs(weights) <= tl.eps(tensor.dtype), tl.ones(tl.shape(weights), **tl.context(factors[0])), weights) factor = factor/(tl.reshape(weights, (1, -1))) factors[mode] = factor if tol: if sparsity: low_rank_component = kruskal_to_tensor((weights, factors)) sparse_component = sparsify_tensor(tensor - low_rank_component, sparsity) unnorml_rec_error = tl.norm(tensor - low_rank_component - sparse_component, 2) else: # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec> factors_norm = kruskal_norm((weights, factors)) # mttkrp and factor for the last mode. This is equivalent to the # inner product <tensor, factorization> iprod = tl.sum(tl.sum(mttkrp*factor, axis=0)*weights) unnorml_rec_error = tl.sqrt(tl.abs(norm_tensor**2 + factors_norm**2 - 2*iprod)) rec_error = unnorml_rec_error / norm_tensor rec_errors.append(rec_error) if iteration >= 1: rec_error_decrease = rec_errors[-2] - rec_errors[-1] if verbose: print("iteration {}, reconstraction error: {}, decrease = {}, unnormalized = {}".format(iteration, rec_error, rec_error_decrease, unnorml_rec_error)) if cvg_criterion == 'abs_rec_error': stop_flag = abs(rec_error_decrease) < tol elif cvg_criterion == 'rec_error': stop_flag = rec_error_decrease < tol else: raise TypeError("Unknown convergence criterion") if stop_flag: if verbose: print("PARAFAC converged after {} iterations".format(iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) kruskal_tensor = KruskalTensor((weights, factors)) if sparsity: sparse_component = sparsify_tensor(tensor -\ kruskal_to_tensor((weights, factors)),\ sparsity) kruskal_tensor = (kruskal_tensor, sparse_component) if return_errors: return kruskal_tensor, rec_errors else: return kruskal_tensor
def fista(UtM, UtU, x=None, n_iter_max=100, non_negative=True, sparsity_coef=0, lr=None, tol=10e-8): """ Fast Iterative Shrinkage Thresholding Algorithm (FISTA) Computes an approximate (nonnegative) solution for Ux=M linear system. Parameters ---------- UtM : ndarray Pre-computed product of the transposed of U and M UtU : ndarray Pre-computed product of the transposed of U and U x : init Default: None n_iter_max : int Maximum number of iteration Default: 100 non_negative : bool, default is False if True, result will be non-negative lr : float learning rate Default : None sparsity_coef : float or None tol : float stopping criterion Returns ------- x : approximate solution such that Ux = M Notes ----- We solve the following problem :math: `1/2 ||m - Ux ||_2^2 + \\lambda |x|_1` Reference ---------- [1] : Beck, A., & Teboulle, M. (2009). A fast iterative shrinkage-thresholding algorithm for linear inverse problems. SIAM journal on imaging sciences, 2(1), 183-202. """ if sparsity_coef is None: sparsity_coef = 0 if x is None: x = tl.zeros(tl.shape(UtM), **tl.context(UtM)) if lr is None: lr = 1 / (tl.partial_svd(UtU)[1][0]) # Parameters momentum_old = tl.tensor(1.0) norm_0 = 0.0 x_update = tl.copy(x) for iteration in range(n_iter_max): if isinstance(UtU, list): x_gradient = -UtM + tl.tenalg.multi_mode_dot( x_update, UtU, transpose=False) + sparsity_coef else: x_gradient = -UtM + tl.dot(UtU, x_update) + sparsity_coef if non_negative is True: x_gradient = tl.where(lr * x_gradient < x_update, x_gradient, x_update / lr) x_new = x_update - lr * x_gradient momentum = (1 + tl.sqrt(1 + 4 * momentum_old**2)) / 2 x_update = x_new + ((momentum_old - 1) / momentum) * (x_new - x) momentum_old = momentum x = tl.copy(x_new) norm = tl.norm(lr * x_gradient) if iteration == 1: norm_0 = norm if norm < tol * norm_0: break return x
def hals_nnls(UtM, UtU, V=None, n_iter_max=500, tol=10e-8, sparsity_coefficient=None, normalize=False, nonzero_rows=False, exact=False): """ Non Negative Least Squares (NNLS) Computes an approximate solution of a nonnegative least squares problem (NNLS) with an exact block-coordinate descent scheme. M is m by n, U is m by r, V is r by n. All matrices are nonnegative componentwise. This algorithm is defined in [1], as an accelerated version of the HALS algorithm. It features two accelerations: an early stop stopping criterion, and a complexity averaging between precomputations and loops, so as to use large precomputations several times. This function is made for being used repetively inside an outer-loop alternating algorithm, for instance for computing nonnegative matrix Factorization or tensor factorization. Parameters ---------- UtM: r-by-n array Pre-computed product of the transposed of U and M, used in the update rule UtU: r-by-r array Pre-computed product of the transposed of U and U, used in the update rule V: r-by-n initialization matrix (mutable) Initialized V array By default, is initialized with one non-zero entry per column corresponding to the closest column of U of the corresponding column of M. n_iter_max: Postivie integer Upper bound on the number of iterations Default: 500 tol : float in [0,1] early stop criterion, while err_k > delta*err_0. Set small for almost exact nnls solution, or larger (e.g. 1e-2) for inner loops of a PARAFAC computation. Default: 10e-8 sparsity_coefficient: float or None The coefficient controling the sparisty level in the objective function. If set to None, the problem is solved unconstrained. Default: None nonzero_rows: boolean True if the lines of the V matrix can't be zero, False if they can be zero Default: False exact: If it is True, the algorithm gives a results with high precision but it needs high computational cost. If it is False, the algorithm gives an approximate solution Default: False Returns ------- V: array a r-by-n nonnegative matrix \approx argmin_{V >= 0} ||M-UV||_F^2 rec_error: float number of loops authorized by the error stop criterion iteration: integer final number of update iteration performed complexity_ratio: float number of loops authorized by the stop criterion Notes ----- We solve the following problem :math:`\\min_{V >= 0} ||M-UV||_F^2` The matrix V is updated linewise. The update rule for this resolution is:: .. math:: \\begin{equation} V[k,:]_(j+1) = V[k,:]_(j) + (UtM[k,:] - UtU[k,:]\\times V_(j))/UtU[k,k] \\end{equation} with j the update iteration. This problem can also be defined by adding a sparsity coefficient, enhancing sparsity in the solution [2]. In this sparse version, the update rule becomes:: .. math:: \\begin{equation} V[k,:]_(j+1) = V[k,:]_(j) + (UtM[k,:] - UtU[k,:]\\times V_(j) - sparsity_coefficient)/UtU[k,k] \\end{equation} References ---------- .. [1]: N. Gillis and F. Glineur, Accelerated Multiplicative Updates and Hierarchical ALS Algorithms for Nonnegative Matrix Factorization, Neural Computation 24 (4): 1085-1105, 2012. .. [2] J. Eggert, and E. Korner. "Sparse coding and NMF." 2004 IEEE International Joint Conference on Neural Networks (IEEE Cat. No. 04CH37541). Vol. 4. IEEE, 2004. """ rank, n_col_M = tl.shape(UtM) if V is None: # checks if V is empty V = tl.solve(UtU, UtM) V = tl.clip(V, a_min=0, a_max=None) # Scaling scale = tl.sum(UtM * V) / tl.sum(UtU * tl.dot(V, tl.transpose(V))) V = V * scale if exact: n_iter_max = 50000 tol = 10e-16 for iteration in range(n_iter_max): rec_error = 0 for k in range(rank): if UtU[k, k]: if sparsity_coefficient is not None: # Modifying the function for sparsification deltaV = tl.where( (UtM[k, :] - tl.dot(UtU[k, :], V) - sparsity_coefficient) / UtU[k, k] > -V[k, :], (UtM[k, :] - tl.dot(UtU[k, :], V) - sparsity_coefficient) / UtU[k, k], -V[k, :]) V = tl.index_update(V, tl.index[k, :], V[k, :] + deltaV) else: # without sparsity deltaV = tl.where( (UtM[k, :] - tl.dot(UtU[k, :], V)) / UtU[k, k] > -V[k, :], (UtM[k, :] - tl.dot(UtU[k, :], V)) / UtU[k, k], -V[k, :]) V = tl.index_update(V, tl.index[k, :], V[k, :] + deltaV) rec_error = rec_error + tl.dot(deltaV, tl.transpose(deltaV)) # Safety procedure, if columns aren't allow to be zero if nonzero_rows and tl.all(V[k, :] == 0): V[k, :] = tl.eps(V.dtype) * tl.max(V) elif nonzero_rows: raise ValueError("Column " + str(k) + " of U is zero with nonzero condition") if normalize: norm = tl.norm(V[k, :]) if norm != 0: V[k, :] /= norm else: sqrt_n = 1 / n_col_M**(1 / 2) V[k, :] = [sqrt_n for i in range(n_col_M)] if iteration == 0: rec_error0 = rec_error numerator = tl.shape(V)[0] * tl.shape(V)[1] + tl.shape(V)[1] * rank denominator = tl.shape(V)[0] * rank + tl.shape(V)[0] complexity_ratio = 1 + (numerator / denominator) if exact: if rec_error < tol * rec_error0: break else: if rec_error < tol * rec_error0 or iteration > 1 + 0.5 * complexity_ratio: break return V, rec_error, iteration, complexity_ratio
def unimodality_prox(tensor): """ This function projects each column of the input array on the set of arrays so that x[1] <= x[2] <= x[j] >= x[j+1]... >= x[n] is satisfied columnwise. Parameters ---------- tensor : ndarray Returns ------- ndarray A tensor of which columns' distribution are unimodal. References ---------- .. [1]: Bro, R., & Sidiropoulos, N. D. (1998). Least squares algorithms under unimodality and non‐negativity constraints. Journal of Chemometrics: A Journal of the Chemometrics Society, 12(4), 223-247. """ if tl.ndim(tensor) == 1: tensor = tl.vec_to_tensor(tensor, [tl.shape(tensor)[0], 1]) elif tl.ndim(tensor) > 2: raise ValueError( "Unimodality prox doesn't support an input which has more than 2 dimensions." ) tensor_unimodal = tl.copy(tensor) monotone_increasing = tl.tensor(monotonicity_prox(tensor), **tl.context(tensor)) monotone_decreasing = tl.tensor(monotonicity_prox(tensor, decreasing=True), **tl.context(tensor)) # Next line finds mutual peak points values = tl.tensor( tl.to_numpy((tensor - monotone_decreasing >= 0)) * tl.to_numpy( (tensor - monotone_increasing >= 0)), **tl.context(tensor)) sum_inc = tl.where(values == 1, tl.cumsum(tl.abs(tensor - monotone_increasing), axis=0), tl.tensor(0, **tl.context(tensor))) sum_inc = tl.where(values == 1, sum_inc - tl.abs(tensor - monotone_increasing), tl.tensor(0, **tl.context(tensor))) sum_dec = tl.where( tl.flip(values, axis=0) == 1, tl.cumsum(tl.abs( tl.flip(tensor, axis=0) - tl.flip(monotone_decreasing, axis=0)), axis=0), tl.tensor(0, **tl.context(tensor))) sum_dec = tl.where( tl.flip(values, axis=0) == 1, sum_dec - tl.abs(tl.flip(tensor, axis=0) - tl.flip(monotone_decreasing, axis=0)), tl.tensor(0, **tl.context(tensor))) difference = tl.where(values == 1, sum_inc + tl.flip(sum_dec, axis=0), tl.max(sum_inc + tl.flip(sum_dec, axis=0))) min_indice = tl.argmin(tl.tensor(difference), axis=0) for i in range(len(min_indice)): tensor_unimodal = tl.index_update( tensor_unimodal, tl.index[:int(min_indice[i]), i], monotone_increasing[:int(min_indice[i]), i]) tensor_unimodal = tl.index_update( tensor_unimodal, tl.index[int(min_indice[i] + 1):, i], monotone_decreasing[int(min_indice[i] + 1):, i]) return tensor_unimodal
def non_negative_parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd', tol=10e-7, random_state=None, verbose=0, normalize_factors=False, return_errors=False, mask=None, orthogonalise=False, cvg_criterion='abs_rec_error'): """ Non-negative CP decomposition Uses multiplicative updates, see [2]_ This is the same as parafac(non_negative=True). Parameters ---------- tensor : ndarray rank : int number of components n_iter_max : int maximum number of iteration init : {'svd', 'random'}, optional svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS tol : float, optional tolerance: the algorithm stops when the variation in the reconstruction error is less than the tolerance random_state : {None, int, np.random.RandomState} verbose : int, optional level of verbosity Returns ------- factors : ndarray list list of positive factors of the CP decomposition element `i` is of shape ``(tensor.shape[i], rank)`` References ---------- .. [2] Amnon Shashua and Tamir Hazan, "Non-negative tensor factorization with applications to statistics and computer vision", In Proceedings of the International Conference on Machine Learning (ICML), pp 792-799, ICML, 2005 """ epsilon = 10e-12 if orthogonalise and not isinstance(orthogonalise, int): orthogonalise = n_iter_max factors = initialize_factors(tensor, rank, init=init, svd=svd, random_state=random_state, non_negative=True, normalize_factors=normalize_factors) rec_errors = [] norm_tensor = tl.norm(tensor, 2) weights = tl.ones(rank, **tl.context(tensor)) for iteration in range(n_iter_max): if orthogonalise and iteration <= orthogonalise: for i, f in enumerate(factors): if min(tl.shape(f)) >= rank: factors[i] = tl.abs(tl.qr(f)[0]) if verbose > 1: print("Starting iteration", iteration + 1) for mode in range(tl.ndim(tensor)): if verbose > 1: print("Mode", mode, "of", tl.ndim(tensor)) accum = 1 # khatri_rao(factors).tl.dot(khatri_rao(factors)) # simplifies to multiplications sub_indices = [i for i in range(len(factors)) if i != mode] for i, e in enumerate(sub_indices): if i: accum *= tl.dot(tl.transpose(factors[e]), factors[e]) else: accum = tl.dot(tl.transpose(factors[e]), factors[e]) if mask is not None: tensor = tensor*mask + tl.kruskal_to_tensor((None, factors), mask=1-mask) mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode) numerator = tl.clip(mttkrp, a_min=epsilon, a_max=None) denominator = tl.dot(factors[mode], accum) denominator = tl.clip(denominator, a_min=epsilon, a_max=None) factor = factors[mode] * numerator / denominator if normalize_factors: weights = tl.norm(factor, order=2, axis=0) weights = tl.where(tl.abs(weights) <= tl.eps(tensor.dtype), tl.ones(tl.shape(weights), **tl.context(factors[0])), weights) factor = factor/(tl.reshape(weights, (1, -1))) factors[mode] = factor if tol: # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec> factors_norm = kruskal_norm((weights, factors)) # mttkrp and factor for the last mode. This is equivalent to the # inner product <tensor, factorization> iprod = tl.sum(tl.sum(mttkrp*factor, axis=0)*weights) rec_error = tl.sqrt(tl.abs(norm_tensor**2 + factors_norm**2 - 2*iprod)) / norm_tensor rec_errors.append(rec_error) if iteration >= 1: rec_error_decrease = rec_errors[-2] - rec_errors[-1] if verbose: print("iteration {}, reconstraction error: {}, decrease = {}".format(iteration, rec_error, rec_error_decrease)) if cvg_criterion == 'abs_rec_error': stop_flag = abs(rec_error_decrease) < tol elif cvg_criterion == 'rec_error': stop_flag = rec_error_decrease < tol else: raise TypeError("Unknown convergence criterion") if stop_flag: if verbose: print("PARAFAC converged after {} iterations".format(iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) kruskal_tensor = KruskalTensor((weights, factors)) if return_errors: return kruskal_tensor, rec_errors else: return kruskal_tensor
def parafac2(tensor_slices, rank, n_iter_max=100, init='random', svd='numpy_svd', normalize_factors=False, tol=1e-8, random_state=None, verbose=False, return_errors=False, n_iter_parafac=5): r"""PARAFAC2 decomposition [1]_ of a third order tensor via alternating least squares (ALS) Computes a rank-`rank` PARAFAC2 decomposition of the third-order tensor defined by `tensor_slices`. The decomposition is on the form :math:`(A [B_i] C)` such that the i-th frontal slice, :math:`X_i`, of :math:`X` is given by .. math:: X_i = B_i diag(a_i) C^T, where :math:`diag(a_i)` is the diagonal matrix whose nonzero entries are equal to the :math:`i`-th row of the :math:`I \times R` factor matrix :math:`A`, :math:`B_i` is a :math:`J_i \times R` factor matrix such that the cross product matrix :math:`B_{i_1}^T B_{i_1}` is constant for all :math:`i`, and :math:`C` is a :math:`K \times R` factor matrix. To compute this decomposition, we reformulate the expression for :math:`B_i` such that .. math:: B_i = P_i B, where :math:`P_i` is a :math:`J_i \times R` orthogonal matrix and :math:`B` is a :math:`R \times R` matrix. An alternative formulation of the PARAFAC2 decomposition is that the tensor element :math:`X_{ijk}` is given by .. math:: X_{ijk} = \sum_{r=1}^R A_{ir} B_{ijr} C_{kr}, with the same constraints hold for :math:`B_i` as above. Parameters ---------- tensor_slices : ndarray or list of ndarrays Either a third order tensor or a list of second order tensors that may have different number of rows. Note that the second mode factor matrices are allowed to change over the first mode, not the third mode as some other implementations use (see note below). rank : int Number of components. n_iter_max : int Maximum number of iteration init : {'svd', 'random', CPTensor, Parafac2Tensor} Type of factor matrix initialization. See `initialize_factors`. svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS normalize_factors : bool (optional) If True, aggregate the weights of each factor in a 1D-tensor of shape (rank, ), which will contain the norms of the factors. Note that there may be some inaccuracies in the component weights. tol : float, optional (Default: 1e-8) Relative reconstruction error tolerance. The algorithm is considered to have found the global minimum when the reconstruction error is less than `tol`. random_state : {None, int, np.random.RandomState} verbose : int, optional Level of verbosity return_errors : bool, optional Activate return of iteration errors n_iter_parafac: int, optional Number of PARAFAC iterations to perform for each PARAFAC2 iteration Returns ------- Parafac2Tensor : (weight, factors, projection_matrices) * weights : 1D array of shape (rank, ) all ones if normalize_factors is False (default), weights of the (normalized) factors otherwise * factors : List of factors of the CP decomposition element `i` is of shape (tensor.shape[i], rank) * projection_matrices : List of projection matrices used to create evolving factors. errors : list A list of reconstruction errors at each iteration of the algorithms. References ---------- .. [1] Kiers, H.A.L., ten Berge, J.M.F. and Bro, R. (1999), PARAFAC2—Part I. A direct fitting algorithm for the PARAFAC2 model. J. Chemometrics, 13: 275-294. Notes ----- This formulation of the PARAFAC2 decomposition is slightly different from the one in [1]_. The difference lies in that here, the second mode changes over the first mode, whereas in [1]_, the second mode changes over the third mode. We made this change since that means that the function accept both lists of matrices and a single nd-array as input without any reordering of the modes. """ weights, factors, projections = initialize_decomposition( tensor_slices, rank, init=init, svd=svd, random_state=random_state) rec_errors = [] norm_tensor = tl.sqrt( sum(tl.norm(tensor_slice, 2)**2 for tensor_slice in tensor_slices)) svd_fun = _get_svd(svd) projected_tensor = tl.zeros([factor.shape[0] for factor in factors], **T.context(factors[0])) for iteration in range(n_iter_max): if verbose: print("Starting iteration", iteration) factors[1] = factors[1] * T.reshape(weights, (1, -1)) weights = T.ones(weights.shape, **tl.context(tensor_slices[0])) projections = _compute_projections(tensor_slices, factors, svd_fun, out=projections) projected_tensor = _project_tensor_slices(tensor_slices, projections, out=projected_tensor) _, factors = parafac(projected_tensor, rank, n_iter_max=n_iter_parafac, init=(weights, factors), svd=svd, orthogonalise=False, verbose=verbose, return_errors=False, normalize_factors=False, mask=None, random_state=random_state, tol=1e-100) if normalize_factors: new_factors = [] for factor in factors: norms = T.norm(factor, axis=0) norms = tl.where( tl.abs(norms) <= tl.eps(factor.dtype), tl.ones(tl.shape(norms), **tl.context(factors[0])), norms) weights = weights * norms new_factors.append(factor / (tl.reshape(norms, (1, -1)))) factors = new_factors if tol: rec_error = _parafac2_reconstruction_error( tensor_slices, (weights, factors, projections)) rec_error /= norm_tensor rec_errors.append(rec_error) if iteration >= 1: if verbose: print('PARAFAC2 reconstruction error={}, variation={}.'. format(rec_errors[-1], rec_errors[-2] - rec_errors[-1])) if tol and abs(rec_errors[-2] - rec_errors[-1]) < tol: if verbose: print('converged in {} iterations.'.format(iteration)) break else: if verbose: print('PARAFAC2 reconstruction error={}'.format( rec_errors[-1])) parafac2_tensor = Parafac2Tensor((weights, factors, projections)) if return_errors: return parafac2_tensor, rec_errors else: return parafac2_tensor
def parafac2( tensor_slices, rank, n_iter_max=2000, init='random', svd='numpy_svd', normalize_factors=False, tol=1e-8, absolute_tol=1e-13, nn_modes=None, random_state=None, verbose=False, return_errors=False, n_iter_parafac=5, ): r"""PARAFAC2 decomposition [1]_ of a third order tensor via alternating least squares (ALS) Computes a rank-`rank` PARAFAC2 decomposition of the third-order tensor defined by `tensor_slices`. The decomposition is on the form :math:`(A [B_i] C)` such that the i-th frontal slice, :math:`X_i`, of :math:`X` is given by .. math:: X_i = B_i diag(a_i) C^T, where :math:`diag(a_i)` is the diagonal matrix whose nonzero entries are equal to the :math:`i`-th row of the :math:`I \times R` factor matrix :math:`A`, :math:`B_i` is a :math:`J_i \times R` factor matrix such that the cross product matrix :math:`B_{i_1}^T B_{i_1}` is constant for all :math:`i`, and :math:`C` is a :math:`K \times R` factor matrix. To compute this decomposition, we reformulate the expression for :math:`B_i` such that .. math:: B_i = P_i B, where :math:`P_i` is a :math:`J_i \times R` orthogonal matrix and :math:`B` is a :math:`R \times R` matrix. An alternative formulation of the PARAFAC2 decomposition is that the tensor element :math:`X_{ijk}` is given by .. math:: X_{ijk} = \sum_{r=1}^R A_{ir} B_{ijr} C_{kr}, with the same constraints hold for :math:`B_i` as above. Parameters ---------- tensor_slices : ndarray or list of ndarrays Either a third order tensor or a list of second order tensors that may have different number of rows. Note that the second mode factor matrices are allowed to change over the first mode, not the third mode as some other implementations use (see note below). rank : int Number of components. n_iter_max : int, optional (Default: 2000) Maximum number of iteration .. versionchanged:: 0.6.1 Previously, the default maximum number of iterations was 100. init : {'svd', 'random', CPTensor, Parafac2Tensor} Type of factor matrix initialization. See `initialize_factors`. svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS normalize_factors : bool (optional) If True, aggregate the weights of each factor in a 1D-tensor of shape (rank, ), which will contain the norms of the factors. Note that there may be some inaccuracies in the component weights. tol : float, optional (Default: 1e-8) Relative reconstruction error decrease tolerance. The algorithm is considered to have converged when :math:`\left|\| X - \hat{X}_{n-1} \|^2 - \| X - \hat{X}_{n} \|^2\right| < \epsilon \| X - \hat{X}_{n-1} \|^2`. That is, when the relative change in sum of squared error is less than the tolerance. .. versionchanged:: 0.6.1 Previously, the stopping condition was :math:`\left|\| X - \hat{X}_{n-1} \| - \| X - \hat{X}_{n} \|\right| < \epsilon`. absolute_tol : float, optional (Default: 1e-13) Absolute reconstruction error tolearnce. The algorithm is considered to have converged when :math:`\left|\| X - \hat{X}_{n-1} \|^2 - \| X - \hat{X}_{n} \|^2\right| < \epsilon_\text{abs}`. That is, when the relative sum of squared error is less than the specified tolerance. The absolute tolerance is necessary for stopping the algorithm when used on noise-free data that follows the PARAFAC2 constraint. If None, then the machine precision + 1000 will be used. nn_modes: None, 'all' or array of integers (Default: None) Used to specify which modes to impose non-negativity constraints on. We cannot impose non-negativity constraints on the the B-mode (mode 1) with the ALS algorithm, so if this mode is among the constrained modes, then a warning will be shown (see notes for more info). random_state : {None, int, np.random.RandomState} verbose : int, optional Level of verbosity return_errors : bool, optional Activate return of iteration errors n_iter_parafac : int, optional Number of PARAFAC iterations to perform for each PARAFAC2 iteration Returns ------- Parafac2Tensor : (weight, factors, projection_matrices) * weights : 1D array of shape (rank, ) all ones if normalize_factors is False (default), weights of the (normalized) factors otherwise * factors : List of factors of the CP decomposition element `i` is of shape (tensor.shape[i], rank) * projection_matrices : List of projection matrices used to create evolving factors. errors : list A list of reconstruction errors at each iteration of the algorithms. References ---------- .. [1] Kiers, H.A.L., ten Berge, J.M.F. and Bro, R. (1999), PARAFAC2—Part I. A direct fitting algorithm for the PARAFAC2 model. J. Chemometrics, 13: 275-294. Notes ----- This formulation of the PARAFAC2 decomposition is slightly different from the one in [1]_. The difference lies in that here, the second mode changes over the first mode, whereas in [1]_, the second mode changes over the third mode. We made this change since that means that the function accept both lists of matrices and a single nd-array as input without any reordering of the modes. Because of the reformulation above, :math:`B_i = P_i B`, the :math:`B_i` matrices cannot be constrained to be non-negative with ALS. If this mode is constrained to be non-negative, then :math:`B` will be non-negative, but not the orthogonal `P_i` matrices. Consequently, the `B_i` matrices are unlikely to be non-negative. """ weights, factors, projections = initialize_decomposition( tensor_slices, rank, init=init, svd=svd, random_state=random_state) rec_errors = [] norm_tensor = tl.sqrt( sum(tl.norm(tensor_slice, 2)**2 for tensor_slice in tensor_slices)) svd_fun = _get_svd(svd) if absolute_tol is None: absolute_tol = tl.eps(factors[0].dtype) * 1000 # If nn_modes is set, we use HALS, otherwise, we use the standard parafac implementation. if nn_modes is None: def parafac_updates(X, w, f): return parafac(X, rank, n_iter_max=n_iter_parafac, init=(w, f), svd=svd, orthogonalise=False, verbose=verbose, return_errors=False, normalize_factors=False, mask=None, random_state=random_state, tol=1e-100)[1] else: if nn_modes == 'all' or 1 in nn_modes: warn( "Mode `1` of PARAFAC2 fitted with ALS cannot be constrained to be truly non-negative. See the documentation for more info." ) def parafac_updates(X, w, f): return non_negative_parafac_hals(X, rank, n_iter_max=n_iter_parafac, init=(w, f), svd=svd, nn_modes=nn_modes, verbose=verbose, return_errors=False, tol=1e-100)[1] projected_tensor = tl.zeros([factor.shape[0] for factor in factors], **T.context(factors[0])) for iteration in range(n_iter_max): if verbose: print("Starting iteration", iteration) factors[1] = factors[1] * T.reshape(weights, (1, -1)) weights = T.ones(weights.shape, **tl.context(tensor_slices[0])) projections = _compute_projections(tensor_slices, factors, svd_fun, out=projections) projected_tensor = _project_tensor_slices(tensor_slices, projections, out=projected_tensor) factors = parafac_updates(projected_tensor, weights, factors) if normalize_factors: new_factors = [] for factor in factors: norms = T.norm(factor, axis=0) norms = tl.where( tl.abs(norms) <= tl.eps(factor.dtype), tl.ones(tl.shape(norms), **tl.context(factors[0])), norms) weights = weights * norms new_factors.append(factor / (tl.reshape(norms, (1, -1)))) factors = new_factors if tol: rec_error = _parafac2_reconstruction_error( tensor_slices, (weights, factors, projections)) rec_error /= norm_tensor rec_errors.append(rec_error) if iteration >= 1: if verbose: print('PARAFAC2 reconstruction error={}, variation={}.'. format(rec_errors[-1], rec_errors[-2] - rec_errors[-1])) if abs(rec_errors[-2]**2 - rec_errors[-1]**2) < ( tol * rec_errors[-2]**2) or rec_errors[-1]**2 < absolute_tol: if verbose: print('converged in {} iterations.'.format(iteration)) break else: if verbose: print('PARAFAC2 reconstruction error={}'.format( rec_errors[-1])) parafac2_tensor = Parafac2Tensor((weights, factors, projections)) if return_errors: return parafac2_tensor, rec_errors else: return parafac2_tensor