def simplex_prox(tensor, parameter): """ Projects the input tensor on the simplex of radius parameter. Parameters ---------- tensor : ndarray parameter : float Returns ------- ndarray References ---------- .. [1]: Held, Michael, Philip Wolfe, and Harlan P. Crowder. "Validation of subgradient optimization." Mathematical programming 6.1 (1974): 62-88. """ _, col = tl.shape(tensor) tensor = tl.clip(tensor, 0, tl.max(tensor)) tensor_sort = tl.sort(tensor, axis=0, descending=True) to_change = tl.sum(tl.where( tensor_sort > (tl.cumsum(tensor_sort, axis=0) - parameter), 1.0, 0.0), axis=0) difference = tl.zeros(col) for i in range(col): if to_change[i] > 0: difference = tl.index_update( difference, tl.index[i], tl.cumsum(tensor_sort, axis=0)[int(to_change[i] - 1), i]) difference = (difference - parameter) / to_change return tl.clip(tensor - difference, a_min=0)
def test_svd(): """Test for the SVD functions""" tol = 0.1 tol_orthogonality = 0.01 for name, svd_fun in T.SVD_FUNS.items(): sizes = [(100, 100), (100, 5), (10, 10), (10, 4), (5, 100)] n_eigenvecs = [90, 4, 5, 4, 5] for s, n in zip(sizes, n_eigenvecs): matrix = np.random.random(s) matrix_backend = T.tensor(matrix) fU, fS, fV = svd_fun(matrix_backend, n_eigenvecs=n) U, S, V = svd(matrix) U, S, V = U[:, :n], S[:n], V[:n, :] assert_array_almost_equal(np.abs(S), T.abs(fS), decimal=3, err_msg='eigenvals not correct for "{}" svd fun VS svd and backend="{}, for {} eigenenvecs, and size {}".'.format( name, tl.get_backend(), n, s)) # True reconstruction error (based on numpy SVD) true_rec_error = np.sum((matrix - np.dot(U, S.reshape((-1, 1))*V))**2) # Reconstruction error with the backend's SVD rec_error = T.sum((matrix_backend - T.dot(fU, T.reshape(fS, (-1, 1))*fV))**2) # Check that the two are similar assert_(true_rec_error - rec_error <= tol, msg='Reconstruction not correct for "{}" svd fun VS svd and backend="{}, for {} eigenenvecs, and size {}".'.format( name, tl.get_backend(), n, s)) # Check for orthogonality when relevant if name != 'symeig_svd': left_orthogonality_error = T.norm(T.dot(T.transpose(fU), fU) - T.eye(n)) assert_(left_orthogonality_error <= tol_orthogonality, msg='Left eigenvecs not orthogonal for "{}" svd fun VS svd and backend="{}, for {} eigenenvecs, and size {}".'.format( name, tl.get_backend(), n, s)) right_orthogonality_error = T.norm(T.dot(T.transpose(fU), fU) - T.eye(n)) assert_(right_orthogonality_error <= tol_orthogonality, msg='Right eigenvecs not orthogonal for "{}" svd fun VS svd and backend="{}, for {} eigenenvecs, and size {}".'.format( name, tl.get_backend(), n, s)) # Should fail on non-matrices with assert_raises(ValueError): tensor = T.tensor(np.random.random((3, 3, 3))) svd_fun(tensor) # Test for singular matrices (some eigenvals will be zero) # Rank at most 5 matrix = T.tensor(np.dot(np.random.random((20, 5)), np.random.random((5, 20)))) U, S, V = tl.partial_svd(matrix, n_eigenvecs=n) true_rec_error = tl.sum((matrix - tl.dot(U, tl.reshape(S, (-1, 1))*V))**2) assert_(true_rec_error <= tol) # Test if partial_svd returns the same result for the same setting matrix = T.tensor(np.random.random((20, 5))) random_state = np.random.RandomState(0) U1, S1, V1 = tl.partial_svd(matrix, n_eigenvecs=2, random_state=random_state) U2, S2, V2 = tl.partial_svd(matrix, n_eigenvecs=2, random_state=0) assert_array_equal(U1, U2) assert_array_equal(S1, S2) assert_array_equal(V1, V2)
def __getitem__(self, indices): if not isinstance(indices, Iterable): indices = [indices] output_shape = [] indexed_factors = [] factors = self.factors weights = self.weights for (index, shape) in zip(indices, self.tensorized_shape): if isinstance(shape, int): # We are indexing a "regular" mode factor, *factors = factors if isinstance(index, (np.integer, int)): weights = weights * factor[index, :] else: factor = factor[index, :] indexed_factors.append(factor) output_shape.append(factor.shape[0]) else: # We are indexing a tensorized mode if index == slice(None) or index == (): # Keeping all indices (:) indexed_factors.extend(factors[:len(shape)]) output_shape.append(shape) else: if isinstance(index, slice): # Since we've already filtered out :, this is a partial slice # Convert into list max_index = math.prod(shape) index = list(range(*index.indices(max_index))) if isinstance(index, Iterable): output_shape.append(len(index)) index = np.unravel_index(index, shape) # Index the whole tensorized shape, resulting in a single factor factor = 1 for idx, ff in zip(index, factors[:len(shape)]): factor *= ff[idx, :] if tl.ndim(factor) == 2: indexed_factors.append(factor) else: weights = weights * factor factors = factors[len(shape):] indexed_factors.extend(factors) output_shape.extend(self.tensorized_shape[len(indices):]) if indexed_factors: return self.__class__(weights, indexed_factors, tensorized_shape=output_shape) return tl.sum(weights)
def test_sum(): rng = tl.check_random_state(0) tensor = tl.tensor(rng.random_sample((5, 6, 7))) all_kwargs = [{}, { 'axis': 1 }, { 'axis': 1, 'keepdims': True }, { 'axis': 1, 'keepdims': False }, { 'keepdims': True }, { 'keepdims': False }, { 'axis': None, 'keepdims': True }, { 'axis': (0, 2), 'keepdims': True }, { 'axis': (0, 2), 'keepdims': False }, { 'axis': (0, 2) }] for kwargs in all_kwargs: np.testing.assert_allclose( tl.to_numpy(tl.sum(tensor, **kwargs)), np.sum(tl.to_numpy(tensor), **kwargs), rtol=1e-5, # Single precision err_msg=f"Sum not same as numpy with kwargs: {kwargs}")
def _parafac2_reconstruction_error(tensor_slices, decomposition): _validate_parafac2_tensor(decomposition) squared_error = 0 for idx, tensor_slice in enumerate(tensor_slices): reconstruction = parafac2_to_slice(decomposition, idx, validate=False) squared_error += tl.sum((tensor_slice - reconstruction)**2) return tl.sqrt(squared_error)
def excluded_value_fraction(self): '''Returns the fraction of missing/excluded values in the tensor, given the values that are masked in tensor.mask Returns ------- excluded_fraction : float Fraction of missing/excluded values in the tensor. ''' if self.mask is None: print("The interaction tensor does not have masked values") return 0.0 else: fraction = tl.sum(self.mask) / tl.prod(tl.tensor(self.tensor.shape)) excluded_fraction = 1.0 - fraction.item() return excluded_fraction
def test_tr_to_tensor(): # Create ground truth TR factors factors = [tl.randn((2, 4, 3)), tl.randn((3, 5, 2)), tl.randn((2, 6, 2))] # Create tensor tensor = tl.zeros((4, 5, 6)) for i in range(4): for j in range(5): for k in range(6): product = tl.dot( tl.dot(factors[0][:, i, :], factors[1][:, j, :]), factors[2][:, k, :]) # TODO: add trace to backend instead of this tensor = tl.index_update( tensor, tl.index[i, j, k], tl.sum(product * tl.eye(product.shape[0]))) # Check that TR factors re-assemble to the original tensor assert_array_almost_equal(tensor, tr_to_tensor(factors))
def __getitem__(self, indices): if isinstance(indices, int): # Select one dimension of one mode mixing_factor, *factors = self.factors weights = self.weights*mixing_factor[indices, :] return self.__class__(weights, factors, self.tensorized_row_shape, self.tensorized_column_shape, n_matrices=self.n_matrices[1:]) elif isinstance(indices, slice): # Index part of a factor mixing_factor, *factors = self.factors factors = [mixing_factor[indices], *factors] weights = self.weights return self.__class__(weights, factors, self.tensorized_row_shape, self.tensorized_column_shape, n_matrices=self.n_matrices[1:]) else: # Index multiple dimensions factors = self.factors index_factors = [] weights = self.weights for index in indices: if index is Ellipsis: raise ValueError(f'Ellipsis is not yet supported, yet got indices={indices} which contains one.') mixing_factor, *factors = factors if isinstance(index, int): if factors or index_factors: weights = weights*mixing_factor[index, :] else: # No factors left return tl.sum(weights*mixing_factor[index, :]) else: index_factors.append(mixing_factor[index]) return self.__class__(weights, index_factors+factors, self.shape, self.tensorized_row_shape, self.tensorized_column_shape, n_matrices=self.n_matrices[len(indices):])
def parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd', tol=1e-8, orthogonalise=False, random_state=None, verbose=False, return_errors=False, non_negative=False): """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS) Computes a rank-`rank` decomposition of `tensor` [1]_ such that, ``tensor = [| factors[0], ..., factors[-1] |]``. Parameters ---------- tensor : ndarray rank : int Number of components. n_iter_max : int Maximum number of iteration init : {'svd', 'random'}, optional Type of factor matrix initialization. See `initialize_factors`. svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS tol : float, optional (Default: 1e-6) Relative reconstruction error tolerance. The algorithm is considered to have found the global minimum when the reconstruction error is less than `tol`. random_state : {None, int, np.random.RandomState} verbose : int, optional Level of verbosity return_errors : bool, optional Activate return of iteration errors non_negative : bool, optional Perform non_negative PARAFAC. See :func:`non_negative_parafac`. Returns ------- factors : ndarray list List of factors of the CP decomposition element `i` is of shape (tensor.shape[i], rank) errors : list A list of reconstruction errors at each iteration of the algorithms. References ---------- .. [1] tl.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications", SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009. """ epsilon = 10e-12 if orthogonalise and not isinstance(orthogonalise, int): orthogonalise = n_iter_max factors = initialize_factors(tensor, rank, init=init, svd=svd, random_state=random_state, non_negative=non_negative) rec_errors = [] norm_tensor = tl.norm(tensor, 2) for iteration in range(n_iter_max): if orthogonalise and iteration <= orthogonalise: factor = [tl.qr(factor)[0] for factor in factors] if verbose: print("Starting iteration", iteration) for mode in range(tl.ndim(tensor)): if verbose: print("Mode", mode, "of", tl.ndim(tensor)) if non_negative: accum = 1 # khatri_rao(factors).tl.dot(khatri_rao(factors)) # simplifies to multiplications sub_indices = [i for i in range(len(factors)) if i != mode] for i, e in enumerate(sub_indices): if i: accum *= tl.dot(tl.transpose(factors[e]), factors[e]) else: accum = tl.dot(tl.transpose(factors[e]), factors[e]) pseudo_inverse = tl.tensor(np.ones((rank, rank)), **tl.context(tensor)) for i, factor in enumerate(factors): if i != mode: pseudo_inverse = pseudo_inverse * tl.dot( tl.conj(tl.transpose(factor)), factor) #factor = tl.dot(unfold(tensor, mode), khatri_rao(factors, skip_matrix=mode).conj()) mttkrp = tl.tenalg.unfolding_dot_khatri_rao(tensor, factors, mode) if non_negative: numerator = tl.clip(mttkrp, a_min=epsilon, a_max=None) denominator = tl.dot(factors[mode], accum) denominator = tl.clip(denominator, a_min=epsilon, a_max=None) factor = factors[mode] * numerator / denominator else: factor = tl.transpose( tl.solve(tl.conj(tl.transpose(pseudo_inverse)), tl.transpose(mttkrp))) factors[mode] = factor if tol: # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec> # This is ||kruskal_to_tensor(factors)||^2 factors_norm = tl.sum( tl.prod( tl.stack([tl.dot(tl.transpose(f), f) for f in factors], 0), 0)) # mttkrp and factor for the last mode. This is equivalent to the # inner product <tensor, factorization> iprod = tl.sum(mttkrp * factor) rec_error = tl.sqrt( tl.abs(norm_tensor**2 + factors_norm - 2 * iprod)) / norm_tensor rec_errors.append(rec_error) if iteration >= 1: if verbose: print('reconstruction error={}, variation={}.'.format( rec_errors[-1], rec_errors[-2] - rec_errors[-1])) if tol and abs(rec_errors[-2] - rec_errors[-1]) < tol: if verbose: print('converged in {} iterations.'.format(iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) if return_errors: return factors, rec_errors else: return factors
def parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd', normalize_factors=False, tol=1e-8, orthogonalise=False, random_state=None, verbose=0, return_errors=False, non_negative=False, mask=None): """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS) Computes a rank-`rank` decomposition of `tensor` [1]_ such that, ``tensor = [|weights; factors[0], ..., factors[-1] |]``. Parameters ---------- tensor : ndarray rank : int Number of components. n_iter_max : int Maximum number of iteration init : {'svd', 'random'}, optional Type of factor matrix initialization. See `initialize_factors`. svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS normalize_factors : if True, aggregate the weights of each factor in a 1D-tensor of shape (rank, ), which will contain the norms of the factors tol : float, optional (Default: 1e-6) Relative reconstruction error tolerance. The algorithm is considered to have found the global minimum when the reconstruction error is less than `tol`. random_state : {None, int, np.random.RandomState} verbose : int, optional Level of verbosity return_errors : bool, optional Activate return of iteration errors non_negative : bool, optional Perform non_negative PARAFAC. See :func:`non_negative_parafac`. mask : ndarray array of booleans with the same shape as ``tensor`` should be 0 where the values are missing and 1 everywhere else. Note: if tensor is sparse, then mask should also be sparse with a fill value of 1 (or True). Allows for missing values [2]_ Returns ------- KruskalTensor : (weight, factors) * weights : 1D array of shape (rank, ) all ones if normalize_factors is False (default), weights of the (normalized) factors otherwise * factors : List of factors of the CP decomposition element `i` is of shape (tensor.shape[i], rank) errors : list A list of reconstruction errors at each iteration of the algorithms. References ---------- .. [1] T.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications", SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009. .. [2] Tomasi, Giorgio, and Rasmus Bro. "PARAFAC and missing values." Chemometrics and Intelligent Laboratory Systems 75.2 (2005): 163-180. """ epsilon = 10e-12 if orthogonalise and not isinstance(orthogonalise, int): orthogonalise = n_iter_max factors = initialize_factors(tensor, rank, init=init, svd=svd, random_state=random_state, non_negative=non_negative, normalize_factors=normalize_factors) rec_errors = [] norm_tensor = tl.norm(tensor, 2) weights = tl.ones(rank, **tl.context(tensor)) for iteration in range(n_iter_max): if orthogonalise and iteration <= orthogonalise: factors = [ tl.qr(f)[0] if min(tl.shape(f)) >= rank else f for i, f in enumerate(factors) ] if verbose > 1: print("Starting iteration", iteration + 1) for mode in range(tl.ndim(tensor)): if verbose > 1: print("Mode", mode, "of", tl.ndim(tensor)) if non_negative: accum = 1 # khatri_rao(factors).tl.dot(khatri_rao(factors)) # simplifies to multiplications sub_indices = [i for i in range(len(factors)) if i != mode] for i, e in enumerate(sub_indices): if i: accum *= tl.dot(tl.transpose(factors[e]), factors[e]) else: accum = tl.dot(tl.transpose(factors[e]), factors[e]) pseudo_inverse = tl.tensor(np.ones((rank, rank)), **tl.context(tensor)) for i, factor in enumerate(factors): if i != mode: pseudo_inverse = pseudo_inverse * tl.dot( tl.conj(tl.transpose(factor)), factor) if mask is not None: tensor = tensor * mask + tl.kruskal_to_tensor( (None, factors), mask=1 - mask) mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode) if non_negative: numerator = tl.clip(mttkrp, a_min=epsilon, a_max=None) denominator = tl.dot(factors[mode], accum) denominator = tl.clip(denominator, a_min=epsilon, a_max=None) factor = factors[mode] * numerator / denominator else: factor = tl.transpose( tl.solve(tl.conj(tl.transpose(pseudo_inverse)), tl.transpose(mttkrp))) if normalize_factors: weights = tl.norm(factor, order=2, axis=0) weights = tl.where( tl.abs(weights) <= tl.eps(tensor.dtype), tl.ones(tl.shape(weights), **tl.context(factors[0])), weights) factor = factor / (tl.reshape(weights, (1, -1))) factors[mode] = factor if tol: # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec> factors_norm = kruskal_norm((weights, factors)) # mttkrp and factor for the last mode. This is equivalent to the # inner product <tensor, factorization> iprod = tl.sum(tl.sum(mttkrp * factor, axis=0) * weights) rec_error = tl.sqrt( tl.abs(norm_tensor**2 + factors_norm**2 - 2 * iprod)) / norm_tensor rec_errors.append(rec_error) if iteration >= 1: if verbose: print('reconstruction error={}, variation={}.'.format( rec_errors[-1], rec_errors[-2] - rec_errors[-1])) if tol and abs(rec_errors[-2] - rec_errors[-1]) < tol: if verbose: print('converged in {} iterations.'.format(iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) kruskal_tensor = KruskalTensor((weights, factors)) if return_errors: return kruskal_tensor, rec_errors else: return kruskal_tensor
def maxvol(A): """ Find the rxr submatrix of maximal volume in A(nxr), n>=r We want to decompose matrix A as A = A[:,J] * (A[I,J])^-1 * A[I,:] This algorithm helps us find this submatrix A[I,J] from A, which has the largest determinant. We greedily find vector of max norm, and subtract its projection from the rest of rows. Parameters ---------- A: matrix The matrix to find maximal volume Returns ------- row_idx: list of int is the list or rows of A forming the matrix with maximal volume, A_inv: matrix is the inverse of the matrix with maximal volume. References ---------- S. A. Goreinov, I. V. Oseledets, D. V. Savostyanov, E. E. Tyrtyshnikov, N. L. Zamarashkin. How to find a good submatrix.Goreinov, S. A., et al. Matrix Methods: Theory, Algorithms and Applications: Dedicated to the Memory of Gene Golub. 2010. 247-256. Ali Çivril, Malik Magdon-Ismail On selecting a maximum volume sub-matrix of a matrix and related problems Theoretical Computer Science. Volume 410, Issues 47–49, 6 November 2009, Pages 4801-4811 """ (n, r) = tl.shape(A) # The index of row of the submatrix row_idx = tl.zeros(r) # Rest of rows / unselected rows rest_of_rows = tl.tensor(list(range(n)),dtype= tl.int64) # Find r rows iteratively i = 0 A_new = A while i < r: mask = list(range(tl.shape(A_new)[0])) # Compute the square of norm of each row rows_norms = tl.sum(A_new ** 2, axis=1) # If there is only one row of A left, let's just return it. MxNet is not robust about this case. if tl.shape(rows_norms) == (): row_idx[i] = rest_of_rows break # If a row is 0, we delete it. if any(rows_norms == 0): zero_idx = tl.argmin(rows_norms,axis=0) mask.pop(zero_idx) rest_of_rows = rest_of_rows[mask] A_new = A_new[mask,:] continue # Find the row of max norm max_row_idx = tl.argmax(rows_norms, axis=0) max_row = A[rest_of_rows[max_row_idx], :] # Compute the projection of max_row to other rows # projection a to b is computed as: <a,b> / sqrt(|a|*|b|) projection = tl.dot(A_new, tl.transpose(max_row)) normalization = tl.sqrt(rows_norms[max_row_idx] * rows_norms) # make sure normalization vector is of the same shape of projection (causing bugs for MxNet) normalization = tl.reshape(normalization, tl.shape(projection)) projection = projection/normalization # Subtract the projection from A_new: b <- b - a * projection A_new = A_new - A_new * tl.reshape(projection, (tl.shape(A_new)[0], 1)) # Delete the selected row mask.pop(max_row_idx) A_new = A_new[mask,:] # update the row_idx and rest_of_rows row_idx[i] = rest_of_rows[max_row_idx] rest_of_rows = rest_of_rows[mask] i = i + 1 row_idx = tl.tensor(row_idx, dtype=tl.int64) inverse = tl.solve(A[row_idx,:], tl.eye(tl.shape(A[row_idx,:])[0], **tl.context(A))) row_idx = tl.to_numpy(row_idx) return row_idx, inverse
def error_calc(tensor, norm_tensor, weights, factors, sparsity, mask, mttkrp=None): r""" Perform the error calculation. Different forms are used here depending upon the available information. If `mttkrp=None` or masking is being performed, then the full tensor must be constructed. Otherwise, the mttkrp is used to reduce the calculation cost. Parameters ---------- tensor : tensor norm_tensor : float The l2 norm of tensor. weights : tensor The current CP weights factors : tensor The current CP factors sparsity : float or int Whether we allow for a sparse component mask : bool Whether masking is being performed. mttkrp : tensor or None The mttkrp product, if available. Returns ------- unnorml_rec_error : float The unnormalized reconstruction error. tensor : tensor The tensor, in case it has been updated by masking. norm_tensor: float The tensor norm, in case it has been updated by masking. """ # If we have to update the mask we already have to build the full tensor if (mask is not None) or (mttkrp is None): low_rank_component = cp_to_tensor((weights, factors)) # Update the tensor based on the mask if mask is not None: tensor = tensor*mask + low_rank_component*(1-mask) norm_tensor = tl.norm(tensor, 2) if sparsity: sparse_component = sparsify_tensor(tensor - low_rank_component, sparsity) else: sparse_component = 0.0 unnorml_rec_error = tl.norm(tensor - low_rank_component - sparse_component, 2) else: if sparsity: low_rank_component = cp_to_tensor((weights, factors)) sparse_component = sparsify_tensor(tensor - low_rank_component, sparsity) unnorml_rec_error = tl.norm(tensor - low_rank_component - sparse_component, 2) else: # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec> factors_norm = cp_norm((weights, factors)) # mttkrp and factor for the last mode. This is equivalent to the # inner product <tensor, factorization> iprod = tl.sum(tl.sum(mttkrp*factors[-1], axis=0)*weights) unnorml_rec_error = tl.sqrt(tl.abs(norm_tensor**2 + factors_norm**2 - 2*iprod)) return unnorml_rec_error, tensor, norm_tensor
def non_negative_parafac_hals(tensor, rank, n_iter_max=100, init="svd", svd='numpy_svd', tol=10e-8, random_state=None, sparsity_coefficients=None, fixed_modes=None, nn_modes='all', exact=False, normalize_factors=False, verbose=False, return_errors=False, cvg_criterion='abs_rec_error'): """ Non-negative CP decomposition via HALS Uses Hierarchical ALS (Alternating Least Squares) which updates each factor column-wise (one column at a time while keeping all other columns fixed), see [1]_ Parameters ---------- tensor : ndarray rank : int number of components n_iter_max : int maximum number of iteration init : {'svd', 'random'}, optional svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS tol : float, optional tolerance: the algorithm stops when the variation in the reconstruction error is less than the tolerance Default: 1e-8 random_state : {None, int, np.random.RandomState} sparsity_coefficients: array of float (of length the number of modes) The sparsity coefficients on each factor. If set to None, the algorithm is computed without sparsity Default: None, fixed_modes: array of integers (between 0 and the number of modes) Has to be set not to update a factor, 0 and 1 for U and V respectively Default: None nn_modes: None, 'all' or array of integers (between 0 and the number of modes) Used to specify which modes to impose non-negativity constraints on. If 'all', then non-negativity is imposed on all modes. Default: 'all' exact: If it is True, the algorithm gives a results with high precision but it needs high computational cost. If it is False, the algorithm gives an approximate solution Default: False normalize_factors : if True, aggregate the weights of each factor in a 1D-tensor of shape (rank, ), which will contain the norms of the factors verbose: boolean Indicates whether the algorithm prints the successive reconstruction errors or not Default: False return_errors: boolean Indicates whether the algorithm should return all reconstruction errors and computation time of each iteration or not Default: False cvg_criterion : {'abs_rec_error', 'rec_error'}, optional Stopping criterion for ALS, works if `tol` is not None. If 'rec_error', ALS stops at current iteration if ``(previous rec_error - current rec_error) < tol``. If 'abs_rec_error', ALS terminates when `|previous rec_error - current rec_error| < tol`. sparsity : float or int random_state : {None, int, np.random.RandomState} Returns ------- factors : ndarray list list of positive factors of the CP decomposition element `i` is of shape ``(tensor.shape[i], rank)`` errors: list A list of reconstruction errors at each iteration of the algorithm. References ---------- .. [1]: N. Gillis and F. Glineur, Accelerated Multiplicative Updates and Hierarchical ALS Algorithms for Nonnegative Matrix Factorization, Neural Computation 24 (4): 1085-1105, 2012. """ weights, factors = initialize_nn_cp(tensor, rank, init=init, svd=svd, random_state=random_state, normalize_factors=normalize_factors) norm_tensor = tl.norm(tensor, 2) n_modes = tl.ndim(tensor) if sparsity_coefficients is None or isinstance(sparsity_coefficients, float): sparsity_coefficients = [sparsity_coefficients] * n_modes if fixed_modes is None: fixed_modes = [] if nn_modes == 'all': nn_modes = set(range(n_modes)) elif nn_modes is None: nn_modes = set() # Avoiding errors for fixed_value in fixed_modes: sparsity_coefficients[fixed_value] = None for mode in range(n_modes): if sparsity_coefficients[mode] is not None: warnings.warn( "Sparsity coefficient is ignored in unconstrained modes.") # Generating the mode update sequence modes = [mode for mode in range(n_modes) if mode not in fixed_modes] # initialisation - declare local varaibles rec_errors = [] # Iteratation for iteration in range(n_iter_max): # One pass of least squares on each updated mode for mode in modes: # Computing Hadamard of cross-products pseudo_inverse = tl.tensor(tl.ones((rank, rank)), **tl.context(tensor)) for i, factor in enumerate(factors): if i != mode: pseudo_inverse = pseudo_inverse * tl.dot( tl.transpose(factor), factor) pseudo_inverse = tl.reshape(weights, (-1, 1)) * pseudo_inverse * tl.reshape( weights, (1, -1)) mttkrp = unfolding_dot_khatri_rao(tensor, (weights, factors), mode) if mode in nn_modes: # Call the hals resolution with nnls, optimizing the current mode nn_factor, _, _, _ = hals_nnls( tl.transpose(mttkrp), pseudo_inverse, tl.transpose(factors[mode]), n_iter_max=100, sparsity_coefficient=sparsity_coefficients[mode], exact=exact) factors[mode] = tl.transpose(nn_factor) else: factor = tl.solve(tl.transpose(pseudo_inverse), tl.transpose(mttkrp)) factors[mode] = tl.transpose(factor) if normalize_factors and mode != modes[-1]: weights, factors = cp_normalize((weights, factors)) if tol: factors_norm = cp_norm((weights, factors)) iprod = tl.sum(tl.sum(mttkrp * factors[-1], axis=0)) rec_error = tl.sqrt( tl.abs(norm_tensor**2 + factors_norm**2 - 2 * iprod)) / norm_tensor rec_errors.append(rec_error) if iteration >= 1: rec_error_decrease = rec_errors[-2] - rec_errors[-1] if verbose: print( "iteration {}, reconstruction error: {}, decrease = {}" .format(iteration, rec_error, rec_error_decrease)) if cvg_criterion == 'abs_rec_error': stop_flag = abs(rec_error_decrease) < tol elif cvg_criterion == 'rec_error': stop_flag = rec_error_decrease < tol else: raise TypeError("Unknown convergence criterion") if stop_flag: if verbose: print("PARAFAC converged after {} iterations".format( iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) if normalize_factors: weights, factors = cp_normalize((weights, factors)) cp_tensor = CPTensor((weights, factors)) if return_errors: return cp_tensor, rec_errors else: return cp_tensor
def constrained_parafac(tensor, rank, n_iter_max=100, n_iter_max_inner=10, init='svd', svd='numpy_svd', tol_outer=1e-8, tol_inner=1e-6, random_state=None, verbose=0, return_errors=False, cvg_criterion='abs_rec_error', fixed_modes=None, non_negative=None, l1_reg=None, l2_reg=None, l2_square_reg=None, unimodality=None, normalize=None, simplex=None, normalized_sparsity=None, soft_sparsity=None, smoothness=None, monotonicity=None, hard_sparsity=None): """CANDECOMP/PARAFAC decomposition via alternating optimization of alternating direction method of multipliers (AO-ADMM): Computes a rank-`rank` decomposition of `tensor` [1]_ such that:: tensor = [|weights; factors[0], ..., factors[-1] |], where factors are either penalized or constrained according to the user-defined constraint. In order to compute the factors efficiently, the ADMM algorithm introduces an auxilliary factor which is called factor_aux in the function. Parameters ---------- tensor : ndarray rank : int Number of components. n_iter_max : int Maximum number of iteration for outer loop n_iter_max_inner : int Number of iteration for inner loop init : {'svd', 'random', cptensor}, optional Type of factor matrix initialization. See `initialize_factors`. svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS tol_outer : float, optional (Default: 1e-8) Relative reconstruction error tolerance for outer loop. The algorithm is considered to have found a local minimum when the reconstruction error is less than `tol_outer`. tol_inner : float, optional (Default: 1e-6) Absolute reconstruction error tolerance for factor update during inner loop, i.e. ADMM optimization. random_state : {None, int, np.random.RandomState} verbose : int, optional Level of verbosity return_errors : bool, optional Activate return of iteration errors non_negative : bool or dictionary This constraint is clipping negative values to '0'. If it is True non-negative constraint is applied to all modes. l1_reg : float or list or dictionary, optional l2_reg : float or list or dictionary, optional l2_square_reg : float or list or dictionary, optional unimodality : bool or dictionary, optional If it is True unimodality constraint is applied to all modes. normalize : bool or dictionary, optional This constraint divides all the values by maximum value of the input array. If it is True normalize constraint is applied to all modes. simplex : float or list or dictionary, optional normalized_sparsity : float or list or dictionary, optional soft_sparsity : float or list or dictionary, optional smoothness : float or list or dictionary, optional monotonicity : bool or dictionary, optional hard_sparsity : float or list or dictionary, optional cvg_criterion : {'abs_rec_error', 'rec_error'}, optional Stopping criterion if `tol` is not None. If 'rec_error', algorithm stops at current iteration if ``(previous rec_error - current rec_error) < tol``. If 'abs_rec_error', algorithm terminates when `|previous rec_error - current rec_error| < tol`. fixed_modes : list, default is None A list of modes for which the initial value is not modified. The last mode cannot be fixed due to error computation. Returns ------- CPTensor : (weight, factors) * weights : 1D array of shape (rank, ) * factors : List of factors of the CP decomposition element `i` is of shape ``(tensor.shape[i], rank)`` errors : list A list of reconstruction errors at each iteration of the algorithms. References ---------- .. [1] T.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications", SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009. .. [2] Huang, Kejun, Nicholas D. Sidiropoulos, and Athanasios P. Liavas. "A flexible and efficient algorithmic framework for constrained matrix and tensor factorization." IEEE Transactions on Signal Processing 64.19 (2016): 5052-5065. """ rank = validate_cp_rank(tl.shape(tensor), rank=rank) _, _ = validate_constraints(non_negative=non_negative, l1_reg=l1_reg, l2_reg=l2_reg, l2_square_reg=l2_square_reg, unimodality=unimodality, normalize=normalize, simplex=simplex, normalized_sparsity=normalized_sparsity, soft_sparsity=soft_sparsity, smoothness=smoothness, monotonicity=monotonicity, hard_sparsity=hard_sparsity, n_const=tl.ndim(tensor)) weights, factors = initialize_constrained_parafac(tensor, rank, init=init, svd=svd, random_state=random_state, non_negative=non_negative, l1_reg=l1_reg, l2_reg=l2_reg, l2_square_reg=l2_square_reg, unimodality=unimodality, normalize=normalize, simplex=simplex, normalized_sparsity=normalized_sparsity, soft_sparsity=soft_sparsity, smoothness=smoothness, monotonicity=monotonicity, hard_sparsity=hard_sparsity) rec_errors = [] norm_tensor = tl.norm(tensor, 2) if fixed_modes is None: fixed_modes = [] if tl.ndim(tensor) - 1 in fixed_modes: warnings.warn('You asked for fixing the last mode, which is not supported.\n ' 'The last mode will not be fixed. Consider using tl.moveaxis()') fixed_modes.remove(tl.ndim(tensor) - 1) modes_list = [mode for mode in range(tl.ndim(tensor)) if mode not in fixed_modes] # ADMM inits dual_variables = [] factors_aux = [] for i in range(len(factors)): dual_variables.append(tl.zeros(tl.shape(factors[i]))) factors_aux.append(tl.transpose(tl.zeros(tl.shape(factors[i])))) for iteration in range(n_iter_max): if verbose > 1: print("Starting iteration", iteration + 1) for mode in modes_list: if verbose > 1: print("Mode", mode, "of", tl.ndim(tensor)) pseudo_inverse = tl.tensor(np.ones((rank, rank)), **tl.context(tensor)) for i, factor in enumerate(factors): if i != mode: pseudo_inverse = pseudo_inverse * tl.dot(tl.transpose(factor), factor) mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode) factors[mode], factors_aux[mode], dual_variables[mode] = admm(mttkrp, pseudo_inverse, factors[mode], dual_variables[mode], n_iter_max=n_iter_max_inner, n_const=tl.ndim(tensor), order=mode, non_negative=non_negative, l1_reg=l1_reg, l2_reg=l2_reg, l2_square_reg=l2_square_reg, unimodality=unimodality, normalize=normalize, simplex=simplex, normalized_sparsity=normalized_sparsity, soft_sparsity=soft_sparsity, smoothness=smoothness, monotonicity=monotonicity, hard_sparsity=hard_sparsity, tol=tol_inner) factors_norm = cp_norm((weights, factors)) iprod = tl.sum(tl.sum(mttkrp * factors[-1], axis=0) * weights) rec_error = tl.sqrt(tl.abs(norm_tensor ** 2 + factors_norm ** 2 - 2 * iprod)) / norm_tensor rec_errors.append(rec_error) constraint_error = 0 for mode in modes_list: constraint_error += tl.norm(factors[mode] - tl.transpose(factors_aux[mode])) / tl.norm(factors[mode]) if tol_outer: if iteration >= 1: rec_error_decrease = rec_errors[-2] - rec_errors[-1] if verbose: print("iteration {}, reconstruction error: {}, decrease = {}".format(iteration, rec_error, rec_error_decrease)) if constraint_error < tol_outer: break if cvg_criterion == 'abs_rec_error': stop_flag = abs(rec_error_decrease) < tol_outer elif cvg_criterion == 'rec_error': stop_flag = rec_error_decrease < tol_outer else: raise TypeError("Unknown convergence criterion") if stop_flag: if verbose: print("PARAFAC converged after {} iterations".format(iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) cp_tensor = CPTensor((weights, factors)) if return_errors: return cp_tensor, rec_errors else: return cp_tensor
def test_sum_keepdims(): rng = tl.check_random_state(0) random_matrix = tl.tensor(rng.random_sample((10, 20))) summed_matrix1 = tl.sum(random_matrix, axis=0) assert tl.shape(summed_matrix1) == (20, ) summed_matrix2 = tl.sum(random_matrix, axis=0, keepdims=False) assert tl.shape(summed_matrix2) == (20, ) summed_matrix3 = tl.sum(random_matrix, axis=0, keepdims=True) assert tl.shape(summed_matrix3) == (1, 20) summed_matrix4 = tl.sum(random_matrix, axis=1) assert tl.shape(summed_matrix4) == (10, ) summed_matrix5 = tl.sum(random_matrix, axis=1, keepdims=False) assert tl.shape(summed_matrix5) == (10, ) summed_matrix6 = tl.sum(random_matrix, axis=1, keepdims=True) assert tl.shape(summed_matrix6) == (10, 1) # Third order tensor random_tensor = tl.tensor(rng.random_sample((10, 20, 30))) summed_tensor1 = tl.sum(random_tensor, axis=0) assert tl.shape(summed_tensor1) == (20, 30) summed_tensor2 = tl.sum(random_tensor, axis=0, keepdims=False) assert tl.shape(summed_tensor2) == (20, 30) summed_tensor3 = tl.sum(random_tensor, axis=0, keepdims=True) assert tl.shape(summed_tensor3) == (1, 20, 30) summed_tensor4 = tl.sum(random_tensor, axis=1) assert tl.shape(summed_tensor4) == (10, 30) summed_tensor5 = tl.sum(random_tensor, axis=1, keepdims=False) assert tl.shape(summed_tensor5) == (10, 30) summed_tensor6 = tl.sum(random_tensor, axis=1, keepdims=True) assert tl.shape(summed_tensor6) == (10, 1, 30) summed_tensor7 = tl.sum(random_tensor, axis=2) assert tl.shape(summed_tensor7) == (10, 20) summed_tensor8 = tl.sum(random_tensor, axis=2, keepdims=False) assert tl.shape(summed_tensor8) == (10, 20) summed_tensor9 = tl.sum(random_tensor, axis=2, keepdims=True) assert tl.shape(summed_tensor9) == (10, 20, 1)
def hals_nnls(UtM, UtU, V=None, n_iter_max=500, tol=10e-8, sparsity_coefficient=None, normalize=False, nonzero_rows=False, exact=False): """ Non Negative Least Squares (NNLS) Computes an approximate solution of a nonnegative least squares problem (NNLS) with an exact block-coordinate descent scheme. M is m by n, U is m by r, V is r by n. All matrices are nonnegative componentwise. This algorithm is defined in [1], as an accelerated version of the HALS algorithm. It features two accelerations: an early stop stopping criterion, and a complexity averaging between precomputations and loops, so as to use large precomputations several times. This function is made for being used repetively inside an outer-loop alternating algorithm, for instance for computing nonnegative matrix Factorization or tensor factorization. Parameters ---------- UtM: r-by-n array Pre-computed product of the transposed of U and M, used in the update rule UtU: r-by-r array Pre-computed product of the transposed of U and U, used in the update rule V: r-by-n initialization matrix (mutable) Initialized V array By default, is initialized with one non-zero entry per column corresponding to the closest column of U of the corresponding column of M. n_iter_max: Postivie integer Upper bound on the number of iterations Default: 500 tol : float in [0,1] early stop criterion, while err_k > delta*err_0. Set small for almost exact nnls solution, or larger (e.g. 1e-2) for inner loops of a PARAFAC computation. Default: 10e-8 sparsity_coefficient: float or None The coefficient controling the sparisty level in the objective function. If set to None, the problem is solved unconstrained. Default: None nonzero_rows: boolean True if the lines of the V matrix can't be zero, False if they can be zero Default: False exact: If it is True, the algorithm gives a results with high precision but it needs high computational cost. If it is False, the algorithm gives an approximate solution Default: False Returns ------- V: array a r-by-n nonnegative matrix \approx argmin_{V >= 0} ||M-UV||_F^2 rec_error: float number of loops authorized by the error stop criterion iteration: integer final number of update iteration performed complexity_ratio: float number of loops authorized by the stop criterion Notes ----- We solve the following problem :math:`\\min_{V >= 0} ||M-UV||_F^2` The matrix V is updated linewise. The update rule for this resolution is:: .. math:: \\begin{equation} V[k,:]_(j+1) = V[k,:]_(j) + (UtM[k,:] - UtU[k,:]\\times V_(j))/UtU[k,k] \\end{equation} with j the update iteration. This problem can also be defined by adding a sparsity coefficient, enhancing sparsity in the solution [2]. In this sparse version, the update rule becomes:: .. math:: \\begin{equation} V[k,:]_(j+1) = V[k,:]_(j) + (UtM[k,:] - UtU[k,:]\\times V_(j) - sparsity_coefficient)/UtU[k,k] \\end{equation} References ---------- .. [1]: N. Gillis and F. Glineur, Accelerated Multiplicative Updates and Hierarchical ALS Algorithms for Nonnegative Matrix Factorization, Neural Computation 24 (4): 1085-1105, 2012. .. [2] J. Eggert, and E. Korner. "Sparse coding and NMF." 2004 IEEE International Joint Conference on Neural Networks (IEEE Cat. No. 04CH37541). Vol. 4. IEEE, 2004. """ rank, n_col_M = tl.shape(UtM) if V is None: # checks if V is empty V = tl.solve(UtU, UtM) V = tl.clip(V, a_min=0, a_max=None) # Scaling scale = tl.sum(UtM * V) / tl.sum(UtU * tl.dot(V, tl.transpose(V))) V = V * scale if exact: n_iter_max = 50000 tol = 10e-16 for iteration in range(n_iter_max): rec_error = 0 for k in range(rank): if UtU[k, k]: if sparsity_coefficient is not None: # Modifying the function for sparsification deltaV = tl.where( (UtM[k, :] - tl.dot(UtU[k, :], V) - sparsity_coefficient) / UtU[k, k] > -V[k, :], (UtM[k, :] - tl.dot(UtU[k, :], V) - sparsity_coefficient) / UtU[k, k], -V[k, :]) V = tl.index_update(V, tl.index[k, :], V[k, :] + deltaV) else: # without sparsity deltaV = tl.where( (UtM[k, :] - tl.dot(UtU[k, :], V)) / UtU[k, k] > -V[k, :], (UtM[k, :] - tl.dot(UtU[k, :], V)) / UtU[k, k], -V[k, :]) V = tl.index_update(V, tl.index[k, :], V[k, :] + deltaV) rec_error = rec_error + tl.dot(deltaV, tl.transpose(deltaV)) # Safety procedure, if columns aren't allow to be zero if nonzero_rows and tl.all(V[k, :] == 0): V[k, :] = tl.eps(V.dtype) * tl.max(V) elif nonzero_rows: raise ValueError("Column " + str(k) + " of U is zero with nonzero condition") if normalize: norm = tl.norm(V[k, :]) if norm != 0: V[k, :] /= norm else: sqrt_n = 1 / n_col_M**(1 / 2) V[k, :] = [sqrt_n for i in range(n_col_M)] if iteration == 0: rec_error0 = rec_error numerator = tl.shape(V)[0] * tl.shape(V)[1] + tl.shape(V)[1] * rank denominator = tl.shape(V)[0] * rank + tl.shape(V)[0] complexity_ratio = 1 + (numerator / denominator) if exact: if rec_error < tol * rec_error0: break else: if rec_error < tol * rec_error0 or iteration > 1 + 0.5 * complexity_ratio: break return V, rec_error, iteration, complexity_ratio
def non_negative_parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd', tol=10e-7, random_state=None, verbose=0, normalize_factors=False, return_errors=False, mask=None, orthogonalise=False, cvg_criterion='abs_rec_error'): """ Non-negative CP decomposition Uses multiplicative updates, see [2]_ This is the same as parafac(non_negative=True). Parameters ---------- tensor : ndarray rank : int number of components n_iter_max : int maximum number of iteration init : {'svd', 'random'}, optional svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS tol : float, optional tolerance: the algorithm stops when the variation in the reconstruction error is less than the tolerance random_state : {None, int, np.random.RandomState} verbose : int, optional level of verbosity Returns ------- factors : ndarray list list of positive factors of the CP decomposition element `i` is of shape ``(tensor.shape[i], rank)`` References ---------- .. [2] Amnon Shashua and Tamir Hazan, "Non-negative tensor factorization with applications to statistics and computer vision", In Proceedings of the International Conference on Machine Learning (ICML), pp 792-799, ICML, 2005 """ epsilon = 10e-12 if orthogonalise and not isinstance(orthogonalise, int): orthogonalise = n_iter_max factors = initialize_factors(tensor, rank, init=init, svd=svd, random_state=random_state, non_negative=True, normalize_factors=normalize_factors) rec_errors = [] norm_tensor = tl.norm(tensor, 2) weights = tl.ones(rank, **tl.context(tensor)) for iteration in range(n_iter_max): if orthogonalise and iteration <= orthogonalise: for i, f in enumerate(factors): if min(tl.shape(f)) >= rank: factors[i] = tl.abs(tl.qr(f)[0]) if verbose > 1: print("Starting iteration", iteration + 1) for mode in range(tl.ndim(tensor)): if verbose > 1: print("Mode", mode, "of", tl.ndim(tensor)) accum = 1 # khatri_rao(factors).tl.dot(khatri_rao(factors)) # simplifies to multiplications sub_indices = [i for i in range(len(factors)) if i != mode] for i, e in enumerate(sub_indices): if i: accum *= tl.dot(tl.transpose(factors[e]), factors[e]) else: accum = tl.dot(tl.transpose(factors[e]), factors[e]) if mask is not None: tensor = tensor*mask + tl.kruskal_to_tensor((None, factors), mask=1-mask) mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode) numerator = tl.clip(mttkrp, a_min=epsilon, a_max=None) denominator = tl.dot(factors[mode], accum) denominator = tl.clip(denominator, a_min=epsilon, a_max=None) factor = factors[mode] * numerator / denominator if normalize_factors: weights = tl.norm(factor, order=2, axis=0) weights = tl.where(tl.abs(weights) <= tl.eps(tensor.dtype), tl.ones(tl.shape(weights), **tl.context(factors[0])), weights) factor = factor/(tl.reshape(weights, (1, -1))) factors[mode] = factor if tol: # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec> factors_norm = kruskal_norm((weights, factors)) # mttkrp and factor for the last mode. This is equivalent to the # inner product <tensor, factorization> iprod = tl.sum(tl.sum(mttkrp*factor, axis=0)*weights) rec_error = tl.sqrt(tl.abs(norm_tensor**2 + factors_norm**2 - 2*iprod)) / norm_tensor rec_errors.append(rec_error) if iteration >= 1: rec_error_decrease = rec_errors[-2] - rec_errors[-1] if verbose: print("iteration {}, reconstraction error: {}, decrease = {}".format(iteration, rec_error, rec_error_decrease)) if cvg_criterion == 'abs_rec_error': stop_flag = abs(rec_error_decrease) < tol elif cvg_criterion == 'rec_error': stop_flag = rec_error_decrease < tol else: raise TypeError("Unknown convergence criterion") if stop_flag: if verbose: print("PARAFAC converged after {} iterations".format(iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) kruskal_tensor = KruskalTensor((weights, factors)) if return_errors: return kruskal_tensor, rec_errors else: return kruskal_tensor
def parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd',\ normalize_factors=False, orthogonalise=False,\ tol=1e-8, random_state=None,\ verbose=0, return_errors=False,\ non_negative=False,\ sparsity = None,\ l2_reg = 0, mask=None,\ cvg_criterion = 'abs_rec_error'): """CANDECOMP/PARAFAC decomposition via alternating least squares (ALS) Computes a rank-`rank` decomposition of `tensor` [1]_ such that, ``tensor = [|weights; factors[0], ..., factors[-1] |]``. Parameters ---------- tensor : ndarray rank : int Number of components. n_iter_max : int Maximum number of iteration init : {'svd', 'random'}, optional Type of factor matrix initialization. See `initialize_factors`. svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS normalize_factors : if True, aggregate the weights of each factor in a 1D-tensor of shape (rank, ), which will contain the norms of the factors tol : float, optional (Default: 1e-6) Relative reconstruction error tolerance. The algorithm is considered to have found the global minimum when the reconstruction error is less than `tol`. random_state : {None, int, np.random.RandomState} verbose : int, optional Level of verbosity return_errors : bool, optional Activate return of iteration errors mask : ndarray array of booleans with the same shape as ``tensor`` should be 0 where the values are missing and 1 everywhere else. Note: if tensor is sparse, then mask should also be sparse with a fill value of 1 (or True). Allows for missing values [2]_ cvg_criterion : {'abs_rec_error', 'rec_error'}, optional Stopping criterion for ALS, works if `tol` is not None. If 'rec_error', ALS stops at current iteration if (previous rec_error - current rec_error) < tol. If 'abs_rec_error', ALS terminates when |previous rec_error - current rec_error| < tol. sparsity : float or int If `sparsity` is not None, we approximate tensor as a sum of low_rank_component and sparse_component, where low_rank_component = kruskal_to_tensor((weights, factors)). `sparsity` denotes desired fraction or number of non-zero elements in the sparse_component of the `tensor`. Returns ------- KruskalTensor : (weight, factors) * weights : 1D array of shape (rank, ) all ones if normalize_factors is False (default), weights of the (normalized) factors otherwise * factors : List of factors of the CP decomposition element `i` is of shape (tensor.shape[i], rank) * sparse_component : nD array of shape tensor.shape. Returns only if `sparsity` is not None. errors : list A list of reconstruction errors at each iteration of the algorithms. References ---------- .. [1] T.G.Kolda and B.W.Bader, "Tensor Decompositions and Applications", SIAM REVIEW, vol. 51, n. 3, pp. 455-500, 2009. .. [2] Tomasi, Giorgio, and Rasmus Bro. "PARAFAC and missing values." Chemometrics and Intelligent Laboratory Systems 75.2 (2005): 163-180. """ epsilon = 10e-12 if orthogonalise and not isinstance(orthogonalise, int): orthogonalise = n_iter_max factors = initialize_factors(tensor, rank, init=init, svd=svd, random_state=random_state, normalize_factors=normalize_factors) rec_errors = [] norm_tensor = tl.norm(tensor, 2) weights = tl.ones(rank, **tl.context(tensor)) Id = tl.eye(rank, **tl.context(tensor))*l2_reg if sparsity: sparse_component = tl.zeros_like(tensor) if isinstance(sparsity, float): sparsity = int(sparsity * np.prod(tensor.shape)) else: sparsity = int(sparsity) for iteration in range(n_iter_max): if orthogonalise and iteration <= orthogonalise: factors = [tl.qr(f)[0] if min(tl.shape(f)) >= rank else f for i, f in enumerate(factors)] if verbose > 1: print("Starting iteration", iteration + 1) for mode in range(tl.ndim(tensor)): if verbose > 1: print("Mode", mode, "of", tl.ndim(tensor)) pseudo_inverse = tl.tensor(np.ones((rank, rank)), **tl.context(tensor)) for i, factor in enumerate(factors): if i != mode: pseudo_inverse = pseudo_inverse*tl.dot(tl.conj(tl.transpose(factor)), factor) pseudo_inverse += Id if mask is not None: tensor = tensor*mask + tl.kruskal_to_tensor((None, factors), mask=1-mask) mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode) factor = tl.transpose(tl.solve(tl.conj(tl.transpose(pseudo_inverse)), tl.transpose(mttkrp))) if normalize_factors: weights = tl.norm(factor, order=2, axis=0) weights = tl.where(tl.abs(weights) <= tl.eps(tensor.dtype), tl.ones(tl.shape(weights), **tl.context(factors[0])), weights) factor = factor/(tl.reshape(weights, (1, -1))) factors[mode] = factor if tol: if sparsity: low_rank_component = kruskal_to_tensor((weights, factors)) sparse_component = sparsify_tensor(tensor - low_rank_component, sparsity) unnorml_rec_error = tl.norm(tensor - low_rank_component - sparse_component, 2) else: # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec> factors_norm = kruskal_norm((weights, factors)) # mttkrp and factor for the last mode. This is equivalent to the # inner product <tensor, factorization> iprod = tl.sum(tl.sum(mttkrp*factor, axis=0)*weights) unnorml_rec_error = tl.sqrt(tl.abs(norm_tensor**2 + factors_norm**2 - 2*iprod)) rec_error = unnorml_rec_error / norm_tensor rec_errors.append(rec_error) if iteration >= 1: rec_error_decrease = rec_errors[-2] - rec_errors[-1] if verbose: print("iteration {}, reconstraction error: {}, decrease = {}, unnormalized = {}".format(iteration, rec_error, rec_error_decrease, unnorml_rec_error)) if cvg_criterion == 'abs_rec_error': stop_flag = abs(rec_error_decrease) < tol elif cvg_criterion == 'rec_error': stop_flag = rec_error_decrease < tol else: raise TypeError("Unknown convergence criterion") if stop_flag: if verbose: print("PARAFAC converged after {} iterations".format(iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) kruskal_tensor = KruskalTensor((weights, factors)) if sparsity: sparse_component = sparsify_tensor(tensor -\ kruskal_to_tensor((weights, factors)),\ sparsity) kruskal_tensor = (kruskal_tensor, sparse_component) if return_errors: return kruskal_tensor, rec_errors else: return kruskal_tensor
def non_negative_parafac(tensor, rank, n_iter_max=100, init='svd', svd='numpy_svd', tol=10e-7, random_state=None, verbose=0, normalize_factors=False, return_errors=False, mask=None, orthogonalise=False, cvg_criterion='abs_rec_error', fixed_modes=[]): """ Non-negative CP decomposition Uses multiplicative updates, see [2]_ This is the same as parafac(non_negative=True). Parameters ---------- tensor : ndarray rank : int number of components n_iter_max : int maximum number of iteration init : {'svd', 'random'}, optional svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS tol : float, optional tolerance: the algorithm stops when the variation in the reconstruction error is less than the tolerance random_state : {None, int, np.random.RandomState} verbose : int, optional level of verbosity fixed_modes : list, default is [] A list of modes for which the initial value is not modified. The last mode cannot be fixed due to error computation. Returns ------- factors : ndarray list list of positive factors of the CP decomposition element `i` is of shape ``(tensor.shape[i], rank)`` References ---------- .. [2] Amnon Shashua and Tamir Hazan, "Non-negative tensor factorization with applications to statistics and computer vision", In Proceedings of the International Conference on Machine Learning (ICML), pp 792-799, ICML, 2005 """ epsilon = 10e-12 rank = validate_cp_rank(tl.shape(tensor), rank=rank) if mask is not None and init == "svd": message = "Masking occurs after initialization. Therefore, random initialization is recommended." warnings.warn(message, Warning) if orthogonalise and not isinstance(orthogonalise, int): orthogonalise = n_iter_max weights, factors = initialize_cp(tensor, rank, init=init, svd=svd, random_state=random_state, non_negative=True, normalize_factors=normalize_factors) rec_errors = [] norm_tensor = tl.norm(tensor, 2) if tl.ndim(tensor) - 1 in fixed_modes: warnings.warn( 'You asked for fixing the last mode, which is not supported while tol is fixed.\n The last mode will not be fixed. Consider using tl.moveaxis()' ) fixed_modes.remove(tl.ndim(tensor) - 1) modes_list = [ mode for mode in range(tl.ndim(tensor)) if mode not in fixed_modes ] for iteration in range(n_iter_max): if orthogonalise and iteration <= orthogonalise: for i, f in enumerate(factors): if min(tl.shape(f)) >= rank: factors[i] = tl.abs(tl.qr(f)[0]) if verbose > 1: print("Starting iteration", iteration + 1) for mode in modes_list: if verbose > 1: print("Mode", mode, "of", tl.ndim(tensor)) accum = 1 # khatri_rao(factors).tl.dot(khatri_rao(factors)) # simplifies to multiplications sub_indices = [i for i in range(len(factors)) if i != mode] for i, e in enumerate(sub_indices): if i: accum *= tl.dot(tl.transpose(factors[e]), factors[e]) else: accum = tl.dot(tl.transpose(factors[e]), factors[e]) if mask is not None: tensor = tensor * mask + tl.cp_to_tensor( (None, factors), mask=1 - mask) mttkrp = unfolding_dot_khatri_rao(tensor, (None, factors), mode) numerator = tl.clip(mttkrp, a_min=epsilon, a_max=None) denominator = tl.dot(factors[mode], accum) denominator = tl.clip(denominator, a_min=epsilon, a_max=None) factor = factors[mode] * numerator / denominator factors[mode] = factor if normalize_factors: weights, factors = cp_normalize((weights, factors)) if tol: # ||tensor - rec||^2 = ||tensor||^2 + ||rec||^2 - 2*<tensor, rec> factors_norm = cp_norm((weights, factors)) # mttkrp and factor for the last mode. This is equivalent to the # inner product <tensor, factorization> iprod = tl.sum(tl.sum(mttkrp * factor, axis=0) * weights) rec_error = tl.sqrt( tl.abs(norm_tensor**2 + factors_norm**2 - 2 * iprod)) / norm_tensor rec_errors.append(rec_error) if iteration >= 1: rec_error_decrease = rec_errors[-2] - rec_errors[-1] if verbose: print( "iteration {}, reconstraction error: {}, decrease = {}" .format(iteration, rec_error, rec_error_decrease)) if cvg_criterion == 'abs_rec_error': stop_flag = abs(rec_error_decrease) < tol elif cvg_criterion == 'rec_error': stop_flag = rec_error_decrease < tol else: raise TypeError("Unknown convergence criterion") if stop_flag: if verbose: print("PARAFAC converged after {} iterations".format( iteration)) break else: if verbose: print('reconstruction error={}'.format(rec_errors[-1])) cp_tensor = CPTensor((weights, factors)) if return_errors: return cp_tensor, rec_errors else: return cp_tensor
def tl_gcp_fg_est(M, f, g, subs, xvals, weights, computeF=True, computeG=True, vectorG=False, lambdaCheck=True, crng=None): """Estimate the GCP objective function and gradient via a subsample (Analogous to tt_gcp_fg_est.m) Parameters ---------- M : CPTensor f : Function handle elementwise loss of the form f(x,m) g : Function handle Elementwise intermediate gradient of the form g(x,m) subs : ndarray xvals : ndarray weights : ndarray computeF : boolean Include computation of the loss function. Default is true. computeG : boolean Include computation of the gradient. vectorG : boolean Reshape gradient matrices into a single vector. lambdaCheck : boolean Ensure lambda = [1,1,...,1] crng : ndarray Range for correction/adjust when nonzeros may be included in the "zero" sample Used in semi-stratified sampling (**COMING SOON**) Returns ------- F : scalar Loss function value G : ndarray(s) If vectorG = False, G is a list of matrices where G[k] is the same size as the k-th factor matrix Otherwise, G is the gradient in vector form """ d = len(M[1]) shp = tl.shape(M) sz = 1 for i in tl.shape(M): sz *= i F = None G = [] # lambda check if lambdaCheck: if not tl.all(M[0]): print("Lambda weights are not all '1's.") # TODO: overload the normalize function to distribute weights into factors ala MATLAB sys.exit(1) # Compute model values and exploded Zk matrices mvals, Zexp = gcp_fg_est_helper(M[1], subs) # Compute function values if computeF: fVec = f(xvals, mvals) if crng is not None: # TODO: Semi-stratified sampling adjustment to fVec print("Semi-stratified sampling not yet implemented.") F = tl.sum(weights * fVec) if not computeG: return [F, G] # Compute sample y values yvals = weights * g(xvals, mvals) if crng is not None: # TODO: Semi-stratified sampling adjustment to fVec print("Semi-stratified sampling not yet implemented.") # Compute function and gradient G = [] nsamps = tl.shape(subs)[0] for k in range(d): # Construct Y matrices based on sample. Row of element is row index to accumulate in the gradient # Column indices are corresponding samples. They are in order b/c they match vector of samples # to be multiplied on the right s = sparse.COO((subs[:, k], np.arange(nsamps)), yvals[0], shape=(shp[k], nsamps)) tempData = sparse.matmul(s, Zexp[k]) G.append(tl.tensor(tempData)) # If indicated, convert G to a single vector if vectorG: G = factors2vec(G) # If not computing F, set F to be the gradient if not computeF: F = G return [F, G]