def test_clip(): # Test that clip can work with single arguments X = T.tensor([0.0, -1.0, 1.0]) X_low = T.tensor([0.0, 0.0, 1.0]) X_high = T.tensor([0.0, -1.0, 0.0]) assert_array_equal(tl.clip(X, a_min=0.0), X_low) assert_array_equal(tl.clip(X, a_max=0.0), X_high) # More extensive test with a larger random tensor rng = tl.check_random_state(0) tensor = tl.tensor(rng.random_sample((10, 10, 10)).astype('float32')) val1 = np.float32(rng.random_sample()) val2 = np.float32(rng.random_sample()) limits = [(min(val1, val2), max(val1, val2)), (-1, 2), (tl.max(tensor) + 1, None), (None, tl.min(tensor) - 1), (tl.max(tensor), None), (tl.min(tensor), None), (None, tl.max(tensor)), (None, tl.min(tensor))] for min_val, max_val in limits: message = f"Tensor clipped incorrectly with min_val={min_val} and max_val={max_val}. Tensor bounds are ({tl.to_numpy(tl.min(tensor))}, {tl.to_numpy(tl.max(tensor))}" if min_val is not None: assert tl.all(tl.clip(tensor, min_val, None) >= min_val), message assert tl.all( tl.clip(tensor, min_val, max_val) >= min_val), message if max_val is not None: assert tl.all(tl.clip(tensor, None, max_val) <= max_val), message assert tl.all( tl.clip(tensor, min_val, max_val) <= max_val), message
def test_cp_flip_sign(): shape = (3, 4, 5) rank = 4 cp_tensor = random_cp(shape, rank) weights, factors = cp_flip_sign(cp_tensor) assert_(tl.all(tl.mean(factors[1], axis=0) > 0)) assert_(tl.all(tl.mean(factors[2], axis=0) > 0)) assert_equal(cp_tensor.rank, cp_tensor.rank) assert_array_equal(cp_tensor.weights, weights) assert_array_almost_equal(cp_to_tensor((weights, factors)), cp_to_tensor(cp_tensor))
def test_non_negative_tucker(monkeypatch): """Test for non-negative Tucker""" rng = tl.check_random_state(1234) tol_norm_2 = 10e-1 tol_max_abs = 10e-1 tensor = tl.tensor(rng.random_sample((3, 4, 3)) + 1) core, factors = tucker(tensor, rank=[3, 4, 3], n_iter_max=200, verbose=1) nn_core, nn_factors = non_negative_tucker(tensor, rank=[3, 4, 3], n_iter_max=100) # Make sure all components are positive for factor in nn_factors: assert_(tl.all(factor >= 0)) assert_(tl.all(nn_core >= 0)) reconstructed_tensor = tucker_to_tensor((core, factors)) nn_reconstructed_tensor = tucker_to_tensor((nn_core, nn_factors)) error = tl.norm(reconstructed_tensor - nn_reconstructed_tensor, 2) error /= tl.norm(reconstructed_tensor, 2) assert_(error < tol_norm_2, 'norm 2 of reconstruction error higher than tol') # Test the max abs difference between the reconstruction and the tensor assert_(tl.norm(reconstructed_tensor - nn_reconstructed_tensor, 'inf') < tol_max_abs, 'abs norm of reconstruction error higher than tol') core_svd, factors_svd = non_negative_tucker(tensor, rank=[3, 4, 3], n_iter_max=500, init='svd', verbose=1) core_random, factors_random = non_negative_tucker(tensor, rank=[3, 4, 3], n_iter_max=200, init='random', random_state=1234) rec_svd = tucker_to_tensor((core_svd, factors_svd)) rec_random = tucker_to_tensor((core_random, factors_random)) error = tl.norm(rec_svd - rec_random, 2) error /= tl.norm(rec_svd, 2) assert_(error < tol_norm_2, 'norm 2 of difference between svd and random init too high') assert_(tl.norm(rec_svd - rec_random, 'inf') < tol_max_abs, 'abs norm of difference between svd and random init too high') # Test for a single rank passed # (should be used for all modes) rank = 3 target_shape = (rank, )*tl.ndim(tensor) core, factors = non_negative_tucker(tensor, rank=rank) assert_(tl.shape(core) == target_shape, 'core has the wrong shape, got {}, but expected {}.'.format(tl.shape(core), target_shape)) for i, f in enumerate(factors): expected_shape = (tl.shape(tensor)[i], rank) assert_(tl.shape(f) == expected_shape, '{}-th factor has the wrong shape, got {}, but expected {}.'.format( i, tl.shape(f), expected_shape)) assert_class_wrapper_correctly_passes_arguments(monkeypatch, non_negative_tucker, Tucker_NN, ignore_args={'return_errors'}, rank=3)
def test_non_negative_tucker(): """Test for non-negative Tucker""" rng = check_random_state(1234) tol_norm_2 = 10e-1 tol_max_abs = 10e-1 tensor = tl.tensor(rng.random_sample((3, 4, 3)) + 1) core, factors = tucker(tensor, rank=[3, 4, 3], n_iter_max=200, verbose=1) nn_core, nn_factors = non_negative_tucker(tensor, rank=[3, 4, 3], n_iter_max=100) # Make sure all components are positive for factor in nn_factors: assert_(tl.all(factor >= 0)) assert_(tl.all(nn_core >= 0)) reconstructed_tensor = tucker_to_tensor(core, factors) nn_reconstructed_tensor = tucker_to_tensor(nn_core, nn_factors) error = tl.norm(reconstructed_tensor - nn_reconstructed_tensor, 2) error /= tl.norm(reconstructed_tensor, 2) assert_(error < tol_norm_2, 'norm 2 of reconstruction error higher than tol') # Test the max abs difference between the reconstruction and the tensor assert_( tl.norm(reconstructed_tensor - nn_reconstructed_tensor, 'inf') < tol_max_abs, 'abs norm of reconstruction error higher than tol') core_svd, factors_svd = non_negative_tucker(tensor, rank=[3, 4, 3], n_iter_max=500, init='svd', verbose=1) core_random, factors_random = non_negative_tucker(tensor, rank=[3, 4, 3], n_iter_max=200, init='random', random_state=1234) rec_svd = tucker_to_tensor(core_svd, factors_svd) rec_random = tucker_to_tensor(core_random, factors_random) error = tl.norm(rec_svd - rec_random, 2) error /= tl.norm(rec_svd, 2) assert_(error < tol_norm_2, 'norm 2 of difference between svd and random init too high') assert_( tl.norm(rec_svd - rec_random, 'inf') < tol_max_abs, 'abs norm of difference between svd and random init too high')
def test_non_negative_parafac_hals(): """Test for non-negative PARAFAC HALS TODO: more rigorous test """ tol_norm_2 = 10e-1 tol_max_abs = 1 rng = tl.check_random_state(1234) tensor = tl.tensor(rng.random_sample((3, 3, 3)) + 1) res = parafac(tensor, rank=3, n_iter_max=120) nn_res = non_negative_parafac_hals(tensor, rank=3, n_iter_max=100, tol=10e-4, init='svd', verbose=0) # Make sure all components are positive _, nn_factors = nn_res for factor in nn_factors: assert_(tl.all(factor >= 0)) reconstructed_tensor = tl.cp_to_tensor(res) nn_reconstructed_tensor = tl.cp_to_tensor(nn_res) error = tl.norm(reconstructed_tensor - nn_reconstructed_tensor, 2) error /= tl.norm(reconstructed_tensor, 2) assert_(error < tol_norm_2, 'norm 2 of reconstruction higher than tol') # Test the max abs difference between the reconstruction and the tensor assert_( tl.max(tl.abs(reconstructed_tensor - nn_reconstructed_tensor)) < tol_max_abs, 'abs norm of reconstruction error higher than tol') # Test fixing mode 0 or 1 with given init fixed_tensor = random_cp((3, 3, 3), rank=2) for factor in fixed_tensor[1]: factor = tl.abs(factor) rec_svd_fixed_mode_0 = non_negative_parafac_hals(tensor, rank=2, n_iter_max=2, init=fixed_tensor, fixed_modes=[0]) rec_svd_fixed_mode_1 = non_negative_parafac_hals(tensor, rank=2, n_iter_max=2, init=fixed_tensor, fixed_modes=[1]) # Check if modified after 2 iterations assert_array_equal( rec_svd_fixed_mode_0.factors[0], fixed_tensor.factors[0], err_msg='Fixed mode 0 was modified in candecomp_parafac') assert_array_equal( rec_svd_fixed_mode_1.factors[1], fixed_tensor.factors[1], err_msg='Fixed mode 1 was modified in candecomp_parafac') res_svd = non_negative_parafac_hals(tensor, rank=3, n_iter_max=100, tol=10e-4, init='svd') res_random = non_negative_parafac_hals(tensor, rank=3, n_iter_max=100, tol=10e-4, init='random', verbose=0) rec_svd = tl.cp_to_tensor(res_svd) rec_random = tl.cp_to_tensor(res_random) error = tl.norm(rec_svd - rec_random, 2) error /= tl.norm(rec_svd, 2) assert_(error < tol_norm_2, 'norm 2 of difference between svd and random init too high') assert_( tl.max(tl.abs(rec_svd - rec_random)) < tol_max_abs, 'abs norm of difference between svd and random init too high') # Regression test: used wrong variable for convergence checking # Used mttkrp*factor instead of mttkrp*factors[-1], which resulted in # error when mode 2 was not constrained and erroneous convergence checking # when mode 2 was constrained. tensor = tl.tensor(rng.random_sample((3, 3, 3)) + 1) nn_estimate, errs = non_negative_parafac_hals(tensor, rank=2, n_iter_max=2, tol=1e-10, init='svd', verbose=0, nn_modes={ 0, }, return_errors=True)
def initialize_constrained_parafac(tensor, rank, init='svd', svd='numpy_svd', random_state=None, non_negative=None, l1_reg=None, l2_reg=None, l2_square_reg=None, unimodality=None, normalize=None, simplex=None, normalized_sparsity=None, soft_sparsity=None, smoothness=None, monotonicity=None, hard_sparsity=None): r"""Initialize factors used in `constrained_parafac`. Parameters ---------- The type of initialization is set using `init`. If `init == 'random'` then initialize factor matrices with uniform distribution using `random_state`. If `init == 'svd'` then initialize the `m`th factor matrix using the `rank` left singular vectors of the `m`th unfolding of the input tensor. If init is a previously initialized `cp tensor`, all the weights are pulled in the last factor and then the weights are set to "1" for the output tensor. Lastly, factors are updated with proximal operator according to the selected constraint(s), so that they satisfy the imposed constraints (does not apply to cptensor initialization). Parameters ---------- tensor : ndarray rank : int random_state : {None, int, np.random.RandomState} init : {'svd', 'random', cptensor}, optional svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS non_negative : bool or dictionary This constraint is clipping negative values to '0'. If it is True non-negative constraint is applied to all modes. l1_reg : float or list or dictionary, optional l2_reg : float or list or dictionary, optional l2_square_reg : float or list or dictionary, optional unimodality : bool or dictionary, optional If it is True unimodality constraint is applied to all modes. normalize : bool or dictionary, optional This constraint divides all the values by maximum value of the input array. If it is True normalize constraint is applied to all modes. simplex : float or list or dictionary, optional normalized_sparsity : float or list or dictionary, optional soft_sparsity : float or list or dictionary, optional smoothness : float or list or dictionary, optional monotonicity : bool or dictionary, optional hard_sparsity : float or list or dictionary, optional Returns ------- factors : CPTensor An initial cp tensor. """ n_modes = tl.ndim(tensor) rng = tl.check_random_state(random_state) if init == 'random': weights, factors = random_cp(tl.shape(tensor), rank, normalise_factors=False, **tl.context(tensor)) elif init == 'svd': try: svd_fun = tl.SVD_FUNS[svd] except KeyError: message = 'Got svd={}. However, for the current backend ({}), the possible choices are {}'.format( svd, tl.get_backend(), tl.SVD_FUNS) raise ValueError(message) factors = [] for mode in range(tl.ndim(tensor)): U, S, _ = svd_fun(unfold(tensor, mode), n_eigenvecs=rank) # Put SVD initialization on the same scaling as the tensor in case normalize_factors=False if mode == 0: idx = min(rank, tl.shape(S)[0]) U = tl.index_update(U, tl.index[:, :idx], U[:, :idx] * S[:idx]) if tensor.shape[mode] < rank: random_part = tl.tensor(rng.random_sample((U.shape[0], rank - tl.shape(tensor)[mode])), **tl.context(tensor)) U = tl.concatenate([U, random_part], axis=1) factors.append(U[:, :rank]) elif isinstance(init, (tuple, list, CPTensor)): try: weights, factors = CPTensor(init) if tl.all(weights == 1): weights, factors = CPTensor((None, factors)) else: weights_avg = tl.prod(weights) ** (1.0 / tl.shape(weights)[0]) for i in range(len(factors)): factors[i] = factors[i] * weights_avg kt = CPTensor((None, factors)) return kt except ValueError: raise ValueError( 'If initialization method is a mapping, then it must ' 'be possible to convert it to a CPTensor instance' ) else: raise ValueError('Initialization method "{}" not recognized'.format(init)) for i in range(n_modes): factors[i] = proximal_operator(factors[i], non_negative=non_negative, l1_reg=l1_reg, l2_reg=l2_reg, l2_square_reg=l2_square_reg, unimodality=unimodality, normalize=normalize, simplex=simplex, normalized_sparsity=normalized_sparsity, soft_sparsity=soft_sparsity, smoothness=smoothness, monotonicity=monotonicity, hard_sparsity=hard_sparsity, n_const=n_modes, order=i) kt = CPTensor((None, factors)) return kt
def initialize_cp(tensor, rank, init='svd', svd='numpy_svd', random_state=None, normalize_factors=False): r"""Initialize factors used in `parafac`. The type of initialization is set using `init`. If `init == 'random'` then initialize factor matrices with uniform distribution using `random_state`. If `init == 'svd'` then initialize the `m`th factor matrix using the `rank` left singular vectors of the `m`th unfolding of the input tensor. If init is a previously initialized `cp tensor`, all the weights are pulled in the last factor and then the weights are set to "1" for the output tensor. Parameters ---------- tensor : ndarray rank : int init : {'svd', 'random', cptensor}, optional svd : str, default is 'numpy_svd' function to use to compute the SVD, acceptable values in tensorly.SVD_FUNS non_negative : bool, default is False if True, non-negative factors are returned Returns ------- factors : CPTensor An initial cp tensor. """ rng = tl.check_random_state(random_state) if init == 'random': kt = random_cp(tl.shape(tensor), rank, normalise_factors=False, random_state=rng, **tl.context(tensor)) elif init == 'svd': try: svd_fun = tl.SVD_FUNS[svd] except KeyError: message = 'Got svd={}. However, for the current backend ({}), the possible choices are {}'.format( svd, tl.get_backend(), tl.SVD_FUNS) raise ValueError(message) factors = [] for mode in range(tl.ndim(tensor)): U, S, _ = svd_fun(unfold(tensor, mode), n_eigenvecs=rank) # Put SVD initialization on the same scaling as the tensor in case normalize_factors=False if mode == 0: idx = min(rank, tl.shape(S)[0]) U = tl.index_update(U, tl.index[:, :idx], U[:, :idx] * S[:idx]) if tensor.shape[mode] < rank: # TODO: this is a hack but it seems to do the job for now random_part = tl.tensor( rng.random_sample( (U.shape[0], rank - tl.shape(tensor)[mode])), **tl.context(tensor)) U = tl.concatenate([U, random_part], axis=1) factors.append(U[:, :rank]) kt = CPTensor((None, factors)) elif isinstance(init, (tuple, list, CPTensor)): # TODO: Test this try: if normalize_factors is True: warnings.warn( 'It is not recommended to initialize a tensor with normalizing. Consider normalizing the tensor before using this function' ) kt = CPTensor(init) weights, factors = kt if tl.all(weights == 1): kt = CPTensor((None, factors)) else: weights_avg = tl.prod(weights)**(1.0 / tl.shape(weights)[0]) for i in range(len(factors)): factors[i] = factors[i] * weights_avg kt = CPTensor((None, factors)) except ValueError: raise ValueError( 'If initialization method is a mapping, then it must ' 'be possible to convert it to a CPTensor instance') else: raise ValueError( 'Initialization method "{}" not recognized'.format(init)) if normalize_factors: kt = cp_normalize(kt) return kt
def hals_nnls(UtM, UtU, V=None, n_iter_max=500, tol=10e-8, sparsity_coefficient=None, normalize=False, nonzero_rows=False, exact=False): """ Non Negative Least Squares (NNLS) Computes an approximate solution of a nonnegative least squares problem (NNLS) with an exact block-coordinate descent scheme. M is m by n, U is m by r, V is r by n. All matrices are nonnegative componentwise. This algorithm is defined in [1], as an accelerated version of the HALS algorithm. It features two accelerations: an early stop stopping criterion, and a complexity averaging between precomputations and loops, so as to use large precomputations several times. This function is made for being used repetively inside an outer-loop alternating algorithm, for instance for computing nonnegative matrix Factorization or tensor factorization. Parameters ---------- UtM: r-by-n array Pre-computed product of the transposed of U and M, used in the update rule UtU: r-by-r array Pre-computed product of the transposed of U and U, used in the update rule V: r-by-n initialization matrix (mutable) Initialized V array By default, is initialized with one non-zero entry per column corresponding to the closest column of U of the corresponding column of M. n_iter_max: Postivie integer Upper bound on the number of iterations Default: 500 tol : float in [0,1] early stop criterion, while err_k > delta*err_0. Set small for almost exact nnls solution, or larger (e.g. 1e-2) for inner loops of a PARAFAC computation. Default: 10e-8 sparsity_coefficient: float or None The coefficient controling the sparisty level in the objective function. If set to None, the problem is solved unconstrained. Default: None nonzero_rows: boolean True if the lines of the V matrix can't be zero, False if they can be zero Default: False exact: If it is True, the algorithm gives a results with high precision but it needs high computational cost. If it is False, the algorithm gives an approximate solution Default: False Returns ------- V: array a r-by-n nonnegative matrix \approx argmin_{V >= 0} ||M-UV||_F^2 rec_error: float number of loops authorized by the error stop criterion iteration: integer final number of update iteration performed complexity_ratio: float number of loops authorized by the stop criterion Notes ----- We solve the following problem :math:`\\min_{V >= 0} ||M-UV||_F^2` The matrix V is updated linewise. The update rule for this resolution is:: .. math:: \\begin{equation} V[k,:]_(j+1) = V[k,:]_(j) + (UtM[k,:] - UtU[k,:]\\times V_(j))/UtU[k,k] \\end{equation} with j the update iteration. This problem can also be defined by adding a sparsity coefficient, enhancing sparsity in the solution [2]. In this sparse version, the update rule becomes:: .. math:: \\begin{equation} V[k,:]_(j+1) = V[k,:]_(j) + (UtM[k,:] - UtU[k,:]\\times V_(j) - sparsity_coefficient)/UtU[k,k] \\end{equation} References ---------- .. [1]: N. Gillis and F. Glineur, Accelerated Multiplicative Updates and Hierarchical ALS Algorithms for Nonnegative Matrix Factorization, Neural Computation 24 (4): 1085-1105, 2012. .. [2] J. Eggert, and E. Korner. "Sparse coding and NMF." 2004 IEEE International Joint Conference on Neural Networks (IEEE Cat. No. 04CH37541). Vol. 4. IEEE, 2004. """ rank, n_col_M = tl.shape(UtM) if V is None: # checks if V is empty V = tl.solve(UtU, UtM) V = tl.clip(V, a_min=0, a_max=None) # Scaling scale = tl.sum(UtM * V) / tl.sum(UtU * tl.dot(V, tl.transpose(V))) V = V * scale if exact: n_iter_max = 50000 tol = 10e-16 for iteration in range(n_iter_max): rec_error = 0 for k in range(rank): if UtU[k, k]: if sparsity_coefficient is not None: # Modifying the function for sparsification deltaV = tl.where( (UtM[k, :] - tl.dot(UtU[k, :], V) - sparsity_coefficient) / UtU[k, k] > -V[k, :], (UtM[k, :] - tl.dot(UtU[k, :], V) - sparsity_coefficient) / UtU[k, k], -V[k, :]) V = tl.index_update(V, tl.index[k, :], V[k, :] + deltaV) else: # without sparsity deltaV = tl.where( (UtM[k, :] - tl.dot(UtU[k, :], V)) / UtU[k, k] > -V[k, :], (UtM[k, :] - tl.dot(UtU[k, :], V)) / UtU[k, k], -V[k, :]) V = tl.index_update(V, tl.index[k, :], V[k, :] + deltaV) rec_error = rec_error + tl.dot(deltaV, tl.transpose(deltaV)) # Safety procedure, if columns aren't allow to be zero if nonzero_rows and tl.all(V[k, :] == 0): V[k, :] = tl.eps(V.dtype) * tl.max(V) elif nonzero_rows: raise ValueError("Column " + str(k) + " of U is zero with nonzero condition") if normalize: norm = tl.norm(V[k, :]) if norm != 0: V[k, :] /= norm else: sqrt_n = 1 / n_col_M**(1 / 2) V[k, :] = [sqrt_n for i in range(n_col_M)] if iteration == 0: rec_error0 = rec_error numerator = tl.shape(V)[0] * tl.shape(V)[1] + tl.shape(V)[1] * rank denominator = tl.shape(V)[0] * rank + tl.shape(V)[0] complexity_ratio = 1 + (numerator / denominator) if exact: if rec_error < tol * rec_error0: break else: if rec_error < tol * rec_error0 or iteration > 1 + 0.5 * complexity_ratio: break return V, rec_error, iteration, complexity_ratio
def active_set_nnls(Utm, UtU, x=None, n_iter_max=100, tol=10e-8): """ Active set algorithm for non-negative least square solution. Computes an approximate non-negative solution for Ux=m linear system. Parameters ---------- Utm : vectorized ndarray Pre-computed product of the transposed of U and m UtU : ndarray Pre-computed Kronecker product of the transposed of U and U x : init Default: None n_iter_max : int Maximum number of iteration Default: 100 tol : float Early stopping criterion Returns ------- x : ndarray Notes ----- This function solves following problem: .. math:: \\begin{equation} \\min_{x} ||Ux - m||^2 \\end{equation} According to [1], non-negativity-constrained least square estimation problem becomes: .. math:: \\begin{equation} x' = (Utm) - (UTU)\\times x \\end{equation} Reference ---------- [1] : Bro, R., & De Jong, S. (1997). A fast non‐negativity‐constrained least squares algorithm. Journal of Chemometrics: A Journal of the Chemometrics Society, 11(5), 393-401. """ if tl.get_backend() == 'tensorflow': raise ValueError( "Active set is not supported with the tensorflow backend. Consider using fista method with tensorflow." ) if x is None: x_vec = tl.zeros(tl.shape(UtU)[1], **tl.context(UtU)) else: x_vec = tl.base.tensor_to_vec(x) x_gradient = Utm - tl.dot(UtU, x_vec) passive_set = x_vec > 0 active_set = x_vec <= 0 support_vec = tl.zeros(tl.shape(x_vec), **tl.context(x_vec)) for iteration in range(n_iter_max): if iteration > 0 or tl.all(x_vec == 0): indice = tl.argmax(x_gradient) passive_set = tl.index_update(passive_set, tl.index[indice], True) active_set = tl.index_update(active_set, tl.index[indice], False) # To avoid singularity error when initial x exists try: passive_solution = tl.solve(UtU[passive_set, :][:, passive_set], Utm[passive_set]) indice_list = [] for i in range(tl.shape(support_vec)[0]): if passive_set[i]: indice_list.append(i) support_vec = tl.index_update( support_vec, tl.index[int(i)], passive_solution[len(indice_list) - 1]) else: support_vec = tl.index_update(support_vec, tl.index[int(i)], 0) # Start from zeros if solve is not achieved except: x_vec = tl.zeros(tl.shape(UtU)[1]) support_vec = tl.zeros(tl.shape(x_vec), **tl.context(x_vec)) passive_set = x_vec > 0 active_set = x_vec <= 0 if tl.any(active_set): indice = tl.argmax(x_gradient) passive_set = tl.index_update(passive_set, tl.index[indice], True) active_set = tl.index_update(active_set, tl.index[indice], False) passive_solution = tl.solve(UtU[passive_set, :][:, passive_set], Utm[passive_set]) indice_list = [] for i in range(tl.shape(support_vec)[0]): if passive_set[i]: indice_list.append(i) support_vec = tl.index_update( support_vec, tl.index[int(i)], passive_solution[len(indice_list) - 1]) else: support_vec = tl.index_update(support_vec, tl.index[int(i)], 0) # update support vector if it is necessary if tl.min(support_vec[passive_set]) <= 0: for i in range(len(passive_set)): alpha = tl.min( x_vec[passive_set][support_vec[passive_set] <= 0] / (x_vec[passive_set][support_vec[passive_set] <= 0] - support_vec[passive_set][support_vec[passive_set] <= 0])) update = alpha * (support_vec - x_vec) x_vec = x_vec + update passive_set = x_vec > 0 active_set = x_vec <= 0 passive_solution = tl.solve( UtU[passive_set, :][:, passive_set], Utm[passive_set]) indice_list = [] for i in range(tl.shape(support_vec)[0]): if passive_set[i]: indice_list.append(i) support_vec = tl.index_update( support_vec, tl.index[int(i)], passive_solution[len(indice_list) - 1]) else: support_vec = tl.index_update(support_vec, tl.index[int(i)], 0) if tl.any(passive_set) != True or tl.min( support_vec[passive_set]) > 0: break # set x to s x_vec = tl.clip(support_vec, 0, tl.max(support_vec)) # gradient update x_gradient = Utm - tl.dot(UtU, x_vec) if tl.any(active_set) != True or tl.max(x_gradient[active_set]) <= tol: break return x_vec
def test_clips_all_negative_tensor_correctly(): # Regression test for bug found with the pytorch backend negative_valued_tensor = tl.zeros((10, 10)) - 0.1 clipped_tensor = tl.clip(negative_valued_tensor, 0) assert tl.all(clipped_tensor == 0)
def tl_gcp_fg_est(M, f, g, subs, xvals, weights, computeF=True, computeG=True, vectorG=False, lambdaCheck=True, crng=None): """Estimate the GCP objective function and gradient via a subsample (Analogous to tt_gcp_fg_est.m) Parameters ---------- M : CPTensor f : Function handle elementwise loss of the form f(x,m) g : Function handle Elementwise intermediate gradient of the form g(x,m) subs : ndarray xvals : ndarray weights : ndarray computeF : boolean Include computation of the loss function. Default is true. computeG : boolean Include computation of the gradient. vectorG : boolean Reshape gradient matrices into a single vector. lambdaCheck : boolean Ensure lambda = [1,1,...,1] crng : ndarray Range for correction/adjust when nonzeros may be included in the "zero" sample Used in semi-stratified sampling (**COMING SOON**) Returns ------- F : scalar Loss function value G : ndarray(s) If vectorG = False, G is a list of matrices where G[k] is the same size as the k-th factor matrix Otherwise, G is the gradient in vector form """ d = len(M[1]) shp = tl.shape(M) sz = 1 for i in tl.shape(M): sz *= i F = None G = [] # lambda check if lambdaCheck: if not tl.all(M[0]): print("Lambda weights are not all '1's.") # TODO: overload the normalize function to distribute weights into factors ala MATLAB sys.exit(1) # Compute model values and exploded Zk matrices mvals, Zexp = gcp_fg_est_helper(M[1], subs) # Compute function values if computeF: fVec = f(xvals, mvals) if crng is not None: # TODO: Semi-stratified sampling adjustment to fVec print("Semi-stratified sampling not yet implemented.") F = tl.sum(weights * fVec) if not computeG: return [F, G] # Compute sample y values yvals = weights * g(xvals, mvals) if crng is not None: # TODO: Semi-stratified sampling adjustment to fVec print("Semi-stratified sampling not yet implemented.") # Compute function and gradient G = [] nsamps = tl.shape(subs)[0] for k in range(d): # Construct Y matrices based on sample. Row of element is row index to accumulate in the gradient # Column indices are corresponding samples. They are in order b/c they match vector of samples # to be multiplied on the right s = sparse.COO((subs[:, k], np.arange(nsamps)), yvals[0], shape=(shp[k], nsamps)) tempData = sparse.matmul(s, Zexp[k]) G.append(tl.tensor(tempData)) # If indicated, convert G to a single vector if vectorG: G = factors2vec(G) # If not computing F, set F to be the gradient if not computeF: F = G return [F, G]
def gcp(X, R, type='normal', func=None, grad=None, lower=None,\ opt='lbfgsb', mask=None, maxiters=1000, \ init='random', printitn=10, state=None, factr=1e7, pgtol=1e-4, \ fsamp=None, gsamp=None, oversample=1.1, sampler='uniform', \ fsampler=None, rate=1e-3, decay=0.1, maxfails=1, epciters=1000, \ festtol=-math.inf, beta1=0.9, beta2=0.999, epsilon=1e-8): """Generalized CANDECOMP/PARAFAC (GCP) decomposition via all-at-once optimization (OPT) [1] Computes a rank-'R' decomposition of 'tensor' such that:: tensor = [|weights; factors[0], ..., factors[-1] |]. GCP-OPT allows the use of a variety of statistically motivated loss functions suited to the data held in a tensor (i.e. continuous, discrete, binary, etc) Parameters ---------- X : ndarray Tensor to factorize **COMING SOON** Sparse tensor support R : int Rank of decomposition (Number of components). type : str, Type of objective function used Options include: 'normal' or 'gaussian' - Gaussian for real-valued data (DEFAULT) 'binary' or 'bernoulli-odds' - Bernoulli w/ odds link for binary data 'bernoulli-logit' - Bernoulli w/ logit link for binary data 'count' or 'poisson' - Poisson for count data 'poisson-log' - Poisson w/ log link for count data 'rayleigh' - Rayleigh distribution for real-valued data 'gamma' - Gamma distribution for non-negative real-valued data **COMING SOON**: 'huber (DELTA) - Similar to Gaussian, for real-valued data 'negative-binomial (r)' - Negative binomial for count data 'beta (BETA)' - Beta divergence for non-negative real-valued data 'user-specified' - Customized objective function provided by user func: lambda function User specified custom objective function, eg. lambda x, m: (m-x)**2 grad: lambda function User specified custom gradient function, eg. lambda x, m: 2*(m-x) lower: 0 or -inf Lower bound for custom objective/gradient opt : str Optimization method Options include: 'lbfgsb' - Bound-constrained limited-memory BFGS 'sgd' - Stochastic gradient descent (SGD) **COMING SOON** 'adam' - Momentum-based SGD method 'adagrad' - Adaptive gradient algorithm, well suited for sparse data If 'tensor' is dense, all 4 options can be used, 'lbfgsb' by default. **COMING SOON** - Sparse format support If 'tensor' is sparse, only 'sgd', 'adam' and 'adagrad' can be used, 'adam' by default. Each method has specific parameters, see documentation mask : ndarray Specifies a mask, 0's for missing/incomplete entries, 1's elsewhere, with the same shape as 'tensor'. **COMING SOON** - Missing/incomplete data simulation. maxiters : int Maximum number of outer iterations, 1000 by default. init : {'random', 'svd', cptensor} Initialization for factor matrices, 'random' by default. Options include: 'random' - random initialization from a uniform distribution on [0,1) 'svd' - initialize the `m`th factor matrix using the `rank` left singular vectors of the `m`th unfolding of the input tensor. cptensor - initialization provided by user. NOTE: weights are pulled in the last factor and then the weights are set to "1" for the output tensor. Initializations all result in a cptensor where the weights are one. printitn : int Print every n iterations; 0 for no printing, 10 by default. state : {None, int, np.random.RandomState} Seed for reproducable random number generation factr : float (L-BFGS-B parameter) Tolerance on the change of objective values. Defaults to 1e7. pgtol : float (L-BFGS-B parameter) Projected gradient tolerance. Defaults to 1e-5 sampler : {uniform, stratified, semi-stratified} Type of sampling to use for stochastic gradient (SGD/ADAM/ADAGRAD). Defaults to 'uniform' for dense tensors. Defaults to 'stratified' for sparse tensors. Options include: 'uniform' - Uniform random sampling **COMING SOON** 'stratified' - Stratified sampling, targets sparse data. Zero and nonzero values sampled separately. 'semi-stratified' - Similar to stratified sampling, but is more computationally efficient (See papers referenced). gsamp : int Number of samples for stochastic gradient (SGD/ADAM/ADAGRAD parameter). Generally set to be O(sum(shape)*R). **COMING SOON** For stratified or semi-stratified, this may be two numbers: - the number of nnz samples - the number of zero samples. If only one number is specified, then this value is used for both nnzs and zeros (total number of samples is 2x specified value in this case). fsampler : {'uniform', 'stratified', custom} Type of sampling for estimating objective function (SGD/ADAM/ADAGRAD parameter). Options include: 'uniform' - Uniform random sampling **COMING SOON** 'stratified' - Stratified sampling, targets sparse data. Zero and nonzero values sampled separately. custom - User-defined sampler (lambda function). Custom option is primarily useful in reusing sampled elements across multiple tests. fsamp : int (SGD/ADAM/ADAGRAD parameter) Number of samples to estimate objective function. This should generally be somewhat large since we want this sample to generate a reliable estimate of the true function value. oversample : float (Stratified sampling parameter) Factor to oversample when implicitly sampling zeros in the sparse case. Defaults to 1.1. Only adjust for very small tensors. rate : float (SGD/ADAM parameter) Initial learning rate. Defaults to 1e-3. decay : float (SGD/ADAM parameter) Amount to decrease learning rate when progress stagnates, i.e. no change in objective function between epochs. Defaults to 0.1. maxfails : int (SGD/ADAM parameter) Number of times to decrease the learning rate. Defaults to 1, may be set to zero. epciters : int (SGD/ADAM parameter) Iterations per epoch. Defaults to 1000. festtol : float (SGD/ADAM parameter) Quit estimation of function if it goes below this level. Defaults to -inf. beta1 : float (ADAM parameter) - generally doesn't need to be changed Defaults to 0.9 beta2 : float (ADAM parameter) - generally doesn't need to be changed Defaults to 0.999 epsilon : float (ADAM parameter) - generally doesn't need to be changed Defaults to 1e-8 Returns ------- Mfin : CPTensor Canonical polyadic decomposition of input tensor X Reference --------- [1] D. Hong, T. G. Kolda, J. A. Duersch, Generalized Canonical Polyadic Tensor Decomposition, SIAM Review, 62:133-163, 2020, https://doi.org/10.1137/18M1203626 [2] T. G. Kolda, D. Hong, Stochastic Gradients for Large-Scale Tensor Decomposition. SIAM J. Mathematics of Data Science, 2:1066-1095, 2020, https://doi.org/10.1137/19m1266265 """ # Timer - Setup (outside optimization) start_setup0 = time.perf_counter() # Initial setup nd = tl.ndim(X) sz = tl.shape(X) tsz = X.size X_context = tl.context(X) vecsz = 0 for i in range(nd): # tsz *= sz[i] vecsz += sz[i] vecsz *= R W = mask # Random set-up if state is not None: state = tl.check_random_state(state) # capture stats(nnzs, zeros, missing) nnonnzeros = 0 X = tl.tensor_to_vec(X) for i in X: if i > 0: nnonnzeros += 1 X = tl.reshape(X, sz) nzeros = tsz - nnonnzeros nmissing = 0 if W is not None: W = tl.tensor_to_vec(W) for i in range(tl.shape(W)[0]): if W[i] > 0: nmissing += 1 # TODO: is this right?? W = tl.reshape(W, sz) # Dictionary for storing important information regarding the decomposition problem info = {} info['tsz'] = tsz info[ 'nmissing'] = 0 # TODO: revisit once missing value functionality incorporated info['nnonnzeros'] = nnonnzeros info[ 'nzeros'] = nzeros # TODO: revisit once missing value functionality incorporated # Set up function, gradient, and bounds fh, gh, lb = validate_type(type, X) info['type'] = type info['fh'] = fh info['gh'] = gh info['lb'] = lb # initialize CP-tensor and make a copy to work with so as to have the starting guess M0 = initialize_cp(X, R, init=init, random_state=state) wghts0 = tl.copy(M0[0]) fcts0 = [] for i in range(nd): f = tl.copy(M0[1][i]) fcts0.append(f) M = CPTensor((wghts0, fcts0)) # Lambda weights are assumed to be all ones throughout, check initial guess satisfies assumption if not tl.all(M[0]): print("Initialization of CP tensor has failed (lambda weight(s) != 1.") sys.exit(1) # check optimization method if validate_opt(opt): print("Choose optimization method from: {lbfgsb, sgd}") sys.exit(1) use_stoc = False if opt != 'lbfgsb': use_stoc = True info['opt'] = opt # set up for stochastic optimization (e.g. sgd, adam, adagrad) if use_stoc: # set up fsampler, gsampler ---> uniform sampling only for now # TODO : add stratified, semi-stratified and user-specified sampling options if not sampler == "uniform": print( "Only uniform sampling currently supported for stochastic optimization." ) sys.exit(1) fsampler_type = sampler gsampler_type = sampler # setup fsampler f_samp = fsamp if f_samp == None: upper = np.maximum(math.ceil(tsz / 10), 10 ^ 6) f_samp = np.minimum(upper, tsz) # set up lambda function/function handle for uniform sampling fsampler = lambda: tl_sample_uniform(X, f_samp) fsampler_str = "{} with {} samples".format(fsampler_type, f_samp) # setup gsampler g_samp = gsamp if g_samp == None: upper = np.maximum(1000, math.ceil(10 * tsz / maxiters)) g_samp = np.minimum(upper, tsz) # setup lambda function/function handle for uniform sampling gsampler = lambda: tl_sample_uniform(X, g_samp) gsampler_str = "{} with {} samples".format(gsampler_type, g_samp) # capture the info info['fsampler'] = fsampler_str info['gsampler'] = gsampler_str info['fsamp'] = f_samp info['gsamp'] = g_samp time_setup0 = time.perf_counter() - start_setup0 # Welcome message if printitn > 0: print("GCP-OPT-{} (Generalized CP Tensor Decomposition)".format(opt)) print("------------------------------------------------") print("Tensor size:\t\t\t\t{} ({} total entries)".format(sz, tsz)) if nmissing > 0: print("Missing entries: {} ({})".format(nmissing, 100 * nmissing / tsz)) print("Generalized function type:\t{}".format(type)) print("Objective function:\t\t\t{}".format( inspect.getsource(fh).strip())) print("Gradient function:\t\t\t{}".format( inspect.getsource(gh).strip())) print("Lower bound of factors:\t\t{}".format(lb)) print("Optimization method:\t\t{}".format(opt)) if use_stoc: print("Max iterations (epochs): {}".format(maxiters)) print("Iterations per epoch: {}".format(epciters)) print("Learning rate / decay / maxfails: {} {} {}".format( rate, decay, maxfails)) print("Function Sampler: {}".format(fsampler_str)) print("Gradient Sampler: {}".format(gsampler_str)) else: print("Max iterations:\t\t\t\t{}".format(maxiters)) print("Projected gradient tol:\t\t{}\n".format(pgtol)) # Make like a zombie and start decomposing Mfin = None # L-BFGS-B optimization if opt == 'lbfgsb': # Timer - Setup (inside optimization) start_setup1 = time.perf_counter() # set up bounds for l-bfgs-b if lb = 0 bounds = None if lb == 0: lb = tl.zeros(tsz) ub = math.inf * tl.ones(tsz) fcn = lambda x: tl_gcp_fg(vec2factors(x, sz, R, X_context), X, fh, gh) m = factors2vec(M[1]) # capture params for l-bfgs-b lbfgsb_params = {} lbfgsb_params['x0'] = factors2vec(M0.factors) lbfgsb_params['printEvery'] = printitn lbfgsb_params['maxIts'] = maxiters lbfgsb_params['maxTotalIts'] = maxiters * 10 lbfgsb_params['factr'] = factr lbfgsb_params['pgtol'] = pgtol time_setup1 = time.perf_counter() - start_setup1 if printitn > 0: print("Begin main loop") # Timer - Main operation start_main = time.perf_counter() x, f, info_dict = fmin_l_bfgs_b(fcn, m, approx_grad=False, bounds=None, \ pgtol=pgtol, factr=factr, maxiter=maxiters) time_main = time.perf_counter() - start_main # capture info info['fcn'] = fcn info['lbfgsbopts'] = lbfgsb_params info['lbfgsbout'] = info_dict info['finalf'] = f if printitn > 0: print("\nFinal objective: {}".format(f)) print("Setup time: {}".format(time_setup0 + time_setup1)) print("Main loop time: {}".format(time_main)) print("Outer iterations:" ) # TODO: access this value (see manpage for fmin_l_bfgs_b) print("Total iterations: {}".format(info_dict['nit'])) print("L-BFGS-B exit message: {} ({})".format( info_dict['task'], info_dict['warnflag'])) Mfin = vec2factors(x, sz, R, X_context) # Stochastic optimization else: # Timer - Setup (inside optimization) start_setup1 = time.perf_counter() if opt == "adam" or opt == "adagrad": print("{} not currently supported".format(opt)) sys.exit(1) # prepare for sgd # initialize moments m = [] v = [] # Extract samples for estimating function value (i.e. call fsampler), these never change fsubs, fvals, fwgts = fsampler() # Compute initial estimated function value fest = tl_gcp_fg_est(M, fh, gh, fsubs, fvals, fwgts, True, False, False, False) # Set up loop variables nfails = 0 titers = 0 M_weights = tl.copy(M[0]) M_factors = [] for k in range(nd): M_factors.append(tl.copy(M[1][k])) Msave = CPTensor( (M_weights, M_factors)) # save a copy of the initial model msave = m vsave = v fest_prev = fest[0] # Tracing the progress in function value by epoch fest_trace = tl.zeros(maxiters + 1) step_trace = tl.zeros(maxiters + 1) time_trace = tl.zeros(maxiters + 1) fest_trace[0] = fest[0] # Print status if printitn > 0: print("Begin main loop") print("Initial f-est: {}".format(fest[0])) time_setup1 = time.perf_counter() - start_setup1 start_main = time.perf_counter() time_trace[0] = time.perf_counter() - start_setup0 # Main loop - outer iteration for nepoch in range(maxiters): step = (decay**nfails) * rate # Main loop - inner iteration for iter in range(epciters): # Tracking iterations titers = titers + 1 # Select subset for stochastic gradient (i.e. call gsampler) gsubs, gvals, gwts = gsampler() # Compute gradients for each mode Gest = tl_gcp_fg_est(M, fh, gh, gsubs, gvals, gwts, False, True, False, False) # Check for inf gradient for g in Gest[0]: g_max = tl.max(g) g_min = tl.min(g) if math.isinf(g_max) or math.isinf(g_min): print( "Infinite gradient encountered! (epoch = {}, iter = {})" .format(nepoch, iter)) # TODO : add functionality for ADAM and ADAGRAD optimization # Take gradient step for k in range(nd): M.factors[k] = M.factors[k] - step * Gest[0][k] # Estimate objective (i.e. call tl_gcp_fg_est) fest = tl_gcp_fg_est(M, fh, gh, fsubs, fvals, fwgts, True, False, False, False) # Save trace (fest & step) fest_trace[nepoch + 1] = fest[0] step_trace[nepoch + 1] = step # Check convergence condition failed_epoch = False if fest[0] > fest_prev: failed_epoch = True if failed_epoch: nfails += 1 festtol_test = False if fest[0] < festtol: festtol_test = True # Reporting if printitn > 0 and (nepoch % printitn == 0 or failed_epoch or festtol_test): print("Epoch {}: f-est = {}, step = {}".format( nepoch, fest[0], step), end='') if failed_epoch: print( ", nfails = {} (resetting to solution from last epoch)" .format(nfails)) print("") # Rectify failed epoch or save current solution if failed_epoch: M = Msave m = msave v = vsave fest[0] = fest_prev titers = titers - epciters else: Msave = CPTensor((tl.copy(M.weights), tl.copy(M.factors))) msave = m vsave = v fest_prev = fest[0] time_trace[nepoch] = time.perf_counter() - start_setup0 if (nfails > maxfails) or festtol_test: break Mfin = M time_main = time.perf_counter() - start_main # capture info info['fest_trace'] = fest_trace info['step_trace'] = step_trace info['time_trace'] = time_trace info['nepoch'] = nepoch # Report end of main loop if printitn > 0: print("End Main Loop") print("") print("Final f-east: {}".format(fest[0])) print("Setup time: {0:0.6f}".format(time_setup0 + time_setup1)) print("Main loop time: {0:0.6f}".format(time_main)) print("Total iterations: {}".format(nepoch * epciters)) # Wrap up / capture remaining info info['mainTime'] = time_main info['setupTime0'] = time_setup0 info['setupTime1'] = time_setup1 info['setupTime'] = time_setup0 + time_setup1 return Mfin