def generateSolution(sz, R, AFill, LambdaHat): A = [] for n in range(len(sz)): A.append(np.zeros((sz[n], R))) for r in range(R): # randomly select some entries to be nonzero nnz = random.sample( range(sz[n]), AFill[n] ) #selects AFill[n] elements among the array range(sz(n)) A[n][nnz, r] = np.random.random(size=AFill[n]) # percentage of large size bigSamp = int(0.1 * sz[n]) if bigSamp > AFill[n]: bigSamp = 1 big = random.sample(nnz, bigSamp) A[n][big, r] = 10 * A[n][big, r] lmbda = np.random.random_integers(low=1, high=1, size=R) M = ktensor.ktensor(lmbda, A) M.normalize_sort(1) ## generate the noise bias U = [] for n in range(len(sz)): U.append(np.zeros((sz[n], 1))) U[n][:, 0] = np.random.random(size=sz[n]) Mhat = ktensor.ktensor(np.array([1]), U) Mhat.normalize(1) Mhat.lmbda[0] = LambdaHat return M, Mhat
def generateSolution(sz, R, AFill, LambdaHat): A = [] for n in range(len(sz)): A.append(np.zeros((sz[n], R))) for r in range(R): # randomly select some entries to be nonzero nnz = random.sample(range(sz[n]), AFill[n]) #selects AFill[n] elements among the array range(sz(n)) A[n][nnz, r] = np.random.random(size=AFill[n]) # percentage of large size bigSamp = int (0.1*sz[n]) if bigSamp > AFill[n]: bigSamp = 1 big = random.sample(nnz, bigSamp) A[n][big, r] = 10 * A[n][big, r] lmbda = np.random.random_integers(low = 1, high = 1, size=R) M = ktensor.ktensor(lmbda, A) M.normalize_sort(1) ## generate the noise bias U = [] for n in range(len(sz)): U.append(np.zeros((sz[n], 1))) U[n][:, 0] = np.random.random(size=sz[n]) Mhat = ktensor.ktensor(np.array([1]), U) Mhat.normalize(1) Mhat.lmbda[0] = LambdaHat return M, Mhat
def generateOriginalTensor(L, A, U, tensorModes, alpha): MFull = [] ## for each set of modes, we will construct both M and MHat for k in range(len(tensorModes)): Alist = [A[n] for n in tensorModes[k]] Ulist = [U[n] for n in tensorModes[k]] M = ktensor.ktensor(L, Alist) Mhat = ktensor.ktensor(np.array([alpha]), Ulist) MFull.append(M.toTensor() + Mhat.toTensor()) return MFull
def useHier(topX, regX, R, hierIters, hierInner, regIters, regInner, tensorInfo): topY1, top1stats, top1mstats = CP_APR.cp_apr(topX, R, maxiters=hierIters, maxinner=hierInner) # reduce them to probability and then just sort them topY1.normalize_sort(1) topY1 = pmdTools.zeroSmallFactors(topY1, 1e-4) ### Use the factors to populate the factors Udiag = np.zeros((len(tensorInfo['diag']), R)) Umed = np.zeros((len(tensorInfo['med']), R)) ### Patient factors stays the same for idx, diag in enumerate(tensorInfo['diag']): topDiagIdx = tensorInfo['diagHier'][diag] diagCount = tensorInfo['diagHierCount'][topDiagIdx] Udiag[idx, :] = topY1.U[1][topDiagIdx, :] / diagCount for idx, med in enumerate(tensorInfo['med']): topMedIdx = tensorInfo['medHier'][med] medCount = tensorInfo['medHierCount'][topMedIdx] Umed[idx, :] = topY1.U[2][topMedIdx, :] / medCount Mtop = ktensor.ktensor(np.ones(R), [topY1.U[0].copy(), Udiag, Umed]) Y1, ystats, mstats = CP_APR.cp_apr(X1, R, Minit=Mtop, maxiters=regIters, maxinner=regInner) return Y1, topY1, top1stats, top1mstats, ystats, mstats
def initialize(self, M=None): """ Initialize the tensor decomposition """ if M == None: AU = tensorTools.randomInit(self.X.shape, 1) F = tensorTools.randomInit(self.X.shape, self.R) self.M[REG_LOCATION] = ktensor.ktensor(np.ones(self.R), F) self.M[AUG_LOCATION] = ktensor.ktensor(np.ones(1), AU) else: ## do a quick sanity check if len(M) != 2: raise ValueError("Initialization needs to be of size 2") if M[0].__class__ != ktensor.ktensor and M[1].__class__ != ktensor.ktensor: raise ValueError("Not ktensor type") self.M = M
def projectSlice(self, X, n, iters=100, epsilon=1e-10, convTol=1e-4): """ Project a slice, solving for the factors of the nth mode Parameters ------------ X : the tensor to project onto the basis n : the mode to project onto iters : the max number of inner iterations epsilon : parameter to avoid dividing by zero convTol : the convergence tolerance Output ----------- the projection matrix """ ## Setup the 'initial guess' F = [] for m in range(X.ndims()): if m == n: F.append(np.random.rand(X.shape[m], self.R)) else: ## double check the shape is the right dimensions if (self.basis[m].shape[0] != X.shape[m]): raise ValueError("Shape of the tensor X is incorrect") F.append(self.basis[m]) #print(F) M = ktensor.ktensor(np.ones(self.R), F) #print(M) ## Solve for the subproblem M, Phi, totIter, kktMV = CP_APR.solveForModeB(X, M, n, iters, epsilon, convTol) #print(M) ## scale by summing across the rows totWeight = np.sum(M.U[n], axis=1) print totWeight.shape zeroIdx = np.where(totWeight < 1e-100)[0] if len(zeroIdx) > 0: # for the zero ones we're going to evenly distribute evenDist = np.repeat(1.0 / self.R, len(zeroIdx) * self.R) M.U[n][zeroIdx, :] = evenDist.reshape((len(zeroIdx), self.R)) totWeight = np.sum(M.U[n], axis=1) twMat = np.repeat(totWeight, self.R).reshape(X.shape[n], self.R) M.U[n] = M.U[n] / twMat #print(M) return M.U[n]
def useHier(topX, regX, R, hierIters, hierInner, regIters, regInner, tensorInfo): topY1, top1stats, top1mstats = CP_APR.cp_apr(topX, R, maxiters=hierIters, maxinner=hierInner) # reduce them to probability and then just sort them topY1.normalize_sort(1) topY1 = pmdTools.zeroSmallFactors(topY1, 1e-4) ### Use the factors to populate the factors Udiag = np.zeros((len(tensorInfo['diag']), R)) Umed = np.zeros((len(tensorInfo['med']), R)) ### Patient factors stays the same for idx, diag in enumerate(tensorInfo['diag']): topDiagIdx = tensorInfo['diagHier'][diag] diagCount = tensorInfo['diagHierCount'][topDiagIdx] Udiag[idx,:] = topY1.U[1][topDiagIdx,:] / diagCount for idx, med in enumerate(tensorInfo['med']): topMedIdx = tensorInfo['medHier'][med] medCount = tensorInfo['medHierCount'][topMedIdx] Umed[idx,:] = topY1.U[2][topMedIdx,:] / medCount Mtop = ktensor.ktensor(np.ones(R), [topY1.U[0].copy(), Udiag, Umed]) Y1, ystats, mstats = CP_APR.cp_apr(X1, R, Minit=Mtop, maxiters=regIters, maxinner=regInner) return Y1, topY1, top1stats, top1mstats, ystats, mstats
def projectSlice(self, X, n, iters=10, epsilon=1e-10, convTol=1e-4): """ Project a slice, solving for the factors of the nth mode Parameters ------------ X : the tensor to project onto the basis n : the mode to project onto iters : the max number of inner iterations epsilon : parameter to avoid dividing by zero convTol : the convergence tolerance Output ----------- the projection matrix """ ## Setup the 'initial guess' F = [] for m in range(X.ndims()): if m == n: F.append(np.random.rand(X.shape[m], self.R)); else: ## double check the shape is the right dimensions if (self.basis[m].shape[0] != X.shape[m]): raise ValueError("Shape of the tensor X is incorrect"); F.append(self.basis[m]) M = ktensor.ktensor(np.ones(self.R), F); ## Solve for the subproblem M, Phi, totIter, kktMV = CP_APR.solveForModeB(X, M, n, iters, epsilon, convTol) ## scale by summing across the rows totWeight = np.sum(M.U[n], axis=1) zeroIdx = np.where(totWeight < 1e-100)[0] if len(zeroIdx) > 0: # for the zero ones we're going to evenly distribute evenDist = np.repeat(1.0 / self.R, len(zeroIdx)*self.R) M.U[n][zeroIdx, :] = evenDist.reshape((len(zeroIdx), self.R)) totWeight = np.sum(M.U[n], axis=1) twMat = np.repeat(totWeight, self.R).reshape(X.shape[n], self.R) M.U[n] = M.U[n] / twMat return M.U[n]
import ktensor import numpy as np R = 4 A = ktensor.ktensor( np.ones(R), [np.random.rand(5, R), np.random.rand(5, R), np.random.rand(2, R)]) B = ktensor.ktensor( np.ones(R), [np.random.rand(5, R), np.random.rand(5, R), np.random.rand(2, R)]) rawFMS = A.fms(B) topFMS = A.top_fms(B, 2) greedFMS = A.greedy_fms(B) print rawFMS, topFMS, greedFMS np.random.seed(10) A = ktensor.ktensor( np.ones(R), [np.random.randn(5, R), np.random.randn(5, R), np.random.randn(2, R)]) A.U = [np.multiply((A.U[n] > 0).astype(int), A.U[n]) for n in range(A.ndims())] B = ktensor.ktensor( np.ones(R), [np.random.randn(5, R),
def cp_apr(X, R, Minit=None, tol=1e-4, maxiters=1000, maxinner=10, epsilon=1e-10, kappatol=1e-10, kappa=1e-2): """ Compute nonnegative CP with alternative Poisson regression. Code is the python implementation of cp_apr in the MATLAB Tensor Toolbox Parameters ---------- X : input tensor of the class tensor or sptensor R : the rank of the CP Minit : the initial guess (in the form of a ktensor), if None random guess tol : tolerance on the inner KKT violation maxiters : maximum number of iterations maxinner : maximum number of inner iterations epsilon : parameter to avoid dividing by zero kappatol : tolerance on complementary slackness kappa : offset to fix complementary slackness Returns ------- M : the CP model as a ktensor cpStats: the statistics for each inner iteration modelStats: a dictionary item with the final statistics for this tensor factorization """ N = X.ndims() ## Random initialization if Minit == None: F = tensorTools.randomInit(X.shape, R) Minit = ktensor.ktensor(np.ones(R), F) nInnerIters = np.zeros(maxiters) ## Initialize M and Phi for iterations M = Minit M.normalize(1) Phi = [[] for i in range(N)] kktModeViolations = np.zeros(N) kktViolations = -np.ones(maxiters) nViolations = np.zeros(maxiters) ## statistics cpStats = np.zeros(7) for iteration in range(maxiters): startIter = time.time() isConverged = True for n in range(N): startMode = time.time() ## Make adjustments to M[n] entries that violate complementary slackness if iteration > 0: V = np.logical_and(Phi[n] > 1, M.U[n] < kappatol) if np.count_nonzero(V) > 0: nViolations[iteration] = nViolations[iteration] + 1 M.U[n][V > 0] = M.U[n][V > 0] + kappa M, Phi[n], inner, kktModeViolations[ n], isConverged = __solveSubproblem(X, M, n, maxinner, isConverged, epsilon, tol) #print '****************************************' #print M.U[0][1,:] #print M.U[0].shape #print '****************************************' elapsed = time.time() - startMode # only write the outer iterations for now #cpStats = np.vstack((cpStats, np.array([iteration, n, inner, tensorTools.lsqrFit(X,M), tensorTools.loglikelihood(X,[M]), kktModeViolations[n], elapsed]))) kktViolations[iteration] = np.max(kktModeViolations) elapsed = time.time() - startIter #cpStats = np.vstack((cpStats, np.array([iter, -1, -1, kktViolations[iter], __loglikelihood(X,M), elapsed]))) print( "Iteration {0}: Inner Its={1} with KKT violation={2}, nViolations={3}, and elapsed time={4}" .format(iteration, nInnerIters[iteration], kktViolations[iteration], nViolations[iteration], elapsed)) if isConverged: break cpStats = np.delete(cpStats, (0), axis=0) # delete the first row which was superfluous ### Print the statistics fit = tensorTools.lsqrFit(X, M) ll = tensorTools.loglikelihood(X, [M]) print("Number of iterations = {0}".format(iteration)) print("Final least squares fit = {0}".format(fit)) print("Final log-likelihood = {0}".format(ll)) print("Final KKT Violation = {0}".format(kktViolations[iteration])) print("Total inner iterations = {0}".format(np.sum(nInnerIters))) modelStats = { "Iters": iter, "LS": fit, "LL": ll, "KKT": kktViolations[iteration] } return M, cpStats, modelStats
def als(X, rank, **kwargs): """ Alternating least-sqaures algorithm to compute the CP decomposition. Parameters ---------- X : tensor_mixin The tensor to be decomposed. rank : int Tensor rank of the decomposition. init : {'random', 'nvecs'}, optional The initialization method to use. - random : Factor matrices are initialized randomly. - nvecs : Factor matrices are initialzed via HOSVD. (default 'nvecs') max_iter : int, optional Maximium number of iterations of the ALS algorithm. (default 500) fit_method : {'full', None} The method to compute the fit of the factorization - 'full' : Compute least-squares fit of the dense approximation of. X and X. - None : Do not compute the fit of the factorization, but iterate until ``max_iter`` (Useful for large-scale tensors). (default 'full') conv : float Convergence tolerance on difference of fit between iterations (default 1e-5) Returns ------- P : ktensor Rank ``rank`` factorization of X. ``P.U[i]`` corresponds to the factor matrix for the i-th mode. ``P.lambda[i]`` corresponds to the weight of the i-th mode. fit : float Fit of the factorization compared to ``X`` itr : int Number of iterations that were needed until convergence exectimes : ndarray of floats Time needed for each single iteration Examples -------- Create random dense tensor >>> from sktensor import dtensor, ktensor >>> U = [np.random.rand(i,3) for i in (20, 10, 14)] >>> T = dtensor(ktensor(U).toarray()) Compute rank-3 CP decomposition of ``T`` with ALS >>> P, fit, itr, _ = als(T, 3) Result is a decomposed tensor stored as a Kruskal operator >>> type(P) <class 'sktensor.ktensor.ktensor'> Factorization should be close to original data >>> np.allclose(T, P.totensor()) True References ---------- .. [1] Kolda, T. G. & Bader, B. W. Tensor Decompositions and Applications. SIAM Rev. 51, 455–500 (2009). .. [2] Harshman, R. A. Foundations of the PARAFAC procedure: models and conditions for an 'explanatory' multimodal factor analysis. UCLA Working Papers in Phonetics 16, (1970). .. [3] Carroll, J. D., Chang, J. J. Analysis of individual differences in multidimensional scaling via an N-way generalization of 'Eckart-Young' decomposition. Psychometrika 35, 283–319 (1970). """ # init options ainit = kwargs.pop('init', _DEF_INIT) maxiter = kwargs.pop('max_iter', _DEF_MAXITER) fit_method = kwargs.pop('fit_method', _DEF_FIT_METHOD) conv = kwargs.pop('conv', _DEF_CONV) dtype = kwargs.pop('dtype', _DEF_TYPE) if not len(kwargs) == 0: raise ValueError('Unknown keywords (%s)' % (kwargs.keys())) N = X.ndim normX = norm(X) U = _init(ainit, X, N, rank, dtype) fit = 0 exectimes = [] for itr in range(maxiter): tic = time.clock() fitold = fit for n in range(N): Unew = X.uttkrp(U, n) Y = ones((rank, rank), dtype=dtype) for i in (list(range(n)) + list(range(n + 1, N))): Y = Y * dot(U[i].T, U[i]) Unew = Unew.dot(pinv(Y)) # Normalize if itr == 0: lmbda = sqrt((Unew**2).sum(axis=0)) else: lmbda = Unew.max(axis=0) lmbda[lmbda < 1] = 1 U[n] = Unew / lmbda P = ktensor(U, lmbda) if fit_method == 'full': normresidual = normX**2 + P.norm()**2 - 2 * P.innerprod(X) fit = 1 - (normresidual / normX**2) else: fit = itr fitchange = abs(fitold - fit) exectimes.append(time.clock() - tic) _log.debug('[%3d] fit: %.5f | delta: %7.1e | secs: %.5f' % (itr, fit, fitchange, exectimes[-1])) if itr > 0 and fitchange < conv: break return P, fit, itr, array(exectimes)
def cp_apr(X, R, Minit=None, tol=1e-4, maxiters=1000, maxinner=10, epsilon=1e-10, kappatol=1e-10, kappa=1e-2): """ Compute nonnegative CP with alternative Poisson regression. Code is the python implementation of cp_apr in the MATLAB Tensor Toolbox Parameters ---------- X : input tensor of the class tensor or sptensor R : the rank of the CP Minit : the initial guess (in the form of a ktensor), if None random guess tol : tolerance on the inner KKT violation maxiters : maximum number of iterations maxinner : maximum number of inner iterations epsilon : parameter to avoid dividing by zero kappatol : tolerance on complementary slackness kappa : offset to fix complementary slackness Returns ------- M : the CP model as a ktensor cpStats: the statistics for each inner iteration modelStats: a dictionary item with the final statistics for this tensor factorization """ N = X.ndims() ## Random initialization if Minit == None: F = tensorTools.randomInit(X.shape, R) Minit = ktensor.ktensor(np.ones(R), F); nInnerIters = np.zeros(maxiters); ## Initialize M and Phi for iterations M = Minit M.normalize(1) Phi = [[] for i in range(N)] kktModeViolations = np.zeros(N) kktViolations = -np.ones(maxiters) nViolations = np.zeros(maxiters) ## statistics cpStats = np.zeros(7) for iteration in range(maxiters): startIter = time.time() isConverged = True; for n in range(N): startMode = time.time() ## Make adjustments to M[n] entries that violate complementary slackness if iteration > 0: V = np.logical_and(Phi[n] > 1, M.U[n] < kappatol) if np.count_nonzero(V) > 0: nViolations[iteration] = nViolations[iteration] + 1 M.U[n][V > 0] = M.U[n][V > 0] + kappa M, Phi[n], inner, kktModeViolations[n], isConverged = __solveSubproblem(X, M, n, maxinner, isConverged, epsilon, tol) elapsed = time.time() - startMode # only write the outer iterations for now cpStats = np.vstack((cpStats, np.array([iteration, n, inner, tensorTools.lsqrFit(X,M), tensorTools.loglikelihood(X,[M]), kktModeViolations[n], elapsed]))) kktViolations[iteration] = np.max(kktModeViolations); elapsed = time.time()-startIter #cpStats = np.vstack((cpStats, np.array([iter, -1, -1, kktViolations[iter], __loglikelihood(X,M), elapsed]))) print("Iteration {0}: Inner Its={1} with KKT violation={2}, nViolations={3}, and elapsed time={4}".format(iteration, nInnerIters[iteration], kktViolations[iteration], nViolations[iteration], elapsed)); if isConverged: break; cpStats = np.delete(cpStats, (0), axis=0) # delete the first row which was superfluous ### Print the statistics fit = tensorTools.lsqrFit(X,M) ll = tensorTools.loglikelihood(X,[M]) print("Number of iterations = {0}".format(iteration)) print("Final least squares fit = {0}".format(fit)) print("Final log-likelihood = {0}".format(ll)) print("Final KKT Violation = {0}".format(kktViolations[iteration])) print("Total inner iterations = {0}".format(np.sum(nInnerIters))) modelStats = {"Iters" : iter, "LS" : fit, "LL" : ll, "KKT" : kktViolations[iteration]} return M, cpStats, modelStats
def als(X, rank, dtype=np.float, **kwargs): """ Alternating least-sqaures algorithm to compute the CP decomposition. Parameters ---------- X : tensor_mixin The tensor to be decomposed. rank : int Tensor rank of the decomposition. init : {'random', 'nvecs'}, optional The initialization method to use. - random : Factor matrices are initialized randomly. - nvecs : Factor matrices are initialzed via HOSVD. (default 'nvecs') max_iter : int, optional Maximium number of iterations of the ALS algorithm. (default 500) fit_method : {'full', None} The method to compute the fit of the factorization - 'full' : Compute least-squares fit of the dense approximation of. X and X. - None : Do not compute the fit of the factorization, but iterate until ``max_iter`` (Useful for large-scale tensors). (default 'full') conv : float Convergence tolerance on difference of fit between iterations (default 1e-5) Returns ------- P : ktensor Rank ``rank`` factorization of X. ``P.U[i]`` corresponds to the factor matrix for the i-th mode. ``P.lambda[i]`` corresponds to the weight of the i-th mode. fit : float Fit of the factorization compared to ``X`` itr : int Number of iterations that were needed until convergence exectimes : ndarray of floats Time needed for each single iteration Examples -------- Create random dense tensor >>> from sktensor import dtensor >>> U = [np.random.rand(i,3) for i in (20, 10, 14)] >>> T = dtensor(ktensor(U).toarray()) Compute rank-3 CP decomposition of ``T`` with ALS >>> P, fit, itr, _ = als(T, 3) Result is a decomposed tensor stored as a Kruskal operator >>> type(P) <class 'sktensor.ktensor.ktensor'> Factorization should be close to original data >>> np.allclose(T, P.totensor()) True References ---------- .. [1] Kolda, T. G. & Bader, B. W. Tensor Decompositions and Applications. SIAM Rev. 51, 455–500 (2009). .. [2] Harshman, R. A. Foundations of the PARAFAC procedure: models and conditions for an 'explanatory' multimodal factor analysis. UCLA Working Papers in Phonetics 16, (1970). .. [3] Carroll, J. D., Chang, J. J. Analysis of individual differences in multidimensional scaling via an N-way generalization of 'Eckart-Young' decomposition. Psychometrika 35, 283–319 (1970). """ N = len(X.shape) normX = norm(X) # init options ainit = kwargs.pop('init', __DEF_INIT) maxiter = kwargs.pop('maxIter', __DEF_MAXITER) fit_method = kwargs.pop('fit_method', __DEF_FIT_METHOD) conv = kwargs.pop('conv', __DEF_CONV) if not len(kwargs) == 0: raise ValueError('Unknown keywords (%s)' % (kwargs.keys())) U = __init(ainit, X, N, rank, dtype) fit = 0 exectimes = [] for itr in xrange(maxiter): tic = time.clock() fitold = fit for n in range(N): Unew = X.uttkrp(U, n) Y = ones((rank, rank), dtype=dtype) for i in (range(n) + range(n + 1, N)): Y = Y * dot(U[i].T, U[i]) Unew = Unew.dot(pinv(Y)) # Normalize if itr == 0: lmbda = sqrt((Unew ** 2).sum(axis=0)) else: lmbda = Unew.max(axis=0) lmbda[lmbda < 1] = 1 U[n] = Unew / lmbda P = ktensor(U, lmbda) if fit_method == 'full': normresidual = normX ** 2 + P.norm() ** 2 - 2 * P.innerprod(X) fit = 1 - (normresidual / normX ** 2) else: fit = itr fitchange = abs(fitold - fit) exectimes.append(time.clock() - tic) _log.debug( '[%3d] fit: %.5f | delta: %7.1e | secs: %.5f' % (itr, fit, fitchange, exectimes[-1]) ) if itr > 0 and fitchange < conv: break return P, fit, itr, array(exectimes)
def cp_als(X, R, tol=1e-4, maxiters=50): """ Compute an estimate of the best rank-R CP model of a tensor X using an alternating least-squares algorithm. The fit is defined as 1 - norm(X - full(P))/norm(X) and is loosely the proportion of data described by the CP model. Parameters ---------- X - input tensor of the class tensor or sptensor R - the rank of the CP Returns ------- out : the CP model as a ktensor """ N = X.ndims() # number of dimensions normX = X.norm() # norm Uinit = [] # for initialization we ignore the first one Uinit.append(None) for idx in np.arange(1, N): Uinit.append(np.random.rand(X.shape[idx], R)) ## Setup for iterations, initializing U and the fit U = Uinit fit = 0 for iter in range(maxiters): fitold = fit # iterate over all the range for n in np.arange(N): # Calculate Unew = X_(n) * khatrirao(all U except n, 'r'). Unew = X.mttkrp(U, n) # Compute the linear system coefficients Y = np.ones((R, R)) for i in np.concatenate((np.arange(0, n), np.arange(n + 1, N))): Y = np.multiply(Y, np.dot(U[i].transpose(), U[i])) Unew = np.linalg.solve(Y, Unew.transpose()).transpose() # Normalize each vector to prevent singularities if iter == 0: lmda = np.sqrt(np.sum(np.square(Unew), axis=0)) else: lmda = Unew.max(axis=0) U[n] = Unew * sparse.spdiags(1 / lmda, 0, R, R, format='csr') P = ktensor.ktensor(lmda, U) normresidual = np.sqrt( np.square(normX) + np.square(P.norm()) - 2 * P.innerprod(X)) fit = 1 - (normresidual / normX) # fraction of residual explained by model fitchange = abs(fitold - fit) print("Iteration {0}: fit={1} with delta={2}".format( iter, fit, fitchange)) if iter > 0 and fitchange < tol: break ## Clean up the final result by normalizing the tensor P.arrange() P.fixsigns() return P, normresidual
def cp_apr(X, Y1, R, Minit=None, tol=1e-4, maxiters=1000, maxinner=50, epsilon=1e-10, kappatol=1e-10, kappa=1e-2): """ Compute nonnegative CP with alternative Poisson regression. Code is the python implementation of cp_apr in the MATLAB Tensor Toolbox Parameters ---------- X : input tensor of the class tensor or sptensor R : the rank of the CP lambta1 is the parameter of docomposition of demographic information lambta4 is the patameter of penalty item of demoU Minit : the initial guess (in the form of a ktensor), if None random guess tol : tolerance on the inner KKT violation maxiters : maximum number of iterations maxinner : maximum number of inner iterations epsilon : parameter to avoid dividing by zero kappatol : tolerance on complementary slackness kappa : offset to fix complementary slackness Returns ------- M : the CP model as a ktensor cpStats: the statistics for each inner iteration modelStats: a dictionary item with the final statistics for this tensor factorization """ N = X.ndims() ## Random initialization if Minit == None: F = tensorTools.randomInit(X.shape, R) Minit = ktensor.ktensor(np.ones(R), F); nInnerIters = np.zeros(maxiters); ## Initialize M and Phi for iterations M = Minit M.normalize(1) Phi = [[] for i in range(N)] kktModeViolations = np.zeros(N) kktViolations = -np.ones(maxiters) nViolations = np.zeros(maxiters) lambda2=1 lambda3=1 sita=np.random.rand(R+1,1); ## statistics cpStats = np.zeros(7) for iteration in range(maxiters): startIter = time.time() isConverged = True; for n in range(N): startMode = time.time() ## Make adjustments to M[n] entries that violate complementary slackness if iteration > 0: V = np.logical_and(Phi[n] > 1, M.U[n] < kappatol) if np.count_nonzero(V) > 0: nViolations[iteration] = nViolations[iteration] + 1 print 'V:',V.shape,V.dtype print 'M.U[n]',M.U[n].shape,M.U[n].dtype M.U[n][V > 0] = M.U[n][V > 0] + kappa if n==0: sita=__solveLogis(M.U[n],Y1,200,epsilon,lambda2,lambda3,sita) M, Phi[n], inner, kktModeViolations[n], isConverged = __solveSubproblem1(X, M, n, maxinner, isConverged, epsilon, tol,sita,Y1, lambda2) else: M, Phi[n], inner, kktModeViolations[n], isConverged = __solveSubproblem0(X, M, n, maxinner, isConverged, epsilon, tol) elapsed = time.time() - startMode kktViolations[iteration] = np.max(kktModeViolations); elapsed = time.time()-startIter print("Iteration {0}: Inner Its={1} with KKT violation={2}, nViolations={3}, and elapsed time={4}".format(iteration, nInnerIters[iteration], kktViolations[iteration], nViolations[iteration], elapsed)); if isConverged: break; cpStats = np.delete(cpStats, (0), axis=0) # delete the first row which was superfluous ### Print the statistics #fit = tensorTools.lsqrFit(X,M) #ll = tensorTools.loglikelihood(X,[M]) print("Number of iterations = {0}".format(iteration)) #print("Final least squares fit = {0}".format(fit)) #print("Final log-likelihood = {0}".format(ll)) print("Final KKT Violation = {0}".format(kktViolations[iteration])) print("Total inner iterations = {0}".format(np.sum(nInnerIters))) #modelStats = {"Iters" : iter, "LS" : fit, "LL" : ll, "KKT" : kktViolations[iteration]} return M, cpStats
def __randomInitialization(shape, R): F = [] for n in range(len(shape)): F.append(np.random.rand(shape[n], R)) return(ktensor.ktensor(np.ones(R), F))
import CP_APR import ktensor """ Test file associated with the CP decomposition using APR """ """ Test factorization of sparse matrix """ subs = np.array([[0, 3, 1], [1, 0, 1], [1, 2, 1], [1, 3, 1], [3, 0, 0]]) vals = np.array([[1], [1], [1], [1], [3]]) siz = np.array([5, 5, 2]) # 5x5x2 tensor X = sptensor.sptensor(subs, vals, siz) U0 = np.array([[0.7689, 0.8843, 0.7487, 0.0900], [0.1673, 0.5880, 0.8256, 0.1117], [0.8620, 0.1548, 0.7900, 0.1363], [0.9899, 0.1999, 0.3185, 0.6787], [0.5144, 0.4070, 0.5341, 0.4952]]) U1 = np.array([[0.1897, 0.5606, 0.8790, 0.9900], [0.4950, 0.9296, 0.9889, 0.5277], [0.1476, 0.6967, 0.0006, 0.4795], [0.0550, 0.5828, 0.8654, 0.8013], [0.8507, 0.8154, 0.6126, 0.2278]]) U2 = np.array([[0.4981, 0.5747, 0.7386, 0.2467], [0.9009, 0.8452, 0.5860, 0.6664]]) Minit = ktensor.ktensor(np.ones(4), [U0, U1, U2]) fms = Minit.fms(Minit) Y, cpstats, modelStats = CP_APR.cp_apr(X, 4, Minit=Minit, maxiters=100) Y.normalize_sort(1) """ Test factorization of regular matrix """ X = tensor.tensor(range(1, 25), [3, 4, 2]) print CP_APR.cp_apr(X, 4)
def cp_als(X, R, tol=1e-4, maxiters=50): """ Compute an estimate of the best rank-R CP model of a tensor X using an alternating least-squares algorithm. The fit is defined as 1 - norm(X - full(P))/norm(X) and is loosely the proportion of data described by the CP model. Parameters ---------- X - input tensor of the class tensor or sptensor R - the rank of the CP Returns ------- out : the CP model as a ktensor """ N = X.ndims(); # number of dimensions normX = X.norm(); # norm Uinit = []; # for initialization we ignore the first one Uinit.append(None); for idx in np.arange(1,N): Uinit.append(np.random.rand(X.shape[idx], R)); ## Setup for iterations, initializing U and the fit U = Uinit; fit = 0; for iter in range(maxiters): fitold = fit; # iterate over all the range for n in np.arange(N): # Calculate Unew = X_(n) * khatrirao(all U except n, 'r'). Unew = X.mttkrp(U,n); # Compute the linear system coefficients Y = np.ones((R, R)); for i in np.concatenate((np.arange(0,n), np.arange(n+1, N))): Y = np.multiply(Y, np.dot(U[i].transpose(),U[i])); Unew = np.linalg.solve(Y, Unew.transpose()).transpose(); # Normalize each vector to prevent singularities if iter == 0: lmda = np.sqrt(np.sum(np.square(Unew), axis=0)); else: lmda = Unew.max(axis=0); U[n] = Unew * sparse.spdiags(1/lmda, 0, R, R, format='csr'); P = ktensor.ktensor(lmda, U); normresidual = np.sqrt(np.square(normX) + np.square(P.norm()) - 2*P.innerprod(X)); fit = 1 - (normresidual / normX); # fraction of residual explained by model fitchange = abs(fitold - fit); print("Iteration {0}: fit={1} with delta={2}".format(iter, fit, fitchange)); if iter > 0 and fitchange < tol: break; ## Clean up the final result by normalizing the tensor P.arrange(); P.fixsigns(); return P, normresidual;
import ktensor import numpy as np R = 4 A = ktensor.ktensor(np.ones(R), [np.random.rand(5,R), np.random.rand(5,R), np.random.rand(2,R)]) B = ktensor.ktensor(np.ones(R), [np.random.rand(5,R), np.random.rand(5,R), np.random.rand(2,R)]) rawFMS = A.fms(B) topFMS = A.top_fms(B, 2) greedFMS = A.greedy_fms(B) print rawFMS, topFMS, greedFMS np.random.seed(10) A = ktensor.ktensor(np.ones(R), [np.random.randn(5,R), np.random.randn(5,R), np.random.randn(2,R)]) A.U = [np.multiply((A.U[n] > 0).astype(int), A.U[n]) for n in range(A.ndims())] B = ktensor.ktensor(np.ones(R), [np.random.randn(5,R), np.random.randn(5,R), np.random.randn(2,R)]) B.U = [np.multiply((B.U[n] > 0).astype(int), B.U[n]) for n in range(B.ndims())] rawFOS = A.fos(B) topFOS = A.top_fos(B, 2) greedFOS = A.greedy_fos(B) print rawFOS, topFOS, greedFOS
import numpy as np; import CP_APR import ktensor import KLProjection """ Test file associated with the CP decomposition using APR """ """ Test factorization of sparse matrix """ subs = np.array([[0,3,1], [1,0,1], [1,2,1], [1,3,1], [3,0,0]]); vals = np.array([[1],[1],[1],[1],[3]]); siz = np.array([5,5,2]) # 5x5x2 tensor X = sptensor.sptensor(subs, vals, siz) U0 = np.array([[0.7689, 0.8843, 0.7487, 0.0900], [0.1673, 0.5880, 0.8256, 0.1117], [0.8620, 0.1548, 0.7900, 0.1363], [0.9899, 0.1999, 0.3185, 0.6787], [0.5144, 0.4070, 0.5341, 0.4952]]) U1 = np.array([[0.1897, 0.5606, 0.8790, 0.9900], [0.4950, 0.9296, 0.9889, 0.5277], [0.1476, 0.6967, 0.0006, 0.4795], [0.0550, 0.5828, 0.8654, 0.8013], [0.8507, 0.8154, 0.6126, 0.2278]]) U2 = np.array([[0.4981, 0.5747, 0.7386, 0.2467], [0.9009, 0.8452, 0.5860, 0.6664]]) Minit = ktensor.ktensor(np.ones(4), [U0, U1, U2]) fms = Minit.fms(Minit) Y, cpstats, modelStats = CP_APR.cp_apr(X,4, Minit=Minit, maxiters=100); Y.normalize_sort(1) subs2 = np.array([[0,3,1], [1,2,0]]) vals2 = np.array([[1], [1]]) siz2 = np.array([2,5,2]) Xhat = sptensor.sptensor(subs2, vals2, siz2) klproj = KLProjection.KLProjection(Y.U, 4) np.random.seed(10) klproj.projectSlice(Xhat, 0)
# load the sparse tensor information subs = np.load(infile) vals = np.load(infile) siz = np.load(infile) infile.close() # now factor it X = sptensor.sptensor(subs, vals, siz) # Create a random initialization N = X.ndims() np.random.seed(0) F = []; for n in range(N): F.append(np.random.rand(X.shape[n], R)) Minit = ktensor.ktensor(np.ones(R), F) Y, ystats, fmsStats, mstats = cp_apr(X, R, Minit=Minit, outputfile=outfile, maxiters=iter) ## automate the creation of the sql file ystats = np.column_stack((np.repeat(modelID, ystats.shape[0]), ystats)) np.savetxt(statsFile, ystats, delimiter="|") fmsStats = np.column_stack((np.repeat(modelID, fmsStats.shape[0]), fmsStats)) np.savetxt(fmsFile, fmsStats, delimiter="|") sqlLoad = file(sqlLoadFile, "w") for i in range(iter): dbFile = outfile.format(i) sqlLoad.write("load client from /home/joyceho/workspace/tensor/{0} of del modified by coldel| insert into joyceho.tensor_iter_factors;\n".format(dbFile)) sqlLoad.write("load client from /home/joyceho/workspace/tensor/{0} of del modified by coldel| insert into joyceho.tensor_iter_results;\n".format(statsFile))
print "Running Uniqueness Experiment with ID {0} and iterations {1}".format(exptID, maxIters) modelOut = file(sqlOutfile, "w") for i in range(totalIter): # initialize the seed for repeatability np.random.seed(seedArray[i]) print "Random Start with seed {0}".format(seedArray[i]) Y, ystats, mstats = decompTools.decomposeCountTensor(inputFile, R=R, outerIters=maxIters, innerIters=innerIters, zeroTol=1e-4) Y.writeRawFile(rawfilePattern.format(exptID,i)) dbYFile = outfilePattern.format(exptID, i) dbOut = decompTools.getDBOutput(Y, yaxis) dbOut = np.column_stack((np.repeat(exptID, dbOut.shape[0]), np.repeat(i, dbOut.shape[0]), dbOut)) dbOut = np.insert(dbOut, 4, np.repeat(-100, dbOut.shape[0]), axis=1) np.savetxt(dbYFile, dbOut, fmt="%s", delimiter="|") yFactor.append(ktensor.ktensor(Y.lmbda.copy(), [Y.U[n].copy() for n in range(Y.ndims())])) # write to the sequel file for ease modelOut.write("insert into joyceho.tensor_uniq_models values({0},{1},{2},\'{3}\',{4},{5},{6},{7},{8});\n".format(exptID, i, labelID, exptDesc, maxIters, innerIters, mstats['LS'], mstats['LL'], mstats['KKT'])) modelOut.write("load client from /home/joyceho/workspace/tensor/{0} of del modified by coldel| insert into joyceho.tensor_uniq_results;\n".format(dbYFile)) ## Calculate all the scores def __generateInfo(n, exptID, type, method, i, k): info = np.tile(np.array([exptID, type, method, i, k], dtype="S20"), n) info = info.reshape((n, 5)) return info scoreResults = np.empty((1,9), dtype="S20") for i in range(totalIter): for k in range(i+1, totalIter): A = yFactor[i] B = yFactor[k]