def projectData(self, XHat, n, maxiters=10, maxinner=10): ## store off the old ones origM = {REG_LOCATION: ktensor.copyTensor(self.M[REG_LOCATION]), AUG_LOCATION: ktensor.copyTensor(self.M[AUG_LOCATION])} origX = self.X self.X = XHat ## randomize the nth self.M[REG_LOCATION].U[n] = np.random.rand(self.X.shape[n], self.R) self.M[REG_LOCATION].lmbda = np.ones(self.R) self.M[AUG_LOCATION].U[n] = np.random.rand(self.X.shape[n], 1) self.M[AUG_LOCATION].lmbda = np.ones(1) ## renormalize self.M[REG_LOCATION].normalize(1) self.normalizeAugTensor() lastLL = tensorTools.loglikelihood(self.X,self.M) for iteration in range(maxiters): xsubs = self.X.subs[:,n] B, Pi, inI1, kktModeViolation1 = self.__solveSignalTensor(xsubs, self.M[AUG_LOCATION].U[n], n) inI2, kktModeViolation2 = self.__solveAugmentedTensor(xsubs, B, Pi, n) ll = tensorTools.loglikelihood(self.X,self.M) if np.abs(lastLL - ll) < self.dlTol: break lastLL = ll ## scale by summing across the rows totWeight = np.sum(self.M[REG_LOCATION].U[n], axis=1) zeroIdx = np.where(totWeight < 1e-100)[0] if len(zeroIdx) > 0: evenDist = 1.0 / self.M[REG_LOCATION].R self.M[REG_LOCATION].U[n][zeroIdx, :] = np.tile(evenDist, (len(zeroIdx), self.M[REG_LOCATION].R)) totWeight = np.sum(self.M[REG_LOCATION].U[n], axis=1) twMat = np.repeat(totWeight, self.M[REG_LOCATION].R).reshape(self.X.shape[n], self.M[REG_LOCATION].R) projMat = self.M[REG_LOCATION].U[n] / twMat biasMat = self.M[AUG_LOCATION].U[n] self.M = origM self.X = origX return projMat, biasMat
def computeDecomp(self, gamma=None, gradual=True): ## random initialize if not existing if self.M[REG_LOCATION] == None and self.M[AUG_LOCATION] == None: self.initialize() ## Kkeep track of the iteration information iterInfo = OrderedDict(sorted({}.items(), key=lambda t:t[1])) lastLL = tensorTools.loglikelihood(self.X, self.M) ## projection factor starts at 0 (unless there's no gradual) xi = 0 if gradual else 1 ## if nothing is set, we're just not going to do any hard-thresholding if gamma == None: gamma = list(np.repeat(0, self.N)) ## for outer iterations for iteration in range(self.maxIters): startIter = time.time() for n in range(self.N): startMode = time.time() ## first we calculate the "augmented" tensor matricization self.M[AUG_LOCATION].redistribute(n) xsubs = self.X.subs[:,n] B, Pi, inI1, kktModeViolation1 = self.__solveSignalTensor(xsubs, self.M[AUG_LOCATION].U[n], n) ## hard threshold based on the xi and gamma thr = xi * gamma[n] if (thr > 0): self.M[REG_LOCATION].U[n] = tensorTools.hardThresholdMatrix(self.M[REG_LOCATION].U[n], thr) # renormalize the mode self.M[REG_LOCATION].normalize_mode(n, 1) ## recalculate B using the new matrix B = np.dot(self.M[REG_LOCATION].U[n], np.diag(self.M[REG_LOCATION].lmbda)) elapsed1 = time.time() - startMode # now that we are done, we can calculate the new 'unaugmented matricization' inI2, kktModeViolation2 = self.__solveAugmentedTensor(xsubs, B, Pi, n) elapsed2 = time.time() - startMode ll = tensorTools.loglikelihood(self.X, self.M) iterInfo[str((iteration, n))] = { "Time": [elapsed1, elapsed2], "KKTViolation": [kktModeViolation1, kktModeViolation2], "Iterations": [inI1, inI2], "LL": ll} if gradual: xiTemp = 1-np.min([1, (np.absolute(lastLL - ll) / np.max(np.absolute([lastLL,ll])))]) if xiTemp > xi: ## take the mean of the two xi = (xi + xiTemp) / 2 print("Iteration {0}: Xi = {1}, dll = {2}, time = {3}".format(iteration, xi, np.abs(lastLL - ll), time.time() - startIter)) if np.abs(lastLL - ll) < self.dlTol and xi >= 0.99: break; lastLL = ll return iterInfo
def cp_apr(X, R, Minit=None, tol=1e-4, maxiters=1000, maxinner=10, epsilon=1e-10, kappatol=1e-10, kappa=1e-2): """ Compute nonnegative CP with alternative Poisson regression. Code is the python implementation of cp_apr in the MATLAB Tensor Toolbox Parameters ---------- X : input tensor of the class tensor or sptensor R : the rank of the CP Minit : the initial guess (in the form of a ktensor), if None random guess tol : tolerance on the inner KKT violation maxiters : maximum number of iterations maxinner : maximum number of inner iterations epsilon : parameter to avoid dividing by zero kappatol : tolerance on complementary slackness kappa : offset to fix complementary slackness Returns ------- M : the CP model as a ktensor cpStats: the statistics for each inner iteration modelStats: a dictionary item with the final statistics for this tensor factorization """ N = X.ndims() ## Random initialization if Minit == None: F = tensorTools.randomInit(X.shape, R) Minit = ktensor.ktensor(np.ones(R), F) nInnerIters = np.zeros(maxiters) ## Initialize M and Phi for iterations M = Minit M.normalize(1) Phi = [[] for i in range(N)] kktModeViolations = np.zeros(N) kktViolations = -np.ones(maxiters) nViolations = np.zeros(maxiters) ## statistics cpStats = np.zeros(7) for iteration in range(maxiters): startIter = time.time() isConverged = True for n in range(N): startMode = time.time() ## Make adjustments to M[n] entries that violate complementary slackness if iteration > 0: V = np.logical_and(Phi[n] > 1, M.U[n] < kappatol) if np.count_nonzero(V) > 0: nViolations[iteration] = nViolations[iteration] + 1 M.U[n][V > 0] = M.U[n][V > 0] + kappa M, Phi[n], inner, kktModeViolations[ n], isConverged = __solveSubproblem(X, M, n, maxinner, isConverged, epsilon, tol) #print '****************************************' #print M.U[0][1,:] #print M.U[0].shape #print '****************************************' elapsed = time.time() - startMode # only write the outer iterations for now #cpStats = np.vstack((cpStats, np.array([iteration, n, inner, tensorTools.lsqrFit(X,M), tensorTools.loglikelihood(X,[M]), kktModeViolations[n], elapsed]))) kktViolations[iteration] = np.max(kktModeViolations) elapsed = time.time() - startIter #cpStats = np.vstack((cpStats, np.array([iter, -1, -1, kktViolations[iter], __loglikelihood(X,M), elapsed]))) print( "Iteration {0}: Inner Its={1} with KKT violation={2}, nViolations={3}, and elapsed time={4}" .format(iteration, nInnerIters[iteration], kktViolations[iteration], nViolations[iteration], elapsed)) if isConverged: break cpStats = np.delete(cpStats, (0), axis=0) # delete the first row which was superfluous ### Print the statistics fit = tensorTools.lsqrFit(X, M) ll = tensorTools.loglikelihood(X, [M]) print("Number of iterations = {0}".format(iteration)) print("Final least squares fit = {0}".format(fit)) print("Final log-likelihood = {0}".format(ll)) print("Final KKT Violation = {0}".format(kktViolations[iteration])) print("Total inner iterations = {0}".format(np.sum(nInnerIters))) modelStats = { "Iters": iter, "LS": fit, "LL": ll, "KKT": kktViolations[iteration] } return M, cpStats, modelStats
def cp_apr(X, R, Minit=None, tol=1e-4, maxiters=1000, maxinner=10, epsilon=1e-10, kappatol=1e-10, kappa=1e-2): """ Compute nonnegative CP with alternative Poisson regression. Code is the python implementation of cp_apr in the MATLAB Tensor Toolbox Parameters ---------- X : input tensor of the class tensor or sptensor R : the rank of the CP Minit : the initial guess (in the form of a ktensor), if None random guess tol : tolerance on the inner KKT violation maxiters : maximum number of iterations maxinner : maximum number of inner iterations epsilon : parameter to avoid dividing by zero kappatol : tolerance on complementary slackness kappa : offset to fix complementary slackness Returns ------- M : the CP model as a ktensor cpStats: the statistics for each inner iteration modelStats: a dictionary item with the final statistics for this tensor factorization """ N = X.ndims() ## Random initialization if Minit == None: F = tensorTools.randomInit(X.shape, R) Minit = ktensor.ktensor(np.ones(R), F); nInnerIters = np.zeros(maxiters); ## Initialize M and Phi for iterations M = Minit M.normalize(1) Phi = [[] for i in range(N)] kktModeViolations = np.zeros(N) kktViolations = -np.ones(maxiters) nViolations = np.zeros(maxiters) ## statistics cpStats = np.zeros(7) for iteration in range(maxiters): startIter = time.time() isConverged = True; for n in range(N): startMode = time.time() ## Make adjustments to M[n] entries that violate complementary slackness if iteration > 0: V = np.logical_and(Phi[n] > 1, M.U[n] < kappatol) if np.count_nonzero(V) > 0: nViolations[iteration] = nViolations[iteration] + 1 M.U[n][V > 0] = M.U[n][V > 0] + kappa M, Phi[n], inner, kktModeViolations[n], isConverged = __solveSubproblem(X, M, n, maxinner, isConverged, epsilon, tol) elapsed = time.time() - startMode # only write the outer iterations for now cpStats = np.vstack((cpStats, np.array([iteration, n, inner, tensorTools.lsqrFit(X,M), tensorTools.loglikelihood(X,[M]), kktModeViolations[n], elapsed]))) kktViolations[iteration] = np.max(kktModeViolations); elapsed = time.time()-startIter #cpStats = np.vstack((cpStats, np.array([iter, -1, -1, kktViolations[iter], __loglikelihood(X,M), elapsed]))) print("Iteration {0}: Inner Its={1} with KKT violation={2}, nViolations={3}, and elapsed time={4}".format(iteration, nInnerIters[iteration], kktViolations[iteration], nViolations[iteration], elapsed)); if isConverged: break; cpStats = np.delete(cpStats, (0), axis=0) # delete the first row which was superfluous ### Print the statistics fit = tensorTools.lsqrFit(X,M) ll = tensorTools.loglikelihood(X,[M]) print("Number of iterations = {0}".format(iteration)) print("Final least squares fit = {0}".format(fit)) print("Final log-likelihood = {0}".format(ll)) print("Final KKT Violation = {0}".format(kktViolations[iteration])) print("Total inner iterations = {0}".format(np.sum(nInnerIters))) modelStats = {"Iters" : iter, "LS" : fit, "LL" : ll, "KKT" : kktViolations[iteration]} return M, cpStats, modelStats