def implicit_euler(A, x, stepSize, n): op = xe.TTOperator.identity(A.dimensions) - stepSize*A j,k = xe.indices(2) ourALS = xe.ALS_SPD ourALS.convergenceEpsilon = 1e-4 ourALS.numHalfSweeps = 100 results = [x] nextX = xe.TTTensor(x) for i in xrange(n) : ourALS(op, nextX, x) # normalize norm = one_norm(nextX) nextX /= norm print("done itr", i, \ "residual:", xe.frob_norm(op(j/2,k/2)*nextX(k&0) - x(j&0)), \ "one-norm:", norm) x = xe.TTTensor(nextX) # ensure it is a copy results.append(x) return results
def costs_riemannian_gradient(_tt, _measures, _values): """ Computes the Riemannian gradient of the cost functional. Parameters ---------- _tt : xe.TTTensor The point at which the gradient is to be computed. _measures, _values : np.ndarray The samples used for Monte-Carlo estimation. Returns ------- TangentVector The gradient. """ #TODO: das geht schneller, wenn man Us, Vs und Xs speichert und den TangentSpace manuell aufbaut. gradient = xe.TTTensor(_tt.dimensions) _tt.move_core(0) for m in range(_tt.order()): core = costs_component_gradient(_tt, m, _measures, _values) cg = xe.TTTensor(_tt) if m < _tt.order() - 1: _tt.move_core(m + 1) Um = _tt.get_component(m).to_ndarray() core -= np.einsum('lex, yzx, yzr -> ler', Um, Um, core) cg.set_component(m, xe.Tensor.from_buffer(core)) gradient = gradient + cg ts = TangentSpace(_tt) tv = ts.project(gradient) assert xe.frob_norm(tv.to_TTTensor() - gradient) <= 1e-12 * xe.frob_norm(gradient) # project out the part in direction [0, ..., 0] tp = ts.project(xe.TTTensor.dirac(_tt.dimensions, [0] * _tt.order())) tv = (tv - (tv @ tp) / tp.norm()**2 * tp) return tv
ttA = xe.TTOperator(A) # and verify its rank print("ttA ranks:", ttA.ranks()) # the right hand side of the equation both as Tensor and in (Q)TT format b = xe.Tensor.ones([2,]*9) ttb = xe.TTTensor.ones(b.dimensions) # construct a random initial guess of rank 3 for the ALS algorithm ttx = xe.TTTensor.random([2,]*9, [3,]*8) # and solve the system with the default ALS algorithm for symmetric positive operators xe.ALS_SPD(ttA, ttx, ttb) # to perform arithmetic operations we need to define some indices i,j,k = xe.indices(3) # calculate the residual of the just solved system to evaluate its accuracy # here i^9 denotes a multiindex named i of dimension 9 (ie. spanning 9 indices of the respective tensors) residual = xe.frob_norm( ttA(i^9,j^9)*ttx(j^9) - ttb(i^9) ) print("residual:", residual) # as an comparison solve the system exactly using the Tensor / operator x = xe.Tensor() x(j^9) << b(i^9) / A(i^9, j^9) # and calculate the Frobenius norm of the difference print("error:", xe.frob_norm(x - xe.Tensor(ttx)))
def calc_residual_norm(self) : i,j = xe.indices(2) return xe.frob_norm(self.A(i/2, j/2)*self.x(j&0) - self.b(i&0)) / self.solutionsNorm
# right -> left, only move core and update stack self.x.move_core(0, True) for pos in reversed(xrange(1,self.d)) : self.push_right_stack(pos) self.leftAStack.pop() self.leftBStack.pop() def simpleALS(A, x, b) : solver = InternalSolver(A, x, b) solver.solve() if __name__ == "__main__": i,j,k = xe.indices(3) A = xe.TTOperator.random([4]*16, [2]*7) A(i/2,j/2) << A(i/2, k/2) * A(j/2, k/2) solution = xe.TTTensor.random([4]*8, [3]*7) b = xe.TTTensor() b(i&0) << A(i/2, j/2) * solution(j&0) x = xe.TTTensor.random([4]*8, [3]*7) simpleALS(A, x, b) print("Residual:", xe.frob_norm(A(i/2, j/2) * x(j&0) - b(i&0))/xe.frob_norm(b)) print("Error:", xe.frob_norm(solution-x)/xe.frob_norm(x))
def compute_and_cache_solution(params, maxIter=100): trainingSet = slice(0, params.num_training_samples, 1) validationSet = slice( params.num_training_samples, params.num_training_samples + params.num_validation_samples, 1) sol = compute_and_cache_measures_and_values(params) measures, values = sol.value time = sol.time sol = compute_and_cache_initial_guess(params, maxIter) tt = sol.value time += sol.time ranks = [params.chaos_rank] * (params.num_exercise_dates - 2) # Define convenience functions. def training_costs(_tt): return costs(_tt, measures[:, trainingSet], values[trainingSet])[0] def training_costs_gradient(_tt): return costs_riemannian_gradient(_tt, measures[:, trainingSet], values[trainingSet]) def validation_costs(_tt): return costs(_tt, measures[:, validationSet], values[validationSet], _alpha=1e3)[0] # compute_descentDir = lambda curGrad, prevGrad: curGrad # GD # compute_descentDir = lambda curGrad, prevGrad: 0.5*(curGrad + prevGrad) # Momentum def compute_descentDir(curGrad, prevGrad): # nonlinear CG update """ Nonlinear CG update. The Hestenes-Stiefel (HS) update can be derived by demanding that consecutive search directions be conjugate with respect to the average Hessian over the line segment [x_k , x_{k+1}]. Even though it is a natural choice it is not easy to implement on Manifolds. The Polak-Ribiere (PR) update is similar to HS, both in terms of theoretical convergence properties and practical performance. For PR however, the strong Wolfe conditions does not guarantee that the computed update direction is always a descent direction. To guarantee this we modify PR to PR+. This choice also provides a direction reset automatically [2]. Finally, it can be shown that global convergence can be guaranteed for every parameter that is bounded in absolute value by the Fletcher-Reeves update. This leads us to the final update rule max{PR+,FR}. To ensure that a descent direction is returned even with Armijo updates we check that the computed update direction does not point in the opposite direction to the gradient. References: ----------- - [1] Numerical optimization (Jorge Nocedal and Stephen J. Wright) - [2] An Introduction to the Conjugate Gradient Method Without the Agonizing Pain (Jonathan Richard Shewchuk) """ gradDiff = curGrad - prevGrad betaPR = (curGrad @ gradDiff) / (curGrad @ curGrad ) # Polak-Ribiere update beta = max(betaPR, 0) # PR+ update betaFR = (curGrad @ curGrad) / (prevGrad @ prevGrad ) # Fletcher-Reeves update beta = min(beta, betaFR) # max{PR+,FR} update descentDir = curGrad + beta * prevGrad if descentDir @ curGrad < 1e-3 * descentDir.norm() * curGrad.norm(): print("WARNING: Computed descent direction opposite to gradient.") descentDir = curGrad return descentDir print("=" * 80) print(" Perform gradient descent") print("=" * 80) tic = process_time() trnCosts = training_costs(tt) valCosts = deque(maxlen=10) grad = training_costs_gradient(tt) print( f"[0] Training costs: {trnCosts: .4e} | Validation costs: {validation_costs(tt): .4e} | Best validation costs: {np.nan: .4e} | Relative gradient norm: {grad.norm()/xe.frob_norm(tt):.2e} | Relative update norm: {np.nan:.2e} | Step size: {np.nan:.2e} | Relative retraction error: {np.nan:.2e} | Ranks: {tt.ranks()}" ) ss = 1 descentDir = grad descentDirGrad = descentDir @ grad bestValCosts = np.inf bestTT = None for iteration in range(maxIter): if grad.norm() < 1e-6 * xe.frob_norm(tt): print( "Termination: relative norm of gradient deceeds tolerance (local minimum reached)" ) break prev_tt = tt tt, re, ss = armijo_step(retraction(tt, descentDir, _roundingParameter=ranks), training_costs, descentDirGrad, _initialStepSize=ss) trnCosts = training_costs(tt) valCosts.append(validation_costs(tt)) if valCosts[-1] < bestValCosts: bestTT = xe.TTTensor(tt) bestValCosts = valCosts[-1] print( f"[{iteration+1}] Training costs: {trnCosts: .4e} | Validation costs: {valCosts[-1]: .4e} | Best validation costs: {bestValCosts: .4e} | Relative gradient norm: {grad.norm()/np.asarray(xe.frob_norm(prev_tt)):.2e} | Relative update norm: {xe.frob_norm(prev_tt-tt)/np.asarray(xe.frob_norm(prev_tt)):.2e} | Step size: {ss:.2e} | Relative retraction error: {re:.2e} | Ranks: {tt.ranks()}" ) if len(valCosts) == 10 and (valCosts[0] - valCosts[-1]) < 1e-2 * valCosts[0]: print("Termination: decrease of costs deceeds tolerance") break if iteration < maxIter - 1: prev_grad = TangentSpace(tt).project(grad) grad = training_costs_gradient(tt) descentDir = compute_descentDir(grad, prev_grad) descentDirGrad = descentDir @ grad else: print("Termination: maximum number of iterations reached") assert bestTT is not None return bestTT
def step(_stepSize): trial = (basePoint - _stepSize * _tv).to_TTTensor() tmp = xe.TTTensor(trial) trial.round(_roundingParameter) return trial, xe.frob_norm(tmp - trial) / (_stepSize * tv_norm)