def para_solver(L): # Levenberg-Marquardt # HT = H + (L-1)**2*np.diag(np.diag(H)) # Attempt to use plain Levenberg HT = H + (L-1)**2*np.eye(len(H)) # print "Inverting Scaled Hessian:" ### # print " G:" ### # pvec1d(G,precision=5) ### # print " HT: (Scal = %.4f)" % (1+(L-1)**2) ### # pmat2d(HT,precision=5) ### Hi = invert_svd(np.mat(HT)) dx = flat(-1 * Hi * col(G)) # print " dx:" ### # pvec1d(dx,precision=5) ### # dxa = -solve(HT, G) # dxa = flat(dxa) # print " dxa:" ### # pvec1d(dxa,precision=5) ### # print ### sol = flat(0.5*row(dx)*np.mat(H)*col(dx))[0] + np.dot(dx,G) for i in self.excision: # Reinsert deleted coordinates - don't take a step in those directions dx = np.insert(dx, i, 0) return dx, sol
def step(self, xk, data, trust): """ Computes the next step in the parameter space. There are lots of tricks here that I will document later. @param[in] G The gradient @param[in] H The Hessian @param[in] trust The trust radius """ from scipy import optimize X, G, H = (data['X0'], data['G0'], data['H0']) if self.bhyp else (data['X'], data['G'], data['H']) H1 = H.copy() H1 = np.delete(H1, self.excision, axis=0) H1 = np.delete(H1, self.excision, axis=1) Eig = eig(H1)[0] # Diagonalize Hessian Emin = min(Eig) if Emin < self.eps: # Mix in SD step if Hessian minimum eigenvalue is negative # Experiment. Adj = max(self.eps, 0.01*abs(Emin)) - Emin print "Hessian has a small or negative eigenvalue (%.1e), mixing in some steepest descent (%.1e) to correct this." % (Emin, Adj) print "Eigenvalues are:" ### pvec1d(Eig) ### H += Adj*np.eye(H.shape[0]) if self.bhyp: G = np.delete(G, self.excision) H = np.delete(H, self.excision, axis=0) H = np.delete(H, self.excision, axis=1) xkd = np.delete(xk, self.excision) if self.Objective.Penalty.fmul != 0.0: warn_press_key("Using the multiplicative hyperbolic penalty is discouraged!") # This is the gradient and Hessian without the contributions from the hyperbolic constraint. Obj0 = {'X':X,'G':G,'H':H} class Hyper(object): def __init__(self, HL, Penalty): self.H = HL.copy() self.dx = 1e10 * np.ones(len(HL),dtype=float) self.Val = 0 self.Grad = np.zeros(len(HL),dtype=float) self.Hess = np.zeros((len(HL),len(HL)),dtype=float) self.Penalty = Penalty def _compute(self, dx): self.dx = dx.copy() Tmp = np.mat(self.H)*col(dx) Reg_Term = self.Penalty.compute(xkd+flat(dx), Obj0) self.Val = (X + np.dot(dx, G) + 0.5*row(dx)*Tmp + Reg_Term[0] - data['X'])[0,0] self.Grad = flat(col(G) + Tmp) + Reg_Term[1] def compute_val(self, dx): if norm(dx - self.dx) > 1e-8: self._compute(dx) return self.Val def compute_grad(self, dx): if norm(dx - self.dx) > 1e-8: self._compute(dx) return self.Grad def compute_hess(self, dx): if norm(dx - self.dx) > 1e-8: self._compute(dx) return self.Hess def hyper_solver(L): dx0 = np.zeros(len(xkd),dtype=float) #dx0 = np.delete(dx0, self.excision) # HL = H + (L-1)**2*np.diag(np.diag(H)) # Attempt to use plain Levenberg HL = H + (L-1)**2*np.eye(len(H)) HYP = Hyper(HL, self.Objective.Penalty) try: Opt1 = optimize.fmin_bfgs(HYP.compute_val,dx0,fprime=HYP.compute_grad,gtol=1e-5,full_output=True,disp=0) except: Opt1 = optimize.fmin(HYP.compute_val,dx0,full_output=True,disp=0) try: Opt2 = optimize.fmin_bfgs(HYP.compute_val,-xkd,fprime=HYP.compute_grad,gtol=1e-5,full_output=True,disp=0) except: Opt2 = optimize.fmin(HYP.compute_val,-xkd,full_output=True,disp=0) #Opt2 = optimize.fmin(HYP.compute_val,-xkd,full_output=True,disp=0) dx1, sol1 = Opt1[0], Opt1[1] dx2, sol2 = Opt2[0], Opt2[1] dxb, sol = (dx1, sol1) if sol1 <= sol2 else (dx2, sol2) for i in self.excision: # Reinsert deleted coordinates - don't take a step in those directions dxb = np.insert(dxb, i, 0) return dxb, sol else: # G0 and H0 are used for determining the expected function change. G0 = G.copy() H0 = H.copy() G = np.delete(G, self.excision) H = np.delete(H, self.excision, axis=0) H = np.delete(H, self.excision, axis=1) # print "Inverting Hessian:" ### # print " G:" ### # pvec1d(G,precision=5) ### # print " H:" ### # pmat2d(H,precision=5) ### Hi = invert_svd(np.mat(H)) dx = flat(-1 * Hi * col(G)) # print " dx:" ### # pvec1d(dx,precision=5) ### # dxa = -solve(H, G) # Take Newton Raphson Step ; use -1*G if want steepest descent. # dxa = flat(dxa) # print " dxa:" ### # pvec1d(dxa,precision=5) ### print ### for i in self.excision: # Reinsert deleted coordinates - don't take a step in those directions dx = np.insert(dx, i, 0) def para_solver(L): # Levenberg-Marquardt # HT = H + (L-1)**2*np.diag(np.diag(H)) # Attempt to use plain Levenberg HT = H + (L-1)**2*np.eye(len(H)) # print "Inverting Scaled Hessian:" ### # print " G:" ### # pvec1d(G,precision=5) ### # print " HT: (Scal = %.4f)" % (1+(L-1)**2) ### # pmat2d(HT,precision=5) ### Hi = invert_svd(np.mat(HT)) dx = flat(-1 * Hi * col(G)) # print " dx:" ### # pvec1d(dx,precision=5) ### # dxa = -solve(HT, G) # dxa = flat(dxa) # print " dxa:" ### # pvec1d(dxa,precision=5) ### # print ### sol = flat(0.5*row(dx)*np.mat(H)*col(dx))[0] + np.dot(dx,G) for i in self.excision: # Reinsert deleted coordinates - don't take a step in those directions dx = np.insert(dx, i, 0) return dx, sol def solver(L): return hyper_solver(L) if self.bhyp else para_solver(L) def trust_fun(L): N = norm(solver(L)[0]) #print "\rL = %.4e, Hessian diagonal addition = %.4e: found length %.4e, objective is %.4e" % (L, (L-1)**2, N, (N - trust)**2) return (N - trust)**2 def search_fun(L): # Evaluate ONLY the objective function. Most useful when # the objective is cheap, but the derivative is expensive. dx, sol = solver(L) # dx is how much the step changes from the previous step. # This is our trial step. xk_ = dx + xk Result = self.Objective.Full(xk_,0,verbose=False)['X'] - data['X'] print "Searching! Hessian diagonal addition = %.4e, L = % .4e, length %.4e, result %.4e" % ((L-1)**2,L,norm(dx),Result) return Result if self.trust0 > 0: # This is the trust region code. bump = False dx, expect = solver(1) dxnorm = norm(dx) if dxnorm > trust: bump = True # Tried a few optimizers here, seems like Brent works well. # Okay, the problem with Brent is that the tolerance is fractional. # If the optimized value is zero, then it takes a lot of meaningless steps. LOpt = optimize.brent(trust_fun,brack=(self.lmg,self.lmg*4),tol=1e-6) ### Result = optimize.fmin_powell(trust_fun,3,xtol=self.search_tol,ftol=self.search_tol,full_output=1,disp=0) ### LOpt = Result[0] dx, expect = solver(LOpt) dxnorm = norm(dx) # print "\rLevenberg-Marquardt: %s step found (length %.3e), Hessian diagonal is scaled by % .8f" % ('hyperbolic-regularized' if self.bhyp else 'Newton-Raphson', dxnorm, (LOpt-1)**2) print "\rLevenberg-Marquardt: %s step found (length %.3e), % .8f added to Hessian diagonal" % ('hyperbolic-regularized' if self.bhyp else 'Newton-Raphson', dxnorm, (LOpt-1)**2) else: # This is the nonlinear search code. # First obtain a step that is the same length as the provided trust radius. LOpt = optimize.brent(trust_fun,brack=(self.lmg,self.lmg*4),tol=1e-6) bump = False Result = optimize.brent(search_fun,brack=(LOpt,LOpt*4),tol=self.search_tol,full_output=1) ### optimize.fmin(search_fun,0,xtol=1e-8,ftol=data['X']*0.1,full_output=1,disp=0) ### Result = optimize.fmin_powell(search_fun,3,xtol=self.search_tol,ftol=self.search_tol,full_output=1,disp=0) dx, _ = solver(Result[0]) expect = Result[1] ## Decide which parameters to redirect. ## Currently not used. if self.Objective.Penalty.ptyp in [3,4,5]: self.FF.make_redirect(dx+xk) return dx, expect, bump