def sd(fun, x0, search='inexact', eps=1e-8, maxiter=10000, **kwargs): """Steepest descent Parameters ---------- fun: object objective function, with callable method f, g and G x0: ndarray initial point search: string, optional 'exact' for exact line search, 'inexact' for inexact line search eps: float, optional tolerance, used for convergence criterion maxiter: int, optional maximum number of iterations kwargs: dict, optional other arguments to pass down Returns ------- x: ndarray optimal point f: float optimal function value gnorm: float norm of gradient at optimal point niter: int number of iterations neval: int number of function evaluations (f, g and G) """ x = x0 f0 = -np.inf f1 = fun.f(x0) g1 = fun.g(x0) niter = 0 neval = 2 while (abs(f1 - f0) > eps) or (norm(g1) > eps): d = -g1 if search == 'inexact': alpha, v = ls.inexact(fun, x, d, fx=f1, gx=g1, **kwargs) elif search == 'exact': alpha, v = ls.exact(fun, x, d, **kwargs) else: raise ValueError('Invalid search type') x = x + alpha * d f0 = f1 f1 = fun.f(x) g1 = fun.g(x) niter += 1 neval += (v + 2) if niter == maxiter: break return x, f1, norm(g1), niter, neval
def cg(fun, x0, method='prp+', search='inexact', eps=1e-8, maxiter=10000, nu=0.2, a_high=.3, debug=False, **kwargs): """Non-linear conjugate gradient methods Parameters ---------- fun: object objective function, with callable method f and g x0: ndarray initial point method: string, optional options: 'fr' for FR, 'prp' for PRP, 'prp+' for PRP+, 'hs' for HS, 'cd' for conjugate descent, 'dy' for Dai-Yuan search: string, optional 'exact' for exact line search, 'inexact' for inexact line search eps: float, optional tolerance, used for stopping criterion maxiter: int, optional maximum number of iterations nv: float, optional parameter for restart by orthogonality test debug: boolean, optional output information for every iteration if set to True kwargs: dict, optional other arguments to pass down Returns ------- x: ndarray optimal point f: float optimal function value gnorm: float norm of gradient at optimal point niter: int number of iterations neval: int number of function evaluations (f and g) flist: list list of objective values along the iterations xlist: list list of points along the iterations """ x = x0 n = x.size f0 = -np.inf f1 = fun.f(x) g0 = np.zeros(n) g1 = fun.g(x) d = -g1 niter = 0 neval = 2 flist = [] xlist = [] while (abs(f1 - f0) > eps) or (norm(g1) > eps): if abs(np.dot(g1, g0)) > 0.2 * np.dot(g1, g1): d = -g1 if search == 'inexact': alpha, v = ls.inexact(fun, x, d, fx=f1, gx=g1, **kwargs) elif search == 'exact': alpha, v = ls.exact(fun, x, d, **kwargs) else: raise ValueError('Invalid search type') d = alpha * d if norm(d, np.inf) > a_high: d = d / norm(d, np.inf) * a_high x = x + d g0 = g1 g1 = fun.g(x) y = g1 - g0 f0 = f1 f1 = fun.f(x) neval += (v + 2) flist.append(f1) xlist.append(x) if debug: print('iter:', niter, alpha) if method == 'fr': beta = np.dot(g1, g1) / np.dot(g0, g0) elif method == 'prp': beta = np.dot(g1, y) / np.dot(g0, g0) elif method == 'prp+': beta = max(np.dot(g1, y) / np.dot(g0, g0), 0) elif method == 'hs': beta = np.dot(g1, y) / np.dot(d, y) elif method == 'cd': beta = -np.dot(g1, g1) / np.dot(g0, d) elif method == 'dy': beta = np.dot(g1, g1) / np.dot(d, y) else: raise ValueError('Invalid method name') d = beta * d - g1 niter += 1 if niter == maxiter: break return x, f1, norm(g1), niter, neval, flist, xlist
def momentum(fun, x0, search='inexact', eps=1e-8, maxiter=10000, beta=.9, a_high=.3, **kwargs): """Momentum Parameters ---------- fun: object objective function, with callable method f, g x0: ndarray initial point search: string, optional 'exact' for exact line search, 'inexact' for inexact line search eps: float, optional tolerance, used for convergence criterion maxiter: int, optional maximum number of iterations kwargs: dict, optional other arguments to pass down Returns ------- x: ndarray optimal point f: float optimal function value gnorm: float norm of gradient at optimal point niter: int number of iterations neval: int number of function evaluations (f and g) flist: list list of objective values along the iterations xlist: list list of points along the iterations """ x = x0 f0 = -np.inf f1 = fun.f(x0) g1 = fun.g(x0) niter = 0 neval = 2 d0 = np.zeros(x.size) flist = [] xlist = [] while (abs(f1 - f0) > eps) or (norm(g1) > eps): d = beta * d0 - g1 if search == 'inexact': alpha, v = ls.inexact(fun, x, d, fx=f1, gx=g1, **kwargs) elif search == 'exact': alpha, v = ls.exact(fun, x, d, **kwargs) else: raise ValueError('Invalid search type') d = alpha * d if norm(d, np.inf) > a_high: d = d / norm(d, np.inf) * a_high x = x + d f0 = f1 f1 = fun.f(x) g1 = fun.g(x) d0 = alpha * d flist.append(f1) xlist.append(x) niter += 1 neval += (v + 2) if niter == maxiter: break return x, f1, norm(g1), niter, neval, flist, xlist
def DennisGayWelsch(fun, x0, B0=None, method='ls', search='inexact', eps=1e-8, maxiter=1000, **kwargs): """Dennis-Gay-Welsch method: line search or trust region Parameters ---------- fun: object objective function, with callable method eval (J, r, f, g) x0: ndarray initial point B0: ndarray, optional initial approximation of matrix S, half the identity by default method: string, optional 'ls' for DGW with line search, 'tr' for DGW with trust region search: string, optional used only when method == 'ls' 'exact' for exact line search, 'inexact' for inexact line search eps: float, optional tolerance, used for stopping criterion maxiter: int, optional maximum number of iterations kwargs: dict, optional other arguments to pass down Returns ------- x: ndarray optimal point f: float optimal function value gnorm: float norm of gradient at optimal point niter: int number of iterations neval: int number of function evaluations """ def dogleg(G, g1): d_SD = -g1 d_GN = np.linalg.solve(G, -g1) alpha = norm(d_SD) ** 2 / np.dot(d_SD, G @ d_SD) if norm(d_GN) < delta: d = d_GN elif alpha * norm(d_SD) > delta: d = delta * (d_SD / norm(d_SD)) else: D = norm(d_GN - alpha * d_SD) ** 2 E = 2 * alpha * np.dot(d_SD, d_GN - alpha * d_SD) F = (alpha * norm(d_SD)) ** 2 - delta ** 2 det = E ** 2 - 4 * D * F beta = (-E + sqrt(det)) / (2 * D) d = (1 - beta) * alpha * d_SD + beta * d_GN return d x = x0 f0 = -np.inf J, r, f1, g1 = fun.eval(x) B = .5 * np.identity(x.size) if B0 is None else B0 delta = 1 # radius of trust-region niter = 0 neval = 1 uflag = True while (abs(f1 - f0) >= eps * abs(f0)): # DGW with line search routine if method == 'ls': d = np.linalg.solve(np.dot(J.T, J) + B, -g1) if np.dot(g1, d) > 0: d = np.linalg.solve(np.dot(J.T, J), -g1) if search == 'inexact': alpha, v = linesearch.inexact(fun, x, d, fx=f1, gx=g1, **kwargs) elif search == 'exact': alpha, v = linesearch.exact(fun, x, d, **kwargs) else: raise ValueError('Invalid search type') s = alpha * d # DGW with trust region (dogleg) routine elif method == 'tr': G = np.dot(J.T, J) + B d = dogleg(G, g1) delta_f = f1 - fun.f(x + d) if delta_f < 0: G = np.dot(J.T, J) d = dogleg(G, g1) delta_f = f1 - fun.f(x + d) delta_q = -np.dot(d, g1) - np.dot(d, G @ d) / 2 gamma = delta_f / delta_q uflag = False if gamma < .25: delta = delta / 4 if gamma > .75 and abs(norm(d) - delta) < 1e-8 * delta: delta = delta * 2 if gamma > 0: s = d uflag = True else: raise ValueError('Invalid method name') # Update of point x and matrix approximation B if uflag == True: x = x + s g0 = g1 f0 = f1 J0 = J J, r, f1, g1 = fun.eval(x) neval += 1 y = g1 - g0 y_hat = g1 - np.dot(J0.T, r) # Scaling for faster convergence tau = min(1, abs(np.dot(s, y_hat) / np.dot(s, B @ s))) B = tau * B z = y_hat - B @ s t = np.dot(y, s) B = B + np.outer(z, y) / t + np.outer(y, z) / t \ - np.outer(y * np.dot(z, s) / t, y / t) niter += 1 if niter == maxiter: break return x, f1, norm(g1), niter, neval
def GaussNewton(fun, x0, method=None, search='inexact', eps=1e-8, maxiter=1000, **kwargs): """Gauss-Newton (GN) method Parameters ---------- fun: object objective function, with callable method eval (J, r, f, g) x0: ndarray initial point method: string, optional ignored search: string, optional 'exact' for exact line search, 'inexact' for inexact line search eps: float, optional tolerance, used for stopping criterion maxiter: int, optional maximum number of iterations kwargs: dict, optional other arguments to pass down Returns ------- x: ndarray optimal point f: float optimal function value gnorm: float norm of gradient at optimal point niter: int number of iterations neval: int number of function evaluations """ x = x0 f0 = -np.inf J, r, f1, g1 = fun.eval(x) niter = 0 neval = 1 while (abs(f1 - f0) >= eps * abs(f0)): d = np.linalg.lstsq(J, -r, rcond=None)[0] if search == 'inexact': alpha, v = linesearch.inexact(fun, x, d, fx=f1, gx=g1, **kwargs) elif search == 'exact': alpha, v = linesearch.exact(fun, x, d, **kwargs) else: raise ValueError('Invalid search type') x = x + alpha * d f0 = f1 J, r, f1, g1 = fun.eval(x) niter += 1 neval += (v + 1) if niter == maxiter: break return x, f1, norm(g1), niter, neval
def quasiNewton(fun, x0, H0=None, method='bfgs', search='inexact', eps=1e-8, maxiter=1000, **kwargs): """Quasi-Newton methods: SR1 / DFP / BFGS Parameters ---------- fun: object objective function, with callable method f and g x0: ndarray initial point H0: ndarray, optional initial Hessian inverse, identity by default method: string, optional 'sr1' for SR1, 'dfp' for DFP, 'bfgs' for BFGS search: string, optional 'exact' for exact line search, 'inexact' for inexact line search eps: float, optional tolerance, used for convergence criterion maxiter: int, optional maximum number of iterations kwargs: dict, optional other arguments to pass down Returns ------- x: ndarray optimal point f: float optimal function value gnorm: float norm of gradient at optimal point niter: int number of iterations neval: int number of function evaluations (f and g) """ x = x0 if H0 is not None: H = H0 else: H = np.eye(x.size) f0 = -np.inf f1 = fun.f(x) g0 = np.zeros(x.size) g1 = fun.g(x) niter = 0 neval = 2 while (abs(f1 - f0) > eps) or (norm(g1) > eps): d = -(H @ g1) if search == 'inexact': alpha, v = ls.inexact(fun, x, d, fx=f1, gx=g1, **kwargs) elif search == 'exact': alpha, v = ls.exact(fun, x, d, **kwargs) else: raise ValueError('Invalid search type') s = alpha * d x = x + s g0 = g1 g1 = fun.g(x) y = g1 - g0 if f0 == 0 and H0 is None: # initial scaling H = (np.dot(y, s) / np.dot(y, y)) * H f0 = f1 f1 = fun.f(x) neval += (v + 2) if method == 'sr1': z = s - H @ y if abs(np.dot(z, y)) >= eps * norm(z) * norm(y): H = H + np.outer(z, z / np.dot(z, y)) elif method == 'dfp': z = H @ y H = H + np.outer(s, s / np.dot(s, y)) - np.outer( z, z / np.dot(y, z)) elif method == 'bfgs': r = 1 / np.dot(s, y) z = r * (H @ y) H = H + r * (1 + np.dot(y, z)) * np.outer(s, s) \ - np.outer(s, z) - np.outer(z, s) else: raise ValueError('Invalid method name') niter += 1 if niter == maxiter: break return x, f1, norm(g1), niter, neval
def Newton(fun, x0, method='damped', search='inexact', eps=1e-8, maxiter=1000, **kwargs): """Newton's method: normal or damped Parameters ---------- fun: object objective function, with callable method f, g and G x0: ndarray initial point method: string, optional 'normal' for Normal Newton, 'damped' for damped Newton search: string, optional 'exact' for exact line search, 'inexact' for inexact line search eps: float, optional tolerance, used for convergence criterion maxiter: int, optional maximum number of iterations kwargs: dict, optional other arguments to pass down Returns ------- x: ndarray optimal point f: float optimal function value gnorm: float norm of gradient at optimal point niter: int number of iterations neval: int number of function evaluations (f, g and G) """ x = x0 f0 = -np.inf f1 = fun.f(x0) g1 = fun.g(x0) niter = 0 neval = 2 errflag = 0 while (abs(f1 - f0) > eps) or (norm(g1) > eps): G = fun.G(x) try: # test if positive definite L = np.linalg.cholesky(G) except np.linalg.LinAlgError: errflag = 1 d = np.linalg.solve(G, -g1) if method == 'normal': alpha, v = 1, 0 elif method == 'damped': if search == 'inexact': alpha, v = ls.inexact(fun, x, d, fx=f1, gx=g1, **kwargs) elif search == 'exact': alpha, v = ls.exact(fun, x, d, **kwargs) else: raise ValueError('Invalid search type') else: raise ValueError('Invalid method name') x = x + alpha * d f0 = f1 f1 = fun.f(x) g1 = fun.g(x) niter += 1 neval += (v + 3) if niter == maxiter: break if errflag == 1: print('Warning: Non-positive-definite Hessian encountered.') return x, f1, norm(g1), niter, neval
def modifiedNewton(fun, x0, method='mix', search='inexact', eps=1e-8, maxiter=1000, **kwargs): """Modified Newton's method: mixed direction or LM method Parameters ---------- fun: object objective function, with callable method f, g and G x0: ndarray initial point method: string, optional 'mix' for mixed direction method, 'lm' for Levenberg-Marquardt method search: string, optional 'exact' for exact line search, 'inexact' for inexact line search eps: float, optional tolerance, used for convergence criterion maxiter: int, optional maximum number of iterations kwargs: dict, optional other arguments to pass down Returns ------- x: ndarray optimal point f: float optimal function value gnorm: float norm of gradient at optimal point niter: int number of iterations neval: int number of function evaluations (f, g and G) """ x = x0 f0 = -np.inf f1 = fun.f(x0) g1 = fun.g(x0) niter = 0 neval = 2 while (abs(f1 - f0) > eps) or (norm(g1) > eps): if method == 'mix': try: # test if singular d = np.linalg.solve(fun.G(x), -g1) if abs(np.dot(g1, d)) < eps * norm(g1) * norm(d): # orthogonal d = -g1 if np.dot(g1, d) > eps * norm(g1) * norm(d): # non-descent d = -d except np.linalg.LinAlgError: d = -g1 elif method == 'lm': G = fun.G(x) v = 0 while True: try: # test if positive definite L = np.linalg.cholesky(G + v * np.eye(x.size)) break except np.linalg.LinAlgError: if v == 0: v = norm(G) / 2 # Frobenius norm else: v *= 2 y = np.linalg.solve(L, -g1) d = np.linalg.solve(L.T, y) else: raise ValueError('Invalid method name') if search == 'inexact': alpha, v = ls.inexact(fun, x, d, fx=f1, gx=g1, **kwargs) elif search == 'exact': alpha, v = ls.exact(fun, x, d, **kwargs) else: raise ValueError('Invalid search type') x = x + alpha * d f0 = f1 f1 = fun.f(x) g1 = fun.g(x) niter += 1 neval += (v + 3) if niter == maxiter: break return x, f1, norm(g1), niter, neval