def trsbox_linear(g, a, b, Delta, use_fortran=USE_FORTRAN): if use_fortran: return trustregion.solve(g, None, Delta, sl=np.minimum(a, -ZERO_THRESH), su=np.maximum(b, ZERO_THRESH), verbose_output=False) # Solve the convex program: # min_x g' * x # s.t. a <= x <= b # ||x||^2 <= Delta^2 # using an active-set type approach n = g.size x = np.zeros((n, )) dirn = -g cons_dirns = [] # If g[i] = 0, never step along this direction constant_directions = np.where(np.abs(dirn) < ZERO_THRESH)[0] dirn[constant_directions] = 0.0 cons_dirns += list(constant_directions) for i in range(n): if np.linalg.norm(dirn) < ZERO_THRESH: return x alpha_unc = ball_step(x, dirn, Delta) xnew = x + alpha_unc * dirn # Check if hit box bounds on_box_bdry = False hit_upper = None idx_hit = None for j in range(n): if j in cons_dirns: continue # only looking at unconstrained directions if xnew[j] <= a[j]: on_box_bdry = True hit_upper = False idx_hit = j break elif xnew[j] >= b[j]: on_box_bdry = True hit_upper = True idx_hit = j break if not on_box_bdry: return xnew # unconstrained solution else: # Go as far as possible until hit box, then remove that direction from 'dirn' cons_dirns.append(idx_hit) # new constrained direction alpha_con = ((b[idx_hit] if hit_upper else a[idx_hit]) - x[idx_hit]) / dirn[idx_hit] x = x + alpha_con * dirn x[idx_hit] = b[idx_hit] if hit_upper else a[ idx_hit] # force boundary exactly dirn[idx_hit] = 0.0 # no more searching this direction return x
def runTest(self): g = np.array([1.0, -1.0]) delta = 5.0 for H in [None, np.zeros((len(g), len(g)))]: x = trustregion.solve(g, H, delta) xtrue = -delta * g / np.linalg.norm(g) self.assertTrue(np.max(np.abs(x - xtrue)) < 1e-10, msg='Wrong step') self.assertTrue(np.linalg.norm(x) <= delta + BALL_EPS, msg='Ball constraint violated')
def runTest(self): n = 5 g = np.ones((n,)) sl = -np.ones((n,)) su = np.ones((n,)) delta = 0.0 for H in [None, np.zeros((len(g), len(g)))]: x = trustregion.solve(g, H, delta, sl=sl, su=su) self.assertAlmostEqual(np.linalg.norm(x), 0.0, msg='Nonzero step')
def runTest(self): g = np.array([1e-15, 0.0]) a = np.array([-2.0, -2.0]) b = np.array([1.0, 2.0]) delta = 5.0 for H in [None, np.zeros((len(g), len(g)))]: x = trustregion.solve(g, H, delta, sl=a, su=b) # Since objective is essentially zero, will accept any x within the defined region self.assertTrue(np.linalg.norm(x) <= delta+BALL_EPS, msg='Ball constraint violated') self.assertTrue(np.max(x - a) >= 0.0, msg='Lower bound violated') self.assertTrue(np.max(x - b) <= 0.0, msg='Upper bound violated')
def runTest(self): g = np.array([1e-15, -1.0]) a = np.array([-2.0, -2.0]) b = np.array([1.0, 2.0]) delta = 5.0 x = trustregion.solve(g, None, delta, sl=a, su=b) xtrue = np.array([0.0, 2.0]) self.assertTrue(np.max(np.abs(x - xtrue)) < 1e-10, msg='Wrong step') self.assertTrue(np.linalg.norm(x) <= delta+BALL_EPS, msg='Ball constraint violated') self.assertTrue(np.max(x - a) >= 0.0, msg='Lower bound violated') self.assertTrue(np.max(x - b) <= 0.0, msg='Upper bound violated')
def runTest(self): g = np.array([1.0, -1.0]) a = np.array([-2.0, -2.0]) b = np.array([1.0, 2.0]) delta = 2.0 for H in [None, np.zeros((len(g), len(g)))]: x = trustregion.solve(g, H, delta, sl=a, su=b) xtrue = np.array([-sqrt(2.0), sqrt(2.0)]) self.assertTrue(np.max(np.abs(x - xtrue)) < 1e-10, msg='Wrong step') self.assertTrue(np.linalg.norm(x) <= delta+BALL_EPS, msg='Ball constraint violated') self.assertTrue(np.max(x - a) >= 0.0, msg='Lower bound violated') self.assertTrue(np.max(x - b) <= 0.0, msg='Upper bound violated')
def runTest(self): n = 3 g = np.array([1.0, 0.0, 1.0]) H = np.array([[-2.0, 0.0, 0.0], [0.0, -1.0, 0.0], [0.0, 0.0, -1.0]]) Delta = 5.0 / 12.0 d, gnew, crvmin = trustregion.solve(g, H, Delta, verbose_output=True) true_d = np.array([-1.0 / 3.0, 0.0, -0.25]) est_min = model_value(g, H, d) true_min = model_value(g, H, true_d) # Hope to get actual correct answer # self.assertTrue(np.all(d == true_d), msg='Wrong answer') # self.assertAlmostEqual(est_min, true_min, msg='Wrong min value') s_cauchy, red_cauchy, crvmin_cauchy = cauchy_pt(g, H, Delta) self.assertTrue(est_min <= red_cauchy, msg='Cauchy reduction not achieved') self.assertTrue(np.all(gnew == g + H.dot(d)), msg='Wrong gnew') self.assertAlmostEqual(crvmin, 0.0, msg='Wrong crvmin') self.assertTrue(np.linalg.norm(d) <= Delta+BALL_EPS, msg='Ball constraint violated')
def runTest(self): n = 3 g = np.array([1.0, 0.0, 1.0]) H = np.array([[1.0, 0.0, 0.0], [0.0, 2.0, 0.0], [0.0, 0.0, 2.0]]) Delta = 5.0 / 12.0 sl = np.array([-0.3, -0.01, -0.1]) su = np.array([10.0, 1.0, 10.0]) d, gnew, crvmin = trustregion.solve(g, H, Delta, sl=sl, su=su, verbose_output=True) true_d = np.array([-1.0 / 3.0, 0.0, -0.25]) est_min = model_value(g, H, d) true_min = model_value(g, H, true_d) # Hope to get actual correct answer # self.assertTrue(np.all(d == true_d), msg='Wrong answer') # self.assertAlmostEqual(est_min, true_min, msg='Wrong min value') s_cauchy, red_cauchy, crvmin_cauchy = cauchy_pt_box(g, H, Delta, sl, su) self.assertTrue(est_min <= red_cauchy, msg='Cauchy reduction not achieved') self.assertTrue(np.max(np.abs(gnew - g - H.dot(d))) < 1e-10, msg='Wrong gnew') # print(crvmin) self.assertAlmostEqual(crvmin, -1.0, msg='Wrong crvmin') # self.assertAlmostEqual(crvmin, crvmin_cauchy, msg='Wrong crvmin') self.assertTrue(np.linalg.norm(d) <= Delta+BALL_EPS, msg='Ball constraint violated') self.assertTrue(np.max(d - sl) >= 0.0, msg='Lower bound violated') self.assertTrue(np.max(d - su) <= 0.0, msg='Upper bound violated')
def trsbox(xopt, g, H, sl, su, delta, use_fortran=USE_FORTRAN): if use_fortran: return trustregion.solve(g, H, delta, sl=np.minimum(sl - xopt, -ZERO_THRESH), su=np.maximum(su - xopt, ZERO_THRESH), verbose_output=True) n = xopt.size assert xopt.shape == (n, ), "xopt has wrong shape (should be vector)" assert g.shape == (n, ), "g and xopt have incompatible sizes" assert len(H.shape) == 2, "H must be a matrix" assert H.shape == (n, n), "H and xopt have incompatible sizes" assert np.allclose(H, H.T), "H must be symmetric" assert sl.shape == (n, ), "sl and xopt have incompatible sizes" assert su.shape == (n, ), "su and xopt have incompatible sizes" assert np.all(sl <= xopt), "xopt violates lower bound sl" assert np.all(xopt <= su), "xopt violates upper bound su" assert delta > 0.0, "delta must be strictly positive" # Assume g and H have full quadratic model for objective # i.e. skip straight to label 8 in DFBOLS version # The sign of G(I) gives the sign of the change to the I-th variable # that will reduce Q from its value at XOPT. Thus XBDI(I) shows whether # or not to fix the I-th variable at one of its bounds initially, with # NACT being set to the number of fixed variables. D and GNEW are also # set for the first iteration. DELSQ is the upper bound on the sum of # squares of the free variables. QRED is the reduction in Q so far. iterc = 0 nact = 0 # number of fixed variables xbdi = np.zeros((n, ), dtype=int) # fix x_i at bounds? [values -1, 0, 1] xbdi[(xopt <= sl) & (g >= 0.0)] = -1 xbdi[(xopt >= su) & (g <= 0.0)] = 1 d = np.zeros((n, )) s = np.zeros((n, )) gnew = g.copy() qred = 0.0 delsq = delta**2 crvmin = -1.0 beta = 0.0 # label 20 need_alt_trust_step = False # will either quit main CG loop to finish, or do alternative step MAX_LOOP_ITERS = 100 * n**2 # avoid infinite loops # while True: # main CG loop [label 30] for ii in range(MAX_LOOP_ITERS): s[xbdi != 0] = 0.0 if beta == 0.0: s[xbdi == 0] = -gnew[xbdi == 0] else: s[xbdi == 0] = beta * s[xbdi == 0] - gnew[xbdi == 0] stepsq = sumsq(s) if stepsq == 0.0: need_alt_trust_step = False break # break and quit if beta == 0.0: gredsq = stepsq itermax = iterc + n - nact if iterc == 0: gredsq0 = gredsq # Exit conditions if gredsq <= min(1.0e-6 * gredsq0, 1.0e-18) or gredsq * delsq <= min( 1.0e-6 * qred**2, 1.0e-18): # DFBOLS need_alt_trust_step = False break # break and quit # Multiply the search direction by the second derivative matrix of Q and # calculate some scalars for the choice of steplength. Then set BLEN to # the length of the the step to the trust region boundary and STPLEN to # the steplength, ignoring the simple bounds. hs = H.dot(s) # label 50 ds = np.dot(s[xbdi == 0], d[xbdi == 0]) shs = np.dot(s[xbdi == 0], hs[xbdi == 0]) resid = delsq - sumsq(d[xbdi == 0]) if resid <= 0.0: need_alt_trust_step = True break # break and calculate alt step instead temp = sqrt(stepsq * resid + ds**2) blen = (resid / (temp + ds) if ds >= 0.0 else (temp - ds) / stepsq) stplen = (blen if shs <= 0.0 else min(blen, gredsq / shs)) # Exit condition if stplen <= 1.0e-30: # DFBOLS need_alt_trust_step = False break # break and quit # Reduce STPLEN if necessary in order to preserve the simple bounds, # letting IACT be the index of the new constrained variable. iact = None for i in range(n): if s[i] != 0.0: temp = (su[i] - xopt[i] - d[i] if s[i] > 0.0 else sl[i] - xopt[i] - d[i]) / s[i] if temp < stplen: stplen = temp iact = i # Update CRVMIN, GNEW and D. Set SDEC to the decrease that occurs in Q. sdec = 0.0 if stplen > 0.0: iterc += 1 temp = shs / stepsq if iact is None and temp > 0.0: crvmin = min(crvmin, temp) if crvmin != -1.0 else temp ggsav = gredsq gnew += stplen * hs d += stplen * s gredsq = sumsq(gnew[xbdi == 0]) sdec = max(stplen * (ggsav - 0.5 * stplen * shs), 0.0) qred += sdec # Restart the conjugate gradient method if it has hit a new bound. if iact is not None: nact += 1 xbdi[iact] = (1 if s[iact] >= 0.0 else -1) delsq = delsq - d[iact]**2 if delsq <= 0.0: need_alt_trust_step = True break # break and calculate alt step instead beta = 0.0 # label 20 continue # restart loop (new CG iteration) # If STPLEN is less than BLEN, then either apply another conjugate # gradient iteration or RETURN. if stplen >= blen: need_alt_trust_step = True break # break and calculate alt step instead # Exit condition if iterc == itermax or sdec <= 1.0e-6 * qred: # DFBOLS need_alt_trust_step = False break # break and quit beta = gredsq / ggsav continue # new CG iteration # end of CG loop # either done or need to take and alternative step if need_alt_trust_step: crvmin = 0.0 d, gnew = alt_trust_step(n, xopt, H, sl, su, d, xbdi, nact, gnew, qred) return d, gnew, crvmin else: return d_within_bounds(d, xopt, sl, su, xbdi), gnew, crvmin
def minimize(f, gradf, hessf, x0, delta0=1.0, deltamin=1e-6, gtol=1e-6, maxiter=100, verbose=False): """ Solve the nonconvex optimization problem min_{x} f(x) using Newton's method, made globally convergent with trustregion. Simple implementation of trust-region methods, based on Algorithm 4.1 from Nocedal & Wright, Numerical Optimization, 2nd edn (2006) :param f: objective function, f : np.ndarray -> float :param gradf: gradient of objective, gradf : np.ndarray -> np.ndarray :param hessf: Hessian of objective, hessf : np.ndarray -> np.ndarray :param x0: starting point of solver, np.ndarray :param delta0: initial trust-region radius, float :param deltamin: final trust-region radius, float :param gtol: terminate when ||gradf(x)|| <= gtol, float :param maxiter: terminate after maxiter iterations :param verbose: whether to print information at each iteration, bool :return: solution x, number of iterations """ xk = x0.copy() # current iterate deltak = delta0 if verbose: print("{0:^10}{1:^10}{2:^15}{3:^15}".format("k", "f(xk)", "||gradf(xk)||", "xk")) np.set_printoptions(precision=4, suppress=True) k = -1 while k < maxiter: k += 1 # Evaluate objective fk = f(xk) gk = gradf(xk) Hk = hessf(xk) if verbose: print("{0:^10}{1:^10.4f}{2:^15.2e}{3:^15}".format( k, fk, np.linalg.norm(gk), str(xk))) # Check termination if np.linalg.norm(gk) <= gtol or deltak <= deltamin: break # quit loop # Step calculation sk = trustregion.solve(gk, Hk, deltak) model_value = fk + np.dot(gk, sk) + 0.5 * np.dot(sk, Hk.dot(sk)) # mk(sk) rhok = (fk - f(xk + sk)) / (fk - model_value) # Update trust-region radius if rhok < 0.25: deltak = 0.25 * deltak elif rhok > 0.75 and abs(np.linalg.norm(sk) - deltak) < 1e-10: deltak = min(2 * deltak, 1e10) else: deltak = deltak # Update iterate if rhok > 0.01: xk = xk + sk else: xk = xk return xk, k
pred = lambda s: -np.dot(g, s) - 0.5 * np.dot(s, H.dot(s)) print('==============================') print('Python') print('==============================') s1, gnew1, crvmin1 = trsbox(np.zeros((2, )), g, H, sl, su, delta) print('') print('s =', s1) print('gnew =', gnew1) print('crvmin = ', crvmin1) print('pred =', pred(s1)) print('xnew =', xopt + s1) print('') print('==============================') print('Fortran') print('==============================') s2, gnew2, crvmin2 = trustregion.solve(g, H, delta, sl=sl, su=su, verbose_output=True) print('') print('s =', s2) print('gnew =', gnew2) print('crvmin = ', crvmin2) print('pred =', pred(s2)) print('xnew =', xopt + s2) print('') print('Done')
def minimize(f, gradf, hessf, x0, xl, xu, delta0=1.0, deltamin=1e-6, gtol=1e-6, maxiter=100, verbose=False): """ Solve the nonconvex optimization problem min_{x} f(x) subject to xl <= x <= xu using Newton's method, made globally convergent with trustregion. Simple implementation of trust-region methods, based on Algorithm 4.1 from Nocedal & Wright, Numerical Optimization, 2nd edn (2006) :param f: objective function, f : np.ndarray -> float :param gradf: gradient of objective, gradf : np.ndarray -> np.ndarray :param hessf: Hessian of objective, hessf : np.ndarray -> np.ndarray :param x0: starting point of solver, np.ndarray :param xl: lower bounds on x0, np.ndarray :param xu: upper bounds on x0, np.ndarray :param delta0: initial trust-region radius, float :param deltamin: final trust-region radius, float :param gtol: terminate when ||gradf(x)|| <= gtol, float :param maxiter: terminate after maxiter iterations :param verbose: whether to print information at each iteration, bool :return: solution x, number of iterations """ xk = np.maximum(np.minimum(xu, x0), xl) # current iterate, project x0 into box deltak = delta0 if verbose: print("{0:^10}{1:^10}{2:^15}{3:^15}".format("k", "f(xk)", "||gradf(xk)||", "xk")) np.set_printoptions(precision=4, suppress=False) k = -1 while k < maxiter: k += 1 # Evaluate objective fk = f(xk) gk = gradf(xk) Hk = hessf(xk) # With box constraints, we have a new criticality measure # See Theorem 12.1.6 of Conn, Gould & Toint, Trust-Region Methods (2000) # Note: if unbounded, this reduces to np.linalg.norm(gk) crit_measure = abs( np.dot(gk, trustregion.solve(gk, None, 1.0, sl=xl - xk, su=xu - xk))) if verbose: print("{0:^10}{1:^10.4e}{2:^15.4e}{3:^15}".format( k, fk, crit_measure, str(xk))) # Check termination if crit_measure <= gtol or deltak <= deltamin: break # quit loop # Step calculation sk = trustregion.solve(gk, Hk, deltak, sl=xl - xk, su=xu - xk) model_value = fk + np.dot(gk, sk) + 0.5 * np.dot(sk, Hk.dot(sk)) # mk(sk) rhok = (fk - f(xk + sk)) / (fk - model_value) # Update trust-region radius if rhok < 0.25: deltak = 0.25 * deltak elif rhok > 0.75 and abs(np.linalg.norm(sk) - deltak) < 1e-10: deltak = min(2 * deltak, 1e10) else: deltak = deltak # Update iterate if rhok > 0.01: xk = xk + sk else: xk = xk return xk, k
""" Examples of usage of trustregion.solve """ # Ensure compatibility with Python 2 from __future__ import absolute_import, division, print_function, unicode_literals import numpy as np import trustregion # Example 1: Linear objective, no box constraints # min_{s} g*s subject to ||s||_2 <= delta g = np.array([1.0, 2.0, 3.0]) delta = 1.0 s = trustregion.solve(g, None, delta) # H=None or H=np.zeros(...) both valid print("Example 1, s =", s) # Example 2: Linear objective, with box constraints # min_{s} g*s subject to ||s||_2 <= delta and sl <= s <= su g = np.array([-1.0, 2.0, 3.0]) delta = 0.5 sl = np.array([-10.0, -0.5, -0.3]) # lower bounds, need sl <= 0 su = np.array([0.5, 10.0, 1.0]) # upper bounds, need su >= 0 s = trustregion.solve(g, None, delta, sl=sl, su=su) # H=None or H=np.zeros(...) both valid print("Example 2, s =", s) # Example 3: Quadratic objective, no box constraints # min_{s} g*s + 0.5*s*H*s subject to ||s||_2 <= delta g = np.array([1.0, 2.0, 3.0]) H = np.eye(3) # must be real, symmetric delta = 1.0