def linesearch_wrapper(func, jac, x_k, p_k, gfk, old_fval, old_old_fval, alpha_max, c1 = 0.0001, c2=0.1, maxIter=100): alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \ _line_search_wolfe12(func, jac, x_k, p_k, gfk, old_fval, old_old_fval, amin=1e-100, amax=1e100) return alpha_k, fc, gc, old_fval, old_old_fval, gfkp1
def gd_ls(self, f, x0, fprime): self.theta_t = x0 self.old_fval = f(self.theta_t) self.gf_t = fprime(x0) self.rho_t = -self.gf_t try: self.eps_t, fc, gc, self.old_fval, self.old_old_fval, gf_next = \ _line_search_wolfe12(f, fprime, self.theta_t, self.rho_t, self.gf_t, self.old_fval, self.old_old_fval, amin=1e-100, amax=1e100) except _LineSearchError: print('Line search failed to find a better solution.\n') self.stop = True theta_next = self.theta_t + self.gf_t * .00001 return theta_next theta_next = self.theta_t + self.eps_t * self.rho_t return theta_next
def bfgs_min(self, f, x0, fprime): self.theta_t = x0 self.old_fval = f(self.theta_t) self.gf_t = fprime(x0) self.rho_t = -numpy.dot(self.H_t, self.gf_t) try: self.eps_t, fc, gc, self.old_fval, self.old_old_fval, gf_next = \ _line_search_wolfe12(f, fprime, self.theta_t, self.rho_t, self.gf_t, self.old_fval, self.old_old_fval, amin=1e-100, amax=1e100) except _LineSearchError: print 'Line search failed to find a better solution.\n' theta_next = self.theta_t + self.gf_t * .0001 return theta_next theta_next = self.theta_t + self.eps_t * self.rho_t delta_t = theta_next - self.theta_t self.theta_t = theta_next self.phi_t = gf_next - self.gf_t self.gf_t = gf_next denom = 1.0 / (numpy.dot(self.phi_t, delta_t)) ## Memory intensive computation based on Wright and Nocedal 'Numerical Optimization', 1999, pg. 198. #I = numpy.eye(len(x0), dtype=int) #A = I - self.phi_t[:, numpy.newaxis] * delta_t[numpy.newaxis, :] * denom ## Estimating H. #self.H_t[...] = numpy.dot(self.H_t, A) #A[...] = I - delta_t[:, numpy.newaxis] * self.phi_t[numpy.newaxis, :] * denom #self.H_t[...] = numpy.dot(A, self.H_t) + (denom * delta_t[:, numpy.newaxis] * # delta_t[numpy.newaxis, :]) #A = None # Fast memory friendly calculation after simplifiation of the above. Z = numpy.dot(self.H_t, self.phi_t) self.H_t -= denom * Z[:, numpy.newaxis] * delta_t[numpy.newaxis,:] self.H_t -= denom * delta_t[:, numpy.newaxis] * Z[numpy.newaxis, :] self.H_t += denom * denom * numpy.dot(self.phi_t, Z) * delta_t[:, numpy.newaxis] * delta_t[numpy.newaxis,:] return theta_next
def minimize_bfgs(func: Callable, init_param_vec: Sequence, grad: Callable = None, grad_tol: float = 1e-5, return_all: bool = True, last_record: 'OptimizeRecord' = None, notes: dict = None): if notes is None: notes = {} notes["grad_tol"] = grad_tol notes["method"] = "BFGS" f = func fprime = grad epsilon = np.finfo(float).eps**0.5 gtol = grad_tol norm = np.Inf x0 = init_param_vec if x0.ndim == 0: x0.shape = (1, ) maxiter = len(x0) * 200 func_calls, f = wrap_function(f, ()) if fprime is None: grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon)) notes["grad_approx"] = True else: grad_calls, myfprime = wrap_function(fprime, ()) notes["grad_approx"] = False k = 0 N = len(x0) I = np.eye(N, dtype=int) if last_record: old_fval = last_record.final_func gfk = last_record.final_grad old_old_fval = last_record.last_vars["old_old_fval"] Hk = last_record.last_vars["Hk"] else: # Sets the initial step guess to dx ~ 1 old_fval = f(x0) gfk = myfprime(x0) old_old_fval = old_fval + np.linalg.norm(gfk) / 2 Hk = I all_param_vec = [x0] all_func = [old_fval] all_grad = [gfk] xk = x0 warnflag = 0 gnorm = vecnorm(gfk, ord=norm) while (gnorm > gtol) and (k < maxiter): pk = -np.dot(Hk, gfk) try: alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \ _line_search_wolfe12(f, myfprime, xk, pk, gfk, old_fval, old_old_fval, amin=1e-100, amax=1e100) except _LineSearchError: # Line search failed to find a better solution. warnflag = 2 break xkp1 = xk + alpha_k * pk sk = xkp1 - xk xk = xkp1 if gfkp1 is None: gfkp1 = myfprime(xkp1) yk = gfkp1 - gfk gfk = gfkp1 k += 1 if return_all: all_param_vec.append(xk) all_func.append(old_fval) all_grad.append(gfk) gnorm = vecnorm(gfk, ord=norm) if (gnorm <= gtol): break if not np.isfinite(old_fval): # We correctly found +-Inf as optimal value, or something went # wrong. warnflag = 2 break try: # this was handled in numeric, let it remaines for more safety rhok = 1.0 / (np.dot(yk, sk)) except ZeroDivisionError: rhok = 1000.0 print("Divide-by-zero encountered: rhok assumed large") if np.isinf(rhok): # this is patch for numpy rhok = 1000.0 print("Divide-by-zero encountered: rhok assumed large") A1 = I - sk[:, np.newaxis] * yk[np.newaxis, :] * rhok A2 = I - yk[:, np.newaxis] * sk[np.newaxis, :] * rhok Hk = np.dot(A1, np.dot( Hk, A2)) + (rhok * sk[:, np.newaxis] * sk[np.newaxis, :]) fval = old_fval if np.isnan(fval): # This can happen if the first call to f returned NaN; # the loop is then never entered. warnflag = 2 if warnflag == 2: msg = _status_message['pr_loss'] elif k >= maxiter: warnflag = 1 msg = _status_message['maxiter'] else: msg = _status_message['success'] history = {"func": all_func, "grad": all_grad, "param_vec": all_param_vec} final_status = { "msg": msg, "warnflag": warnflag, "num_func_call": func_calls[0], "num_grad_call": grad_calls[0], "num_iter": k } last_vars = {"Hk": Hk, "old_old_fval": old_old_fval} record = OptimizeRecord(history, final_status, last_vars, notes) return record
def bfgs(fun, grad, x0, iterations, tol): """ Minimization of scalar function of one or more variables using the BFGS algorithm. Parameters ---------- fun : function Objective function. grad : function Gradient function of objective function. x0 : numpy.array, size=9 Initial value of the parameters to be estimated. iterations : int Maximum iterations of optimization algorithms. tol : float Tolerance of optimization algorithms. Returns ------- xk : numpy.array, size=9 Parameters wstimated by optimization algorithms. fval : float Objective function value at xk. grad_val : float Gradient value of objective function at xk. grad_log : numpy.array The record of gradient of objective function of each iteration. """ fval = None grad_val = None x_log = [] y_log = [] grad_log = [] x0 = asarray(x0).flatten() # iterations = len(x0) * 200 old_fval = fun(x0) gfk = grad(x0) k = 0 N = len(x0) I = np.eye(N, dtype=int) Hk = I old_old_fval = old_fval + np.linalg.norm(gfk) / 2 xk = x0 x_log = np.append(x_log, xk.T) y_log = np.append(y_log, fun(xk)) grad_log = np.append(grad_log, np.linalg.norm(xk - x_log[-1:])) gnorm = np.amax(np.abs(gfk)) while (gnorm > tol) and (k < iterations): pk = -np.dot(Hk, gfk) try: alpha, fc, gc, old_fval, old_old_fval, gfkp1 = _line_search_wolfe12( fun, grad, xk, pk, gfk, old_fval, old_old_fval, amin=1e-100, amax=1e100) except _LineSearchError: break x1 = xk + alpha * pk sk = x1 - xk xk = x1 if gfkp1 is None: gfkp1 = grad(x1) yk = gfkp1 - gfk gfk = gfkp1 k += 1 gnorm = np.amax(np.abs(gfk)) grad_log = np.append(grad_log, np.linalg.norm(xk - x_log[-1:])) x_log = np.append(x_log, xk.T) y_log = np.append(y_log, fun(xk)) if (gnorm <= tol): break if not np.isfinite(old_fval): break try: rhok = 1.0 / (np.dot(yk, sk)) except ZeroDivisionError: rhok = 1000.0 if isinf(rhok): rhok = 1000.0 A1 = I - sk[:, np.newaxis] * yk[np.newaxis, :] * rhok A2 = I - yk[:, np.newaxis] * sk[np.newaxis, :] * rhok Hk = np.dot(A1, np.dot( Hk, A2)) + (rhok * sk[:, np.newaxis] * sk[np.newaxis, :]) fval = old_fval grad_val = grad_log[-1] return xk, fval, grad_val, x_log, y_log, grad_log
def _minimize(fun, x0, args=(), jac=None, callback=None, gtol=1e-5, fxtol=1e-09, xtol=1e-09, norm=Inf, eps=_epsilon, maxiter=None, disp=False, return_all=False, **unknown_options): _check_unknown_options(unknown_options) f = fun fprime = jac epsilon = eps retall = return_all x0 = asarray(x0).flatten() if x0.ndim == 0: x0.shape = (1,) if maxiter is None: maxiter = len(x0) * 200 func_calls, f = wrap_function(f, args) grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon)) gfk = myfprime(x0) k = 0 N = len(x0) I = numpy.eye(N, dtype=int) Hk = I old_fval = f(x0) old_old_fval = None xk = x0 if retall: allvecs = [x0] sk = [2 * gtol] warnflag = 0 gnorm = vecnorm(gfk, ord=norm) xnorm = np.Inf fx = np.Inf print_lst = [] while (gnorm > gtol) and (xnorm > xtol) and (fx > fxtol) and (k < maxiter): pk = -numpy.dot(Hk, gfk) try: alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \ _line_search_wolfe12(f, myfprime, xk, pk, gfk, old_fval, old_old_fval) except _LineSearchError: # search failed to find a better solution. print_lst.append("Przeszukiwanie liniowe zawiodlo lub nie moze osiagnac lepszego rozwiazania") warnflag = 2 break xkp1 = xk + alpha_k * pk fx = np.absolute(old_old_fval - old_fval) xnorm = vecnorm(xkp1 - xk) if retall: allvecs.append(xkp1) sk = xkp1 - xk xk = xkp1 if gfkp1 is None: gfkp1 = myfprime(xkp1) yk = gfkp1 - gfk gfk = gfkp1 if callback is not None: callback(xk) k += 1 if disp: print_ = ('Iter: ' + str(k) + '\n') print_ += ('x: ' + str(xk) + '\n') print_ += ('f(x): ' + str(f(xk)) + '\n') #zmiana na fx print_ +=('gtol: ' + str(gnorm) + '\n') print_ +=('xtol: ' + str(xnorm) + '\n') print_ +=('fxtol: ' + str(fx) + '\n') print_lst.append(print_) gnorm = vecnorm(gfk, ord=norm) if (gnorm <= gtol): break if not numpy.isfinite(old_fval): # We correctly found +-Inf as optimal value, or something went # wrong. print_lst.append("Zlaneziono +-Inf za optymalna wartosc... lub cos poszlo zle.") warnflag = 2 break try: # this was handled in numeric, let it remaines for more safety rhok = 1.0 / (numpy.dot(yk, sk)) except ZeroDivisionError: rhok = 1000.0 if disp: print_lst.append("Dzielenie przez zero!!") if isinf(rhok): # this is patch for numpy rhok = 1000.0 if disp: print_lst.appedn("Dzielenie przez zero!!") A1 = I - sk[:, numpy.newaxis] * yk[numpy.newaxis, :] * rhok A2 = I - yk[:, numpy.newaxis] * sk[numpy.newaxis, :] * rhok Hk = numpy.dot(A1, numpy.dot(Hk, A2)) + (rhok * sk[:, numpy.newaxis] * sk[numpy.newaxis, :]) fval = old_fval if np.isnan(fval): # This can happen if the first call to f returned NaN; # the loop is then never entered. print_lst.append("Osiagnieto Nan w pierwszym wywolaniem algorytmu.") warnflag = 2 if warnflag == 2: msg = _status_message['pr_loss'] if disp: print_ = ("Ostrzezenie: " + msg) print_ += (" Wartosc funkcji celu: %f" % fval) print_ += (" Iteracje: %d" % k) print_ += (" Wywolania funkcji: %d" % func_calls[0]) print_ += (" Wywolania gradientu: %d" % grad_calls[0]) elif k >= maxiter: warnflag = 1 msg = _status_message['maxiter'] if disp: print_ = ("Ostrzerzenie: " + msg) print_ += (" Wartosc funkcji celu: %f" % fval) print_ += (" Iteracje: %d" % k) print_ += (" Wywolania funkcji: %d" % func_calls[0]) print_ += (" Wywolania gradientu: %d" % grad_calls[0]) print_lst.append(print_) else: msg = _status_message['success'] if disp: print_ = (msg + '\n') print_ += (" Wartosc funkcji celu: %f" % fval) print_ += (" Iteracje: %d" % k) print_ += (" Wywolania funkcji: %d" % func_calls[0]) print_ += (" Wywolania gradientu: %d" % grad_calls[0]) print_lst.append(print_) [print(line) for line in print_lst] result = OptimizeResult(fun=fval,lst=print_lst, jac=gfk, hess_inv=Hk, nfev=func_calls[0], njev=grad_calls[0], status=warnflag, success=(warnflag == 0), message=msg, x=xk, nit=k) if retall: result['allvecs'] = allvecs return result
def conjugate_gradient(fun, grad, x0, iterations, tol): """ Minimization of scalar function of one or more variables using the conjugate gradient algorithm. Parameters ---------- fun : function Objective function. grad : function Gradient function of objective function. x0 : numpy.array, size=9 Initial value of the parameters to be estimated. iterations : int Maximum iterations of optimization algorithms. tol : float Tolerance of optimization algorithms. Returns ------- xk : numpy.array, size=9 Parameters wstimated by optimization algorithms. fval : float Objective function value at xk. grad_val : float Gradient value of objective function at xk. grad_log : numpy.array The record of gradient of objective function of each iteration. """ fval = None grad_val = None x_log = [] y_log = [] grad_log = [] x0 = asarray(x0).flatten() # iterations = len(x0) * 200 old_fval = fun(x0) gfk = grad(x0) k = 0 xk = x0 # Sets the initial step guess to dx ~ 1 old_old_fval = old_fval + np.linalg.norm(gfk) / 2 pk = -gfk x_log = np.append(x_log, xk.T) y_log = np.append(y_log, fun(xk)) grad_log = np.append(grad_log, np.linalg.norm(xk - x_log[-1:])) gnorm = np.amax(np.abs(gfk)) sigma_3 = 0.01 while (gnorm > tol) and (k < iterations): deltak = np.dot(gfk, gfk) cached_step = [None] def polak_ribiere_powell_step(alpha, gfkp1=None): xkp1 = xk + alpha * pk if gfkp1 is None: gfkp1 = grad(xkp1) yk = gfkp1 - gfk beta_k = max(0, np.dot(yk, gfkp1) / deltak) pkp1 = -gfkp1 + beta_k * pk gnorm = np.amax(np.abs(gfkp1)) return (alpha, xkp1, pkp1, gfkp1, gnorm) def descent_condition(alpha, xkp1, fp1, gfkp1): # Polak-Ribiere+ needs an explicit check of a sufficient # descent condition, which is not guaranteed by strong Wolfe. # # See Gilbert & Nocedal, "Global convergence properties of # conjugate gradient methods for optimization", # SIAM J. Optimization 2, 21 (1992). cached_step[:] = polak_ribiere_powell_step(alpha, gfkp1) alpha, xk, pk, gfk, gnorm = cached_step # Accept step if it leads to convergence. if gnorm <= tol: return True # Accept step if sufficient descent condition applies. return np.dot(pk, gfk) <= -sigma_3 * np.dot(gfk, gfk) try: alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \ _line_search_wolfe12(fun, grad, xk, pk, gfk, old_fval, old_old_fval, c2=0.4, amin=1e-100, amax=1e100, extra_condition=descent_condition) except _LineSearchError: break # Reuse already computed results if possible if alpha_k == cached_step[0]: alpha_k, xk, pk, gfk, gnorm = cached_step else: alpha_k, xk, pk, gfk, gnorm = polak_ribiere_powell_step( alpha_k, gfkp1) k += 1 grad_log = np.append(grad_log, np.linalg.norm(xk - x_log[-1:])) x_log = np.append(x_log, xk.T) y_log = np.append(y_log, fun(xk)) fval = old_fval grad_val = grad_log[-1] return xk, fval, grad_val, x_log, y_log, grad_log
def steepest_descent(fun, grad, x0, iterations, tol): """ Minimization of scalar function of one or more variables using the steepest descent algorithm. Parameters ---------- fun : function Objective function. grad : function Gradient function of objective function. x0 : numpy.array, size=9 Initial value of the parameters to be estimated. iterations : int Maximum iterations of optimization algorithms. tol : float Tolerance of optimization algorithms. Returns ------- xk : numpy.array, size=9 Parameters wstimated by optimization algorithms. fval : float Objective function value at xk. grad_val : float Gradient value of objective function at xk. grad_log : numpy.array The record of gradient of objective function of each iteration. """ fval = None grad_val = None x_log = [] y_log = [] grad_log = [] x0 = asarray(x0).flatten() # iterations = len(x0) * 200 old_fval = fun(x0) gfk = grad(x0) k = 0 old_old_fval = old_fval + np.linalg.norm(gfk) / 2 xk = x0 x_log = np.append(x_log, xk.T) y_log = np.append(y_log, fun(xk)) grad_log = np.append(grad_log, np.linalg.norm(xk - x_log[-1:])) gnorm = np.amax(np.abs(gfk)) while (gnorm > tol) and (k < iterations): pk = -gfk try: alpha, fc, gc, old_fval, old_old_fval, gfkp1 = _line_search_wolfe12( fun, grad, xk, pk, gfk, old_fval, old_old_fval, amin=1e-100, amax=1e100) except _LineSearchError: break xk = xk + alpha * pk k += 1 grad_log = np.append(grad_log, np.linalg.norm(xk - x_log[-1:])) x_log = np.append(x_log, xk.T) y_log = np.append(y_log, fun(xk)) if (gnorm <= tol): break fval = old_fval grad_val = grad_log[-1] return xk, fval, grad_val, x_log, y_log, grad_log
def _minimize_cg(fun, x0, args=(), jac=None, callback=None, gtol=1e-5, norm=Inf, eps=_epsilon, maxiter=None, disp=False, return_all=False, xtol= 1e-6, **unknown_options): """ Minimization of scalar function of one or more variables using the conjugate gradient algorithm. Options for the conjugate gradient algorithm are: disp : bool Set to True to print convergence messages. maxiter : int Maximum number of iterations to perform. gtol : float Gradient norm must be less than `gtol` before successful termination. norm : float Order of norm (Inf is max, -Inf is min). eps : float or ndarray If `jac` is approximated, use this value for the step size. This function is called by the `minimize` function with `method=CG`. It is not supposed to be called directly. """ _check_unknown_options(unknown_options) f = fun fprime = jac epsilon = eps retall = return_all x0 = asarray(x0).flatten() if maxiter is None: maxiter = len(x0) * 200 func_calls, f = wrap_function(f, args) if fprime is None: grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon)) else: grad_calls, myfprime = wrap_function(fprime, args) gfk = myfprime(x0) k = 0 xk = x0 old_fval = f(xk) old_old_fval = None if retall: allvecs = [xk] warnflag = 0 pk = -gfk gnorm = vecnorm(gfk, ord=norm) while (gnorm > gtol) and (k < maxiter): deltak = numpy.dot(gfk, gfk) try: alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \ _line_search_wolfe12(f, myfprime, xk, pk, gfk, old_fval, old_old_fval, c2=0.4, xtol=xtol) except _LineSearchError: # Line search failed to find a better solution. warnflag = 2 break xk = xk + alpha_k * pk if retall: allvecs.append(xk) if gfkp1 is None: gfkp1 = myfprime(xk) yk = gfkp1 - gfk beta_k = max(0, numpy.dot(yk, gfkp1) / deltak) pk = -gfkp1 + beta_k * pk gfk = gfkp1 gnorm = vecnorm(gfk, ord=norm) if callback is not None: callback(xk) k += 1 fval = old_fval if warnflag == 2: msg = _status_message['pr_loss'] if disp: print("Warning: " + msg) print(" Current function value: %f" % fval) print(" Iterations: %d" % k) print(" Function evaluations: %d" % func_calls[0]) print(" Gradient evaluations: %d" % grad_calls[0]) elif k >= maxiter: warnflag = 1 msg = _status_message['maxiter'] if disp: print("Warning: " + msg) print(" Current function value: %f" % fval) print(" Iterations: %d" % k) print(" Function evaluations: %d" % func_calls[0]) print(" Gradient evaluations: %d" % grad_calls[0]) else: msg = _status_message['success'] if disp: print(msg) print(" Current function value: %f" % fval) print(" Iterations: %d" % k) print(" Function evaluations: %d" % func_calls[0]) print(" Gradient evaluations: %d" % grad_calls[0]) result = OptimizeResult(fun=fval, jac=gfk, nfev=func_calls[0], njev=grad_calls[0], status=warnflag, success=(warnflag == 0), message=msg, x=xk) if retall: result['allvecs'] = allvecs return result
def _minimize_lbfgsb( fun, x0, bounds=None, args=(), kwargs={}, jac=None, callback=None, tol={"abs": 1e-05, "rel": 1e-08}, norm=np.Inf, maxiter=None, disp=False, return_all=False, **unknown_options ): """ Minimization of scalar function of one or more variables using the BHHH algorithm. Options ------- disp : bool Set to True to print convergence messages. maxiter : int Maximum number of iterations to perform. tol : dict Absolute and relative tolerance values. norm : float Order of norm (Inf is max, -Inf is min). """ _check_unknown_options(unknown_options) def f(x0): return fun(x0, *args, **kwargs) fprime = jac # epsilon = eps Add functionality retall = return_all k = 0 ns = 0 nsmax = 5 N = len(x0) x0 = np.asarray(x0).flatten() if x0.ndim == 0: x0.shape = (1,) if bounds is None: bounds = np.array([np.inf] * N * 2).reshape((2, N)) bounds[0, :] = -bounds[0, :] if bounds.shape[1] != N: raise ValueError("length of x0 != length of bounds") low = bounds[0, :] up = bounds[1, :] x0 = np.clip(x0, low, up) if maxiter is None: maxiter = len(x0) * 200 if not callable(fprime): def myfprime(x0): return approx_derivative(f, x0, args=args, kwargs=kwargs) else: myfprime = fprime # Setup for iteration old_fval = f(x0) gf0 = myfprime(x0) gfk = gf0 norm_pg0 = vecnorm(x0 - np.clip(x0 - gf0, low, up), ord=norm) xk = x0 norm_pgk = norm_pg0 sstore = np.zeros((maxiter, N)) ystore = sstore.copy() if retall: allvecs = [x0] warnflag = 0 # Calculate indices ofactive and inative set using projected gradient epsilon = min(np.min(up - low) / 2, norm_pgk) activeset = np.logical_or(xk - low <= epsilon, up - xk <= epsilon) inactiveset = np.logical_not(activeset) for _ in range(maxiter): # for loop instead. # Check tolerance of gradient norm if norm_pgk <= tol["abs"] + tol["rel"] * norm_pg0: break pk = -gfk pk = bfgsrecb(ns, sstore, ystore, pk, activeset) gfk_active = gfk.copy() gfk_active[inactiveset] = 0 pk = -gfk_active + pk # Sets the initial step guess to dx ~ 1 old_old_fval = old_fval + np.linalg.norm(gfk) / 2 try: alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = _line_search_wolfe12( f, myfprime, xk, pk, gfk, old_fval, old_old_fval, amin=1e-100, amax=1e100, ) except _LineSearchError: # Line search failed to find a better solution. warnflag = 2 break xkp1 = np.clip(xk + alpha_k * pk, low, up) if retall: allvecs.append(xkp1) yk = myfprime(xkp1) - gfk sk = xkp1 - xk xk = xkp1 gfk = myfprime(xkp1) norm_pgk = vecnorm(xk - np.clip(xk - gfk, low, up), ord=norm) # Calculate indices ofactive and inative set using projected gradient epsilon = min(np.min(up - low) / 2, norm_pgk) activeset = np.logical_or(xk - low <= epsilon, up - xk <= epsilon) inactiveset = np.logical_not(activeset) yk[activeset] = 0 sk[activeset] = 0 # reset storage ytsk = yk.dot(sk) if ytsk <= 0: ns = 0 if ns == nsmax: print("ns reached maximum size") ns = 0 elif ytsk > 0: ns += 1 alpha0 = ytsk ** 0.5 sstore[ns - 1, :] = sk / alpha0 ystore[ns - 1, :] = yk / alpha0 k += 1 if callback is not None: callback(xk) if np.isinf(old_fval): # We correctly found +-Inf as optimal value, or something went # wrong. warnflag = 2 break if np.isnan(xk).any(): warnflag = 3 break fval = old_fval if warnflag == 2: msg = _status_message["pr_loss"] elif k >= maxiter: warnflag = 1 msg = _status_message["maxiter"] elif np.isnan(fval) or np.isnan(xk).any(): warnflag = 3 msg = _status_message["nan"] else: msg = _status_message["success"] if disp: print("{}{}".format("Warning: " if warnflag != 0 else "", msg)) print(" Current function value: %f" % fval) print(" Iterations: %d" % k) result = OptimizeResult( fun=fval, jac=gfk, status=warnflag, success=(warnflag == 0), message=msg, x=xk, nit=k, ) if retall: result["allvecs"] = allvecs return result
def _minimize_bhhh(fun, x0, bounds=None, args=(), jac=None, callback=None, tol={ "abs": 1e-05, "rel": 1e-08 }, norm=np.Inf, maxiter=None, disp=False, return_all=False, **unknown_options): """ Minimization of scalar function of one or more variables using the BHHH algorithm. Options ------- disp : bool Set to True to print convergence messages. maxiter : int Maximum number of iterations to perform. tol : dict Absolute and relative tolerance values. norm : float Order of norm (Inf is max, -Inf is min). """ _check_unknown_options(unknown_options) f = fun fprime = jac retall = return_all k = 0 N = len(x0) x0 = np.asarray(x0).flatten() if x0.ndim == 0: x0.shape = (1, ) if bounds is None: bounds = np.array([np.inf] * N * 2).reshape((2, N)) bounds[0, :] = -bounds[0, :] if bounds.shape[1] != N: raise ValueError("length of x0 != length of bounds") low = bounds[0, :] up = bounds[1, :] x0 = np.clip(x0, low, up) if maxiter is None: maxiter = len(x0) * 200 # Need the aggregate functions to take only x0 as an argument func_calls, agg_fun = wrap_function_agg(f, args) if not callable(fprime): grad_calls, myfprime = wrap_function_num_dev(f, args) else: grad_calls, myfprime = wrap_function(fprime, args) def agg_fprime(x0): return myfprime(x0).sum(axis=0) # Setup for iteration old_fval = agg_fun(x0) gf0 = agg_fprime(x0) norm_pg0 = vecnorm(x0 - np.clip(x0 - gf0, low, up), ord=norm) xk = x0 norm_pgk = norm_pg0 if retall: allvecs = [x0] warnflag = 0 for _ in range(maxiter): # for loop instead. # Individual gfk_obs = myfprime(xk) # Aggregate fprime. Might replace by simply summing up gfk_obs gfk = gfk_obs.sum(axis=0) norm_pgk = vecnorm(xk - np.clip(xk - gfk, low, up), ord=norm) # Check tolerance of gradient norm if norm_pgk <= tol["abs"] + tol["rel"] * norm_pg0: break # Sets the initial step guess to dx ~ 1 old_old_fval = old_fval + np.linalg.norm(gfk) / 2 # Calculate BHHH hessian and step Hk = np.dot(gfk_obs.T, gfk_obs) Bk = np.linalg.inv(Hk) pk = np.empty(N) pk = -np.dot(Bk, gfk) try: alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = _line_search_wolfe12( agg_fun, agg_fprime, xk, pk, gfk, old_fval, old_old_fval, amin=1e-100, amax=1e100, ) except _LineSearchError: # Line search failed to find a better solution. warnflag = 2 break xkp1 = np.clip(xk + alpha_k * pk, low, up) if retall: allvecs.append(xkp1) xk = xkp1 if callback is not None: callback(xk) k += 1 if np.isinf(old_fval): # We correctly found +-Inf as optimal value, or something went # wrong. warnflag = 2 break fval = old_fval if warnflag == 2: msg = _status_message["pr_loss"] elif k >= maxiter: warnflag = 1 msg = _status_message["maxiter"] elif np.isnan(fval) or np.isnan(xk).any(): warnflag = 3 msg = _status_message["nan"] else: msg = _status_message["success"] if disp: print("{}{}".format("Warning: " if warnflag != 0 else "", msg)) print(" Current function value: %f" % fval) print(" Iterations: %d" % k) result = OptimizeResult( fun=fval, jac=gfk, hess_inv=Bk, nfev=func_calls[0], njev=grad_calls[0], status=warnflag, success=(warnflag == 0), message=msg, x=xk, nit=k, ) if retall: result["allvecs"] = allvecs return result