def nonlinear_conjugate_gradients(oracle, x_0, tolerance=1e-4, max_iter=500, line_search_options={ 'method': 'Wolfe', 'c1': 1e-4, 'c2': 0.2 }, display=False, trace=False): """ Nonlinear conjugate gradient method for optimization (Polak--Ribiere version). Parameters ---------- oracle : BaseSmoothOracle-descendant object Oracle with .func() and .grad() methods implemented for computing function value and its gradient respectively. x_0 : 1-dimensional np.array Starting point of the algorithm tolerance : float Epsilon value for stopping criterion. max_iter : int Maximum number of iterations. line_search_options : dict, LineSearchTool or None Dictionary with line search options. See LineSearchTool class for details. display : bool If True, debug information is displayed during optimization. Printing format is up to the student and is not checked in any way. trace: bool If True, the progress information is appended into history dictionary during training. Otherwise None is returned instead of history. Returns ------- x_star : np.array The point found by the optimization procedure message : string 'success' or the description of error: - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy the stopping criterion. history : dictionary of lists or None Dictionary containing the progress information or None if trace=False. Dictionary has to be organized as follows: - history['func'] : list of function values f(x_k) on every step of the algorithm - history['time'] : list of floats, containing time in seconds passed from the start of the method - history['grad_norm'] : list of values Euclidean norms ||g(x_k)|| of the gradient on every step of the algorithm - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2 """ history = defaultdict(list) if trace else None line_search_tool = get_line_search_tool(line_search_options) x_k = np.copy(x_0) # TODO: Implement the Nonlinear conjugate gradient method. # Use line_search_tool.line_search() for adaptive step size. return x_k, 'success', history
def lbfgs(oracle, x_0, tolerance=1e-4, max_iter=500, memory_size=10, line_search_options=None, display=False, trace=False): """ Limited-memory Broyden–Fletcher–Goldfarb–Shanno's method for optimization. Parameters ---------- oracle : BaseSmoothOracle-descendant object Oracle with .func() and .grad() methods implemented for computing function value and its gradient respectively. x_0 : 1-dimensional np.array Starting point of the algorithm tolerance : float Epsilon value for stopping criterion. max_iter : int Maximum number of iterations. memory_size : int The length of directions history in L-BFGS method. line_search_options : dict, LineSearchTool or None Dictionary with line search options. See LineSearchTool class for details. display : bool If True, debug information is displayed during optimization. Printing format is up to a student and is not checked in any way. trace: bool If True, the progress information is appended into history dictionary during training. Otherwise None is returned instead of history. Returns ------- x_star : np.array The point found by the optimization procedure message : string 'success' or the description of error: - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy the stopping criterion. history : dictionary of lists or None Dictionary containing the progress information or None if trace=False. Dictionary has to be organized as follows: - history['func'] : list of function values f(x_k) on every step of the algorithm - history['time'] : list of floats, containing time in seconds passed from the start of the method - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2 """ history = defaultdict(list) if trace else None line_search_tool = get_line_search_tool(line_search_options) x_k = np.copy(x_0) # TODO: Implement L-BFGS method. # Use line_search_tool.line_search() for adaptive step size. def fill_history(): if not trace: return history['func'].append(oracle.func(x_k)) history['time'].append((datetime.now() - t_0).seconds) history['grad_norm'].append(grad_k_norm) if x_size <= 2: history['x'].append(np.copy(x_k)) def do_display(): if not display: return if len(x_k) <= 4: print('x = {}, '.format(np.round(x_k, 4)), end='') print('func= {}, grad_norm = {}'.format(np.round(oracle.func(x_k), 4), np.round(grad_k_norm, 4))) t_0 = datetime.now() x_size = len(x_k) message = None grad_k = oracle.grad(x_k) grad_0_norm = grad_k_norm = np.linalg.norm(grad_k) def bfgs_multiply(v, H, gamma_0): if len(H) == 0: return gamma_0 * v s, y = H[-1] H = H[:-1] v_new = v - (s @ v) / (y @ s) * y z = bfgs_multiply(v_new, H, gamma_0) result = z + (s @ v - y @ z) / (y @ s) * s return result def bfgs_direction(): if len(H) == 0: return -grad_k s, y = H[-1] gamma_0 = (y @ s) / (y @ y) return bfgs_multiply(-grad_k, H, gamma_0) H = [] for k in range(max_iter): do_display() fill_history() d = bfgs_direction() alpha = line_search_tool.line_search(oracle, x_k, d) x_new = x_k + alpha * d grad_new = oracle.grad(x_new) H.append((x_new - x_k, grad_new - grad_k)) if len(H) > memory_size: H = H[1:] x_k, grad_k = x_new, grad_new grad_k_norm = np.linalg.norm(grad_k) if grad_k_norm**2 < tolerance * grad_0_norm**2: message = 'success' break do_display() fill_history() if not grad_k_norm**2 < tolerance * grad_0_norm**2: message = 'iterations_exceeded' return x_k, message, history
def hessian_free_newton(oracle, x_0, tolerance=1e-4, max_iter=500, line_search_options=None, display=False, trace=False): """ Hessian Free method for optimization. Parameters ---------- oracle : BaseSmoothOracle-descendant object Oracle with .func(), .grad() and .hess_vec() methods implemented for computing function value, its gradient and matrix product of the Hessian times vector respectively. x_0 : 1-dimensional np.array Starting point of the algorithm tolerance : float Epsilon value for stopping criterion. max_iter : int Maximum number of iterations. line_search_options : dict, LineSearchTool or None Dictionary with line search options. See LineSearchTool class for details. display : bool If True, debug information is displayed during optimization. Printing format is up to a student and is not checked in any way. trace: bool If True, the progress information is appended into history dictionary during training. Otherwise None is returned instead of history. Returns ------- x_star : np.array The point found by the optimization procedure message : string 'success' or the description of error: - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy the stopping criterion. history : dictionary of lists or None Dictionary containing the progress information or None if trace=False. Dictionary has to be organized as follows: - history['func'] : list of function values f(x_k) on every step of the algorithm - history['time'] : list of floats, containing time in seconds passed from the start of the method - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2 """ history = defaultdict(list) if trace else None line_search_tool = get_line_search_tool(line_search_options) x_k = np.copy(x_0) if display: print("x_0: {}".format(x_k)) g_norm_0 = np.linalg.norm(oracle.grad(x_0)) start = datetime.now() for iter in range(max_iter): g_k = oracle.grad(x_k) g_norm = np.linalg.norm(g_k) t = (datetime.now() - start).total_seconds() if trace: history['time'].append(t) if x_0.size <= 2: history['x'].append(x_k) history['func'].append(oracle.func(x_k)) history['grad_norm'].append(g_norm) if not (np.isfinite(x_k).all() and np.isfinite(oracle.func(x_k)) and np.isfinite(oracle.grad(x_k)).all()): return x_k, 'computational_error', history if g_norm**2 <= tolerance * g_norm_0**2: return x_k, 'success', history # search of direction eta_k = min(0.5, np.linalg.norm(g_k)**0.5) d_k = conjugate_gradients(partial(oracle.hess_vec, x_k), -g_k, -g_k, eta_k)[0] while np.dot(d_k, g_k) >= 0: eta_k *= 0.1 d_k = conjugate_gradients(partial(oracle.hess_vec, x_k), d_k, np.zeros(x_0.shape), eta_k)[0] alpha = line_search_tool.line_search(oracle, x_k, d_k) x_k = x_k + alpha * d_k if display: print(f"fx_k: {x_k}, d_k: {d_k}, alpha: {alpha}, eta: {eta_k}") if display: print(f"x_star: {x_k}") g = oracle.grad(x_k) g_norm = np.linalg.norm(g) t = (datetime.now() - start).total_seconds() if trace: history['time'].append(t) if x_0.size <= 2: history['x'].append(x_k) history['func'].append(oracle.func(x_k)) history['grad_norm'].append(g_norm) if g_norm**2 <= tolerance * g_norm_0**2: return x_k, 'success', history return x_k, 'iterations_exceeded', history
def lbfgs(oracle, x_0, tolerance=1e-4, max_iter=500, memory_size=10, line_search_options=None, display=False, trace=False): """ Limited-memory Broyden–Fletcher–Goldfarb–Shanno's method for optimization. Parameters ---------- oracle : BaseSmoothOracle-descendant object Oracle with .func() and .grad() methods implemented for computing function value and its gradient respectively. x_0 : 1-dimensional np.array Starting point of the algorithm tolerance : float Epsilon value for stopping criterion. max_iter : int Maximum number of iterations. memory_size : int The length of directions history in L-BFGS method. line_search_options : dict, LineSearchTool or None Dictionary with line search options. See LineSearchTool class for details. display : bool If True, debug information is displayed during optimization. Printing format is up to a student and is not checked in any way. trace: bool If True, the progress information is appended into history dictionary during training. Otherwise None is returned instead of history. Returns ------- x_star : np.array The point found by the optimization procedure message : string 'success' or the description of error: - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy the stopping criterion. history : dictionary of lists or None Dictionary containing the progress information or None if trace=False. Dictionary has to be organized as follows: - history['func'] : list of function values f(x_k) on every step of the algorithm - history['time'] : list of floats, containing time in seconds passed from the start of the method - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2 """ def bfgs_multiply(v, H, gamma_0): if not H: return gamma_0 * v s, y = H.pop() v_ = v - np.dot(s, v) / np.dot(y, s) * y z = bfgs_multiply(v_, H, gamma_0) return z + (np.dot(s, v) - np.dot(y, z)) / np.dot(y, s) * s def lbfgs_direction(g_k, H_k): if H_k: s, y = H_k[-1] gamma_0 = np.dot(y, s) / np.dot(y, y) else: gamma_0 = 1.0 return bfgs_multiply(-g_k, copy.copy(H_k), gamma_0) history = defaultdict(list) if trace else None line_search_tool = get_line_search_tool(line_search_options) x_k = np.copy(x_0) if display: print("x_0: {}".format(x_k)) g_norm_0 = np.linalg.norm(oracle.grad(x_0)) H_k = deque(maxlen=memory_size) start = datetime.now() for iter in range(max_iter): g_k = oracle.grad(x_k) g_norm = np.linalg.norm(g_k) t = (datetime.now() - start).total_seconds() if trace: history['time'].append(t) if x_0.size <= 2: history['x'].append(x_k) history['func'].append(oracle.func(x_k)) history['grad_norm'].append(g_norm) if g_norm**2 <= tolerance * g_norm_0**2: return x_k, 'success', history # search of direction d_k = lbfgs_direction(g_k, H_k) alpha = line_search_tool.line_search(oracle, x_k, d_k) H_k.append( [alpha * d_k, oracle.grad(x_k + alpha * d_k) - oracle.grad(x_k)]) x_k = x_k + alpha * d_k if display: print(f"fx_k: {x_k}, d_k: {d_k}, alpha: {alpha}") if display: print(f"x_star: {x_k}") g = oracle.grad(x_k) g_norm = np.linalg.norm(g) t = (datetime.now() - start).total_seconds() if trace: history['time'].append(t) if x_0.size <= 2: history['x'].append(x_k) history['func'].append(oracle.func(x_k)) history['grad_norm'].append(g_norm) if g_norm**2 <= tolerance * g_norm_0**2: return x_k, 'success', history return x_k, 'iterations_exceeded', history
def newton(oracle, x_0, tolerance=1e-5, max_iter=100, line_search_options=None, trace=False, display=False): """ Newton's optimization method. Parameters ---------- oracle : BaseSmoothOracle-descendant object Oracle with .func(), .grad() and .hess() methods implemented for computing function value, its gradient and Hessian respectively. If the Hessian returned by the oracle is not positive-definite method stops with message="newton_direction_error" x_0 : np.array Starting point for optimization algorithm tolerance : float Epsilon value for stopping criterion. max_iter : int Maximum number of iterations. line_search_options : dict, LineSearchTool or None Dictionary with line search options. See LineSearchTool class for details. trace : bool If True, the progress information is appended into history dictionary during training. Otherwise None is returned instead of history. display : bool If True, debug information is displayed during optimization. Returns ------- x_star : np.array The point found by the optimization procedure message : string 'success' or the description of error: - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy the stopping criterion. - 'newton_direction_error': in case of failure of solving linear system with Hessian matrix (e.g. non-invertible matrix). - 'computational_error': in case of getting Infinity or None value during the computations. history : dictionary of lists or None Dictionary containing the progress information or None if trace=False. Dictionary has to be organized as follows: - history['time'] : list of floats, containing time passed from the start of the method - history['func'] : list of function values f(x_k) on every step of the algorithm - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2 Example: -------- >> oracle = QuadraticOracle(np.eye(5), np.arange(5)) >> x_opt, message, history = newton(oracle, np.zeros(5), line_search_options={'method': 'Constant', 'c': 1.0}) >> print('Found optimal point: {}'.format(x_opt)) Found optimal point: [ 0. 1. 2. 3. 4.] """ history = defaultdict(list) if trace else None line_search_tool = get_line_search_tool(line_search_options) x_k = np.copy(x_0) if display: print("x_0: {}".format(x_k)) grad_norm_0 = np.linalg.norm(oracle.grad(x_0)) start = datetime.now() for iter in range(max_iter): grad = oracle.grad(x_k) grad_norm = np.linalg.norm(grad) t = (datetime.now() - start).total_seconds() if trace: history['time'].append(t) if x_0.size <= 2: history['x'].append(x_k) history['func'].append(oracle.func(x_k)) history['grad_norm'].append(grad_norm) if not (np.isfinite(x_k).all() and np.isfinite(oracle.func(x_k)) and np.isfinite(oracle.grad(x_k)).all()): return x_k, 'computational_error', history if grad_norm**2 <= tolerance * grad_norm_0**2: return x_k, 'success', history try: L = scipy.linalg.cho_factor(oracle.hess(x_k)) except: return x_k, 'newton_direction_error', history d_k = -scipy.linalg.cho_solve(L, grad) n = x_k.size // 2 x, u = x_k[:n], x_k[n:] d_x, d_u = d_k[:n], d_k[n:] max_alpha = line_search_tool.alpha_0 if np.sum(d_x > d_u): max_alpha = min( ((u - x)[d_x > d_u] / (d_x - d_u)[d_x > d_u]).min() * 0.99, max_alpha) # u - x > 0 if np.sum(-d_x > d_u): max_alpha = min( ((u + x)[-d_x > d_u] / -(d_x + d_u)[-d_x > d_u]).min() * 0.99, max_alpha) # u + x > 0 alpha = line_search_tool.line_search(oracle, x_k, d_k, previous_alpha=max_alpha) x_k = x_k + alpha * d_k print(f"Newton iteration = {iter}, d_k = {d_k}, alpha_k = {alpha}") if display: print("x_k: {}, d_k: {}, alpha: {}".format(x_k, d_k, alpha)) if display: print("x_star: {}".format(x_k)) grad = oracle.grad(x_k) grad_norm = np.linalg.norm(grad) t = (datetime.now() - start).total_seconds() if trace: history['time'].append(t) if x_0.size <= 2: history['x'].append(x_k) history['func'].append(oracle.func(x_k)) history['grad_norm'].append(grad_norm) if not (np.isfinite(x_k).all() and np.isfinite(oracle.func(x_k)) and np.isfinite(oracle.grad(x_k)).all()): return x_k, 'computational_error', history if grad_norm**2 <= tolerance * grad_norm_0**2: return x_k, 'success', history return x_k, 'iterations_exceeded', history
def lbfgs(oracle, x_0, tolerance=1e-4, max_iter=500, memory_size=10, line_search_options=None, display=False, trace=False): """ Limited-memory Broyden–Fletcher–Goldfarb–Shanno's method for optimization. Parameters ---------- oracle : BaseSmoothOracle-descendant object Oracle with .func() and .grad() methods implemented for computing function value and its gradient respectively. x_0 : 1-dimensional np.array Starting point of the algorithm tolerance : float Epsilon value for stopping criterion. max_iter : int Maximum number of iterations. memory_size : int The length of directions history in L-BFGS method. line_search_options : dict, LineSearchTool or None Dictionary with line search options. See LineSearchTool class for details. display : bool If True, debug information is displayed during optimization. Printing format is up to a student and is not checked in any way. trace: bool If True, the progress information is appended into history dictionary during training. Otherwise None is returned instead of history. Returns ------- x_star : np.array The point found by the optimization procedure message : string 'success' or the description of error: - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy the stopping criterion. history : dictionary of lists or None Dictionary containing the progress information or None if trace=False. Dictionary has to be organized as follows: - history['func'] : list of function values f(x_k) on every step of the algorithm - history['time'] : list of floats, containing time in seconds passed from the start of the method - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2 """ history = defaultdict(list) if trace else None labels = ['func', 'time', 'grad_norm', 'x'] line_search_tool = get_line_search_tool(line_search_options) H = deque() x_k = np.copy(x_0) i = 0 # TODO: Implement L-BFGS method. # Use line_search_tool.line_search() for adaptive step size. def bfgs_multiply(v, H, gamma): if H: H1 = H.copy() s, y = H1.pop() v1 = v - s.dot(v) / y.dot(s) * y z = bfgs_multiply(v1, H1, gamma) return z + (s.dot(v) - y.dot(z)) / y.dot(s) * s else: return gamma * v def lbfgs_direction(grad, H): if H: s, y = H[-1] gamma = y.dot(s) / y.dot(y) return bfgs_multiply(-grad, H, gamma) else: return -grad start = time.time() grad_0 = oracle.grad(x_k) grad_k = np.copy(grad_0) grad_k_prev = None x_k_prev = None msg = 'success' while grad_k.dot(grad_k) > tolerance * grad_0.dot(grad_0): if i > max_iter: msg = 'iterations exceed' if display: print(msg) return x_k, msg, history hist_values = [ oracle.func(x_k), time.time() - start, np.linalg.norm(grad_k), x_k ] make_history(history, labels, hist_values) if x_k_prev is not None: H.append((x_k - x_k_prev, grad_k - grad_k_prev)) if len(H) > memory_size: H.popleft() d_k = lbfgs_direction(grad_k, H) alpha_k = line_search_tool.line_search(oracle, x_k, d_k) x_k_prev = np.copy(x_k) grad_k_prev = np.copy(grad_k) x_k = x_k + alpha_k * d_k grad_k = oracle.grad(x_k) i += 1 hist_values = [ oracle.func(x_k), time.time() - start, np.linalg.norm(grad_k), x_k ] make_history(history, labels, hist_values) if display: print(msg) return x_k, msg, history
def lbfgs(oracle, x_0, tolerance=1e-4, max_iter=500, memory_size=10, line_search_options=None, display=False, trace=False): """ Limited-memory Broyden–Fletcher–Goldfarb–Shanno's method for optimization. Parameters ---------- oracle : BaseSmoothOracle-descendant object Oracle with .func() and .grad() methods implemented for computing function value and its gradient respectively. x_0 : 1-dimensional np.array Starting point of the algorithm tolerance : float Epsilon value for stopping criterion. max_iter : int Maximum number of iterations. memory_size : int The length of directions history in L-BFGS method. line_search_options : dict, LineSearchTool or None Dictionary with line search options. See LineSearchTool class for details. display : bool If True, debug information is displayed during optimization. Printing format is up to a student and is not checked in any way. trace: bool If True, the progress information is appended into history dictionary during training. Otherwise None is returned instead of history. Returns ------- x_star : np.array The point found by the optimization procedure message : string 'success' or the description of error: - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy the stopping criterion. history : dictionary of lists or None Dictionary containing the progress information or None if trace=False. Dictionary has to be organized as follows: - history['func'] : list of function values f(x_k) on every step of the algorithm - history['time'] : list of floats, containing time in seconds passed from the start of the method - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2 """ history = defaultdict(list) if trace else None line_search_tool = get_line_search_tool(line_search_options) x_k = np.copy(x_0) # TODO: Implement L-BFGS method. # Use line_search_tool.line_search() for adaptive step size. History = [] x_m1 = None dfxk_m1 = None eps_due_to_float = 1e-8 start_time = None dfx0_norm = np.linalg.norm(oracle.grad(x_0)) for iteration in range(0, max_iter + 1): #initialising dfxk = oracle.grad(x_k) dfxk_norm = np.linalg.norm(dfxk) if display: print('Debug information:) Iteration number: ', iteration) # History update if trace: if start_time is None: start_time = datetime.now() if x_k.shape[0] <= 2: history['x'].append(x_k) history['grad_norm'].append(dfxk_norm) history['func'].append(oracle.func(x_k)) history['time'].append( (datetime.now() - start_time).total_seconds()) #Stop criteria check if dfxk_norm < (tolerance**0.5) * dfx0_norm + eps_due_to_float: return x_k, 'success', history #FancyHistory update if x_m1 is not None and dfxk_m1 is not None: History.append((x_k - x_m1, dfxk - dfxk_m1)) if len(History) > memory_size: History = History[1:] #Step d_k = lbfgs_direction(dfxk, History) alpha_k = line_search_tool.line_search(oracle, x_k, d_k) x_m1 = np.copy(x_k) dfxk_m1 = np.copy(dfxk) x_k = x_k + alpha_k * d_k return x_k, 'iterations_exceeded', history
def lbfgs(oracle, x_0, tolerance=1e-4, max_iter=500, memory_size=10, line_search_options=None, display=False, trace=False): """ Limited-memory Broyden–Fletcher–Goldfarb–Shanno's method for optimization. Parameters ---------- oracle : BaseSmoothOracle-descendant object Oracle with .func() and .grad() methods implemented for computing function value and its gradient respectively. x_0 : 1-dimensional np.array Starting point of the algorithm tolerance : float Epsilon value for stopping criterion. max_iter : int Maximum number of iterations. memory_size : int The length of directions history in L-BFGS method. line_search_options : dict, LineSearchTool or None Dictionary with line search options. See LineSearchTool class for details. display : bool If True, debug information is displayed during optimization. Printing format is up to a student and is not checked in any way. trace: bool If True, the progress information is appended into history dictionary during training. Otherwise None is returned instead of history. Returns ------- x_star : np.array The point found by the optimization procedure message : string 'success' or the description of error: - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy the stopping criterion. history : dictionary of lists or None Dictionary containing the progress information or None if trace=False. Dictionary has to be organized as follows: - history['func'] : list of function values f(x_k) on every step of the algorithm - history['time'] : list of floats, containing time in seconds passed from the start of the method - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2 """ history = defaultdict(list) if trace else None line_search_tool = get_line_search_tool(line_search_options) x_k = np.copy(x_0).astype(np.float64) timer = Timer() converge = False alpha_k = None s_trace, y_trace = deque(), deque() grad_k = oracle.grad(x_k) for num_iter in range(max_iter + 1): # if np.isinf(x_k).any() or np.isnan(x_k).any(): # return x_k, 'computational_error', history f_k = oracle.func(x_k) # if np.isinf(grad_k).any() or np.isnan(grad_k).any(): # return x_k, 'computational_error', history grad_norm_k = scipy.linalg.norm(grad_k) if trace: history['time'].append(timer.seconds()) history['func'].append(np.copy(f_k)) history['grad_norm'].append(np.copy(grad_norm_k)) if x_k.size <= 2: history['x'].append(np.copy(x_k)) if display: print('step', history['time'][-1] if history else '') if num_iter == 0: eps_grad_norm_0 = np.sqrt(tolerance) * grad_norm_k if grad_norm_k <= eps_grad_norm_0: converge = True break if num_iter == max_iter: break def lbfgs_direction(grad, s_trace, y_trace): d = -grad if not s_trace: return d mus = [] for s, y in zip(reversed(s_trace), reversed(y_trace)): mu = np.dot(s, d) / np.dot(s, y) mus.append(mu) d -= mu * y d *= np.dot(s_trace[-1], y_trace[-1]) / np.dot( y_trace[-1], y_trace[-1]) for s, y, mu in zip(s_trace, y_trace, reversed(mus)): beta = np.dot(y, d) / np.dot(s, y) d += (mu - beta) * s return d d_k = lbfgs_direction(grad_k, s_trace, y_trace) alpha_k = line_search_tool.line_search( oracle, x_k, d_k, 2.0 * alpha_k if alpha_k is not None else None) x_k += alpha_k * d_k last_grad_k = np.copy(grad_k) grad_k = oracle.grad(x_k) if memory_size > 0: if len(s_trace) == memory_size: s_trace.popleft() y_trace.popleft() s_trace.append(alpha_k * d_k) y_trace.append(grad_k - last_grad_k) return x_k, 'success' if converge else 'iterations_exceeded', history
def nonlinear_conjugate_gradients(oracle, x_0, tolerance=1e-4, max_iter=500, line_search_options={'method': 'Wolfe', 'c1': 1e-4, 'c2': 0.2}, display=False, trace=False): """ Nonlinear conjugate gradient method for optimization (Polak--Ribiere version). Parameters ---------- oracle : BaseSmoothOracle-descendant object Oracle with .func() and .grad() methods implemented for computing function value and its gradient respectively. x_0 : 1-dimensional np.array Starting point of the algorithm tolerance : float Epsilon value for stopping criterion. max_iter : int Maximum number of iterations. line_search_options : dict, LineSearchTool or None Dictionary with line search options. See LineSearchTool class for details. display : bool If True, debug information is displayed during optimization. Printing format is up to the student and is not checked in any way. trace: bool If True, the progress information is appended into history dictionary during training. Otherwise None is returned instead of history. Returns ------- x_star : np.array The point found by the optimization procedure message : string 'success' or the description of error: - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy the stopping criterion. history : dictionary of lists or None Dictionary containing the progress information or None if trace=False. Dictionary has to be organized as follows: - history['func'] : list of function values f(x_k) on every step of the algorithm - history['time'] : list of floats, containing time in seconds passed from the start of the method - history['grad_norm'] : list of values Euclidean norms ||g(x_k)|| of the gradient on every step of the algorithm - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2 """ history = defaultdict(list) if trace else None line_search_tool = get_line_search_tool(line_search_options) x_k = np.copy(x_0) t0 = datetime.now() grad_x_0 = oracle.grad(x_0) norm_grad_x_0 = np.linalg.norm(grad_x_0) for it in range(max_iter + 1): # Oracle if it > 0: grad_x_k_prev = grad_x_k.copy() norm_grad_x_k_prev = norm_grad_x_k f_x_k = oracle.func(x_k) grad_x_k = oracle.grad(x_k) norm_grad_x_k = np.linalg.norm(grad_x_k) # Debug info if display: print(it) # Fill trace data if trace: history['time'].append((datetime.now() - t0).total_seconds()) history['grad_norm'].append(norm_grad_x_k) history['func'].append(f_x_k) if x_k.size < 3: history['x'].append(x_k) # Criterium if norm_grad_x_k * norm_grad_x_k <= tolerance * norm_grad_x_0 * norm_grad_x_0: break if it >= max_iter: return x_k, 'iterations_exceeded', history # Direction if it > 0: betta = np.dot(grad_x_k, (grad_x_k - grad_x_k_prev)) / (norm_grad_x_k_prev * norm_grad_x_k_prev) #betta = (norm_grad_x_k * norm_grad_x_k) / (norm_grad_x_k_prev * norm_grad_x_k_prev) d_k = -grad_x_k + betta * d_k else: d_k = -grad_x_k # Line search alpha = line_search_tool.line_search (oracle, x_k, d_k) x_k = x_k + alpha * d_k return x_k, 'success', history
def hessian_free_newton(oracle, x_0, tolerance=1e-4, max_iter=500, line_search_options=None, display=False, trace=False): """ Hessian Free method for optimization. Parameters ---------- oracle : BaseSmoothOracle-descendant object Oracle with .func(), .grad() and .hess_vec() methods implemented for computing function value, its gradient and matrix product of the Hessian times vector respectively. x_0 : 1-dimensional np.array Starting point of the algorithm tolerance : float Epsilon value for stopping criterion. max_iter : int Maximum number of iterations. line_search_options : dict, LineSearchTool or None Dictionary with line search options. See LineSearchTool class for details. display : bool If True, debug information is displayed during optimization. Printing format is up to a student and is not checked in any way. trace: bool If True, the progress information is appended into history dictionary during training. Otherwise None is returned instead of history. Returns ------- x_star : np.array The point found by the optimization procedure message : string 'success' or the description of error: - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy the stopping criterion. history : dictionary of lists or None Dictionary containing the progress information or None if trace=False. Dictionary has to be organized as follows: - history['func'] : list of function values f(x_k) on every step of the algorithm - history['time'] : list of floats, containing time in seconds passed from the start of the method - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2 """ # TODO: Implement hessian-free Newton's method. # Use line_search_tool.line_search() for adaptive step size. history = defaultdict(list) if trace else None line_search_tool = get_line_search_tool(line_search_options) x_k = x_0.astype(float) t_0 = time() grad = oracle.grad(x_k) grad_norm = norm(grad) grad_norm_x0 = grad_norm if display: sys.stdout.write(u"Start of newton method\n") if trace: update_history_newton(history, oracle.func(x_k), t_0, x_k, grad_norm) i = 0 while grad_norm > np.sqrt(tolerance) * grad_norm_x0: i += 1 if (i > max_iter): return x_k, 'iterations_exceeded', history matvec = lambda v: oracle.hess_vec(x_k, v) nu_k = min(0.5, np.sqrt(grad_norm)) d_k = -grad d_k, _, _ = conjugate_gradients(matvec, -grad, d_k, tolerance=nu_k) while not np.dot(grad, d_k) < 0: nu_k /= 10.0 d_k = conjugate_gradients(matvec, -grad, d_k, tolerance=nu_k) alpha = line_search_tool.line_search(oracle, x_k, d_k, previous_alpha=1.0) if np.isinf(alpha) or np.isnan(alpha): return x_k, 'computational_error', history x_k += alpha * d_k grad = oracle.grad(x_k) grad_norm = norm(grad) if display: sys.stdout.write(str(nu_k) + '\n') if trace: update_history_newton(history, oracle.func(x_k), t_0, x_k, norm(grad)) return x_k, 'success', history
def lbfgs(oracle, x_0, tolerance=1e-4, max_iter=500, memory_size=10, line_search_options=None, display=False, trace=False): """ Limited-memory Broyden–Fletcher–Goldfarb–Shanno's method for optimization. Parameters ---------- oracle : BaseSmoothOracle-descendant object Oracle with .func() and .grad() methods implemented for computing function value and its gradient respectively. x_0 : 1-dimensional np.array Starting point of the algorithm tolerance : float Epsilon value for stopping criterion. max_iter : int Maximum number of iterations. memory_size : int The length of directions history in L-BFGS method. line_search_options : dict, LineSearchTool or None Dictionary with line search options. See LineSearchTool class for details. display : bool If True, debug information is displayed during optimization. Printing format is up to a student and is not checked in any way. trace: bool If True, the progress information is appended into history dictionary during training. Otherwise None is returned instead of history. Returns ------- x_star : np.array The point found by the optimization procedure message : string 'success' or the description of error: - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy the stopping criterion. history : dictionary of lists or None Dictionary containing the progress information or None if trace=False. Dictionary has to be organized as follows: - history['time'] : list of floats, containing time in seconds passed from the start of the method - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2 """ history = defaultdict(list) if trace else None line_search_tool = get_line_search_tool(line_search_options) x_k = np.copy(x_0) * 1.0 x_k_old = None df_k_old = None H = deque() l = memory_size start = time.time() def pushHistory(x_k, oracle, df_k_n): if trace: history['time'].append(time.time() - start) history['func'].append(oracle.func(x_k)) history['grad_norm'].append(df_k_n ** (1 / 2)) if np.alen(x_k) <= 2: history['x'].append(np.copy(x_k)) if display: print(x_k) df0 = oracle.grad(x_0) df0_norm = df0.dot(df0) for k in range(max_iter): if x_k_old is not None: df_k_old = np.copy(df_k) df_k = oracle.grad(x_k) # hf_k = oracle.hess(x_k) df_k_norm = df_k.dot(df_k) pushHistory(x_k, oracle, df_k_norm) if df_k_norm <= df0_norm * tolerance: return x_k, 'success', history if x_k_old is not None: H.append((np.copy(x_k-x_k_old), np.copy(df_k-df_k_old))) if len(H) > l: H.popleft() d_k = df_k * (-1.0) mus = list() for s, y in reversed(H): mu = s.dot(d_k) / s.dot(y) mus.append(mu) d_k = d_k - y * mu if len(H) > 0: s_k, y_k = H[-1] d_k = d_k * s_k.dot(y_k) / y_k.dot(y_k) for s_y, mu in zip(H, reversed(mus)): s, y = s_y betta = y.dot(d_k) / s.dot(y) d_k = d_k + s * (mu - betta) a_k = line_search_tool.line_search(oracle, x_k, d_k) x_k_old = np.copy(x_k) x_k += d_k * a_k df_last = oracle.grad(x_k) df_last_norm = df_last.dot(df_last) pushHistory(x_k, oracle, df_last_norm) if df_last_norm <= df0_norm * tolerance: return x_k, 'success', history else: return x_k, 'iterations_exceeded', history
def gradient_descent(oracle, x_0, tolerance=1e-5, max_iter=10000, line_search_options=None, trace=False, display=False): """ Gradien descent optimization method. Parameters ---------- oracle : BaseSmoothOracle-descendant object Oracle with .func(), .grad() and .hess() methods implemented for computing function value, its gradient and Hessian respectively. x_0 : np.array Starting point for optimization algorithm tolerance : float Epsilon value for stopping criterion. max_iter : int Maximum number of iterations. line_search_options : dict, LineSearchTool or None Dictionary with line search options. See LineSearchTool class for details. trace : bool If True, the progress information is appended into history dictionary during training. Otherwise None is returned instead of history. display : bool If True, debug information is displayed during optimization. Printing format and is up to a student and is not checked in any way. Returns ------- x_star : np.array The point found by the optimization procedure message : string "success" or the description of error: - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy the stopping criterion. history : dictionary of lists or None Dictionary containing the progress information or None if trace=False. Dictionary has to be organized as follows: - history['time'] : list of floats, containing time in seconds passed from the start of the method - history['func'] : list of function values f(x_k) on every step of the algorithm - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2 Example: -------- >> oracle = QuadraticOracle(np.eye(5), np.arange(5)) >> x_opt, message, history = gradient_descent(oracle, np.zeros(5), line_search_options={'method': 'Armijo', 'c1': 1e-4}) >> print('Found optimal point: {}'.format(x_opt)) Found optimal point: [ 0. 1. 2. 3. 4.] """ history = defaultdict(list) if trace else None line_search_tool = get_line_search_tool(line_search_options) x_k = np.copy(x_0) start = time.time() def pushHistory(x_k, oracle, df_k_n): if trace: history['time'].append(time.time()-start) history['func'].append(oracle.func(x_k)) history['grad_norm'].append(df_k_n ** (1/2)) if np.alen(x_k) <= 2: history['x'].append(np.copy(x_k)) if display: print(x_k) df0 = oracle.grad(x_0) df0_norm = df0.dot(df0) for k in range(max_iter): df_k = oracle.grad(x_k) df_k_norm = df_k.dot(df_k) pushHistory(x_k, oracle, df_k_norm) if df_k_norm <= df0_norm * tolerance: return x_k, 'success', history d_k = df_k*(-1) a_k = line_search_tool.line_search(oracle, x_k, d_k) x_k += d_k * a_k df_last = oracle.grad(x_k) df_last_norm = df_last.dot(df_last) pushHistory(x_k, oracle, df_last_norm) if df_last_norm <= df0_norm * tolerance: return x_k, 'success', history else: return x_k, 'iterations_exceeded', history
def hessian_free_newton(oracle, x_0, tolerance=1e-4, max_iter=500, line_search_options=None, display=False, trace=False): """ Hessian Free method for optimization. Parameters ---------- oracle : BaseSmoothOracle-descendant object Oracle with .func(), .grad() and .hess_vec() methods implemented for computing function value, its gradient and matrix product of the Hessian times vector respectively. x_0 : 1-dimensional np.array Starting point of the algorithm tolerance : float Epsilon value for stopping criterion. max_iter : int Maximum number of iterations. line_search_options : dict, LineSearchTool or None Dictionary with line search options. See LineSearchTool class for details. display : bool If True, debug information is displayed during optimization. Printing format is up to a student and is not checked in any way. trace: bool If True, the progress information is appended into history dictionary during training. Otherwise None is returned instead of history. Returns ------- x_star : np.array The point found by the optimization procedure message : string 'success' or the description of error: - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy the stopping criterion. history : dictionary of lists or None Dictionary containing the progress information or None if trace=False. Dictionary has to be organized as follows: - history['time'] : list of floats, containing time in seconds passed from the start of the method - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2 """ history = defaultdict(list) if trace else None line_search_tool = get_line_search_tool(line_search_options) x_k = np.copy(x_0) * 1.0 start = time.time() teta_k = lambda n_k: min(0.5, n_k**(1/4)) def pushHistory(x_k, oracle, df_k_n): if trace: history['time'].append(time.time() - start) history['func'].append(oracle.func(x_k)) history['grad_norm'].append(df_k_n ** (1 / 2)) if np.alen(x_k) <= 2: history['x'].append(np.copy(x_k)) if display: print(x_k) df0 = oracle.grad(x_0) df0_norm = df0.dot(df0) for k in range(max_iter): df_k = oracle.grad(x_k) df_k_norm = df_k.dot(df_k) pushHistory(x_k, oracle, df_k_norm) if df_k_norm <= df0_norm * tolerance: return x_k, 'success', history teta = teta_k(df_k_norm) matvec = lambda v: oracle.hess_vec(x_k, v) d_k, msg, hist = conjugate_gradients(matvec, df_k*(-1), df_k*(-1), tolerance=teta) while d_k.dot(df_k) > 0: teta /= 10 d_k, msg, hist = conjugate_gradients(matvec, df_k * (-1), df_k*(-1), tolerance=teta) a_k = line_search_tool.line_search(oracle, x_k, d_k) x_k += d_k * a_k df_last = oracle.grad(x_k) df_last_norm = df_last.dot(df_last) pushHistory(x_k, oracle, df_last_norm) if df_last_norm <= df0_norm * tolerance: return x_k, 'success', history else: return x_k, 'iterations_exceeded', history
def hessian_free_newton(oracle, x_0, tolerance=1e-4, max_iter=500, line_search_options=None, display=False, trace=False): """ Hessian Free method for optimization. Parameters ---------- oracle : BaseSmoothOracle-descendant object Oracle with .func(), .grad() and .hess_vec() methods implemented for computing function value, its gradient and matrix product of the Hessian times vector respectively. x_0 : 1-dimensional np.array Starting point of the algorithm tolerance : float Epsilon value for stopping criterion. max_iter : int Maximum number of iterations. line_search_options : dict, LineSearchTool or None Dictionary with line search options. See LineSearchTool class for details. display : bool If True, debug information is displayed during optimization. Printing format is up to a student and is not checked in any way. trace: bool If True, the progress information is appended into history dictionary during training. Otherwise None is returned instead of history. Returns ------- x_star : np.array The point found by the optimization procedure message : string 'success' or the description of error: - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy the stopping criterion. history : dictionary of lists or None Dictionary containing the progress information or None if trace=False. Dictionary has to be organized as follows: - history['func'] : list of function values f(x_k) on every step of the algorithm - history['time'] : list of floats, containing time in seconds passed from the start of the method - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2 """ history = defaultdict(list) if trace else None labels = ['func', 'time', 'grad_norm', 'x'] line_search_tool = get_line_search_tool(line_search_options) x_k = np.copy(x_0) i = 0 # TODO: Implement hessian-free Newton's method. # Use line_search_tool.line_search() for adaptive step size. start = time.time() grad_0 = oracle.grad(x_k) grad_k = np.copy(grad_0) msg = 'success' while grad_k.dot(grad_k) > tolerance * grad_0.dot(grad_0): if i > max_iter: msg = 'iterations exceed' if display: print(msg) return x_k, msg, history hist_values = [ oracle.func(x_k), time.time() - start, np.linalg.norm(grad_k), x_k ] make_history(history, labels, hist_values) matvec = lambda v: oracle.hess_vec(x_k, v) mu_k = np.min([0.5, np.sqrt(np.linalg.norm(grad_k))]) while True: d_k, _, _ = conjugate_gradients(matvec, -grad_k, -grad_k, mu_k) if grad_k.dot(d_k) <= 0: break mu_k = mu_k / 10. alpha_k = line_search_tool.line_search(oracle, x_k, d_k) x_k = x_k + alpha_k * d_k grad_k = oracle.grad(x_k) i += 1 hist_values = [ oracle.func(x_k), time.time() - start, np.linalg.norm(grad_k), x_k ] make_history(history, labels, hist_values) if display: print(msg) return x_k, msg, history
def hessian_free_newton(oracle, x_0, tolerance=1e-4, max_iter=500, line_search_options=None, display=False, trace=False): """ Hessian Free method for optimization. Parameters ---------- oracle : BaseSmoothOracle-descendant object Oracle with .func(), .grad() and .hess_vec() methods implemented for computing function value, its gradient and matrix product of the Hessian times vector respectively. x_0 : 1-dimensional np.array Starting point of the algorithm tolerance : float Epsilon value for stopping criterion. max_iter : int Maximum number of iterations. line_search_options : dict, LineSearchTool or None Dictionary with line search options. See LineSearchTool class for details. display : bool If True, debug information is displayed during optimization. Printing format is up to a student and is not checked in any way. trace: bool If True, the progress information is appended into history dictionary during training. Otherwise None is returned instead of history. Returns ------- x_star : np.array The point found by the optimization procedure message : string 'success' or the description of error: - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy the stopping criterion. history : dictionary of lists or None Dictionary containing the progress information or None if trace=False. Dictionary has to be organized as follows: - history['func'] : list of function values f(x_k) on every step of the algorithm - history['time'] : list of floats, containing time in seconds passed from the start of the method - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2 """ history = defaultdict(list) if trace else None line_search_tool = get_line_search_tool(line_search_options) x_k = np.copy(x_0) # TODO: Implement hessian-free Newton's method. # Use line_search_tool.line_search() for adaptive step size. def fill_history(): if not trace: return history['func'].append(oracle.func(x_k)) history['time'].append((datetime.now() - t_0).seconds) history['grad_norm'].append(grad_k_norm) if x_size <= 2: history['x'].append(np.copy(x_k)) def do_display(): if not display: return if len(x_k) <= 4: print('x = {}, '.format(np.round(x_k, 4)), end='') print('func= {}, grad_norm = {}'.format(np.round(oracle.func(x_k), 4), np.round(grad_k_norm, 4))) t_0 = datetime.now() x_size = len(x_k) message = None grad_k = oracle.grad(x_k) grad_0_norm = grad_k_norm = np.linalg.norm(grad_k) for _ in range(max_iter): do_display() fill_history() eps = min(0.5, grad_k_norm**0.5) while True: hess_vec = lambda v: oracle.hess_vec(x_k, v) d, _, _ = conjugate_gradients(hess_vec, -grad_k, -grad_k, eps) if grad_k @ d < 0: break else: eps *= 10 alpha = line_search_tool.line_search(oracle, x_k, d, previous_alpha=1) x_k = x_k + alpha * d grad_k = oracle.grad(x_k) grad_k_norm = np.linalg.norm(grad_k) if grad_k_norm**2 < tolerance * grad_0_norm**2: message = 'success' break do_display() fill_history() if not grad_k_norm**2 < tolerance * grad_0_norm**2: message = 'iterations_exceeded' return x_k, message, history
def gradient_descent(oracle, x_0, tolerance=1e-5, max_iter=10000, line_search_options=None, trace=False, display=False): """ Gradien descent optimization method. Parameters ---------- oracle : BaseSmoothOracle-descendant object Oracle with .func(), .grad() and .hess() methods implemented for computing function value, its gradient and Hessian respectively. x_0 : np.array Starting point for optimization algorithm tolerance : float Epsilon value for stopping criterion. max_iter : int Maximum number of iterations. line_search_options : dict, LineSearchTool or None Dictionary with line search options. See LineSearchTool class for details. trace : bool If True, the progress information is appended into history dictionary during training. Otherwise None is returned instead of history. display : bool If True, debug information is displayed during optimization. Printing format and is up to a student and is not checked in any way. Returns ------- x_star : np.array The point found by the optimization procedure message : string "success" or the description of error: - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy the stopping criterion. - 'computational_error': in case of getting Infinity or None value during the computations. history : dictionary of lists or None Dictionary containing the progress information or None if trace=False. Dictionary has to be organized as follows: - history['time'] : list of floats, containing time in seconds passed from the start of the method - history['func'] : list of function values f(x_k) on every step of the algorithm - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2 Example: -------- >> oracle = QuadraticOracle(np.eye(5), np.arange(5)) >> x_opt, message, history = gradient_descent(oracle, np.zeros(5), line_search_options={'method': 'Armijo', 'c1': 1e-4}) >> print('Found optimal point: {}'.format(x_opt)) Found optimal point: [ 0. 1. 2. 3. 4.] """ history = defaultdict(list) if trace else None line_search_tool = get_line_search_tool(line_search_options) x_k = np.copy(x_0) # TODO: Implement gradient descent # Use line_search_tool.line_search() for adaptive step size. def fill_history(): if not trace: return history['time'].append((datetime.now() - t_0).seconds) history['func'].append(func_k) history['grad_norm'].append(grad_k_norm) if len(x_k) <= 2: history['x'].append(np.copy(x_k)) t_0 = datetime.now() func_k = oracle.func(x_k) grad_k = oracle.grad(x_k) a_k = None grad_0_norm = grad_k_norm = np.linalg.norm(grad_k) fill_history() if display: print('Begin new GD') for i in range(max_iter): if display: print('i = {} grad_norm = {} func = {} x = {} grad = {}'.format( i, grad_k_norm, func_k, x_k, grad_k), end=' ') if grad_k_norm**2 <= tolerance * grad_0_norm**2: break d_k = -grad_k a_k = line_search_tool.line_search(oracle, x_k, d_k, 2 * a_k if a_k else None) if display: print('alpha = {}'.format(a_k)) x_k += a_k * d_k func_k = oracle.func(x_k) grad_k = oracle.grad(x_k) grad_k_norm = np.linalg.norm(grad_k) fill_history() if display: print() if grad_k_norm**2 <= tolerance * grad_0_norm**2: return x_k, 'success', history else: return x_k, 'iterations_exceeded', history
def lbfgs(oracle, x_0, tolerance=1e-4, max_iter=500, memory_size=10, line_search_options=None, display=False, trace=False): """ Parameters ---------- oracle : BaseSmoothOracle-descendant object Oracle with .func() and .grad() methods implemented for computing function value and its gradient respectively. x_0 : 1-dimensional np.array Starting point of the algorithm tolerance : float Epsilon value for stopping criterion. max_iter : int Maximum number of iterations. memory_size : int The length of directions history in L-BFGS method. line_search_options : dict, LineSearchTool or None Dictionary with line search options. See LineSearchTool class for details. display : bool If True, debug information is displayed during optimization. Printing format is up to a student and is not checked in any way. trace: bool If True, the progress information is appended into history dictionary during training. Otherwise None is returned instead of history. Returns ------- x_star : np.array The point found by the optimization procedure message : string 'success' or the description of error: - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy the stopping criterion. history : dictionary of lists or None Dictionary containing the progress information or None if trace=False. Dictionary has to be organized as follows: - history['func'] : list of function values f(x_k) on every step of the algorithm - history['time'] : list of floats, containing time in seconds passed from the start of the method - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2 """ history = defaultdict(list) if trace else None line_search_tool = get_line_search_tool(line_search_options) t0 = datetime.now() x_k = np.copy(x_0) grad_x_0 = oracle.grad(x_0) norm_grad_x_0 = np.linalg.norm(grad_x_0) memory = [] for it in range(max_iter + 1): f_x_k = oracle.func(x_k) if it > 0: grad_x_k_prev = grad_x_k.copy () grad_x_k = oracle.grad(x_k) s_k_prev = x_k - x_k_prev y_k_prev = grad_x_k - grad_x_k_prev chop_size = np.minimum(memory_size - 1, len(memory)) if chop_size < len(memory): memory = memory[len(memory) - chop_size:] memory = memory[len(memory) - chop_size:] memory.append((s_k_prev, y_k_prev)) else: grad_x_k = oracle.grad(x_k) norm_grad_x_k = np.linalg.norm(grad_x_k) #Fill trace data if display: print(it) if trace: history['time'].append((datetime.now() - t0).total_seconds()) history['grad_norm'].append(norm_grad_x_k) history['func'].append(f_x_k) if x_k.size < 3: history['x'].append(x_k) if norm_grad_x_k * norm_grad_x_k <= tolerance * norm_grad_x_0 * norm_grad_x_0: break if it >= max_iter: return x_k, 'iterations_exceeded', history d_k = -grad_x_k if it > 0: mu = np.zeros(len(memory)) for index in range(len(memory)): i = len(memory) - index - 1 s_i, y_i = memory[i] mu[i] = np.dot(s_i, d_k) / np.dot(s_i, y_i) d_k = d_k - mu[i] * y_i s_k_prev, y_k_prev = memory[len(memory) - 1] d_k = (np.dot(s_k_prev, y_k_prev) / np.dot(y_k_prev, y_k_prev)) * d_k for i in range(len(memory)): s_i, y_i = memory[i] betta = np.dot (y_i, d_k) / np.dot (s_i, y_i) d_k = d_k + (mu[i] - betta) * s_i alpha = line_search_tool.line_search(oracle, x_k, d_k) x_k_prev = x_k.copy () x_k = x_k + alpha * d_k return x_k, 'success', history
def hessian_free_newton(oracle, x_0, tolerance=1e-4, max_iter=500, line_search_options=None, display=False, trace=False): """ Hessian Free method for optimization. Parameters ---------- oracle : BaseSmoothOracle-descendant object Oracle with .func(), .grad() and .hess_vec() methods implemented for computing function value, its gradient and matrix product of the Hessian times vector respectively. x_0 : 1-dimensional np.array Starting point of the algorithm tolerance : float Epsilon value for stopping criterion. max_iter : int Maximum number of iterations. line_search_options : dict, LineSearchTool or None Dictionary with line search options. See LineSearchTool class for details. display : bool If True, debug information is displayed during optimization. Printing format is up to a student and is not checked in any way. trace: bool If True, the progress information is appended into history dictionary during training. Otherwise None is returned instead of history. Returns ------- x_star : np.array The point found by the optimization procedure message : string 'success' or the description of error: - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy the stopping criterion. history : dictionary of lists or None Dictionary containing the progress information or None if trace=False. Dictionary has to be organized as follows: - history['func'] : list of function values f(x_k) on every step of the algorithm - history['time'] : list of floats, containing time in seconds passed from the start of the method - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2 """ history = defaultdict(list) if trace else None line_search_tool = get_line_search_tool(line_search_options) x_k = np.copy(x_0).astype(np.float64) timer = Timer() converge = False alpha_k = None for num_iter in range(max_iter + 1): # if np.isinf(x_k).any() or np.isnan(x_k).any(): # return x_k, 'computational_error', history f_k = oracle.func(x_k) grad_k = oracle.grad(x_k) # if np.isinf(grad_k).any() or np.isnan(grad_k).any(): # return x_k, 'computational_error', history grad_norm_k = scipy.linalg.norm(grad_k) if trace: history['time'].append(timer.seconds()) history['func'].append(np.copy(f_k)) history['grad_norm'].append(np.copy(grad_norm_k)) if x_k.size <= 2: history['x'].append(np.copy(x_k)) if display: print('step', history['time'][-1] if history else '') if num_iter == 0: eps_grad_norm_0 = np.sqrt(tolerance) * grad_norm_k if grad_norm_k <= eps_grad_norm_0: converge = True break if num_iter == max_iter: break eta = min(0.5, np.sqrt(grad_norm_k)) conjugate_gradient_converge = False while not conjugate_gradient_converge: d_k, _, _ = conjugate_gradients(lambda d: oracle.hess_vec(x_k, d), -grad_k, -grad_k, tolerance=eta) eta /= 10 conjugate_gradient_converge = np.dot(d_k, grad_k) < 0 alpha_k = line_search_tool.line_search(oracle, x_k, d_k, 1.0) x_k += alpha_k * d_k return x_k, 'success' if converge else 'iterations_exceeded', history
def hessian_free_newton(oracle, x_0, tolerance=1e-4, max_iter=500, line_search_options=None, display=False, trace=False): """ Hessian Free method for optimization. Parameters ---------- oracle : BaseSmoothOracle-descendant object Oracle with .func(), .grad() and .hess_vec() methods implemented for computing function value, its gradient and matrix product of the Hessian times vector respectively. x_0 : 1-dimensional np.array Starting point of the algorithm tolerance : float Epsilon value for stopping criterion. max_iter : int Maximum number of iterations. line_search_options : dict, LineSearchTool or None Dictionary with line search options. See LineSearchTool class for details. display : bool If True, debug information is displayed during optimization. Printing format is up to a student and is not checked in any way. trace: bool If True, the progress information is appended into history dictionary during training. Otherwise None is returned instead of history. Returns ------- x_star : np.array The point found by the optimization procedure message : string 'success' or the description of error: - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy the stopping criterion. history : dictionary of lists or None Dictionary containing the progress information or None if trace=False. Dictionary has to be organized as follows: - history['func'] : list of function values f(x_k) on every step of the algorithm - history['time'] : list of floats, containing time in seconds passed from the start of the method - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2 """ history = defaultdict(list) if trace else None line_search_tool = get_line_search_tool(line_search_options) x_k = np.copy(x_0) t0 = datetime.now() grad_x_0 = oracle.grad(x_0) norm_grad_x_0 = np.linalg.norm(grad_x_0) for it in range(max_iter + 1): # Oracle f_x_k = oracle.func(x_k) grad_x_k = oracle.grad(x_k) norm_grad_x_k = np.linalg.norm(grad_x_k) # Fill trace data if display: print(it) if trace: history['time'].append((datetime.now() - t0).total_seconds()) history['grad_norm'].append(norm_grad_x_k) history['func'].append(f_x_k) if x_k.size < 3: history['x'].append(x_k) # Criterium if norm_grad_x_k * norm_grad_x_k <= tolerance * norm_grad_x_0 * norm_grad_x_0: break if it >= max_iter: return x_k, 'iterations_exceeded', history # Direction eta_k = np.minimum (0.5, np.sqrt(norm_grad_x_k)) d_k = -grad_x_k matvec = lambda v: oracle.hess_vec (x_k, v) condition=False while condition == False: d_k, msg_cg, history_cg = conjugate_gradients(matvec, -grad_x_k, d_k, tolerance = eta_k) if np.dot(d_k, grad_x_k) < 0: condition = True eta_k = eta_k / 10. # Line Search alpha = line_search_tool.line_search(oracle, x_k, d_k) x_k = x_k + alpha * d_k return x_k, 'success', history
def hessian_free_newton(oracle, x_0, tolerance=1e-4, max_iter=500, line_search_options=None, display=False, trace=False): """ Hessian Free method for optimization. Parameters ---------- oracle : BaseSmoothOracle-descendant object Oracle with .func(), .grad() and .hess_vec() methods implemented for computing function value, its gradient and matrix product of the Hessian times vector respectively. x_0 : 1-dimensional np.array Starting point of the algorithm tolerance : float Epsilon value for stopping criterion. max_iter : int Maximum number of iterations. line_search_options : dict, LineSearchTool or None Dictionary with line search options. See LineSearchTool class for details. display : bool If True, debug information is displayed during optimization. Printing format is up to a student and is not checked in any way. trace: bool If True, the progress information is appended into history dictionary during training. Otherwise None is returned instead of history. Returns ------- x_star : np.array The point found by the optimization procedure message : string 'success' or the description of error: - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy the stopping criterion. history : dictionary of lists or None Dictionary containing the progress information or None if trace=False. Dictionary has to be organized as follows: - history['func'] : list of function values f(x_k) on every step of the algorithm - history['time'] : list of floats, containing time in seconds passed from the start of the method - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2 """ history = defaultdict(list) if trace else None line_search_tool = get_line_search_tool(line_search_options) x_k = np.copy(x_0) # TODO: Implement hessian-free Newton's method. # Use line_search_tool.line_search() for adaptive step size. eps_due_to_float = 1e-8 start_time = None dfx0 = oracle.grad(x_0) dfx0_norm2 = dfx0.dot(dfx0.T) for iteration in range(0, max_iter + 1): dfxk = oracle.grad(x_k) dfxk_norm2 = dfxk.dot(dfxk.T) # History update if display: print('Debug information:) Iteration number: ', iteration) if trace: if start_time is None: start_time = datetime.now() if x_k.shape[0] <= 2: history['x'].append(x_k) history['grad_norm'].append(math.sqrt(dfxk_norm2)) history['func'].append(oracle.func(x_k)) history['time'].append( (datetime.now() - start_time).total_seconds()) #Stoping criteria check if (dfxk_norm2 < tolerance * dfx0_norm2 + eps_due_to_float): return x_k, 'success', history n_k = min(0.5, (dfxk_norm2)**0.25) d_start = -dfxk matvec = lambda x: oracle.hess_vec(x_k, x.T) while True: # find d_k through cg d_k, message, history_sg = conjugate_gradients(matvec, -dfxk, d_start, tolerance=n_k, max_iter=None, trace=False, display=False) #Check if cg made d_k descent direction if dfxk.dot(d_k.T) < 0: break n_k = n_k / 10 d_start = d_k # alpha_k search by Line_search_tool alpha_k = line_search_tool.line_search(oracle, x_k, d_k) # Updating x_k x_k = x_k + alpha_k * d_k return x_k, 'iterations_exceeded', history
def newton(oracle, x_0, tolerance=1e-5, max_iter=100, line_search_options=None, trace=False, display=False): """ Newton's optimization method. Parameters ---------- oracle : BaseSmoothOracle-descendant object Oracle with .func(), .grad() and .hess() methods implemented for computing function value, its gradient and Hessian respectively. If the Hessian returned by the oracle is not positive-definite method stops with message="newton_direction_error" x_0 : np.array Starting point for optimization algorithm tolerance : float Epsilon value for stopping criterion. max_iter : int Maximum number of iterations. line_search_options : dict, LineSearchTool or None Dictionary with line search options. See LineSearchTool class for details. trace : bool If True, the progress information is appended into history dictionary during training. Otherwise None is returned instead of history. display : bool If True, debug information is displayed during optimization. Returns ------- x_star : np.array The point found by the optimization procedure message : string 'success' or the description of error: - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy the stopping criterion. - 'newton_direction_error': in case of failure of solving linear system with Hessian matrix (e.g. non-invertible matrix). - 'computational_error': in case of getting Infinity or None value during the computations. history : dictionary of lists or None Dictionary containing the progress information or None if trace=False. Dictionary has to be organized as follows: - history['time'] : list of floats, containing time passed from the start of the method - history['func'] : list of function values f(x_k) on every step of the algorithm - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2 Example: -------- >> oracle = QuadraticOracle(np.eye(5), np.arange(5)) >> x_opt, message, history = newton(oracle, np.zeros(5), line_search_options={'method': 'Constant', 'c': 1.0}) >> print('Found optimal point: {}'.format(x_opt)) Found optimal point: [ 0. 1. 2. 3. 4.] """ history = defaultdict(list) if trace else None line_search_tool = get_line_search_tool(line_search_options) x_k = np.copy(x_0) t0=datetime.now() norm_grad0=np.linalg.norm(oracle.grad(x_0)) for iteration in range(max_iter + 1): #Oracle grad_k=oracle.grad(x_k) norm_grad_k=np.linalg.norm (grad_k) hess_k=oracle.hess(x_k) #Fill trace data if trace: history['time'].append((datetime.now() - t0).total_seconds()) history['func'].append(oracle.func(x_k)) history['grad_norm'].append(norm_grad_k) if x_k.size < 3: history['x'].append(x_k) if display==True: print(u"debug info") #Criterium if norm_grad_k*norm_grad_k <= tolerance * norm_grad0*norm_grad0: break; if iteration == max_iter: return x_k, 'iterations_exceeded', history #Compute direction try: L=scipy.linalg.cho_factor(hess_k, lower=True) d_k=scipy.linalg.cho_solve(L,-grad_k) except: return x_k, 'computational_error', history #Line search alpha = line_search_tool.line_search (oracle, x_k, d_k) if alpha == None: return x_k, 'computational_error', history #Update x_k x_k = x_k + alpha * d_k return x_k, 'success', history