Python get_line_search_tool 예제들, utils.get_line_search_tool Python 예제들

예제 #1

0

파일 보기

파일: optimization.py 프로젝트: arodomanov/shad-optml-course

def nonlinear_conjugate_gradients(oracle,
                                  x_0,
                                  tolerance=1e-4,
                                  max_iter=500,
                                  line_search_options={
                                      'method': 'Wolfe',
                                      'c1': 1e-4,
                                      'c2': 0.2
                                  },
                                  display=False,
                                  trace=False):
    """
    Nonlinear conjugate gradient method for optimization (Polak--Ribiere version).

    Parameters
    ----------
    oracle : BaseSmoothOracle-descendant object
        Oracle with .func() and .grad() methods implemented for computing
        function value and its gradient respectively.
    x_0 : 1-dimensional np.array
        Starting point of the algorithm
    tolerance : float
        Epsilon value for stopping criterion.
    max_iter : int
        Maximum number of iterations.
    line_search_options : dict, LineSearchTool or None
        Dictionary with line search options. See LineSearchTool class for details.
    display : bool
        If True, debug information is displayed during optimization.
        Printing format is up to the student and is not checked in any way.
    trace:  bool
        If True, the progress information is appended into history dictionary during training.
        Otherwise None is returned instead of history.

    Returns
    -------
    x_star : np.array
        The point found by the optimization procedure
    message : string
        'success' or the description of error:
            - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy
              the stopping criterion.
    history : dictionary of lists or None
        Dictionary containing the progress information or None if trace=False.
        Dictionary has to be organized as follows:
            - history['func'] : list of function values f(x_k) on every step of the algorithm
            - history['time'] : list of floats, containing time in seconds passed from the start of the method
            - history['grad_norm'] : list of values Euclidean norms ||g(x_k)|| of the gradient on every step of the algorithm
            - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2
    """
    history = defaultdict(list) if trace else None
    line_search_tool = get_line_search_tool(line_search_options)
    x_k = np.copy(x_0)

    # TODO: Implement the Nonlinear conjugate gradient method.
    # Use line_search_tool.line_search() for adaptive step size.
    return x_k, 'success', history

예제 #2

0

파일 보기

def lbfgs(oracle,
          x_0,
          tolerance=1e-4,
          max_iter=500,
          memory_size=10,
          line_search_options=None,
          display=False,
          trace=False):
    """
    Limited-memory Broyden–Fletcher–Goldfarb–Shanno's method for optimization.

    Parameters
    ----------
    oracle : BaseSmoothOracle-descendant object
        Oracle with .func() and .grad() methods implemented for computing
        function value and its gradient respectively.
    x_0 : 1-dimensional np.array
        Starting point of the algorithm
    tolerance : float
        Epsilon value for stopping criterion.
    max_iter : int
        Maximum number of iterations.
    memory_size : int
        The length of directions history in L-BFGS method.
    line_search_options : dict, LineSearchTool or None
        Dictionary with line search options. See LineSearchTool class for details.
    display : bool
        If True, debug information is displayed during optimization.
        Printing format is up to a student and is not checked in any way.
    trace:  bool
        If True, the progress information is appended into history dictionary during training.
        Otherwise None is returned instead of history.

    Returns
    -------
    x_star : np.array
        The point found by the optimization procedure
    message : string
        'success' or the description of error:
            - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy
              the stopping criterion.
    history : dictionary of lists or None
        Dictionary containing the progress information or None if trace=False.
        Dictionary has to be organized as follows:
            - history['func'] : list of function values f(x_k) on every step of the algorithm
            - history['time'] : list of floats, containing time in seconds passed from the start of the method
            - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm
            - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2
    """
    history = defaultdict(list) if trace else None
    line_search_tool = get_line_search_tool(line_search_options)
    x_k = np.copy(x_0)

    # TODO: Implement L-BFGS method.
    # Use line_search_tool.line_search() for adaptive step size.
    def fill_history():
        if not trace:
            return
        history['func'].append(oracle.func(x_k))
        history['time'].append((datetime.now() - t_0).seconds)
        history['grad_norm'].append(grad_k_norm)
        if x_size <= 2:
            history['x'].append(np.copy(x_k))

    def do_display():
        if not display:
            return
        if len(x_k) <= 4:
            print('x = {}, '.format(np.round(x_k, 4)), end='')
        print('func= {}, grad_norm = {}'.format(np.round(oracle.func(x_k), 4),
                                                np.round(grad_k_norm, 4)))

    t_0 = datetime.now()
    x_size = len(x_k)
    message = None

    grad_k = oracle.grad(x_k)
    grad_0_norm = grad_k_norm = np.linalg.norm(grad_k)

    def bfgs_multiply(v, H, gamma_0):
        if len(H) == 0:
            return gamma_0 * v
        s, y = H[-1]
        H = H[:-1]
        v_new = v - (s @ v) / (y @ s) * y
        z = bfgs_multiply(v_new, H, gamma_0)
        result = z + (s @ v - y @ z) / (y @ s) * s
        return result

    def bfgs_direction():
        if len(H) == 0:
            return -grad_k
        s, y = H[-1]
        gamma_0 = (y @ s) / (y @ y)
        return bfgs_multiply(-grad_k, H, gamma_0)

    H = []
    for k in range(max_iter):
        do_display()
        fill_history()

        d = bfgs_direction()
        alpha = line_search_tool.line_search(oracle, x_k, d)
        x_new = x_k + alpha * d
        grad_new = oracle.grad(x_new)
        H.append((x_new - x_k, grad_new - grad_k))
        if len(H) > memory_size:
            H = H[1:]
        x_k, grad_k = x_new, grad_new
        grad_k_norm = np.linalg.norm(grad_k)
        if grad_k_norm**2 < tolerance * grad_0_norm**2:
            message = 'success'
            break

    do_display()
    fill_history()

    if not grad_k_norm**2 < tolerance * grad_0_norm**2:
        message = 'iterations_exceeded'

    return x_k, message, history

예제 #3

0

파일 보기

파일: optimization.py 프로젝트: pkorobov/optimization-methods-in-ml

def hessian_free_newton(oracle,
                        x_0,
                        tolerance=1e-4,
                        max_iter=500,
                        line_search_options=None,
                        display=False,
                        trace=False):
    """
    Hessian Free method for optimization.

    Parameters
    ----------
    oracle : BaseSmoothOracle-descendant object
        Oracle with .func(), .grad() and .hess_vec() methods implemented for computing
        function value, its gradient and matrix product of the Hessian times vector respectively.
    x_0 : 1-dimensional np.array
        Starting point of the algorithm
    tolerance : float
        Epsilon value for stopping criterion.
    max_iter : int
        Maximum number of iterations.
    line_search_options : dict, LineSearchTool or None
        Dictionary with line search options. See LineSearchTool class for details.
    display : bool
        If True, debug information is displayed during optimization.
        Printing format is up to a student and is not checked in any way.
    trace:  bool
        If True, the progress information is appended into history dictionary during training.
        Otherwise None is returned instead of history.

    Returns
    -------
    x_star : np.array
        The point found by the optimization procedure
    message : string
        'success' or the description of error:
            - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy
              the stopping criterion.
    history : dictionary of lists or None
        Dictionary containing the progress information or None if trace=False.
        Dictionary has to be organized as follows:
            - history['func'] : list of function values f(x_k) on every step of the algorithm
            - history['time'] : list of floats, containing time in seconds passed from the start of the method
            - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm
            - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2
    """
    history = defaultdict(list) if trace else None
    line_search_tool = get_line_search_tool(line_search_options)
    x_k = np.copy(x_0)
    if display:
        print("x_0: {}".format(x_k))

    g_norm_0 = np.linalg.norm(oracle.grad(x_0))
    start = datetime.now()
    for iter in range(max_iter):

        g_k = oracle.grad(x_k)
        g_norm = np.linalg.norm(g_k)
        t = (datetime.now() - start).total_seconds()

        if trace:
            history['time'].append(t)
            if x_0.size <= 2:
                history['x'].append(x_k)
            history['func'].append(oracle.func(x_k))
            history['grad_norm'].append(g_norm)

        if not (np.isfinite(x_k).all() and np.isfinite(oracle.func(x_k))
                and np.isfinite(oracle.grad(x_k)).all()):
            return x_k, 'computational_error', history

        if g_norm**2 <= tolerance * g_norm_0**2:
            return x_k, 'success', history

        # search of direction
        eta_k = min(0.5, np.linalg.norm(g_k)**0.5)
        d_k = conjugate_gradients(partial(oracle.hess_vec, x_k), -g_k, -g_k,
                                  eta_k)[0]
        while np.dot(d_k, g_k) >= 0:
            eta_k *= 0.1
            d_k = conjugate_gradients(partial(oracle.hess_vec, x_k), d_k,
                                      np.zeros(x_0.shape), eta_k)[0]

        alpha = line_search_tool.line_search(oracle, x_k, d_k)
        x_k = x_k + alpha * d_k

        if display:
            print(f"fx_k: {x_k}, d_k: {d_k}, alpha: {alpha}, eta: {eta_k}")

    if display:
        print(f"x_star: {x_k}")

    g = oracle.grad(x_k)
    g_norm = np.linalg.norm(g)
    t = (datetime.now() - start).total_seconds()

    if trace:
        history['time'].append(t)
        if x_0.size <= 2:
            history['x'].append(x_k)
        history['func'].append(oracle.func(x_k))
        history['grad_norm'].append(g_norm)

    if g_norm**2 <= tolerance * g_norm_0**2:
        return x_k, 'success', history

    return x_k, 'iterations_exceeded', history

예제 #4

0

파일 보기

파일: optimization.py 프로젝트: pkorobov/optimization-methods-in-ml

def lbfgs(oracle,
          x_0,
          tolerance=1e-4,
          max_iter=500,
          memory_size=10,
          line_search_options=None,
          display=False,
          trace=False):
    """
    Limited-memory Broyden–Fletcher–Goldfarb–Shanno's method for optimization.

    Parameters
    ----------
    oracle : BaseSmoothOracle-descendant object
        Oracle with .func() and .grad() methods implemented for computing
        function value and its gradient respectively.
    x_0 : 1-dimensional np.array
        Starting point of the algorithm
    tolerance : float
        Epsilon value for stopping criterion.
    max_iter : int
        Maximum number of iterations.
    memory_size : int
        The length of directions history in L-BFGS method.
    line_search_options : dict, LineSearchTool or None
        Dictionary with line search options. See LineSearchTool class for details.
    display : bool
        If True, debug information is displayed during optimization.
        Printing format is up to a student and is not checked in any way.
    trace:  bool
        If True, the progress information is appended into history dictionary during training.
        Otherwise None is returned instead of history.

    Returns
    -------
    x_star : np.array
        The point found by the optimization procedure
    message : string
        'success' or the description of error:
            - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy
              the stopping criterion.
    history : dictionary of lists or None
        Dictionary containing the progress information or None if trace=False.
        Dictionary has to be organized as follows:
            - history['func'] : list of function values f(x_k) on every step of the algorithm
            - history['time'] : list of floats, containing time in seconds passed from the start of the method
            - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm
            - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2
    """
    def bfgs_multiply(v, H, gamma_0):
        if not H:
            return gamma_0 * v
        s, y = H.pop()
        v_ = v - np.dot(s, v) / np.dot(y, s) * y
        z = bfgs_multiply(v_, H, gamma_0)
        return z + (np.dot(s, v) - np.dot(y, z)) / np.dot(y, s) * s

    def lbfgs_direction(g_k, H_k):
        if H_k:
            s, y = H_k[-1]
            gamma_0 = np.dot(y, s) / np.dot(y, y)
        else:
            gamma_0 = 1.0
        return bfgs_multiply(-g_k, copy.copy(H_k), gamma_0)

    history = defaultdict(list) if trace else None
    line_search_tool = get_line_search_tool(line_search_options)
    x_k = np.copy(x_0)
    if display:
        print("x_0: {}".format(x_k))

    g_norm_0 = np.linalg.norm(oracle.grad(x_0))
    H_k = deque(maxlen=memory_size)
    start = datetime.now()
    for iter in range(max_iter):

        g_k = oracle.grad(x_k)
        g_norm = np.linalg.norm(g_k)
        t = (datetime.now() - start).total_seconds()

        if trace:
            history['time'].append(t)
            if x_0.size <= 2:
                history['x'].append(x_k)
            history['func'].append(oracle.func(x_k))
            history['grad_norm'].append(g_norm)

        if g_norm**2 <= tolerance * g_norm_0**2:
            return x_k, 'success', history

        # search of direction
        d_k = lbfgs_direction(g_k, H_k)
        alpha = line_search_tool.line_search(oracle, x_k, d_k)

        H_k.append(
            [alpha * d_k,
             oracle.grad(x_k + alpha * d_k) - oracle.grad(x_k)])
        x_k = x_k + alpha * d_k

        if display:
            print(f"fx_k: {x_k}, d_k: {d_k}, alpha: {alpha}")

    if display:
        print(f"x_star: {x_k}")

    g = oracle.grad(x_k)
    g_norm = np.linalg.norm(g)
    t = (datetime.now() - start).total_seconds()

    if trace:
        history['time'].append(t)
        if x_0.size <= 2:
            history['x'].append(x_k)
        history['func'].append(oracle.func(x_k))
        history['grad_norm'].append(g_norm)

    if g_norm**2 <= tolerance * g_norm_0**2:
        return x_k, 'success', history
    return x_k, 'iterations_exceeded', history

예제 #5

0

파일 보기

def newton(oracle,
           x_0,
           tolerance=1e-5,
           max_iter=100,
           line_search_options=None,
           trace=False,
           display=False):
    """
    Newton's optimization method.

    Parameters
    ----------
    oracle : BaseSmoothOracle-descendant object
        Oracle with .func(), .grad() and .hess() methods implemented for computing
        function value, its gradient and Hessian respectively. If the Hessian
        returned by the oracle is not positive-definite method stops with message="newton_direction_error"
    x_0 : np.array
        Starting point for optimization algorithm
    tolerance : float
        Epsilon value for stopping criterion.
    max_iter : int
        Maximum number of iterations.
    line_search_options : dict, LineSearchTool or None
        Dictionary with line search options. See LineSearchTool class for details.
    trace : bool
        If True, the progress information is appended into history dictionary during training.
        Otherwise None is returned instead of history.
    display : bool
        If True, debug information is displayed during optimization.

    Returns
    -------
    x_star : np.array
        The point found by the optimization procedure
    message : string
        'success' or the description of error:
            - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy
                the stopping criterion.
            - 'newton_direction_error': in case of failure of solving linear system with Hessian matrix (e.g. non-invertible matrix).
            - 'computational_error': in case of getting Infinity or None value during the computations.
    history : dictionary of lists or None
        Dictionary containing the progress information or None if trace=False.
        Dictionary has to be organized as follows:
            - history['time'] : list of floats, containing time passed from the start of the method
            - history['func'] : list of function values f(x_k) on every step of the algorithm
            - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm
            - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2

    Example:
    --------
    >> oracle = QuadraticOracle(np.eye(5), np.arange(5))
    >> x_opt, message, history = newton(oracle, np.zeros(5), line_search_options={'method': 'Constant', 'c': 1.0})
    >> print('Found optimal point: {}'.format(x_opt))
       Found optimal point: [ 0.  1.  2.  3.  4.]
    """
    history = defaultdict(list) if trace else None
    line_search_tool = get_line_search_tool(line_search_options)
    x_k = np.copy(x_0)
    if display:
        print("x_0: {}".format(x_k))

    grad_norm_0 = np.linalg.norm(oracle.grad(x_0))
    start = datetime.now()
    for iter in range(max_iter):

        grad = oracle.grad(x_k)
        grad_norm = np.linalg.norm(grad)
        t = (datetime.now() - start).total_seconds()

        if trace:
            history['time'].append(t)
            if x_0.size <= 2:
                history['x'].append(x_k)
            history['func'].append(oracle.func(x_k))
            history['grad_norm'].append(grad_norm)

        if not (np.isfinite(x_k).all() and np.isfinite(oracle.func(x_k))
                and np.isfinite(oracle.grad(x_k)).all()):
            return x_k, 'computational_error', history

        if grad_norm**2 <= tolerance * grad_norm_0**2:
            return x_k, 'success', history

        try:
            L = scipy.linalg.cho_factor(oracle.hess(x_k))
        except:
            return x_k, 'newton_direction_error', history

        d_k = -scipy.linalg.cho_solve(L, grad)
        n = x_k.size // 2
        x, u = x_k[:n], x_k[n:]
        d_x, d_u = d_k[:n], d_k[n:]

        max_alpha = line_search_tool.alpha_0
        if np.sum(d_x > d_u):
            max_alpha = min(
                ((u - x)[d_x > d_u] / (d_x - d_u)[d_x > d_u]).min() * 0.99,
                max_alpha)  # u - x > 0
        if np.sum(-d_x > d_u):
            max_alpha = min(
                ((u + x)[-d_x > d_u] / -(d_x + d_u)[-d_x > d_u]).min() * 0.99,
                max_alpha)  # u + x > 0
        alpha = line_search_tool.line_search(oracle,
                                             x_k,
                                             d_k,
                                             previous_alpha=max_alpha)
        x_k = x_k + alpha * d_k
        print(f"Newton iteration = {iter}, d_k = {d_k}, alpha_k = {alpha}")

        if display:
            print("x_k: {}, d_k: {}, alpha: {}".format(x_k, d_k, alpha))

    if display:
        print("x_star: {}".format(x_k))

    grad = oracle.grad(x_k)
    grad_norm = np.linalg.norm(grad)
    t = (datetime.now() - start).total_seconds()

    if trace:
        history['time'].append(t)
        if x_0.size <= 2:
            history['x'].append(x_k)
        history['func'].append(oracle.func(x_k))
        history['grad_norm'].append(grad_norm)

    if not (np.isfinite(x_k).all() and np.isfinite(oracle.func(x_k))
            and np.isfinite(oracle.grad(x_k)).all()):
        return x_k, 'computational_error', history

    if grad_norm**2 <= tolerance * grad_norm_0**2:
        return x_k, 'success', history

    return x_k, 'iterations_exceeded', history

예제 #6

0

파일 보기

def lbfgs(oracle,
          x_0,
          tolerance=1e-4,
          max_iter=500,
          memory_size=10,
          line_search_options=None,
          display=False,
          trace=False):
    """
    Limited-memory Broyden–Fletcher–Goldfarb–Shanno's method for optimization.

    Parameters
    ----------
    oracle : BaseSmoothOracle-descendant object
        Oracle with .func() and .grad() methods implemented for computing
        function value and its gradient respectively.
    x_0 : 1-dimensional np.array
        Starting point of the algorithm
    tolerance : float
        Epsilon value for stopping criterion.
    max_iter : int
        Maximum number of iterations.
    memory_size : int
        The length of directions history in L-BFGS method.
    line_search_options : dict, LineSearchTool or None
        Dictionary with line search options. See LineSearchTool class for details.
    display : bool
        If True, debug information is displayed during optimization.
        Printing format is up to a student and is not checked in any way.
    trace:  bool
        If True, the progress information is appended into history dictionary during training.
        Otherwise None is returned instead of history.

    Returns
    -------
    x_star : np.array
        The point found by the optimization procedure
    message : string
        'success' or the description of error:
            - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy
              the stopping criterion.
    history : dictionary of lists or None
        Dictionary containing the progress information or None if trace=False.
        Dictionary has to be organized as follows:
            - history['func'] : list of function values f(x_k) on every step of the algorithm
            - history['time'] : list of floats, containing time in seconds passed from the start of the method
            - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm
            - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2
    """
    history = defaultdict(list) if trace else None
    labels = ['func', 'time', 'grad_norm', 'x']
    line_search_tool = get_line_search_tool(line_search_options)
    H = deque()
    x_k = np.copy(x_0)
    i = 0

    # TODO: Implement L-BFGS method.
    # Use line_search_tool.line_search() for adaptive step size.

    def bfgs_multiply(v, H, gamma):
        if H:
            H1 = H.copy()
            s, y = H1.pop()
            v1 = v - s.dot(v) / y.dot(s) * y
            z = bfgs_multiply(v1, H1, gamma)
            return z + (s.dot(v) - y.dot(z)) / y.dot(s) * s
        else:
            return gamma * v

    def lbfgs_direction(grad, H):
        if H:
            s, y = H[-1]
            gamma = y.dot(s) / y.dot(y)
            return bfgs_multiply(-grad, H, gamma)
        else:
            return -grad

    start = time.time()
    grad_0 = oracle.grad(x_k)
    grad_k = np.copy(grad_0)
    grad_k_prev = None
    x_k_prev = None
    msg = 'success'
    while grad_k.dot(grad_k) > tolerance * grad_0.dot(grad_0):

        if i > max_iter:
            msg = 'iterations exceed'
            if display:
                print(msg)
            return x_k, msg, history

        hist_values = [
            oracle.func(x_k),
            time.time() - start,
            np.linalg.norm(grad_k), x_k
        ]
        make_history(history, labels, hist_values)

        if x_k_prev is not None:
            H.append((x_k - x_k_prev, grad_k - grad_k_prev))
            if len(H) > memory_size:
                H.popleft()

        d_k = lbfgs_direction(grad_k, H)

        alpha_k = line_search_tool.line_search(oracle, x_k, d_k)
        x_k_prev = np.copy(x_k)
        grad_k_prev = np.copy(grad_k)
        x_k = x_k + alpha_k * d_k
        grad_k = oracle.grad(x_k)

        i += 1

    hist_values = [
        oracle.func(x_k),
        time.time() - start,
        np.linalg.norm(grad_k), x_k
    ]
    make_history(history, labels, hist_values)

    if display:
        print(msg)

    return x_k, msg, history

예제 #7

0

파일 보기

def lbfgs(oracle,
          x_0,
          tolerance=1e-4,
          max_iter=500,
          memory_size=10,
          line_search_options=None,
          display=False,
          trace=False):
    """
    Limited-memory Broyden–Fletcher–Goldfarb–Shanno's method for optimization.

    Parameters
    ----------
    oracle : BaseSmoothOracle-descendant object
        Oracle with .func() and .grad() methods implemented for computing
        function value and its gradient respectively.
    x_0 : 1-dimensional np.array
        Starting point of the algorithm
    tolerance : float
        Epsilon value for stopping criterion.
    max_iter : int
        Maximum number of iterations.
    memory_size : int
        The length of directions history in L-BFGS method.
    line_search_options : dict, LineSearchTool or None
        Dictionary with line search options. See LineSearchTool class for details.
    display : bool
        If True, debug information is displayed during optimization.
        Printing format is up to a student and is not checked in any way.
    trace:  bool
        If True, the progress information is appended into history dictionary during training.
        Otherwise None is returned instead of history.

    Returns
    -------
    x_star : np.array
        The point found by the optimization procedure
    message : string
        'success' or the description of error:
            - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy
              the stopping criterion.
    history : dictionary of lists or None
        Dictionary containing the progress information or None if trace=False.
        Dictionary has to be organized as follows:
            - history['func'] : list of function values f(x_k) on every step of the algorithm
            - history['time'] : list of floats, containing time in seconds passed from the start of the method
            - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm
            - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2
    """
    history = defaultdict(list) if trace else None
    line_search_tool = get_line_search_tool(line_search_options)
    x_k = np.copy(x_0)

    # TODO: Implement L-BFGS method.
    # Use line_search_tool.line_search() for adaptive step size.

    History = []
    x_m1 = None
    dfxk_m1 = None
    eps_due_to_float = 1e-8
    start_time = None
    dfx0_norm = np.linalg.norm(oracle.grad(x_0))

    for iteration in range(0, max_iter + 1):

        #initialising
        dfxk = oracle.grad(x_k)
        dfxk_norm = np.linalg.norm(dfxk)

        if display: print('Debug information:) Iteration number: ', iteration)
        # History update
        if trace:
            if start_time is None: start_time = datetime.now()
            if x_k.shape[0] <= 2:
                history['x'].append(x_k)
            history['grad_norm'].append(dfxk_norm)
            history['func'].append(oracle.func(x_k))
            history['time'].append(
                (datetime.now() - start_time).total_seconds())

        #Stop criteria check
        if dfxk_norm < (tolerance**0.5) * dfx0_norm + eps_due_to_float:
            return x_k, 'success', history

        #FancyHistory update
        if x_m1 is not None and dfxk_m1 is not None:
            History.append((x_k - x_m1, dfxk - dfxk_m1))
            if len(History) > memory_size:
                History = History[1:]

        #Step

        d_k = lbfgs_direction(dfxk, History)

        alpha_k = line_search_tool.line_search(oracle, x_k, d_k)

        x_m1 = np.copy(x_k)
        dfxk_m1 = np.copy(dfxk)
        x_k = x_k + alpha_k * d_k

    return x_k, 'iterations_exceeded', history

예제 #8

0

파일 보기

파일: optimization.py 프로젝트: Tehada/hse-optimization-course

def lbfgs(oracle,
          x_0,
          tolerance=1e-4,
          max_iter=500,
          memory_size=10,
          line_search_options=None,
          display=False,
          trace=False):
    """
    Limited-memory Broyden–Fletcher–Goldfarb–Shanno's method for optimization.

    Parameters
    ----------
    oracle : BaseSmoothOracle-descendant object
        Oracle with .func() and .grad() methods implemented for computing
        function value and its gradient respectively.
    x_0 : 1-dimensional np.array
        Starting point of the algorithm
    tolerance : float
        Epsilon value for stopping criterion.
    max_iter : int
        Maximum number of iterations.
    memory_size : int
        The length of directions history in L-BFGS method.
    line_search_options : dict, LineSearchTool or None
        Dictionary with line search options. See LineSearchTool class for details.
    display : bool
        If True, debug information is displayed during optimization.
        Printing format is up to a student and is not checked in any way.
    trace:  bool
        If True, the progress information is appended into history dictionary during training.
        Otherwise None is returned instead of history.

    Returns
    -------
    x_star : np.array
        The point found by the optimization procedure
    message : string
        'success' or the description of error:
            - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy
              the stopping criterion.
    history : dictionary of lists or None
        Dictionary containing the progress information or None if trace=False.
        Dictionary has to be organized as follows:
            - history['func'] : list of function values f(x_k) on every step of the algorithm
            - history['time'] : list of floats, containing time in seconds passed from the start of the method
            - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm
            - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2
    """

    history = defaultdict(list) if trace else None
    line_search_tool = get_line_search_tool(line_search_options)
    x_k = np.copy(x_0).astype(np.float64)

    timer = Timer()
    converge = False
    alpha_k = None

    s_trace, y_trace = deque(), deque()
    grad_k = oracle.grad(x_k)

    for num_iter in range(max_iter + 1):
        # if np.isinf(x_k).any() or np.isnan(x_k).any():
        #     return x_k, 'computational_error', history

        f_k = oracle.func(x_k)

        # if np.isinf(grad_k).any() or np.isnan(grad_k).any():
        #     return x_k, 'computational_error', history

        grad_norm_k = scipy.linalg.norm(grad_k)

        if trace:
            history['time'].append(timer.seconds())
            history['func'].append(np.copy(f_k))
            history['grad_norm'].append(np.copy(grad_norm_k))
            if x_k.size <= 2:
                history['x'].append(np.copy(x_k))

        if display: print('step', history['time'][-1] if history else '')

        if num_iter == 0:
            eps_grad_norm_0 = np.sqrt(tolerance) * grad_norm_k
        if grad_norm_k <= eps_grad_norm_0:
            converge = True
            break

        if num_iter == max_iter: break

        def lbfgs_direction(grad, s_trace, y_trace):
            d = -grad

            if not s_trace:
                return d

            mus = []
            for s, y in zip(reversed(s_trace), reversed(y_trace)):
                mu = np.dot(s, d) / np.dot(s, y)
                mus.append(mu)
                d -= mu * y

            d *= np.dot(s_trace[-1], y_trace[-1]) / np.dot(
                y_trace[-1], y_trace[-1])

            for s, y, mu in zip(s_trace, y_trace, reversed(mus)):
                beta = np.dot(y, d) / np.dot(s, y)
                d += (mu - beta) * s

            return d

        d_k = lbfgs_direction(grad_k, s_trace, y_trace)
        alpha_k = line_search_tool.line_search(
            oracle, x_k, d_k, 2.0 * alpha_k if alpha_k is not None else None)
        x_k += alpha_k * d_k
        last_grad_k = np.copy(grad_k)
        grad_k = oracle.grad(x_k)

        if memory_size > 0:
            if len(s_trace) == memory_size:
                s_trace.popleft()
                y_trace.popleft()
            s_trace.append(alpha_k * d_k)
            y_trace.append(grad_k - last_grad_k)

    return x_k, 'success' if converge else 'iterations_exceeded', history

예제 #9

0

파일 보기

파일: optimization.py 프로젝트: DailyFu/Optimization-smooth

def nonlinear_conjugate_gradients(oracle, x_0, tolerance=1e-4, max_iter=500,
          line_search_options={'method': 'Wolfe', 'c1': 1e-4, 'c2': 0.2},
          display=False, trace=False):
    """
    Nonlinear conjugate gradient method for optimization (Polak--Ribiere version).

    Parameters
    ----------
    oracle : BaseSmoothOracle-descendant object
        Oracle with .func() and .grad() methods implemented for computing
        function value and its gradient respectively.
    x_0 : 1-dimensional np.array
        Starting point of the algorithm
    tolerance : float
        Epsilon value for stopping criterion.
    max_iter : int
        Maximum number of iterations.
    line_search_options : dict, LineSearchTool or None
        Dictionary with line search options. See LineSearchTool class for details.
    display : bool
        If True, debug information is displayed during optimization.
        Printing format is up to the student and is not checked in any way.
    trace:  bool
        If True, the progress information is appended into history dictionary during training.
        Otherwise None is returned instead of history.

    Returns
    -------
    x_star : np.array
        The point found by the optimization procedure
    message : string
        'success' or the description of error:
            - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy
              the stopping criterion.
    history : dictionary of lists or None
        Dictionary containing the progress information or None if trace=False.
        Dictionary has to be organized as follows:
            - history['func'] : list of function values f(x_k) on every step of the algorithm
            - history['time'] : list of floats, containing time in seconds passed from the start of the method
            - history['grad_norm'] : list of values Euclidean norms ||g(x_k)|| of the gradient on every step of the algorithm
            - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2
    """
    history = defaultdict(list) if trace else None
    line_search_tool = get_line_search_tool(line_search_options)
    x_k = np.copy(x_0)

    
    t0 = datetime.now()
    grad_x_0 = oracle.grad(x_0)
    norm_grad_x_0 = np.linalg.norm(grad_x_0)
    
    for it in range(max_iter + 1):
        
        # Oracle
        if it > 0:
            grad_x_k_prev = grad_x_k.copy()
            norm_grad_x_k_prev = norm_grad_x_k            
        
        f_x_k = oracle.func(x_k)
        grad_x_k = oracle.grad(x_k)        
        norm_grad_x_k = np.linalg.norm(grad_x_k)
            
        # Debug info
        if display:
            print(it)
        
        # Fill trace data
        if trace:
            history['time'].append((datetime.now() - t0).total_seconds())
            history['grad_norm'].append(norm_grad_x_k)
            history['func'].append(f_x_k)
            if x_k.size < 3:
                history['x'].append(x_k)
        
        # Criterium
        if norm_grad_x_k * norm_grad_x_k <= tolerance * norm_grad_x_0 * norm_grad_x_0:
            break
        
        if it >= max_iter:
            return x_k, 'iterations_exceeded', history
        
        # Direction
        if it > 0:
            betta = np.dot(grad_x_k, (grad_x_k - grad_x_k_prev)) / (norm_grad_x_k_prev * norm_grad_x_k_prev)
            #betta = (norm_grad_x_k * norm_grad_x_k) / (norm_grad_x_k_prev * norm_grad_x_k_prev)
            
            
            d_k = -grad_x_k + betta * d_k
        else:
            d_k = -grad_x_k

        # Line search
        alpha = line_search_tool.line_search (oracle, x_k, d_k)

        x_k = x_k + alpha * d_k
    
    
    return x_k, 'success', history

예제 #10

0

파일 보기

def hessian_free_newton(oracle,
                        x_0,
                        tolerance=1e-4,
                        max_iter=500,
                        line_search_options=None,
                        display=False,
                        trace=False):
    """
    Hessian Free method for optimization.

    Parameters
    ----------
    oracle : BaseSmoothOracle-descendant object
        Oracle with .func(), .grad() and .hess_vec() methods implemented for computing
        function value, its gradient and matrix product of the Hessian times vector respectively.
    x_0 : 1-dimensional np.array
        Starting point of the algorithm
    tolerance : float
        Epsilon value for stopping criterion.
    max_iter : int
        Maximum number of iterations.
    line_search_options : dict, LineSearchTool or None
        Dictionary with line search options. See LineSearchTool class for details.
    display : bool
        If True, debug information is displayed during optimization.
        Printing format is up to a student and is not checked in any way.
    trace:  bool
        If True, the progress information is appended into history dictionary during training.
        Otherwise None is returned instead of history.

    Returns
    -------
    x_star : np.array
        The point found by the optimization procedure
    message : string
        'success' or the description of error:
            - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy
              the stopping criterion.
    history : dictionary of lists or None
        Dictionary containing the progress information or None if trace=False.
        Dictionary has to be organized as follows:
            - history['func'] : list of function values f(x_k) on every step of the algorithm
            - history['time'] : list of floats, containing time in seconds passed from the start of the method
            - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm
            - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2
    """
    # TODO: Implement hessian-free Newton's method.
    # Use line_search_tool.line_search() for adaptive step size.

    history = defaultdict(list) if trace else None
    line_search_tool = get_line_search_tool(line_search_options)
    x_k = x_0.astype(float)
    t_0 = time()

    grad = oracle.grad(x_k)
    grad_norm = norm(grad)
    grad_norm_x0 = grad_norm

    if display:
        sys.stdout.write(u"Start of newton method\n")
    if trace:
        update_history_newton(history, oracle.func(x_k), t_0, x_k, grad_norm)

    i = 0

    while grad_norm > np.sqrt(tolerance) * grad_norm_x0:
        i += 1
        if (i > max_iter):
            return x_k, 'iterations_exceeded', history
        matvec = lambda v: oracle.hess_vec(x_k, v)

        nu_k = min(0.5, np.sqrt(grad_norm))
        d_k = -grad
        d_k, _, _ = conjugate_gradients(matvec, -grad, d_k, tolerance=nu_k)
        while not np.dot(grad, d_k) < 0:
            nu_k /= 10.0
            d_k = conjugate_gradients(matvec, -grad, d_k, tolerance=nu_k)
        alpha = line_search_tool.line_search(oracle,
                                             x_k,
                                             d_k,
                                             previous_alpha=1.0)
        if np.isinf(alpha) or np.isnan(alpha):
            return x_k, 'computational_error', history
        x_k += alpha * d_k
        grad = oracle.grad(x_k)
        grad_norm = norm(grad)
        if display:
            sys.stdout.write(str(nu_k) + '\n')
        if trace:
            update_history_newton(history, oracle.func(x_k), t_0, x_k,
                                  norm(grad))

    return x_k, 'success', history

예제 #11

0

파일 보기

파일: optimization.py 프로젝트: ADKosm/OptimizationMethods

def lbfgs(oracle, x_0, tolerance=1e-4, max_iter=500, memory_size=10, 
          line_search_options=None, display=False, trace=False):
    """
    Limited-memory Broyden–Fletcher–Goldfarb–Shanno's method for optimization.

    Parameters
    ----------
    oracle : BaseSmoothOracle-descendant object
        Oracle with .func() and .grad() methods implemented for computing
        function value and its gradient respectively.
    x_0 : 1-dimensional np.array
        Starting point of the algorithm
    tolerance : float
        Epsilon value for stopping criterion.
    max_iter : int
        Maximum number of iterations.
    memory_size : int
        The length of directions history in L-BFGS method.
    line_search_options : dict, LineSearchTool or None
        Dictionary with line search options. See LineSearchTool class for details.
    display : bool
        If True, debug information is displayed during optimization.
        Printing format is up to a student and is not checked in any way.
    trace:  bool
        If True, the progress information is appended into history dictionary during training.
        Otherwise None is returned instead of history.

    Returns
    -------
    x_star : np.array
        The point found by the optimization procedure
    message : string
        'success' or the description of error:
            - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy
              the stopping criterion.
    history : dictionary of lists or None
        Dictionary containing the progress information or None if trace=False.
        Dictionary has to be organized as follows:
            - history['time'] : list of floats, containing time in seconds passed from the start of the method
            - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm
            - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2
    """
    history = defaultdict(list) if trace else None
    line_search_tool = get_line_search_tool(line_search_options)
    x_k = np.copy(x_0) * 1.0
    x_k_old = None
    df_k_old = None
    H = deque()
    l = memory_size

    start = time.time()

    def pushHistory(x_k, oracle, df_k_n):
        if trace:
            history['time'].append(time.time() - start)
            history['func'].append(oracle.func(x_k))
            history['grad_norm'].append(df_k_n ** (1 / 2))
            if np.alen(x_k) <= 2:
                history['x'].append(np.copy(x_k))
        if display:
            print(x_k)

    df0 = oracle.grad(x_0)
    df0_norm = df0.dot(df0)
    for k in range(max_iter):
        if x_k_old is not None:
            df_k_old = np.copy(df_k)

        df_k = oracle.grad(x_k)
        # hf_k = oracle.hess(x_k)
        df_k_norm = df_k.dot(df_k)
        pushHistory(x_k, oracle, df_k_norm)
        if df_k_norm <= df0_norm * tolerance:
            return x_k, 'success', history

        if x_k_old is not None:
            H.append((np.copy(x_k-x_k_old), np.copy(df_k-df_k_old)))
            if len(H) > l:
                H.popleft()

        d_k = df_k * (-1.0)
        mus = list()
        for s, y in reversed(H):
            mu = s.dot(d_k) / s.dot(y)
            mus.append(mu)
            d_k = d_k - y * mu
        if len(H) > 0:
            s_k, y_k = H[-1]
            d_k = d_k * s_k.dot(y_k) / y_k.dot(y_k)
        for s_y, mu in zip(H, reversed(mus)):
            s, y = s_y
            betta = y.dot(d_k) / s.dot(y)
            d_k = d_k + s * (mu - betta)

        a_k = line_search_tool.line_search(oracle, x_k, d_k)
        x_k_old = np.copy(x_k)
        x_k += d_k * a_k

    df_last = oracle.grad(x_k)
    df_last_norm = df_last.dot(df_last)
    pushHistory(x_k, oracle, df_last_norm)
    if df_last_norm <= df0_norm * tolerance:
        return x_k, 'success', history
    else:
        return x_k, 'iterations_exceeded', history

예제 #12

0

파일 보기

파일: optimization.py 프로젝트: ADKosm/OptimizationMethods

def gradient_descent(oracle, x_0, tolerance=1e-5, max_iter=10000,
                     line_search_options=None, trace=False, display=False):
    """
    Gradien descent optimization method.

    Parameters
    ----------
    oracle : BaseSmoothOracle-descendant object
        Oracle with .func(), .grad() and .hess() methods implemented for computing
        function value, its gradient and Hessian respectively.
    x_0 : np.array
        Starting point for optimization algorithm
    tolerance : float
        Epsilon value for stopping criterion.
    max_iter : int
        Maximum number of iterations.
    line_search_options : dict, LineSearchTool or None
        Dictionary with line search options. See LineSearchTool class for details.
    trace : bool
        If True, the progress information is appended into history dictionary during training.
        Otherwise None is returned instead of history.
    display : bool
        If True, debug information is displayed during optimization.
        Printing format and is up to a student and is not checked in any way.

    Returns
    -------
    x_star : np.array
        The point found by the optimization procedure
    message : string
        "success" or the description of error:
            - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy
                the stopping criterion.
    history : dictionary of lists or None
        Dictionary containing the progress information or None if trace=False.
        Dictionary has to be organized as follows:
            - history['time'] : list of floats, containing time in seconds passed from the start of the method
            - history['func'] : list of function values f(x_k) on every step of the algorithm
            - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm
            - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2

    Example:
    --------
    >> oracle = QuadraticOracle(np.eye(5), np.arange(5))
    >> x_opt, message, history = gradient_descent(oracle, np.zeros(5), line_search_options={'method': 'Armijo', 'c1': 1e-4})
    >> print('Found optimal point: {}'.format(x_opt))
       Found optimal point: [ 0.  1.  2.  3.  4.]
    """
    history = defaultdict(list) if trace else None
    line_search_tool = get_line_search_tool(line_search_options)
    x_k = np.copy(x_0)
    start = time.time()

    def pushHistory(x_k, oracle, df_k_n):
        if trace:
            history['time'].append(time.time()-start)
            history['func'].append(oracle.func(x_k))
            history['grad_norm'].append(df_k_n ** (1/2))
            if np.alen(x_k) <= 2:
                history['x'].append(np.copy(x_k))
        if display:
            print(x_k)


    df0 = oracle.grad(x_0)
    df0_norm = df0.dot(df0)

    for k in range(max_iter):
        df_k = oracle.grad(x_k)
        df_k_norm = df_k.dot(df_k)
        pushHistory(x_k, oracle, df_k_norm)
        if df_k_norm <= df0_norm * tolerance:
            return x_k, 'success', history
        d_k = df_k*(-1)
        a_k = line_search_tool.line_search(oracle, x_k, d_k)
        x_k += d_k * a_k

    df_last = oracle.grad(x_k)
    df_last_norm = df_last.dot(df_last)
    pushHistory(x_k, oracle, df_last_norm)
    if df_last_norm <= df0_norm * tolerance:
        return x_k, 'success', history
    else:
        return x_k, 'iterations_exceeded', history

예제 #13

0

파일 보기

파일: optimization.py 프로젝트: ADKosm/OptimizationMethods

def hessian_free_newton(oracle, x_0, tolerance=1e-4, max_iter=500, 
                        line_search_options=None, display=False, trace=False):
    """
    Hessian Free method for optimization.

    Parameters
    ----------
    oracle : BaseSmoothOracle-descendant object
        Oracle with .func(), .grad() and .hess_vec() methods implemented for computing
        function value, its gradient and matrix product of the Hessian times vector respectively.
    x_0 : 1-dimensional np.array
        Starting point of the algorithm
    tolerance : float
        Epsilon value for stopping criterion.
    max_iter : int
        Maximum number of iterations.
    line_search_options : dict, LineSearchTool or None
        Dictionary with line search options. See LineSearchTool class for details.
    display : bool
        If True, debug information is displayed during optimization.
        Printing format is up to a student and is not checked in any way.
    trace:  bool
        If True, the progress information is appended into history dictionary during training.
        Otherwise None is returned instead of history.

    Returns
    -------
    x_star : np.array
        The point found by the optimization procedure
    message : string
        'success' or the description of error:
            - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy
              the stopping criterion.
    history : dictionary of lists or None
        Dictionary containing the progress information or None if trace=False.
        Dictionary has to be organized as follows:
            - history['time'] : list of floats, containing time in seconds passed from the start of the method
            - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm
            - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2
    """
    history = defaultdict(list) if trace else None
    line_search_tool = get_line_search_tool(line_search_options)
    x_k = np.copy(x_0) * 1.0

    start = time.time()

    teta_k = lambda n_k: min(0.5, n_k**(1/4))

    def pushHistory(x_k, oracle, df_k_n):
        if trace:
            history['time'].append(time.time() - start)
            history['func'].append(oracle.func(x_k))
            history['grad_norm'].append(df_k_n ** (1 / 2))
            if np.alen(x_k) <= 2:
                history['x'].append(np.copy(x_k))
        if display:
            print(x_k)

    df0 = oracle.grad(x_0)
    df0_norm = df0.dot(df0)
    for k in range(max_iter):
        df_k = oracle.grad(x_k)
        df_k_norm = df_k.dot(df_k)
        pushHistory(x_k, oracle, df_k_norm)
        if df_k_norm <= df0_norm * tolerance:
            return x_k, 'success', history

        teta = teta_k(df_k_norm)
        matvec = lambda v: oracle.hess_vec(x_k, v)
        d_k, msg, hist = conjugate_gradients(matvec, df_k*(-1), df_k*(-1), tolerance=teta)
        while d_k.dot(df_k) > 0:
            teta /= 10
            d_k, msg, hist = conjugate_gradients(matvec, df_k * (-1), df_k*(-1), tolerance=teta)


        a_k = line_search_tool.line_search(oracle, x_k, d_k)
        x_k += d_k * a_k

    df_last = oracle.grad(x_k)
    df_last_norm = df_last.dot(df_last)
    pushHistory(x_k, oracle, df_last_norm)
    if df_last_norm <= df0_norm * tolerance:
        return x_k, 'success', history
    else:
        return x_k, 'iterations_exceeded', history

예제 #14

0

파일 보기

def hessian_free_newton(oracle,
                        x_0,
                        tolerance=1e-4,
                        max_iter=500,
                        line_search_options=None,
                        display=False,
                        trace=False):
    """
    Hessian Free method for optimization.

    Parameters
    ----------
    oracle : BaseSmoothOracle-descendant object
        Oracle with .func(), .grad() and .hess_vec() methods implemented for computing
        function value, its gradient and matrix product of the Hessian times vector respectively.
    x_0 : 1-dimensional np.array
        Starting point of the algorithm
    tolerance : float
        Epsilon value for stopping criterion.
    max_iter : int
        Maximum number of iterations.
    line_search_options : dict, LineSearchTool or None
        Dictionary with line search options. See LineSearchTool class for details.
    display : bool
        If True, debug information is displayed during optimization.
        Printing format is up to a student and is not checked in any way.
    trace:  bool
        If True, the progress information is appended into history dictionary during training.
        Otherwise None is returned instead of history.

    Returns
    -------
    x_star : np.array
        The point found by the optimization procedure
    message : string
        'success' or the description of error:
            - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy
              the stopping criterion.
    history : dictionary of lists or None
        Dictionary containing the progress information or None if trace=False.
        Dictionary has to be organized as follows:
            - history['func'] : list of function values f(x_k) on every step of the algorithm
            - history['time'] : list of floats, containing time in seconds passed from the start of the method
            - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm
            - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2
    """
    history = defaultdict(list) if trace else None
    labels = ['func', 'time', 'grad_norm', 'x']
    line_search_tool = get_line_search_tool(line_search_options)
    x_k = np.copy(x_0)
    i = 0

    # TODO: Implement hessian-free Newton's method.
    # Use line_search_tool.line_search() for adaptive step size.
    start = time.time()
    grad_0 = oracle.grad(x_k)
    grad_k = np.copy(grad_0)
    msg = 'success'
    while grad_k.dot(grad_k) > tolerance * grad_0.dot(grad_0):

        if i > max_iter:
            msg = 'iterations exceed'
            if display:
                print(msg)
            return x_k, msg, history

        hist_values = [
            oracle.func(x_k),
            time.time() - start,
            np.linalg.norm(grad_k), x_k
        ]
        make_history(history, labels, hist_values)

        matvec = lambda v: oracle.hess_vec(x_k, v)
        mu_k = np.min([0.5, np.sqrt(np.linalg.norm(grad_k))])
        while True:
            d_k, _, _ = conjugate_gradients(matvec, -grad_k, -grad_k, mu_k)
            if grad_k.dot(d_k) <= 0:
                break
            mu_k = mu_k / 10.

        alpha_k = line_search_tool.line_search(oracle, x_k, d_k)
        x_k = x_k + alpha_k * d_k
        grad_k = oracle.grad(x_k)

        i += 1

    hist_values = [
        oracle.func(x_k),
        time.time() - start,
        np.linalg.norm(grad_k), x_k
    ]
    make_history(history, labels, hist_values)

    if display:
        print(msg)
    return x_k, msg, history

예제 #15

0

파일 보기

def hessian_free_newton(oracle,
                        x_0,
                        tolerance=1e-4,
                        max_iter=500,
                        line_search_options=None,
                        display=False,
                        trace=False):
    """
    Hessian Free method for optimization.

    Parameters
    ----------
    oracle : BaseSmoothOracle-descendant object
        Oracle with .func(), .grad() and .hess_vec() methods implemented for computing
        function value, its gradient and matrix product of the Hessian times vector respectively.
    x_0 : 1-dimensional np.array
        Starting point of the algorithm
    tolerance : float
        Epsilon value for stopping criterion.
    max_iter : int
        Maximum number of iterations.
    line_search_options : dict, LineSearchTool or None
        Dictionary with line search options. See LineSearchTool class for details.
    display : bool
        If True, debug information is displayed during optimization.
        Printing format is up to a student and is not checked in any way.
    trace:  bool
        If True, the progress information is appended into history dictionary during training.
        Otherwise None is returned instead of history.

    Returns
    -------
    x_star : np.array
        The point found by the optimization procedure
    message : string
        'success' or the description of error:
            - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy
              the stopping criterion.
    history : dictionary of lists or None
        Dictionary containing the progress information or None if trace=False.
        Dictionary has to be organized as follows:
            - history['func'] : list of function values f(x_k) on every step of the algorithm
            - history['time'] : list of floats, containing time in seconds passed from the start of the method
            - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm
            - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2
    """
    history = defaultdict(list) if trace else None
    line_search_tool = get_line_search_tool(line_search_options)
    x_k = np.copy(x_0)

    # TODO: Implement hessian-free Newton's method.
    # Use line_search_tool.line_search() for adaptive step size.

    def fill_history():
        if not trace:
            return
        history['func'].append(oracle.func(x_k))
        history['time'].append((datetime.now() - t_0).seconds)
        history['grad_norm'].append(grad_k_norm)
        if x_size <= 2:
            history['x'].append(np.copy(x_k))

    def do_display():
        if not display:
            return
        if len(x_k) <= 4:
            print('x = {}, '.format(np.round(x_k, 4)), end='')
        print('func= {}, grad_norm = {}'.format(np.round(oracle.func(x_k), 4),
                                                np.round(grad_k_norm, 4)))

    t_0 = datetime.now()
    x_size = len(x_k)
    message = None

    grad_k = oracle.grad(x_k)
    grad_0_norm = grad_k_norm = np.linalg.norm(grad_k)

    for _ in range(max_iter):
        do_display()
        fill_history()

        eps = min(0.5, grad_k_norm**0.5)
        while True:
            hess_vec = lambda v: oracle.hess_vec(x_k, v)
            d, _, _ = conjugate_gradients(hess_vec, -grad_k, -grad_k, eps)
            if grad_k @ d < 0:
                break
            else:
                eps *= 10
        alpha = line_search_tool.line_search(oracle, x_k, d, previous_alpha=1)
        x_k = x_k + alpha * d
        grad_k = oracle.grad(x_k)
        grad_k_norm = np.linalg.norm(grad_k)
        if grad_k_norm**2 < tolerance * grad_0_norm**2:
            message = 'success'
            break

    do_display()
    fill_history()
    if not grad_k_norm**2 < tolerance * grad_0_norm**2:
        message = 'iterations_exceeded'

    return x_k, message, history

예제 #16

0

파일 보기

def gradient_descent(oracle,
                     x_0,
                     tolerance=1e-5,
                     max_iter=10000,
                     line_search_options=None,
                     trace=False,
                     display=False):
    """
    Gradien descent optimization method.

    Parameters
    ----------
    oracle : BaseSmoothOracle-descendant object
        Oracle with .func(), .grad() and .hess() methods implemented for computing
        function value, its gradient and Hessian respectively.
    x_0 : np.array
        Starting point for optimization algorithm
    tolerance : float
        Epsilon value for stopping criterion.
    max_iter : int
        Maximum number of iterations.
    line_search_options : dict, LineSearchTool or None
        Dictionary with line search options. See LineSearchTool class for details.
    trace : bool
        If True, the progress information is appended into history dictionary during training.
        Otherwise None is returned instead of history.
    display : bool
        If True, debug information is displayed during optimization.
        Printing format and is up to a student and is not checked in any way.

    Returns
    -------
    x_star : np.array
        The point found by the optimization procedure
    message : string
        "success" or the description of error:
            - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy
                the stopping criterion.
            - 'computational_error': in case of getting Infinity or None value during the computations.
    history : dictionary of lists or None
        Dictionary containing the progress information or None if trace=False.
        Dictionary has to be organized as follows:
            - history['time'] : list of floats, containing time in seconds passed from the start of the method
            - history['func'] : list of function values f(x_k) on every step of the algorithm
            - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm
            - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2

    Example:
    --------
    >> oracle = QuadraticOracle(np.eye(5), np.arange(5))
    >> x_opt, message, history = gradient_descent(oracle, np.zeros(5), line_search_options={'method': 'Armijo', 'c1': 1e-4})
    >> print('Found optimal point: {}'.format(x_opt))
       Found optimal point: [ 0.  1.  2.  3.  4.]
    """
    history = defaultdict(list) if trace else None
    line_search_tool = get_line_search_tool(line_search_options)
    x_k = np.copy(x_0)

    # TODO: Implement gradient descent
    # Use line_search_tool.line_search() for adaptive step size.
    def fill_history():
        if not trace:
            return
        history['time'].append((datetime.now() - t_0).seconds)
        history['func'].append(func_k)
        history['grad_norm'].append(grad_k_norm)
        if len(x_k) <= 2:
            history['x'].append(np.copy(x_k))

    t_0 = datetime.now()
    func_k = oracle.func(x_k)
    grad_k = oracle.grad(x_k)
    a_k = None
    grad_0_norm = grad_k_norm = np.linalg.norm(grad_k)
    fill_history()
    if display:
        print('Begin new GD')

    for i in range(max_iter):
        if display:
            print('i = {} grad_norm = {} func = {} x = {} grad = {}'.format(
                i, grad_k_norm, func_k, x_k, grad_k),
                  end=' ')
        if grad_k_norm**2 <= tolerance * grad_0_norm**2:
            break

        d_k = -grad_k
        a_k = line_search_tool.line_search(oracle, x_k, d_k,
                                           2 * a_k if a_k else None)
        if display:
            print('alpha = {}'.format(a_k))
        x_k += a_k * d_k
        func_k = oracle.func(x_k)
        grad_k = oracle.grad(x_k)
        grad_k_norm = np.linalg.norm(grad_k)
        fill_history()
    if display:
        print()

    if grad_k_norm**2 <= tolerance * grad_0_norm**2:
        return x_k, 'success', history
    else:
        return x_k, 'iterations_exceeded', history

예제 #17

0

파일 보기

파일: optimization.py 프로젝트: DailyFu/Optimization-smooth

def lbfgs(oracle, x_0, tolerance=1e-4, max_iter=500, memory_size=10, 
          line_search_options=None, display=False, trace=False):
    """
    Parameters
    ----------
    oracle : BaseSmoothOracle-descendant object
        Oracle with .func() and .grad() methods implemented for computing
        function value and its gradient respectively.
    x_0 : 1-dimensional np.array
        Starting point of the algorithm
    tolerance : float
        Epsilon value for stopping criterion.
    max_iter : int
        Maximum number of iterations.
    memory_size : int
        The length of directions history in L-BFGS method.
    line_search_options : dict, LineSearchTool or None
        Dictionary with line search options. See LineSearchTool class for details.
    display : bool
        If True, debug information is displayed during optimization.
        Printing format is up to a student and is not checked in any way.
    trace:  bool
        If True, the progress information is appended into history dictionary during training.
        Otherwise None is returned instead of history.

    Returns
    -------
    x_star : np.array
        The point found by the optimization procedure
    message : string
        'success' or the description of error:
            - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy
              the stopping criterion.
    history : dictionary of lists or None
        Dictionary containing the progress information or None if trace=False.
        Dictionary has to be organized as follows:
            - history['func'] : list of function values f(x_k) on every step of the algorithm
            - history['time'] : list of floats, containing time in seconds passed from the start of the method
            - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm
            - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2
    """
    history = defaultdict(list) if trace else None
    line_search_tool = get_line_search_tool(line_search_options)
    t0 = datetime.now()
    
    x_k = np.copy(x_0)
    grad_x_0 = oracle.grad(x_0)
    norm_grad_x_0 = np.linalg.norm(grad_x_0)
    
    memory = []
    
    for it in range(max_iter + 1):
        f_x_k = oracle.func(x_k)
        
        if it > 0:
            grad_x_k_prev = grad_x_k.copy ()
            grad_x_k = oracle.grad(x_k)
            
            s_k_prev = x_k - x_k_prev
            y_k_prev = grad_x_k - grad_x_k_prev
            
            chop_size = np.minimum(memory_size - 1, len(memory))
            if chop_size < len(memory):
                memory = memory[len(memory) - chop_size:]
                memory = memory[len(memory) - chop_size:]
            
            memory.append((s_k_prev, y_k_prev))
            
        else:
            grad_x_k = oracle.grad(x_k)
    
        norm_grad_x_k = np.linalg.norm(grad_x_k)

        #Fill trace data
        if display:
            print(it)
        if trace:
            history['time'].append((datetime.now() - t0).total_seconds())
            history['grad_norm'].append(norm_grad_x_k)
            history['func'].append(f_x_k)
            if x_k.size < 3:
                history['x'].append(x_k)
        
        if norm_grad_x_k * norm_grad_x_k <= tolerance * norm_grad_x_0 * norm_grad_x_0:
            break
        
        if it >= max_iter:
            return x_k, 'iterations_exceeded', history
                
        d_k = -grad_x_k
        if it > 0:
            mu = np.zeros(len(memory))
            for index in range(len(memory)):
                i = len(memory) - index - 1
                s_i, y_i = memory[i]
                mu[i] = np.dot(s_i, d_k) / np.dot(s_i, y_i)
                d_k = d_k - mu[i] * y_i
            s_k_prev, y_k_prev = memory[len(memory) - 1]
            d_k = (np.dot(s_k_prev, y_k_prev) / np.dot(y_k_prev, y_k_prev)) * d_k
            for i in range(len(memory)):
                s_i, y_i = memory[i]
                betta = np.dot (y_i, d_k) / np.dot (s_i, y_i)
                d_k = d_k + (mu[i] - betta) * s_i
        
        alpha = line_search_tool.line_search(oracle, x_k, d_k)
        
        x_k_prev = x_k.copy ()
        x_k = x_k + alpha * d_k

    
    return x_k, 'success', history

예제 #18

0

파일 보기

파일: optimization.py 프로젝트: Tehada/hse-optimization-course

def hessian_free_newton(oracle,
                        x_0,
                        tolerance=1e-4,
                        max_iter=500,
                        line_search_options=None,
                        display=False,
                        trace=False):
    """
    Hessian Free method for optimization.

    Parameters
    ----------
    oracle : BaseSmoothOracle-descendant object
        Oracle with .func(), .grad() and .hess_vec() methods implemented for computing
        function value, its gradient and matrix product of the Hessian times vector respectively.
    x_0 : 1-dimensional np.array
        Starting point of the algorithm
    tolerance : float
        Epsilon value for stopping criterion.
    max_iter : int
        Maximum number of iterations.
    line_search_options : dict, LineSearchTool or None
        Dictionary with line search options. See LineSearchTool class for details.
    display : bool
        If True, debug information is displayed during optimization.
        Printing format is up to a student and is not checked in any way.
    trace:  bool
        If True, the progress information is appended into history dictionary during training.
        Otherwise None is returned instead of history.

    Returns
    -------
    x_star : np.array
        The point found by the optimization procedure
    message : string
        'success' or the description of error:
            - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy
              the stopping criterion.
    history : dictionary of lists or None
        Dictionary containing the progress information or None if trace=False.
        Dictionary has to be organized as follows:
            - history['func'] : list of function values f(x_k) on every step of the algorithm
            - history['time'] : list of floats, containing time in seconds passed from the start of the method
            - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm
            - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2
    """

    history = defaultdict(list) if trace else None
    line_search_tool = get_line_search_tool(line_search_options)
    x_k = np.copy(x_0).astype(np.float64)

    timer = Timer()
    converge = False
    alpha_k = None

    for num_iter in range(max_iter + 1):
        # if np.isinf(x_k).any() or np.isnan(x_k).any():
        #     return x_k, 'computational_error', history

        f_k = oracle.func(x_k)
        grad_k = oracle.grad(x_k)

        # if np.isinf(grad_k).any() or np.isnan(grad_k).any():
        #     return x_k, 'computational_error', history

        grad_norm_k = scipy.linalg.norm(grad_k)

        if trace:
            history['time'].append(timer.seconds())
            history['func'].append(np.copy(f_k))
            history['grad_norm'].append(np.copy(grad_norm_k))
            if x_k.size <= 2:
                history['x'].append(np.copy(x_k))

        if display: print('step', history['time'][-1] if history else '')

        if num_iter == 0:
            eps_grad_norm_0 = np.sqrt(tolerance) * grad_norm_k
        if grad_norm_k <= eps_grad_norm_0:
            converge = True
            break

        if num_iter == max_iter: break

        eta = min(0.5, np.sqrt(grad_norm_k))
        conjugate_gradient_converge = False

        while not conjugate_gradient_converge:
            d_k, _, _ = conjugate_gradients(lambda d: oracle.hess_vec(x_k, d),
                                            -grad_k,
                                            -grad_k,
                                            tolerance=eta)
            eta /= 10
            conjugate_gradient_converge = np.dot(d_k, grad_k) < 0

        alpha_k = line_search_tool.line_search(oracle, x_k, d_k, 1.0)
        x_k += alpha_k * d_k

    return x_k, 'success' if converge else 'iterations_exceeded', history

예제 #19

0

파일 보기

파일: optimization.py 프로젝트: DailyFu/Optimization-smooth

def hessian_free_newton(oracle, x_0, tolerance=1e-4, max_iter=500, 
                        line_search_options=None, display=False, trace=False):
    """
    Hessian Free method for optimization.

    Parameters
    ----------
    oracle : BaseSmoothOracle-descendant object
        Oracle with .func(), .grad() and .hess_vec() methods implemented for computing
        function value, its gradient and matrix product of the Hessian times vector respectively.
    x_0 : 1-dimensional np.array
        Starting point of the algorithm
    tolerance : float
        Epsilon value for stopping criterion.
    max_iter : int
        Maximum number of iterations.
    line_search_options : dict, LineSearchTool or None
        Dictionary with line search options. See LineSearchTool class for details.
    display : bool
        If True, debug information is displayed during optimization.
        Printing format is up to a student and is not checked in any way.
    trace:  bool
        If True, the progress information is appended into history dictionary during training.
        Otherwise None is returned instead of history.

    Returns
    -------
    x_star : np.array
        The point found by the optimization procedure
    message : string
        'success' or the description of error:
            - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy
              the stopping criterion.
    history : dictionary of lists or None
        Dictionary containing the progress information or None if trace=False.
        Dictionary has to be organized as follows:
            - history['func'] : list of function values f(x_k) on every step of the algorithm
            - history['time'] : list of floats, containing time in seconds passed from the start of the method
            - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm
            - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2
    """
    history = defaultdict(list) if trace else None
    line_search_tool = get_line_search_tool(line_search_options)
    x_k = np.copy(x_0)
    
    t0 = datetime.now()
    
    grad_x_0 = oracle.grad(x_0)
    norm_grad_x_0 = np.linalg.norm(grad_x_0)

    for it in range(max_iter + 1):
        # Oracle
        f_x_k = oracle.func(x_k)
        grad_x_k = oracle.grad(x_k)
        norm_grad_x_k = np.linalg.norm(grad_x_k)
        
        # Fill trace data
        if display:
            print(it)
        if trace:
            history['time'].append((datetime.now() - t0).total_seconds())
            history['grad_norm'].append(norm_grad_x_k)
            history['func'].append(f_x_k)
            if x_k.size < 3:
                history['x'].append(x_k)
        
        # Criterium
        if norm_grad_x_k * norm_grad_x_k <= tolerance * norm_grad_x_0 * norm_grad_x_0:
            break
        
        if it >= max_iter:
            return x_k, 'iterations_exceeded', history
        
        # Direction
        eta_k = np.minimum (0.5, np.sqrt(norm_grad_x_k))
        d_k = -grad_x_k 
        matvec = lambda v: oracle.hess_vec (x_k, v)
        
        condition=False
        while condition == False:
            d_k, msg_cg, history_cg = conjugate_gradients(matvec, -grad_x_k, d_k, tolerance = eta_k)
            if np.dot(d_k, grad_x_k) < 0:
                condition = True
            eta_k = eta_k / 10.
                
        # Line Search
        alpha = line_search_tool.line_search(oracle, x_k, d_k)
        
        x_k = x_k + alpha * d_k

    return x_k, 'success', history

예제 #20

0

파일 보기

def hessian_free_newton(oracle,
                        x_0,
                        tolerance=1e-4,
                        max_iter=500,
                        line_search_options=None,
                        display=False,
                        trace=False):
    """
    Hessian Free method for optimization.

    Parameters
    ----------
    oracle : BaseSmoothOracle-descendant object
        Oracle with .func(), .grad() and .hess_vec() methods implemented for computing
        function value, its gradient and matrix product of the Hessian times vector respectively.
    x_0 : 1-dimensional np.array
        Starting point of the algorithm
    tolerance : float
        Epsilon value for stopping criterion.
    max_iter : int
        Maximum number of iterations.
    line_search_options : dict, LineSearchTool or None
        Dictionary with line search options. See LineSearchTool class for details.
    display : bool
        If True, debug information is displayed during optimization.
        Printing format is up to a student and is not checked in any way.
    trace:  bool
        If True, the progress information is appended into history dictionary during training.
        Otherwise None is returned instead of history.

    Returns
    -------
    x_star : np.array
        The point found by the optimization procedure
    message : string
        'success' or the description of error:
            - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy
              the stopping criterion.
    history : dictionary of lists or None
        Dictionary containing the progress information or None if trace=False.
        Dictionary has to be organized as follows:
            - history['func'] : list of function values f(x_k) on every step of the algorithm
            - history['time'] : list of floats, containing time in seconds passed from the start of the method
            - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm
            - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2
    """
    history = defaultdict(list) if trace else None
    line_search_tool = get_line_search_tool(line_search_options)
    x_k = np.copy(x_0)

    # TODO: Implement hessian-free Newton's method.
    # Use line_search_tool.line_search() for adaptive step size.

    eps_due_to_float = 1e-8
    start_time = None
    dfx0 = oracle.grad(x_0)
    dfx0_norm2 = dfx0.dot(dfx0.T)

    for iteration in range(0, max_iter + 1):

        dfxk = oracle.grad(x_k)
        dfxk_norm2 = dfxk.dot(dfxk.T)
        # History update

        if display: print('Debug information:) Iteration number: ', iteration)

        if trace:
            if start_time is None: start_time = datetime.now()
            if x_k.shape[0] <= 2:
                history['x'].append(x_k)
            history['grad_norm'].append(math.sqrt(dfxk_norm2))
            history['func'].append(oracle.func(x_k))
            history['time'].append(
                (datetime.now() - start_time).total_seconds())

        #Stoping criteria check
        if (dfxk_norm2 < tolerance * dfx0_norm2 + eps_due_to_float):
            return x_k, 'success', history

        n_k = min(0.5, (dfxk_norm2)**0.25)
        d_start = -dfxk
        matvec = lambda x: oracle.hess_vec(x_k, x.T)
        while True:
            # find d_k through cg

            d_k, message, history_sg = conjugate_gradients(matvec,
                                                           -dfxk,
                                                           d_start,
                                                           tolerance=n_k,
                                                           max_iter=None,
                                                           trace=False,
                                                           display=False)

            #Check if cg made d_k descent direction
            if dfxk.dot(d_k.T) < 0: break
            n_k = n_k / 10
            d_start = d_k

        # alpha_k search by Line_search_tool
        alpha_k = line_search_tool.line_search(oracle, x_k, d_k)

        # Updating x_k
        x_k = x_k + alpha_k * d_k

    return x_k, 'iterations_exceeded', history

예제 #21

0

파일 보기

파일: optimization.py 프로젝트: DailyFu/Optimization-smooth

def newton(oracle, x_0, tolerance=1e-5, max_iter=100,
           line_search_options=None, trace=False, display=False):
    """
    Newton's optimization method.

    Parameters
    ----------
    oracle : BaseSmoothOracle-descendant object
        Oracle with .func(), .grad() and .hess() methods implemented for computing
        function value, its gradient and Hessian respectively. If the Hessian
        returned by the oracle is not positive-definite method stops with message="newton_direction_error"
    x_0 : np.array
        Starting point for optimization algorithm
    tolerance : float
        Epsilon value for stopping criterion.
    max_iter : int
        Maximum number of iterations.
    line_search_options : dict, LineSearchTool or None
        Dictionary with line search options. See LineSearchTool class for details.
    trace : bool
        If True, the progress information is appended into history dictionary during training.
        Otherwise None is returned instead of history.
    display : bool
        If True, debug information is displayed during optimization.

    Returns
    -------
    x_star : np.array
        The point found by the optimization procedure
    message : string
        'success' or the description of error:
            - 'iterations_exceeded': if after max_iter iterations of the method x_k still doesn't satisfy
                the stopping criterion.
            - 'newton_direction_error': in case of failure of solving linear system with Hessian matrix (e.g. non-invertible matrix).
            - 'computational_error': in case of getting Infinity or None value during the computations.
    history : dictionary of lists or None
        Dictionary containing the progress information or None if trace=False.
        Dictionary has to be organized as follows:
            - history['time'] : list of floats, containing time passed from the start of the method
            - history['func'] : list of function values f(x_k) on every step of the algorithm
            - history['grad_norm'] : list of values Euclidian norms ||g(x_k)|| of the gradient on every step of the algorithm
            - history['x'] : list of np.arrays, containing the trajectory of the algorithm. ONLY STORE IF x.size <= 2

    Example:
    --------
    >> oracle = QuadraticOracle(np.eye(5), np.arange(5))
    >> x_opt, message, history = newton(oracle, np.zeros(5), line_search_options={'method': 'Constant', 'c': 1.0})
    >> print('Found optimal point: {}'.format(x_opt))
       Found optimal point: [ 0.  1.  2.  3.  4.]
    """
    history = defaultdict(list) if trace else None
    line_search_tool = get_line_search_tool(line_search_options)
    x_k = np.copy(x_0)
                
    t0=datetime.now()
    norm_grad0=np.linalg.norm(oracle.grad(x_0))    
    
    for iteration in range(max_iter + 1):
        #Oracle
        grad_k=oracle.grad(x_k)
        norm_grad_k=np.linalg.norm (grad_k)
        hess_k=oracle.hess(x_k)
        
        #Fill trace data
        if trace:
            history['time'].append((datetime.now() - t0).total_seconds())
            history['func'].append(oracle.func(x_k))
            history['grad_norm'].append(norm_grad_k)
            if x_k.size < 3:
                history['x'].append(x_k)
        
        if display==True:
            print(u"debug info")
        
        #Criterium
        if norm_grad_k*norm_grad_k <= tolerance * norm_grad0*norm_grad0:
            break;
        if iteration == max_iter:
            return x_k, 'iterations_exceeded', history
        
        #Compute direction
        try:
            L=scipy.linalg.cho_factor(hess_k, lower=True)
            d_k=scipy.linalg.cho_solve(L,-grad_k)
        except:
            return x_k, 'computational_error', history
        
        #Line search
        alpha = line_search_tool.line_search (oracle, x_k, d_k)
        
        if alpha == None:
            return x_k, 'computational_error', history
        
        #Update x_k
        x_k = x_k + alpha * d_k

    return x_k, 'success', history