Пример #1
0
def CGM(x, f, g, eps, kmax, iCG, iRC, nu=None, precision=6):
    gradient_norm = round(np.linalg.norm(g(x)), precision)
    Xk = [[np.NaN, f(x), gradient_norm]]
    rk = [np.NaN]
    Mk = [np.NaN]
    # ============== #
    d = -g(x)
    k = 0
    while np.linalg.norm(g(x)) > eps and k < kmax:
        if k > 0:
            alpha, *_ = line_search(f,
                                    g,
                                    x,
                                    d,
                                    old_old_fval=f(x_prev),
                                    c1=0.01,
                                    c2=0.45)
        else:
            alpha, *_ = line_search(f, g, x, d, c1=0.01, c2=0.45)
        if alpha is None:
            break
        x, x_prev = x + alpha * d, x
        # =========== #
        # CGM variants
        if iCG == "FR":
            beta = (g(x).T @ g(x)) / (g(x_prev).T @ g(x_prev))
        elif iCG == "PR":
            beta = max(0,
                       g(x).T @ (g(x) - g(x_prev)) / (g(x_prev).T @ g(x_prev)))
        else:
            raise TypeError(
                "iCG should be FR (Fletcher-Reeves) or PR (Polak-Ribière)")
        # Restart conditions
        if iRC > 0 and nu is None:
            raise TypeError(
                f"nu is a necessary parameter with iRC equal to {iRC}")
        if (iRC == 1 and k % nu == 0 or iRC == 2
                and g(x).T @ g(x_prev) / np.linalg.norm(g(x))**2 > nu
                or k == 0):
            d = -g(x)
        else:
            d = -g(x) + beta * d
        k += 1
        # =========== #
        gradient_norm = np.round(np.linalg.norm(g(x)), precision)
        Xk.append([alpha, f(x), gradient_norm])
        rk.append(np.linalg.norm(g(x)) / np.linalg.norm((g(x_prev))))
        Mk.append(np.linalg.norm(g(x)) / (np.linalg.norm((g(x_prev)))**2))
        # =========== #
    data = pd.DataFrame(Xk,
                        columns=["alpha", "f(x)", "||g(x)||"],
                        dtype=np.float)
    data['r'] = rk
    data['M'] = Mk
    return x, data
Пример #2
0
    def otimizar(self, p_inicial):
        # Definindo valores iniciais
        self.ponto_inicial = np.array(p_inicial)
        self.tolerancia = 1000000
        self.num_iteracoes = 0
        self.chamadas_func_obj = 0
        self.chamadas_gradiente = 0
        ponto_anterior = None
        ponto = np.array(p_inicial)

        self.iniciar_tempo()

        while self.tolerancia >= 1e-6:
            self.num_iteracoes += 1
            direcao = -self.gradiente_himmelblau(ponto)

            resp = line_search(f=self.func_himmelblau,
                               myfprime=self.gradiente_himmelblau,
                               xk=ponto,
                               pk=direcao)
            ponto_anterior = ponto
            ponto = ponto + resp[0] * direcao

            self.tolerancia = np.linalg.norm(
                ponto - ponto_anterior) / np.linalg.norm(ponto_anterior)

            self.chamadas_func_obj += resp[1]
            self.chamadas_gradiente += resp[2]

        self.finalizar_tempo()

        self.ponto_final = ponto
        self.valor_final = self.func_himmelblau(self.ponto_final)
Пример #3
0
    def optimize(self, start_point, verbose=False):
        xk = start_point
        iter = 0
        self.obj_f.reset_count()

        while True:
            dk = np.linalg.solve(self.G(xk), -self.g(xk))

            alpha, fc, gc, new_fval, old_fval, new_slope = line_search(
                self.f, self.g, xk, dk)
            if alpha is None:
                alpha = ALPHA_BK

            xk_plus_1 = xk + alpha * dk
            iter += 1

            if verbose:
                print('----------')
                print("alpha", alpha)
                print("dk", dk)
                print("x_k+1", xk_plus_1)
                print("f_k+1", self.f(xk_plus_1))

            if should_break(xk, xk_plus_1):
                break
            xk = xk_plus_1

        print("     final point", xk_plus_1)
        print("     final_fval", self.f(xk_plus_1))
        print("     iter times", iter)
        print("     function calls", self.obj_f.get_count()[0])
        print("     derivate calls", self.obj_f.get_count()[1])
        print("     hessian calls", self.obj_f.get_count()[2])
        return xk_plus_1, self.f(xk_plus_1)
def gradient_Wolfe(f, f_grad, x0, PREC, ITE_MAX):
    x = np.copy(x0)
    stop = PREC * np.linalg.norm(f_grad(x0))

    x_tab = np.copy(x)
    print(
        "------------------------------------\n Gradient with Wolfe line search\n------------------------------------\nSTART"
    )
    t_s = timeit.default_timer()
    for k in range(ITE_MAX):
        g = f_grad(x)

        res = line_search(f,
                          f_grad,
                          x,
                          -g,
                          gfk=None,
                          old_fval=None,
                          old_old_fval=None,
                          args=(),
                          c1=0.0001,
                          c2=0.9,
                          amax=50)

        x = x - res[0] * g

        x_tab = np.vstack((x_tab, x))

        if np.linalg.norm(g) < stop:
            break
    t_e = timeit.default_timer()
    print("FINISHED -- {:d} iterations / {:.6f}s -- final value: {:f}\n\n".
          format(k, t_e - t_s, f(x)))
    return x, x_tab
def gaussNewton(f, Df, Jac, r, x, niter=10, backtrack=True):
    '''
    Solve a nonlinear least squares problem with Gauss-Newton method.
    Inputs:
        f -- the objective function
        Df -- gradient of f
        Jac -- jacobian of residual vector
        r -- the residual vector
        x -- initial point
        niter -- integer giving the number of iterations
    Returns:
        the minimizer
    '''
    a=0
    for i in xrange(niter):
        #print i
        J = Jac(x)
        g = J.T.dot(r(x))
        #print J.T.dot(J)
        p = la.solve(J.T.dot(J), -g)
        slope = (g*p).sum()
        if backtrack:
            a = backtracking(f, slope, x, p)
        else:
            a = opt.line_search(f, Df, x, p)[0]  
        x += a*p
        print x, f(x), a
    return x
Пример #6
0
    def conjugate_gradient(self, w, J=10, gtol=1e-5):
        d, g = [], []

        gnorm = gtol + 1
        j = 0
        while (gnorm > gtol) and (j < J):
            if j == 0:
                g.append(self.g_cols(w))
                d.append(-g[j])

            res = optimize.line_search(self.f_cols, self.g_cols, w, d[j], g[j],
                                       self.f_cols(w))
            if res[0] is None:
                return w, j
            else:
                alpha = res[0]
                w = w + alpha * d[j]

                g.append(self.g_cols(w))
                gnorm = vecnorm(g[j + 1], ord=np.Inf)

                beta_j = max(0,
                             np.dot(g[j + 1].T, g[j + 1] - g[j]) /
                             np.dot(g[j], g[j]))  # eq. 7.74 Polak-Ribiere
                d.append(-g[j + 1] + beta_j * d[j])  # eq.7.67

                j += 1

        return w, j
Пример #7
0
def conj_grad(function, gradient, starting_point, iterations, error, results):
    i = 0
    k = 0
    r = np.asarray(-gradient(starting_point))
    d = r
    x = starting_point
    sigma_new = np.dot(r.transpose(), r)
    sigma_0 = sigma_new

    while (i < iterations and sigma_new > error ** 2 * sigma_0):
        j = 0
        sigma_d = np.dot(d.transpose(), d)
        alfa = optimize.line_search(my_function, gradf, x, r)[0]
        x = x + alfa * d
        r = -gradf(x)
        sigma_old = sigma_new
        sigma_new = np.dot(r.transpose(), r)
        beta = sigma_new / sigma_old
        d = r + np.dot(beta, d)
        k += 1

        results.append(x)
        if k == iterations or np.dot(r.transpose(), d) <= 0:
            d = r
            k = 0
        i = i + 1
Пример #8
0
def gradient_descent(x0, f, f_prime, hessian=None, adaptative=False):
    x_i, y_i = x0
    all_x_i = list()
    all_y_i = list()
    all_f_i = list()

    for i in range(1, 100):
        all_x_i.append(x_i)
        all_y_i.append(y_i)
        all_f_i.append(f([x_i, y_i]))
        dx_i, dy_i = f_prime(np.asarray([x_i, y_i]))
        if adaptative:
            # Compute a step size using a line_search to satisfy the Wolf
            # conditions
            step = optimize.line_search(f, f_prime,
                                np.r_[x_i, y_i], -np.r_[dx_i, dy_i],
                                np.r_[dx_i, dy_i], c2=.05)
            step = step[0]
            if step is None:
                step = 0
        else:
            step = 1
        x_i += - step*dx_i
        y_i += - step*dy_i
        if np.abs(all_f_i[-1]) < 1e-16:
            break
    return all_x_i, all_y_i, all_f_i
Пример #9
0
def gaussNewton(f, Df, Jac, r, x, niter=10, backtrack=True):
    '''
    Solve a nonlinear least squares problem with Gauss-Newton method.
    Inputs:
        f -- the objective function
        Df -- gradient of f
        Jac -- jacobian of residual vector
        r -- the residual vector
        x -- initial point
        niter -- integer giving the number of iterations
    Returns:
        the minimizer
    '''
    a = 0
    for i in xrange(niter):
        #print i
        J = Jac(x)
        g = J.T.dot(r(x))
        #print J.T.dot(J)
        p = la.solve(J.T.dot(J), -g)
        slope = (g * p).sum()
        if backtrack:
            a = backtracking(f, slope, x, p)
        else:
            a = opt.line_search(f, Df, x, p)[0]
        x += a * p
        print x, f(x), a
    return x
Пример #10
0
def  BFGS_algorithm(obj_fun, theta0, max_iter=2e04, epsilon=0):
    print("Starting BFGS algorithm.")
    #Initialization of object: bfgs
    bfgs = BFGS()
    bfgs.initialize(6, "inv_hess")
    #Lists to store results for theta (th) and cost(c)
    th,c = [],[]
    th.append(theta0)
    c.append(obj_fun(theta0))
    niter = max_iter
    success = (False, "max_iter reached.")
    #Iteration
    for n in range(max_iter):
        th_0 = th[n]
        g0 = gradient(th_0)
        #If loss<epsilon, converged
        #If epsilon=0, no check for convergence
        if (epsilon > 0) and (obj_fun(th_0) < epsilon):
            niter = n
            success = (True, "Loss = {}".format(obj_fun(th_0)))
            break
        #Compute search direction
        d = bfgs.dot(g0)
        #Compute step size through line search
        alpha = line_search(obj_fun,gradient,th_0,-g0)[0]
        #Update theta and gradient
        th_1 = th_0 - alpha*d
        g1 = gradient(th_1)
        #Update theta history and cost history
        th.append(th_1)
        c.append(obj_fun(th_1))
        #Update inverse hessian
        bfgs.update(th_1-th_0, g1-g0)
    print("Exiting.")
    return th,c,niter,success
Пример #11
0
def damped_newton(s):
    sol = la.solve(H(s), -df(s))
    a = opt.line_search(f, df, s, sol)[0]
    if a == None:
        a = 1
    s_n = s + a * sol
    return s_n
Пример #12
0
def quasi_newtonian(f, f1, x0=np.array([1, 1]), maxiter=0, epsi=0.001):
    if not maxiter: maxiter = len(x0) * 200

    k = 0
    gfk = f1(x0)
    N = len(x0)
    I = np.eye(N, dtype=int)
    Hk = I
    xk = x0

    while ln.norm(gfk) > epsi and k < maxiter:
        pk = -np.dot(Hk, gfk)

        alpha = line_search(f, f1, xk, pk)[0]

        xkp1 = xk + alpha * pk
        sk = xkp1 - xk
        xk = xkp1

        gfkp1 = f1(xkp1)
        yk = gfkp1 - gfk
        gfk = gfkp1

        k += 1

        ro = 1.0 / (np.dot(yk, sk))
        A1 = I - ro * sk[:, np.newaxis] * yk[np.newaxis, :]
        A2 = I - ro * yk[:, np.newaxis] * sk[np.newaxis, :]
        Hk = np.dot(A1, np.dot(
            Hk, A2)) + (ro * sk[:, np.newaxis] * sk[np.newaxis, :])

    return tuple(round(i, 2) for i in xk)
def gradient_descent(x0, f, f_prime, hessian=None, adaptative=False):
    x_i, y_i = x0
    all_x_i = list()
    all_y_i = list()
    all_f_i = list()

    for i in range(1, 100):
        all_x_i.append(x_i)
        all_y_i.append(y_i)
        all_f_i.append(f([x_i, y_i]))
        dx_i, dy_i = f_prime(np.asarray([x_i, y_i]))
        if adaptative:
            # Compute a step size using a line_search to satisfy the Wolf
            # conditions
            step = optimize.line_search(f, f_prime,
                                np.r_[x_i, y_i], -np.r_[dx_i, dy_i],
                                np.r_[dx_i, dy_i], c2=.05)
            step = step[0]
        else:
            step = 1
        x_i += - step*dx_i
        y_i += - step*dy_i
        if np.abs(all_f_i[-1]) < 1e-16:
            break
    return all_x_i, all_y_i, all_f_i
Пример #14
0
    def _line_search_update(self):
        """ Proceed line search with x & d """
        with warnings.catch_warnings():
            warnings.filterwarnings("error")
            try:
                if self._search is not None:
                    alpha, feva, success_flag, self._loss_cache, self._grad_cache = self._search.step(
                        self._x, self.get_d())
                    self.success += success_flag
                elif not scipy_flag:
                    feva, alpha = 0, 1
                else:
                    def f(x):
                        self._func.refresh_cache(x, dtype="loss")
                        return self._func.loss(x)

                    def g(x):
                        self._func.refresh_cache(x)
                        return self._func.grad(x)

                    alpha, feva, _, self._loss_cache, old_f, self._grad_cache = optimize.line_search(
                        f, g, self._x, self.get_d()
                    )
            except RuntimeWarning:
                feva = 0
                if self._search is not None:
                    alpha = self._search._params["floor"]
                else:
                    alpha = 0.01
        self._x += alpha * self._d
        self.feva += feva
        self._d = None
def gradient_descent(x0, f, f_prime, hessian, stepsize=None, nsteps=50):
    """
                    Steepest-Descent algorithm with option for line search
    """
    x_i, y_i = x0
    all_x_i = list()
    all_y_i = list()
    all_f_i = list()

    for i in range(1, nsteps):
        all_x_i.append(x_i)
        all_y_i.append(y_i)
        x = np.array([x_i, y_i])
        all_f_i.append(f(x))
        dx_i, dy_i = f_prime(x)
        if stepsize is None:
            # Compute a step size using a line_search to satisfy the Wolf
            # conditions
            step = line_search(f,
                               f_prime,
                               np.r_[x_i, y_i],
                               -np.r_[dx_i, dy_i],
                               np.r_[dx_i, dy_i],
                               c2=.05)
            step = step[0]
            if step is None:
                step = 0
        else:
            step = stepsize
        x_i += -step * dx_i
        y_i += -step * dy_i
        if np.abs(all_f_i[-1]) < 1e-5:
            break
    return all_x_i, all_y_i, all_f_i
Пример #16
0
    def _line_search_update(self):
        """ Proceed line search with x & d """
        with warnings.catch_warnings():
            warnings.filterwarnings("error")
            try:
                if self._search is not None:
                    alpha, feva, success_flag, self._loss_cache, self._grad_cache = self._search.step(
                        self._x, self.get_d())
                    self.success += success_flag
                elif not scipy_flag:
                    feva, alpha = 0, 1
                else:

                    def f(x):
                        self._func.refresh_cache(x, dtype="loss")
                        return self._func.loss(x)

                    def g(x):
                        self._func.refresh_cache(x)
                        return self._func.grad(x)

                    alpha, feva, _, self._loss_cache, old_f, self._grad_cache = optimize.line_search(
                        f, g, self._x, self.get_d())
            except RuntimeWarning:
                feva = 0
                if self._search is not None:
                    alpha = self._search._params["floor"]
                else:
                    alpha = 0.01
        self._x += alpha * self._d
        self.feva += feva
        self._d = None
Пример #17
0
def conjugate_gradient(x0, obj_func, grd_func, args=()):
    f0 = obj_func(x0, *args)
    g0 = grd_func(x0, *args)
    p = -g0
    x = x0
    g = g0
    epoque = 0
    while np.linalg.norm(g) >= 0.000005:
        alpha, fc, gc, new_loss, old_loss, new_slope = line_search(
            f=obj_func,
            myfprime=grd_func,
            xk=x,
            pk=p,
            gfk=g,
            old_fval=f0,
            args=args)
        x = x + alpha * p
        h = grd_func(x, *args)
        dgg = np.linalg.norm(g)
        ngg = np.linalg.norm(h)

        # Fletcher-Reeves's beta (Eq 2.53)
        #         beta = ngg / dgg

        # Ribière-Polak beta
        delta = np.dot(h, (h - g))
        beta = max(0, delta / dgg)

        g = h
        p = -g + beta * p
        print(f"Epoque {epoque} and loss is: {new_loss}")
        epoque += 1
    return x
Пример #18
0
 def line_search_init_param(self, func):
     x = self.renderer.get_param()
     jac = _get_jac(func=func, delta=0.005, x0=x)
     search_direction = -func(x) / jac(x)
     res = optimize.line_search(f=func, myfprime=jac, xk=x, pk=search_direction)
     alpha = res[0]
     x_new = x + alpha * search_direction
     self.renderer.set_param(x_new)
Пример #19
0
def get_alpha(x):
    for _ in range(100):
        alpha, _, _, _, _, _ = opt.line_search(
            f(x),
            gradient(x),
            x,
            -gradient(x),
        )
    return alpha
def get_alpha(fun, current_point):
    def grad(x):
        return nd.Gradient(fun)([x[0], x[1]])

    x = np.ravel(current_point)
    p = -grad(x)  #current search direc
    a = line_search(fun, grad, x, p)[0]

    print(a)
Пример #21
0
def line_search_rank0_scipy_scalar_search_wolfe1(
        F, Fp, c1, c2, old_F_val=None, old_Fp_val=None, **kwargs):
    from scipy.optimize.linesearch import scalar_search_wolfe1 as line_search
    alpha, phi, phi0 = line_search(
        F, Fp,
        phi0=old_F_val, derphi0=old_Fp_val, c1=c1, c2=c2,
        **kwargs)
    if alpha is None:
        phi = None
    return alpha, phi
Пример #22
0
def get_alpha(fun, current_point):
    def grad(x):
        return nd.Gradient(fun)([x[0], x[1]])

    x = np.ravel(current_point)
    p = -grad(x)  #current search direc
    a = line_search(fun, grad, x, p)[0]
    return a


# line_search(fun,np.array([1.],[3.]),)
Пример #23
0
def BFGS(x, f, g, eps, kmax, precision=6):
    gradient_norm = np.round(np.linalg.norm(g(x)), precision)
    Xk = [[np.NaN, f(x), gradient_norm]]
    rk = [np.NaN]
    Mk = [np.NaN]
    # =========== #
    H = I = np.identity(len(g(x)))
    k = 0
    while np.linalg.norm(g(x)) > eps and k < kmax:
        d = -H @ g(x)
        if k > 0:
            alpha, *_ = line_search(f,
                                    g,
                                    x,
                                    d,
                                    old_old_fval=f(x_prev),
                                    c1=0.01,
                                    c2=0.45)
        else:
            alpha, *_ = line_search(f, g, x, d, c1=0.01, c2=0.45)
        if alpha is None:
            break
        x, x_prev = x + alpha * d, x
        s = x - x_prev
        y = g(x) - g(x_prev)
        y = y[None, :]
        rho = 1 / ((y).T @ s)
        H = (I - rho * s @ y.T) @ H @ (I - rho * y @ (s.T)) + rho * s @ s.T
        k += 1
        # =========== #
        gradient_norm = np.round(np.linalg.norm(g(x)), precision)
        Xk.append([alpha, f(x), gradient_norm])
        rk.append(np.linalg.norm(g(x)) / np.linalg.norm((g(x_prev))))
        Mk.append(np.linalg.norm(g(x)) / (np.linalg.norm((g(x_prev)))**2))
        # =========== #
    data = pd.DataFrame(Xk,
                        columns=["alpha", "f(x)", "||g(x)||"],
                        dtype=np.float)
    data['r'] = rk
    data['M'] = Mk
    return x, data
Пример #24
0
def line_search_rank0_scipy_scalar_search_wolfe2(
        F, Fp, c1, c2, old_F_val=None, old_Fp_val=None, **kwargs):
    from scipy.optimize.linesearch import scalar_search_wolfe2 as line_search
    alpha_star, phi_star, phi0, derphi_star = line_search(
        F, Fp,
        phi0=old_F_val, derphi0=old_Fp_val, c1=c1, c2=c2,
        **kwargs)
    if derphi_star is None:
        alpha_star = None
    if alpha_star is None:
        phi_star = None
    return alpha_star, phi_star
Пример #25
0
    def find_alpha(self, y_true, curr_pred, tree_pred):

        def alpha_obj(x):
            return self.logistic_loss(y_true, x)

        def alpha_grad(x):
            return self.logistic_grad(y_true, x)

        alpha = line_search(alpha_obj, alpha_grad, xk=curr_pred, pk=tree_pred)
        if not alpha[0]:
            return 1.0
        return alpha[0]
    def quasi_newton_bfgs(self, init_x, eps=1e-6, store=False):
        self.check_dimension(init_x)
        size = init_x.size

        init_f = self.f(init_x)
        current_x, current_f, current_g, current_H = np.copy(init_x), \
            init_f, self.grad_f(init_x), np.eye(size)

        hist_x = [init_x]
        hist_f = [init_f]
        m = 10
        previous_s = [None] * m
        previous_y = [None] * m

        iteration = 0
        lag = 100

        while current_f / init_f > eps:
            current_p = -np.dot(current_H, current_g)

            alpha = line_search(self.f, self.grad_f, current_x, current_p)[0]
            # alpha = self.line_search_wolfe(current_x, current_p)
            next_x = current_x + alpha * current_p
            next_f = self.f(next_x)
            next_g = self.grad_f(next_x)
            s = alpha * current_p
            y = next_g - current_g

            rho = np.dot(s, y)
            Hy = current_H.dot(y)

            next_H = current_H \
                    + (rho + Hy.dot(y)) * np.outer(s, s) / rho**2 \
                    - (np.outer(Hy, s) + np.outer(s, Hy)) / rho

            if iteration % lag == 0:
                if store:
                    hist_x.append(current_x)
                    hist_f.append(current_f)
                # print "iteration {}: {}".format(iteration, current_f)

            if iteration < m:
                previous_s[iteration] = s
                previous_y[iteration] = y

            iteration += 1

            current_x = next_x
            current_f = next_f
            current_g = next_g
            current_H = next_H

        return (current_x, current_f, iteration, previous_s, previous_y)
def steepest_descent(x, f: fl.Fluxion, tol: float = 1e-8):
    w = FluxionWrapper(f)
    xs = np.zeros([2001, 2])
    xs[0] = x
    for i in range(0, 2000):
        x = xs[i]
        grad = -w.diff(x)
        alpha = op.line_search(w.val, w.diff, x, grad)
        xs[i + 1] = x + alpha[0] * grad
        stepsize = np.linalg.norm(xs[i + 1] - xs[i])
        if stepsize < tol:
            break
    return (xs[0:i + 1, :], i + 1, stepsize, xs[i])
Пример #28
0
 def ls_subopt(x, g, i):
     lam_mag = np.sqrt(np.mean(g[:-1]**2))
     rho_mag = np.sqrt(g[-1]**2)
     if rho_mag > 10 * lam_mag:
         print " ----- VBoost doing rho Line Search ------ "
         ff = lambda x: mixture_obj(x, i)
         gg = lambda x: mixture_obj_grad(x, i)
         alpha0, fc, gc, _, _, _ = \
             optimize.line_search(ff, gg, xk=x, pk=ls_dir)
         if alpha0 is not None:
             print "new rho = ", (x + alpha0 * ls_dir)[-1]
             x = x + alpha0 * ls_dir
     return x
Пример #29
0
def lbfgs(f,fgrad,x0,maxiter=100,max_corr=25,grad_norm_tol=1e-9, ihp=None,ls_criteria="armijo"):
    """
    LBFGS algorithm as described by Nocedal & Wright
    In fact it gives numerically identical answers to L-BFGS-B on some test problems.
    """
    x = x0.copy()
    yield x
    if ihp is None: ihp = InverseHessianPairs(max_corr)
    oldg = fgrad(x)
    if ls_criteria=="armijo": fval = f(x)
    p = -oldg/np.linalg.norm(oldg)

    log = logging.getLogger("lbfgs")
    iter_count = 0
    while True:
        # TODO compare line searches
        g=None
        if ls_criteria == "strong_wolfe":
            alpha_star, _, _, fval, _, g = opt.line_search(f,fgrad,x,p,oldg)        
        elif ls_criteria == "armijo":
            import scipy.optimize.linesearch
            alpha_star,_,fval=scipy.optimize.linesearch.line_search_armijo(f,x,p,oldg,fval)
        else:
            raise NotImplementedError

        if alpha_star is None:
            log.error("lbfgs line search failed!")
            break
        s = alpha_star * p
        x += s
        yield x

        iter_count += 1
        
        if iter_count  >= maxiter:
            break

        if g is None: 
            log.debug("line search didn't give us a gradient. calculating")
            g = fgrad(x)

        if np.linalg.norm(g) < grad_norm_tol:
            break


        y = g - oldg
        ihp.add( s,y )
        p = ihp.mvp(-g)
        oldg = g

        log.info("lbfgs iter %i %8.3e",iter_count, fval)
    def quasi_newton_l_bfgs(self,
                            init_x,
                            previous_s,
                            previous_y,
                            eps=1e-6,
                            m=10,
                            store=False):
        self.check_dimension(init_x)
        size = init_x.size

        init_f = self.f(init_x)
        current_x, current_f, current_g = np.copy(init_x), init_f, self.grad_f(
            init_x)

        hist_x = [init_x]
        hist_f = [init_f]

        iteration = m
        lag = 100

        while current_f / init_f > eps:
            current_p = -self.l_bfgs_two_loop(current_g, previous_s,
                                              previous_y)
            alpha = line_search(self.f, self.grad_f, current_x, current_p)[0]

            next_x = current_x + alpha * current_p
            next_f = self.f(next_x)
            next_g = self.grad_f(next_x)
            s = alpha * current_p
            y = next_g - current_g

            del previous_s[0]
            previous_s.append(s)

            del previous_y[0]
            previous_y.append(y)

            current_x = next_x
            current_f = next_f
            current_g = next_g

            if iteration % lag == 0:
                if store:
                    hist_x.append(current_x)
                    hist_f.append(current_f)
                # print "iteration {}: {}".format(iteration, current_f)

            iteration += 1

        return (current_x, current_f, iteration, hist_x, hist_f, lag)
Пример #31
0
def conjugate_gradient_step(x, z, w, gradf_old, p, version):
    '''
    One step of Conjugate gradient method with strong Wolfe conditions
    '''
    my_tuple = line_search(f, gradf, x, p, c1=c1, c2=c2, args=(z, w, version))
    alpha = my_tuple[0]
    if alpha == None:
        alpha = step_length(x, z, w, p, version)
    x += alpha * p
    gradf_new = gradf(x, z, w, version)
    beta = (gradf_new.T @ gradf_new) / (gradf_old.T @ gradf_old)
    p = -gradf_new + beta * p
    gradf_old = gradf_new
    return x, gradf_old, p
Пример #32
0
    def _get_s(self, H, grad, estimate, t):

        p = np.dot(-H, grad)
        oofv = None if t == 0 else self.objectives[-2]
        results = line_search(self.get_objective,
                              lambda x: self.get_gradient(x)[:, 0],
                              estimate,
                              p,
                              gfk=grad[:, 0],
                              old_fval=self.objectives[-1],
                              old_old_fval=oofv)
        eta = results[0]

        return eta * p
Пример #33
0
def optimize(H, x, y, maxiter, index, xRef, lambdaL2=0.5):
    print 'Doing super-resolution optimization'
    t = time()
    miny = np.min(y[0])
    maxy = np.max(y[0])
    print 'bounds of y : ' + str(miny) + ', ' + str(maxy)

    iteration = 0
    maxdiff = np.ones(len(y)) * (maxy - miny)
    threshold = 0.01 * (maxy - miny)

    while iteration < maxiter and np.max(maxdiff) > threshold:

        gradL2 = lossL2prime(x, H, y)

        #Find alpha that satisfies strong Wolfe conditions.
        #http://scipy.github.io/devdocs/generated/scipy.optimize.line_search.html#scipy.optimize.line_search
        res = line_search(lossL2, lossL2prime, x, -gradL2, args=(H, y))
        alphaL2 = res[0]
        if alphaL2 is None:
            alphaL2 = computeAlpha(x, gradL2)

        update = alphaL2 * gradL2

        if xRef is not None:
            gradDenoising = 2.0 * (x - xRef)
            alphaDenoising = computeAlpha(x, gradDenoising)
            update = (
                1 - lambdaL2
            ) * alphaDenoising * gradDenoising + lambdaL2 * alphaL2 * gradL2

        #Update high resolution image
        x = x - update

        #Threshold on Maxdiff or update magnitude ?
        for i in range(len(y)):
            maxdiff[i] = np.max(H[i].dot(x) - y[i])

        #Use bounds to limit intensity range of x
        x[x < miny] = miny
        x[x > maxy] = maxy

        iteration += 1
        if iteration == maxiter:
            print 'Maximum number of iterations is reached'

    print 'Optimization done in ' + str(time() - t) + ' s, in ' + str(
        iteration) + ' iterations'

    return x
Пример #34
0
    def optimize(self, start_point, verbose=False):
        xk = start_point
        gk = self.g(xk)
        Hk = np.eye(gk.shape[0], gk.shape[0])
        iter = 0
        self.obj_f.reset_count()

        while True:
            dk = -Hk.dot(gk)

            alpha, fc, gc, new_fval, old_fval, new_slope = line_search(
                self.f, self.g, xk, dk)
            if alpha is None:
                alpha = ALPHA_BK

            sk = alpha * dk
            xk_plus_1 = xk + sk
            iter += 1
            gk_plus_1 = self.g(xk_plus_1)
            yk = gk_plus_1 - gk

            # change to matrix, as column vector
            sk = np.array([sk]).T
            yk = np.array([yk]).T

            Hk_puls_1 = Hk + (1+yk.T.dot(Hk).dot(yk)/yk.T.dot(sk))*(sk.dot(sk.T)/yk.T.dot(sk))-\
                        (sk.dot(yk.T).dot(Hk)+Hk.dot(yk).dot(sk.T))/yk.T.dot(sk)

            if verbose:
                print('----------')
                print("alpha", alpha)
                print("dk", dk)
                print("x_k+1", xk_plus_1)
                print("f_k+1", self.f(xk_plus_1))
                # print("Hk", Hk_puls_1)

            if should_break(xk, xk_plus_1):
                break
            xk = xk_plus_1
            Hk = Hk_puls_1
            gk = gk_plus_1

        print("     final point", xk_plus_1)
        print("     final_fval", self.f(xk_plus_1))
        print("     iter_times", iter)
        print("     function calls", self.obj_f.get_count()[0])
        print("     derivate calls", self.obj_f.get_count()[1])
        print("     hessian calls", self.obj_f.get_count()[2])
        return xk_plus_1, self.f(xk_plus_1)
Пример #35
0
    def update_overdispersion(self):
        node = self.nodes['overdispersion']
        mu = node.expected_x()
        var = node.expected_var_x()
        tau = node.prior_prec.expected_x()
        mm = node.prior_mean.expected_x()
        nn = self.Nframe['count']

        # make an adjusted F that does not include our pars of interest
        F_adj = self.F() / node.expected_exp_x()

        def objfun(x):
            mu = x[:self.M]
            kap = x[self.M:]
            var = np.exp(kap)
            bar_exp_eta = np.exp(mu + 0.5 * var) * F_adj

            elbo = -0.5 * np.sum(tau * (var + (mu - mm)**2))
            elbo += 0.5 * np.sum(np.log(var))
            elbo += np.sum(nn * mu)
            elbo += -np.sum(bar_exp_eta)

            return -elbo

        def gradfun(x):
            jac = np.empty_like(x)
            mu = x[:self.M]
            kap = x[self.M:]
            var = np.exp(kap)
            bar_exp_eta = np.exp(mu + 0.5 * var) * F_adj

            jac[:self.M] = -tau * (mu - mm)
            jac[:self.M] += (nn - bar_exp_eta)
            jac[self.M:] = -0.5 * tau * var + 0.5
            jac[self.M:] += -0.5 * var * bar_exp_eta

            return -jac

        # parameter of vectors to optimize over
        starts = np.concatenate((mu, np.log(var)))
        start_g = gradfun(starts)

        alpha = line_search(objfun, gradfun, starts, -start_g, gfk=start_g)
        if alpha[0] is not None:
            xnew = starts - alpha[0] * start_g

            node.post_mean = xnew[:self.M]
            node.post_prec = np.exp(-xnew[self.M:])
            self.F(update=True)
Пример #36
0
def GM(x, f, g, eps, kmax, precision=6):
    gradient_norm = np.round(np.linalg.norm(g(x)), precision)
    Xk = [[np.NaN, f(x), gradient_norm]]
    rk = [np.NaN]
    Mk = [np.NaN]
    # ============== #
    k = 0
    while np.linalg.norm(g(x)) > eps and k < kmax:
        d = -g(x)
        if k > 0:
            alpha, *_ = line_search(f,
                                    g,
                                    x,
                                    d,
                                    old_old_fval=f(x_prev),
                                    c1=0.01,
                                    c2=0.45)
        else:
            alpha, *_ = line_search(f, g, x, d, c1=0.01, c2=0.45)
        if alpha is None:
            print("alpha not found (!)")
            break
        x, x_prev = x + alpha * d, x
        k += 1
        # =========== #
        gradient_norm = np.round(np.linalg.norm(g(x)), precision)
        Xk.append([alpha, f(x), gradient_norm])
        rk.append(np.linalg.norm(g(x)) / np.linalg.norm((g(x_prev))))
        Mk.append(np.linalg.norm(g(x)) / (np.linalg.norm((g(x_prev)))**2))
        # =========== #
    data = pd.DataFrame(Xk,
                        columns=["alpha", "f(x)", "||g(x)||"],
                        dtype=np.float)
    data['r'] = rk
    data['M'] = Mk
    return x, data
Пример #37
0
def gradient_descent(fn, fn_grad, x0, gtol=1e-5, maxiter=100):
  i = 0
  x = x0.copy()
  while i < maxiter:
    i += 1
    dx = -fn_grad(x)
    if abs(dx).max() <= gtol:
      print 'Terminated since |g| <= %f' % gtol
      break
    t = sio.line_search(fn, fn_grad, x, dx, -dx)[0]
    x += t*dx
    print 'Step %d: y=%f, |g|=%f, t=%f' % (i, fn(x), np.linalg.norm(dx), t)
  if i >= maxiter:
    print 'Terminated due to iteration limit'
  return x
Пример #38
0
def optimize(H,x,y,maxiter,index,xRef,lambdaL2=0.5):
  print('Doing super-resolution optimization')
  t = time()
  miny = np.min(y[0])
  maxy = np.max(y[0])
  print('bounds of y : '+str(miny)+', '+str(maxy))
  
  iteration = 0
  maxdiff = np.ones(len(y)) * (maxy-miny)
  threshold = 0.01 * (maxy-miny)
  
  while iteration<maxiter and np.max(maxdiff) > threshold:

    gradL2 = lossL2prime(x,H,y)
      
    #Find alpha that satisfies strong Wolfe conditions.
    #http://scipy.github.io/devdocs/generated/scipy.optimize.line_search.html#scipy.optimize.line_search
    res = line_search(lossL2, lossL2prime, x, -gradL2, args=(H,y))
    alphaL2 = res[0]
    if alphaL2 is None:
      alphaL2 = computeAlpha(x,gradL2) 
    
    update = alphaL2*gradL2    
          
    if xRef is not None:
      gradDenoising =2.0* (x-xRef)
      alphaDenoising = computeAlpha(x,gradDenoising)
      update = (1-lambdaL2)*alphaDenoising*gradDenoising + lambdaL2*alphaL2*gradL2   
       
    #Update high resolution image  
    x = x - update

    #Threshold on Maxdiff or update magnitude ?    
    for i in range(len(y)):
      maxdiff[i] = np.max(H[i].dot(x) - y[i])
    
    #Use bounds to limit intensity range of x
    x[x<miny] = miny
    x[x>maxy] = maxy
    
    iteration+=1
    if iteration==maxiter:
      print('Maximum number of iterations is reached')
  
  print('Optimization done in '+str(time()-t)+' s, in '+str(iteration)+' iterations')
  
  return x
Пример #39
0
def gaussNewton(f, df, jac, r, x, niter=10):
    """Solve a nonlinear least squares problem with Gauss-Newton method.

    Parameters:
        f (function): The objective function.
        df (function): The gradient of f.
        jac (function): The jacobian of the residual vector.
        r (function): The residual vector.
        x (ndarray of shape (n,)): The initial point.
        niter (int): The number of iterations.
    
    Returns:
        (ndarray of shape (n,)) The minimizer.
    """
    for _ in xrange(niter):
        p = la.solve(np.dot(jac(x).T,jac(x)), np.dot(-jac(x), r(x)))
        steps = line_search(f,df,x,p)
        x = x + steps * p
        k += 1
    return x
def steepest_descent(x0):
    x=[]
    x.append(x0)
    k=0
    
    tol=1e-5
    tol1=1
    while(tol1>tol):
        alpha=spo.line_search(f,df,x[k],-df(x[k]))
        c=alpha[0]
        if(alpha[0]==None):
            c=1
        
        xnew=x[k]-c*df(x[k])
        x.append(xnew)
        tol1=la.norm(x[k+1]-x[k])
        a=x[k+1]
        k+=1
        #print x
    return a,x
def damped_newton(x0):
    x=[]
    x.append(x0)
    k=0
    
    tol=1e-12
    tol1=1
    while(tol1>tol):
        s=la.solve(hessian(x[k]),-df(x[k]))
        alpha=spo.line_search(f,df,x[k],-df(x[k]))
        c=alpha[0]
        if(alpha[0]==None):
            c=1
        
        xnew=x[k]+s*c
        x.append(xnew)
        tol1=la.norm(x[k+1]-x[k])
        a=x[k+1]
        k+=1
    return a,x
Пример #42
0
    def update_weights(self, weight_deltas):
        real_weights = [layer.weight for layer in self.train_layers]

        weights_vector = matrix_list_in_one_vector(real_weights)
        gradients_vetor = matrix_list_in_one_vector(self.gradients)

        res = line_search(self.check_updates,
                          self.get_gradient_by_weights,
                          xk=weights_vector,
                          pk=matrix_list_in_one_vector(weight_deltas),
                          gfk=gradients_vetor,
                          amax=self.maxstep,
                          c1=self.c1,
                          c2=self.c2)

        step = (res[0] if res[0] is not None else self.step)
        # SciPy some times ignore `amax` argument and return
        # bigger result
        self.step = min(self.maxstep, step)
        self.set_weights(real_weights)

        return super(WolfeSearch, self).update_weights(weight_deltas)
Пример #43
0
    def _FindCenter(self, u):
        """
        linesearch algorithm to find optimal alpha
        """
        qr,pr = self.shape
    
        u = params[:pr]
        U = sparse.dia_matrix( (u,0),shape=(pr,pr) )
        U2 = U.dot(U)
        U_inv = sparse.dia_matrix( (1./np.array(u),0), shape=(pr,pr))

        z  = np.linalg.inv( self.X.dot(U2.dot(X.T)) ).dot(self.a)
        d = u - U2.dot(self.X.T).dot(z)
    
        f = lambda x: -np.sum(np.log(x) )
        gradf = lambda x: - 1./x
    
        if np.linalg.norm(U_inv.dot(d) )<.25:
            alpha=[1.]
        else:
            alpha = optimize.line_search(f,gradf,u, d)
    
        return u+ alpha[0]*d
Пример #44
0
    def __call__(self, x0, conf=None, obj_fun=None, obj_fun_grad=None, status=None, obj_args=None):
        #    def fmin_sd( conf, x0, fn_of, fn_ofg, args = () ):

        conf = get_default(conf, self.conf)
        obj_fun = get_default(obj_fun, self.obj_fun)
        obj_fun_grad = get_default(obj_fun_grad, self.obj_fun_grad)
        status = get_default(status, self.status)
        obj_args = get_default(obj_args, self.obj_args)

        if conf.output:
            globals()["output"] = conf.output

        output("entering optimization loop...")

        nc_of, tt_of, fn_of = wrap_function(obj_fun, obj_args)
        nc_ofg, tt_ofg, fn_ofg = wrap_function(obj_fun_grad, obj_args)

        time_stats = {"of": tt_of, "ofg": tt_ofg, "check": []}

        if conf.log:
            log = Log.from_conf(conf, ([r"of"], [r"$||$ofg$||$"], [r"alpha"]))
        else:
            log = None

        ofg = None

        it = 0
        xit = x0.copy()
        while 1:

            of = fn_of(xit)

            if it == 0:
                of0 = ofit0 = of_prev = of
                of_prev_prev = of + 5000.0

            if ofg is None:
                #            ofg = 1
                ofg = fn_ofg(xit)

            if conf.check:
                tt = time.clock()
                check_gradient(xit, ofg, fn_of, conf.delta, conf.check)
                time_stats["check"].append(time.clock() - tt)

            ofg_norm = nla.norm(ofg, conf.norm)

            ret = conv_test(conf, it, of, ofit0, ofg_norm)
            if ret >= 0:
                break
            ofit0 = of

            ##
            # Backtrack (on errors).
            alpha = conf.ls0
            can_ls = True
            while 1:
                xit2 = xit - alpha * ofg
                aux = fn_of(xit2)
                if aux is None:
                    alpha *= conf.ls_red_warp
                    can_ls = False
                    output("warp: reducing step (%f)" % alpha)
                elif conf.ls and conf.ls_method == "backtracking":
                    if aux < of * conf.ls_on:
                        break
                    alpha *= conf.ls_red
                    output("backtracking: reducing step (%f)" % alpha)
                else:
                    of_prev_prev = of_prev
                    of_prev = aux
                    break

                if alpha < conf.ls_min:
                    if aux is None:
                        raise RuntimeError, "giving up..."
                    output("linesearch failed, continuing anyway")
                    break

            # These values are modified by the line search, even if it fails
            of_prev_bak = of_prev
            of_prev_prev_bak = of_prev_prev

            if conf.ls and can_ls and conf.ls_method == "full":
                output("full linesearch...")
                alpha, fc, gc, of_prev, of_prev_prev, ofg1 = linesearch.line_search(
                    fn_of, fn_ofg, xit, -ofg, ofg, of_prev, of_prev_prev, c2=0.4
                )
                if alpha is None:  # line search failed -- use different one.
                    alpha, fc, gc, of_prev, of_prev_prev, ofg1 = sopt.line_search(
                        fn_of, fn_ofg, xit, -ofg, ofg, of_prev_bak, of_prev_prev_bak
                    )
                    if alpha is None or alpha == 0:
                        # This line search also failed to find a better solution.
                        ret = 3
                        break
                output(" -> alpha: %.8e" % alpha)
            else:
                if conf.ls_method == "full":
                    output("full linesearch off (%s and %s)" % (conf.ls, can_ls))
                ofg1 = None

            if conf.log:
                log(of, ofg_norm, alpha)

            xit = xit - alpha * ofg
            if ofg1 is None:
                ofg = None
            else:
                ofg = ofg1.copy()

            for key, val in time_stats.iteritems():
                if len(val):
                    output("%10s: %7.2f [s]" % (key, val[-1]))

            it = it + 1

        output("status:               %d" % ret)
        output("initial value:        %.8e" % of0)
        output("current value:        %.8e" % of)
        output("iterations:           %d" % it)
        output("function evaluations: %d in %.2f [s]" % (nc_of[0], nm.sum(time_stats["of"])))
        output("gradient evaluations: %d in %.2f [s]" % (nc_ofg[0], nm.sum(time_stats["ofg"])))

        if conf.log:
            log(of, ofg_norm, alpha, finished=True)

        if status is not None:
            status["log"] = log
            status["status"] = status
            status["of0"] = of0
            status["of"] = of
            status["it"] = it
            status["nc_of"] = nc_of[0]
            status["nc_ofg"] = nc_ofg[0]
            status["time_stats"] = time_stats

        return xit
Пример #45
0
def my_fmin_bfgs(f, x0, fprime=None, args=(), gtol=1e-5, norm=Inf,
                 epsilon=_epsilon, maxiter=None, full_output=0, disp=1,
                 retall=0, callback=None):
    """Minimize a function using the BFGS algorithm.

    :Parameters:

      f : callable f(x,*args)
          Objective function to be minimized.
      x0 : ndarray
          Initial guess.
      fprime : callable f'(x,*args)
          Gradient of f.
      args : tuple
          Extra arguments passed to f and fprime.
      gtol : float
          Gradient norm must be less than gtol before succesful termination.
      norm : float
          Order of norm (Inf is max, -Inf is min)
      epsilon : int or ndarray
          If fprime is approximated, use this value for the step size.
      callback : callable
          An optional user-supplied function to call after each
          iteration.  Called as callback(xk), where xk is the
          current parameter vector.

    :Returns: (xopt, {fopt, gopt, Hopt, func_calls, grad_calls, warnflag}, <allvecs>)

        xopt : ndarray
            Parameters which minimize f, i.e. f(xopt) == fopt.
        fopt : float
            Minimum value.
        gopt : ndarray
            Value of gradient at minimum, f'(xopt), which should be near 0.
        Bopt : ndarray
            Value of 1/f''(xopt), i.e. the inverse hessian matrix.
        func_calls : int
            Number of function_calls made.
        grad_calls : int
            Number of gradient calls made.
        warnflag : integer
            1 : Maximum number of iterations exceeded.
            2 : Gradient and/or function calls not changing.
        allvecs  :  list
            Results at each iteration.  Only returned if retall is True.

    *Other Parameters*:
        maxiter : int
            Maximum number of iterations to perform.
        full_output : bool
            If True,return fopt, func_calls, grad_calls, and warnflag
            in addition to xopt.
        disp : bool
            Print convergence message if True.
        retall : bool
            Return a list of results at each iteration if True.

    :Notes:

        Optimize the function, f, whose gradient is given by fprime
        using the quasi-Newton method of Broyden, Fletcher, Goldfarb,
        and Shanno (BFGS) See Wright, and Nocedal 'Numerical
        Optimization', 1999, pg. 198.

    *See Also*:

      scikits.openopt : SciKit which offers a unified syntax to call
                        this and other solvers.

    """
    x0 = asarray(x0).squeeze()
    if x0.ndim == 0:
        x0.shape = (1,)
    if maxiter is None:
        maxiter = len(x0)*200
    func_calls, f = wrap_function(f, args)
    if fprime is None:
        grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon))
    else:
        grad_calls, myfprime = wrap_function(fprime, args)

    print "Evaluating initial gradient ..."
    gfk = myfprime(x0)

    k = 0
    N = len(x0)
    I = numpy.eye(N,dtype=int)
    Hk = I

    print "Evaluating initial function value ..."
    fval = f(x0)

    old_fval = fval + 5000
    xk = x0
    if retall:
        allvecs = [x0]
    sk = [2*gtol]
    warnflag = 0
    gnorm = vecnorm(gfk,ord=norm)

    print "gtol  = %g" % gtol
    print "gnorm = %g" % gnorm

    while (gnorm > gtol) and (k < maxiter):
        pk = -numpy.dot(Hk,gfk)

        print "xk =", xk
        print "pk =", pk
        print "Begin iteration %d line search..." % (k + 1)
#        print "  gfk =", gfk
#        print "  Hk = \n", Hk

        # do line search for alpha_k
        old_old_fval = old_fval
        old_fval = fval
        alpha_k, fc, gc, fval, old_fval, gfkp1 = \
           linesearch.line_search(f,myfprime,xk,pk,gfk,
                                  old_fval,old_old_fval)
        if alpha_k is None:  # line search failed try different one.
            print "Begin line search (method 2) ..."
            alpha_k, fc, gc, fval, old_fval, gfkp1 = \
                     line_search(f,myfprime,xk,pk,gfk,
                                 old_fval,old_old_fval)
            if alpha_k is None:
                # This line search also failed to find a better solution.
                print "Line search failed!"
                warnflag = 2
                break
        print "End line search, alpha = %g ..." % alpha_k

        xkp1 = xk + alpha_k * pk
        if retall:
            allvecs.append(xkp1)
        sk = xkp1 - xk
        xk = xkp1
        if gfkp1 is None:
            gfkp1 = myfprime(xkp1)

        yk = gfkp1 - gfk
        gfk = gfkp1
        if callback is not None:
            callback(xk)
        k += 1
        gnorm = vecnorm(gfk,ord=norm)
        print "gnorm = %g" % gnorm
        if (k >= maxiter or gnorm <= gtol):
            break

        # Reset the initial quasi-Newton matrix to a scaled identity aimed
        # at reflecting the size of the inverse true Hessian
        deltaXDeltaGrad = numpy.dot(sk, yk);
        updateOk = deltaXDeltaGrad >= _epsilon * max(_epsilonSq, \
                       vecnorm(sk,ord=2) * vecnorm(yk, ord=2))
        if k == 1 and updateOk:
            Hk = deltaXDeltaGrad / numpy.dot(yk,yk) * numpy.eye(N);
            print "Hscaled =\n", Hk

        try: # this was handled in numeric, let it remain for more safety
            rhok = 1.0 / (numpy.dot(yk,sk))
        except ZeroDivisionError:
            rhok = 1000.0
            print "Divide-by-zero encountered: rhok assumed large"
        if isinf(rhok): # this is patch for numpy
            rhok = 1000.0
            print "Divide-by-zero encountered: rhok assumed large"
        A1 = I - sk[:,numpy.newaxis] * yk[numpy.newaxis,:] * rhok
        A2 = I - yk[:,numpy.newaxis] * sk[numpy.newaxis,:] * rhok
        Hk = numpy.dot(A1,numpy.dot(Hk,A2)) + rhok * sk[:,numpy.newaxis] \
                 * sk[numpy.newaxis,:]

    if gnorm > gtol:
        warnflag = 1

    if disp:
        if warnflag == 1:
            print "Warning: Maximum number of iterations has been exceeded"
        elif warnflag == 2:
            print "Warning: Desired error not necessarily achieved" \
                  "due to precision loss"
        else:
            print "Optimization terminated successfully."

        print "         Current function value: %g" % fval
        print "         Current gradient norm : %g" % gnorm
        print "         Gradient tolerance    : %g" % gtol
        print "         Iterations: %d" % k
        print "         Function evaluations: %d" % func_calls[0]
        print "         Gradient evaluations: %d" % grad_calls[0]

    if full_output:
        retlist = xk, fval, gfk, Hk, func_calls[0], grad_calls[0], warnflag
        if retall:
            retlist += (allvecs,)
    else:
        retlist = xk
        if retall:
            retlist = (xk, allvecs)

    return retlist
Пример #46
0
def steepest_desc(s):
    sol = -df(s)
    a = opt.line_search(f, df, s, sol)[0]
    s_n = s + a * sol
    return s_n
Пример #47
0
def stepSize(x):
    res = optimize.line_search(lambda z: F(z[0], z[1]), lambda t: gradient(t[0], t[1]), np.array(x), np.array(sd), gr)
    alpha = res[0]
    return alpha
Пример #48
0
    def __call__( self, x0, conf = None, obj_fun = None, obj_fun_grad = None,
                  status = None, obj_args = None ):
#    def fmin_sd( conf, x0, fn_of, fn_ofg, args = () ):

        conf = get_default( conf, self.conf )
        obj_fun = get_default( obj_fun, self.obj_fun )
        obj_fun_grad = get_default( obj_fun_grad, self.obj_fun_grad )
        status = get_default( status, self.status )
        obj_args = get_default( obj_args, self.obj_args )

        if conf.output:
            globals()['output'] = conf.output

        output( 'entering optimization loop...' )

        nc_of, tt_of, fn_of = wrap_function( obj_fun, obj_args )
        nc_ofg, tt_ofg, fn_ofg = wrap_function( obj_fun_grad, obj_args )

        time_stats = {'of' : tt_of, 'ofg': tt_ofg, 'check' : []}

        ofg = None

        it = 0
        xit = x0.copy()
        while 1:

            of = fn_of( xit )

            if it == 0:
                of0 = ofit0 = of_prev = of
                of_prev_prev = of + 5000.0

            if ofg is None:
                ofg = fn_ofg( xit )

            if conf.check:
                tt = time.clock()
                check_gradient( xit, ofg, fn_of, conf.delta, conf.check )
                time_stats['check'].append( time.clock() - tt )

            ofg_norm = nla.norm( ofg, conf.norm )

            ret = conv_test( conf, it, of, ofit0, ofg_norm )
            if ret >= 0:
                break
            ofit0 = of

            ##
            # Backtrack (on errors).
            alpha = conf.ls0
            can_ls = True
            while 1:
                xit2 = xit - alpha * ofg
                aux = fn_of( xit2 )

                if self.log is not None:
                    self.log(of, ofg_norm, alpha, it)

                if aux is None:
                    alpha *= conf.ls_red_warp
                    can_ls = False
                    output( 'warp: reducing step (%f)' % alpha )
                elif conf.ls and conf.ls_method == 'backtracking':
                    if aux < of * conf.ls_on: break
                    alpha *= conf.ls_red
                    output( 'backtracking: reducing step (%f)' % alpha )
                else:
                    of_prev_prev = of_prev
                    of_prev = aux
                    break

                if alpha < conf.ls_min:
                    if aux is None:
                        raise RuntimeError, 'giving up...'
                    output( 'linesearch failed, continuing anyway' )
                    break

            # These values are modified by the line search, even if it fails
            of_prev_bak = of_prev
            of_prev_prev_bak = of_prev_prev

            if conf.ls and can_ls and conf.ls_method == 'full':
                output( 'full linesearch...' )
                alpha, fc, gc, of_prev, of_prev_prev, ofg1 = \
                       linesearch.line_search(fn_of,fn_ofg,xit,
                                              -ofg,ofg,of_prev,of_prev_prev,
                                              c2=0.4)
                if alpha is None:  # line search failed -- use different one.
                    alpha, fc, gc, of_prev, of_prev_prev, ofg1 = \
                           sopt.line_search(fn_of,fn_ofg,xit,
                                            -ofg,ofg,of_prev_bak,
                                            of_prev_prev_bak)
                    if alpha is None or alpha == 0:
                        # This line search also failed to find a better solution.
                        ret = 3
                        break
                output( ' -> alpha: %.8e' % alpha )
            else:
                if conf.ls_method == 'full':
                    output( 'full linesearch off (%s and %s)' % (conf.ls,
                                                                 can_ls) )
                ofg1 = None

            if self.log is not None:
                self.log.plot_vlines(color='g', linewidth=0.5)

            xit = xit - alpha * ofg
            if ofg1 is None:
                ofg = None
            else:
                ofg = ofg1.copy()

            for key, val in time_stats.iteritems():
                if len( val ):
                    output( '%10s: %7.2f [s]' % (key, val[-1]) )

            it = it + 1

        output( 'status:               %d' % ret )
        output( 'initial value:        %.8e' % of0 )
        output( 'current value:        %.8e' % of )
        output( 'iterations:           %d' % it )
        output( 'function evaluations: %d in %.2f [s]' \
              % (nc_of[0], nm.sum( time_stats['of'] ) ) )
        output( 'gradient evaluations: %d in %.2f [s]' \
              % (nc_ofg[0], nm.sum( time_stats['ofg'] ) ) )

        if self.log is not None:
            self.log(of, ofg_norm, alpha, it)

            if conf.log.plot is not None:
                self.log(save_figure=conf.log.plot,
                         finished=True)
            else:
                self.log(finished=True)
                
        if status is not None:
            status['log'] = self.log
            status['status'] = status
            status['of0'] = of0
            status['of'] = of
            status['it'] = it
            status['nc_of'] = nc_of[0]
            status['nc_ofg'] = nc_ofg[0]
            status['time_stats'] = time_stats

        return xit
Пример #49
0
#optimisation in theta, psi
xpath = [0.001*np.random.randn(2)]
f_spherical = lambda x: objective(*spherical_to_cart(x[0],x[1],1))
def g_spherical(x):
    g_cart = grad(*spherical_to_cart(x[0],x[1],1))
    theta, psi = x
    J = np.array([[np.cos(psi)*np.cos(theta),np.sin(psi)*np.cos(theta),-np.sin(theta)],
        [-np.sin(psi)*np.sin(theta),np.cos(psi)*np.sin(theta),0.]])
    return np.dot(J,g_cart)
iteration=0
while True:
    search_dir = -g_spherical(xpath[-1])
    grad_norm = np.sum(np.square(search_dir))
    if grad_norm<1e-6:
        break
    alpha, fc,gc,foo,bar,baz = optimize.line_search(f_spherical,g_spherical,xpath[-1],search_dir,-search_dir)
    xnew = xpath[-1] + 0.01*alpha*search_dir
    xpath.append(xnew)
    iteration += 1
    print iteration,grad_norm,'\r',
    sys.stdout.flush()
print ''

xx,yy,zz = np.vstack(map(lambda x: spherical_to_cart(x[0],x[1],1),xpath)).T
ax.plot(xx,yy,zz,'mo',linewidth=2,mew=0)

#natural optimisation in theta, psi
xpath = [0.001*np.random.randn(2)]
iteration=0
while True:
    g = g_spherical(xpath[-1])