Exemplo n.º 1
0
def fminLooped(f,
               x0,
               fprime=None,
               args=(),
               gtol=1e-5,
               norm=Inf,
               epsilon=numpy.sqrt(numpy.finfo(float).eps),
               maxiter=None,
               full_output=0,
               disp=1,
               retall=0,
               callback=None):

    testVar = 0

    x0 = asarray(x0).squeeze()
    if x0.ndim == 0:
        x0.shape = (1, )
    if maxiter is None:
        maxiter = len(x0) * 200
    func_calls, f = wrap_function(f, args)
    if fprime is None:
        grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon))
    else:
        grad_calls, myfprime = wrap_function(fprime, args)
    gfk = myfprime(x0)
    k = 0
    N = len(x0)
    I = numpy.eye(N, dtype=int)
    Hk = I
    old_fval = f(x0)
    old_old_fval = old_fval + 5000
    xk = x0
    if retall:
        allvecs = [x0]
    sk = [2 * gtol]
    warnflag = 0
    gnorm = vecnorm(gfk, ord=norm)

    newInputParams = locals()

    pickleBles, NonPickles = ParamsManager.filterUnpickles(newInputParams)

    # import dill
    # pickle.dump(pickleBles, open('inputParams.dat', 'w'))
    # pickle.dump(loopThing, open('loopFunc.dat', 'w'))

    for loopI in range(300):

        newInputParams = loopThing(newInputParams)
Exemplo n.º 2
0
def fminLooped(f, x0, fprime=None, args=(), gtol=1e-5, norm=Inf,
              epsilon= numpy.sqrt(numpy.finfo(float).eps), maxiter=None, full_output=0, disp=1,
              retall=0, callback=None):

    testVar = 0

    x0 = asarray(x0).squeeze()
    if x0.ndim == 0:
        x0.shape = (1,)
    if maxiter is None:
        maxiter = len(x0)*200
    func_calls, f = wrap_function(f, args)
    if fprime is None:
        grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon))
    else:
        grad_calls, myfprime = wrap_function(fprime, args)
    gfk = myfprime(x0)
    k = 0
    N = len(x0)
    I = numpy.eye(N,dtype=int)
    Hk = I
    old_fval = f(x0)
    old_old_fval = old_fval + 5000
    xk = x0
    if retall:
        allvecs = [x0]
    sk = [2*gtol]
    warnflag = 0
    gnorm = vecnorm(gfk,ord=norm)

    newInputParams = locals()

    pickleBles, NonPickles = ParamsManager.filterUnpickles(newInputParams)

    import dill
    pickle.dump(pickleBles, open('inputParams.dat', 'w'))
    pickle.dump(loopThing, open('loopFunc.dat', 'w'))

    for loopI in range(10):

        newInputParams = loopThing(newInputParams)
Exemplo n.º 3
0
def _minimize_neldermead(func,
                         x0,
                         args=(),
                         callback=None,
                         xtol=1e-4,
                         ftol=1e-4,
                         maxiter=None,
                         maxfev=None,
                         disp=False,
                         return_all=False,
                         **unknown_options):
    """
    Minimization of scalar function of one or more variables using the
    Nelder-Mead algorithm.

    Options
    -------
    disp : bool
        Set to True to print convergence messages.
    xtol : float
        Relative error in solution `xopt` acceptable for convergence.
    ftol : float
        Relative error in ``fun(xopt)`` acceptable for convergence.
    maxiter : int
        Maximum number of iterations to perform.
    maxfev : int
        Maximum number of function evaluations to make.

    """
    # _check_unknown_options(unknown_options)
    maxfun = maxfev
    retall = return_all

    fcalls, func = wrap_function(func, args)
    x0 = asfarray(x0).flatten()
    N = len(x0)
    if maxiter is None:
        maxiter = N * 200
    if maxfun is None:
        maxfun = N * 200

    rho = 1
    chi = 2
    psi = 0.5
    sigma = 0.5
    one2np1 = list(range(1, N + 1))

    sim = numpy.zeros((N + 1, N), dtype=x0.dtype)
    fsim = numpy.zeros((N + 1, ), float)
    sim[0] = x0
    if retall:
        allvecs = [sim[0]]
    fsim[0] = func(x0)
    nonzdelt = 0.05
    zdelt = 0.00025
    for k in range(0, N):
        y = numpy.array(x0, copy=True)
        if y[k] != 0:
            y[k] = (1 + nonzdelt) * y[k]
        else:
            y[k] = zdelt

        sim[k + 1] = y
        f = func(y)
        fsim[k + 1] = f

    ind = numpy.argsort(fsim)
    fsim = numpy.take(fsim, ind, 0)
    # sort so sim[0,:] has the lowest function value
    sim = numpy.take(sim, ind, 0)

    iterations = 1

    while (fcalls[0] < maxfun and iterations < maxiter):
        if (numpy.max(numpy.ravel(numpy.abs(sim[1:] - sim[0]))) <= xtol
                and numpy.max(numpy.abs(fsim[0] - fsim[1:])) <= ftol):
            break

        xbar = numpy.add.reduce(sim[:-1], 0) / N
        xr = (1 + rho) * xbar - rho * sim[-1]
        fxr = func(xr)
        doshrink = 0

        if fxr < fsim[0]:
            xe = (1 + rho * chi) * xbar - rho * chi * sim[-1]
            fxe = func(xe)

            if fxe < fxr:
                sim[-1] = xe
                fsim[-1] = fxe
            else:
                sim[-1] = xr
                fsim[-1] = fxr
        else:  # fsim[0] <= fxr
            if fxr < fsim[-2]:
                sim[-1] = xr
                fsim[-1] = fxr
            else:  # fxr >= fsim[-2]
                # Perform contraction
                if fxr < fsim[-1]:
                    xc = (1 + psi * rho) * xbar - psi * rho * sim[-1]
                    fxc = func(xc)

                    if fxc <= fxr:
                        sim[-1] = xc
                        fsim[-1] = fxc
                    else:
                        doshrink = 1
                else:
                    # Perform an inside contraction
                    xcc = (1 - psi) * xbar + psi * sim[-1]
                    fxcc = func(xcc)

                    if fxcc < fsim[-1]:
                        sim[-1] = xcc
                        fsim[-1] = fxcc
                    else:
                        doshrink = 1

                if doshrink:
                    for j in one2np1:
                        sim[j] = sim[0] + sigma * (sim[j] - sim[0])
                        fsim[j] = func(sim[j])

        ind = numpy.argsort(fsim)
        sim = numpy.take(sim, ind, 0)
        fsim = numpy.take(fsim, ind, 0)
        if callback is not None:
            callback(sim[0])
        iterations += 1
        if retall:
            allvecs.append(sim[0])

    x = sim[0]
    fval = numpy.min(fsim)
    warnflag = 0

    if fcalls[0] >= maxfun:
        warnflag = 1
        msg = _status_message['maxfev']
        if disp:
            print('Warning: ' + msg)
    elif iterations >= maxiter:
        warnflag = 2
        msg = _status_message['maxiter']
        if disp:
            print('Warning: ' + msg)
    else:
        msg = _status_message['success']
        if disp:
            print(msg)
            print("         Current function value: %f" % fval)
            print("         Iterations: %d" % iterations)
            print("         Function evaluations: %d" % fcalls[0])

    result = OptimizeResult(fun=fval,
                            nit=iterations,
                            nfev=fcalls[0],
                            status=warnflag,
                            success=(warnflag == 0),
                            message=msg,
                            x=x,
                            final_simplex=(sim, fsim))
    if retall:
        result['allvecs'] = allvecs
    return result
Exemplo n.º 4
0
def minimize_bfgs(func: Callable,
                  init_param_vec: Sequence,
                  grad: Callable = None,
                  grad_tol: float = 1e-5,
                  return_all: bool = True,
                  last_record: 'OptimizeRecord' = None,
                  notes: dict = None):
    if notes is None:
        notes = {}
    notes["grad_tol"] = grad_tol
    notes["method"] = "BFGS"

    f = func
    fprime = grad
    epsilon = np.finfo(float).eps**0.5
    gtol = grad_tol
    norm = np.Inf

    x0 = init_param_vec
    if x0.ndim == 0:
        x0.shape = (1, )
    maxiter = len(x0) * 200

    func_calls, f = wrap_function(f, ())
    if fprime is None:
        grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon))
        notes["grad_approx"] = True
    else:
        grad_calls, myfprime = wrap_function(fprime, ())
        notes["grad_approx"] = False

    k = 0
    N = len(x0)
    I = np.eye(N, dtype=int)

    if last_record:
        old_fval = last_record.final_func
        gfk = last_record.final_grad
        old_old_fval = last_record.last_vars["old_old_fval"]
        Hk = last_record.last_vars["Hk"]

    else:
        # Sets the initial step guess to dx ~ 1
        old_fval = f(x0)
        gfk = myfprime(x0)
        old_old_fval = old_fval + np.linalg.norm(gfk) / 2
        Hk = I

    all_param_vec = [x0]
    all_func = [old_fval]
    all_grad = [gfk]

    xk = x0
    warnflag = 0
    gnorm = vecnorm(gfk, ord=norm)
    while (gnorm > gtol) and (k < maxiter):
        pk = -np.dot(Hk, gfk)
        try:
            alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
                _line_search_wolfe12(f, myfprime, xk, pk, gfk,
                                     old_fval, old_old_fval, amin=1e-100, amax=1e100)
        except _LineSearchError:
            # Line search failed to find a better solution.
            warnflag = 2
            break

        xkp1 = xk + alpha_k * pk
        sk = xkp1 - xk
        xk = xkp1
        if gfkp1 is None:
            gfkp1 = myfprime(xkp1)

        yk = gfkp1 - gfk
        gfk = gfkp1

        k += 1

        if return_all:
            all_param_vec.append(xk)
            all_func.append(old_fval)
            all_grad.append(gfk)

        gnorm = vecnorm(gfk, ord=norm)
        if (gnorm <= gtol):
            break

        if not np.isfinite(old_fval):
            # We correctly found +-Inf as optimal value, or something went
            # wrong.
            warnflag = 2
            break

        try:  # this was handled in numeric, let it remaines for more safety
            rhok = 1.0 / (np.dot(yk, sk))
        except ZeroDivisionError:
            rhok = 1000.0
            print("Divide-by-zero encountered: rhok assumed large")
        if np.isinf(rhok):  # this is patch for numpy
            rhok = 1000.0
            print("Divide-by-zero encountered: rhok assumed large")
        A1 = I - sk[:, np.newaxis] * yk[np.newaxis, :] * rhok
        A2 = I - yk[:, np.newaxis] * sk[np.newaxis, :] * rhok
        Hk = np.dot(A1, np.dot(
            Hk, A2)) + (rhok * sk[:, np.newaxis] * sk[np.newaxis, :])

    fval = old_fval
    if np.isnan(fval):
        # This can happen if the first call to f returned NaN;
        # the loop is then never entered.
        warnflag = 2

    if warnflag == 2:
        msg = _status_message['pr_loss']
    elif k >= maxiter:
        warnflag = 1
        msg = _status_message['maxiter']
    else:
        msg = _status_message['success']

    history = {"func": all_func, "grad": all_grad, "param_vec": all_param_vec}
    final_status = {
        "msg": msg,
        "warnflag": warnflag,
        "num_func_call": func_calls[0],
        "num_grad_call": grad_calls[0],
        "num_iter": k
    }
    last_vars = {"Hk": Hk, "old_old_fval": old_old_fval}

    record = OptimizeRecord(history, final_status, last_vars, notes)

    return record
Exemplo n.º 5
0
def model_policy_gradient(
        f: Callable[..., float],
        x0: np.ndarray,
        *,
        args=(),
        learning_rate: float = 1e-2,
        decay_rate: float = 0.96,
        decay_steps: int = 5,
        log_sigma_init: float = -5.0,
        max_iterations: int = 1000,
        batch_size: int = 10,
        radius_coeff: float = 3.0,
        warmup_steps: int = 10,
        batch_size_model: int = 65536,
        save_func_vals: bool = False,
        random_state: "cirq.RANDOM_STATE_OR_SEED_LIKE" = None,
        known_values: Optional[Tuple[List[np.ndarray], List[float]]] = None,
        max_evaluations: Optional[int] = None
) -> scipy.optimize.OptimizeResult:
    """Model policy gradient algorithm for black-box optimization.

    The idea of this algorithm is to perform policy gradient, but estimate
    the function values using a surrogate model. 
    The surrogate model is a least-squared quadratic
    fit to points sampled from the vicinity of the current iterate.

    Args:
        f: The function to minimize.
        x0: An initial guess.
        args: Additional arguments to pass to the function.
        learning_rate: The learning rate for the policy gradient.
        decay_rate: the learning decay rate for the Adam optimizer.
        decay_steps: the learning decay steps for the Adam optimizer.
        log_sigma_init: the intial value for the sigma of the policy
            in the log scale. 
        max_iterations: The maximum number of iterations to allow before
            termination.
        batch_size: The number of points to sample in each iteration. The cost 
            of evaluation of these samples are computed through the 
            quantum computer cost model.
        radius_coeff: The ratio determining the size of the radius around 
            the current iterate to sample points from to build the quadratic model.
            The ratio is with respect to the maximal ratio of the samples 
            from the current policy. 
        warmup_steps: The number of steps before the model policy gradient is performed. 
            before these steps, we use the policy gradient without the model. 
        batch_size_model: The model sample batch size. 
            After we fit the quadratic model, we use the model to evaluate 
            on big enough batch of samples.
        save_func_vals: whether to compute and save the function values for 
            the current value of parameter.   
        random_state: A seed (int) or `np.random.RandomState` class to use when
            generating random values. If not set, defaults to using the module
            methods in `np.random`.
        known_values: Any prior known values of the objective function.
            This is given as a tuple where the first element is a list
            of points and the second element is a list of the function values
            at those points.
        max_evaluations: The maximum number of function evaluations to allow
            before termination.

    Returns:
        Scipy OptimizeResult
    """
    random_state = value.parse_random_state(random_state)

    if known_values is not None:
        known_xs, known_ys = known_values
        known_xs = [np.copy(x) for x in known_xs]
        known_ys = [np.copy(y) for y in known_ys]
    else:
        known_xs, known_ys = [], []

    if max_evaluations is None:
        max_evaluations = np.inf

    n = len(x0)
    log_sigma = np.ones(n) * log_sigma_init
    sigma = np.exp(log_sigma)

    # set up the first and second moment estimate
    m_mean = np.zeros(n)
    v_mean = np.zeros(n)
    m_log_sigma = np.zeros(n)
    v_log_sigma = np.zeros(n)

    # set up lr schedule and optimizer
    lr_schedule1 = _ExponentialSchedule(learning_rate,
                                        decay_steps=decay_steps,
                                        decay_rate=decay_rate,
                                        staircase=True)
    lr_schedule2 = _ExponentialSchedule(learning_rate,
                                        decay_steps=decay_steps,
                                        decay_rate=decay_rate,
                                        staircase=True)

    _, f = wrap_function(f, args)
    res = OptimizeResult()
    current_x = np.copy(x0)
    res.x_iters = []  # initializes as lists
    res.xs_iters = []
    res.ys_iters = []
    res.func_vals = []
    res.fun = 0
    total_evals = 0
    num_iter = 0
    message = None

    # stats
    history_max = -np.inf

    while num_iter < max_iterations:
        # get samples from the current policy to evaluate
        z = random_state.randn(batch_size, n)
        new_xs = sigma * z + current_x

        if total_evals + batch_size > max_evaluations:
            message = "Reached maximum number of evaluations."
            break

        # Evaluate points
        res.xs_iters.append(new_xs)
        new_ys = [f(x) for x in new_xs]
        res.ys_iters.append(new_ys)
        total_evals += batch_size
        known_xs.extend(new_xs)
        known_ys.extend(new_ys)

        # Save function value
        if save_func_vals:
            res.func_vals.append(f(current_x))
            res.x_iters.append(np.copy(current_x))
            res.fun = res.func_vals[-1]

        # current sampling radius (maximal)
        max_radius = 0
        for x in new_xs:
            if np.linalg.norm(x - current_x) > max_radius:
                max_radius = np.linalg.norm(x - current_x)

        reward = [-y for y in new_ys]

        # warmup steps control whether to use the model to estimate the f
        if num_iter >= warmup_steps:
            # Determine points to use to build model
            model_xs = []
            model_ys = []
            for x, y in zip(known_xs, known_ys):
                if np.linalg.norm(x - current_x) < radius_coeff * max_radius:
                    model_xs.append(x)
                    model_ys.append(y)
            # safer way without the `SVD` not converging
            try:
                model = _get_quadratic_model(model_xs, model_ys, x)
                use_model = True
            except ValueError:
                use_model = False

            if use_model:
                # get samples (from model)
                z = random_state.randn(batch_size_model, n)
                new_xs = sigma * z + current_x

                # use the model for prediction
                new_ys = model.predict(new_xs - current_x)
                reward = [-y for y in new_ys]

        reward = np.array(reward)

        # stats
        reward_mean = np.mean(reward)
        reward_max = np.max(reward)

        if reward_max > history_max:
            history_max = reward_max

        # subtract baseline
        reward = reward - reward_mean

        # analytic derivatives (natural gradient policy gradient)
        delta_mean = np.dot(z.T, reward) * sigma
        delta_log_sigma = np.dot(z.T**2, reward) / np.sqrt(2)

        delta_mean_norm = np.linalg.norm(np.dot(z.T, reward))
        delta_log_sigma_norm = np.linalg.norm(np.dot(z.T**2, reward))

        delta_mean = delta_mean / delta_mean_norm
        delta_log_sigma = delta_log_sigma / delta_log_sigma_norm

        # gradient ascend to update the parameters
        current_x, m_mean, v_mean = _adam_update(delta_mean,
                                                 current_x,
                                                 num_iter,
                                                 m_mean,
                                                 v_mean,
                                                 lr_schedule=lr_schedule1)
        log_sigma, m_log_sigma, v_log_sigma = _adam_update(
            delta_log_sigma,
            log_sigma,
            num_iter,
            m_log_sigma,
            v_log_sigma,
            lr_schedule=lr_schedule2,
        )

        log_sigma = np.clip(log_sigma, -20.0, 2.0)
        sigma = np.exp(log_sigma)

        num_iter += 1

    final_val = f(current_x)
    res.func_vals.append(final_val)

    if message is None:
        message = "Reached maximum number of iterations."

    res.x_iters.append(current_x)
    total_evals += 1
    res.x = current_x
    res.fun = final_val
    res.nit = num_iter
    res.nfev = total_evals
    res.message = message
    return res
Exemplo n.º 6
0
def _minimize_slsqp(func,
                    x0,
                    args=(),
                    jac=None,
                    bounds=None,
                    constraints=(),
                    maxiter=100,
                    ftol=1.0E-6,
                    iprint=1,
                    disp=False,
                    eps=_epsilon,
                    callback=None,
                    **unknown_options):
    """
    Minimize a scalar function of one or more variables using Sequential
    Least SQuares Programming (SLSQP).
    Options
    -------
    ftol : float
        Precision goal for the value of f in the stopping criterion.
    eps : float
        Step size used for numerical approximation of the jacobian.
    disp : bool
        Set to True to print convergence messages. If False,
        `verbosity` is ignored and set to 0.
    maxiter : int
        Maximum number of iterations.
    """
    _check_unknown_options(unknown_options)
    fprime = jac
    iter = maxiter
    acc = ftol
    epsilon = eps

    if not disp:
        iprint = 0

    # Constraints are triaged per type into a dictionnary of tuples
    if isinstance(constraints, dict):
        constraints = (constraints, )

    cons = {'eq': (), 'ineq': ()}
    for ic, con in enumerate(constraints):
        # check type
        try:
            ctype = con['type'].lower()
        except KeyError:
            raise KeyError('Constraint %d has no type defined.' % ic)
        except TypeError:
            raise TypeError('Constraints must be defined using a '
                            'dictionary.')
        except AttributeError:
            raise TypeError("Constraint's type must be a string.")
        else:
            if ctype not in ['eq', 'ineq']:
                raise ValueError("Unknown constraint type '%s'." % con['type'])

        # check function
        if 'fun' not in con:
            raise ValueError('Constraint %d has no function defined.' % ic)

        # check jacobian
        cjac = con.get('jac')
        if cjac is None:
            # approximate jacobian function.  The factory function is needed
            # to keep a reference to `fun`, see gh-4240.
            def cjac_factory(fun):
                def cjac(x, *args):
                    return approx_jacobian(x, fun, epsilon, *args)

                return cjac

            cjac = cjac_factory(con['fun'])

        # update constraints' dictionary
        cons[ctype] += ({
            'fun': con['fun'],
            'jac': cjac,
            'args': con.get('args', ())
        }, )

    exit_modes = {
        -1: "Gradient evaluation required (g & a)",
        0: "Optimization terminated successfully.",
        1: "Function evaluation required (f & c)",
        2: "More equality constraints than independent variables",
        3: "More than 3*n iterations in LSQ subproblem",
        4: "Inequality constraints incompatible",
        5: "Singular matrix E in LSQ subproblem",
        6: "Singular matrix C in LSQ subproblem",
        7: "Rank-deficient equality constraint subproblem HFTI",
        8: "Positive directional derivative for linesearch",
        9: "Iteration limit exceeded"
    }

    # Wrap func
    feval, func = wrap_function(func, args)

    # Wrap fprime, if provided, or approx_jacobian if not
    if fprime:
        geval, fprime = wrap_function(fprime, args)
    else:
        geval, fprime = wrap_function(approx_jacobian, (func, epsilon))

    # Transform x0 into an array.
    x = asfarray(x0).flatten()

    # Set the parameters that SLSQP will need
    # meq, mieq: number of equality and inequality constraints
    meq = sum(
        map(len, [atleast_1d(c['fun'](x, *c['args'])) for c in cons['eq']]))
    mieq = sum(
        map(len, [atleast_1d(c['fun'](x, *c['args'])) for c in cons['ineq']]))
    # m = The total number of constraints
    m = meq + mieq
    # la = The number of constraints, or 1 if there are no constraints
    la = array([1, m]).max()
    # n = The number of independent variables
    n = len(x)

    # Define the workspaces for SLSQP
    n1 = n + 1
    mineq = m - meq + n1 + n1
    len_w = (3*n1+m)*(n1+1)+(n1-meq+1)*(mineq+2) + 2*mineq+(n1+mineq)*(n1-meq) \
            + 2*meq + n1 + ((n+1)*n)//2 + 2*m + 3*n + 3*n1 + 1
    len_jw = mineq
    w = zeros(len_w)
    jw = zeros(len_jw)

    # Decompose bounds into xl and xu
    if bounds is None or len(bounds) == 0:
        xl = np.empty(n, dtype=float)
        xu = np.empty(n, dtype=float)
        xl.fill(np.nan)
        xu.fill(np.nan)
    else:
        bnds = array(bounds, float)
        if bnds.shape[0] != n:
            raise IndexError('SLSQP Error: the length of bounds is not '
                             'compatible with that of x0.')

        with np.errstate(invalid='ignore'):
            bnderr = bnds[:, 0] > bnds[:, 1]

        if bnderr.any():
            raise ValueError('SLSQP Error: lb > ub in bounds %s.' %
                             ', '.join(str(b) for b in bnderr))
        xl, xu = bnds[:, 0], bnds[:, 1]

        # Mark infinite bounds with nans; the Fortran code understands this
        infbnd = ~isfinite(bnds)
        xl[infbnd[:, 0]] = np.nan
        xu[infbnd[:, 1]] = np.nan

    # Clip initial guess to bounds (SLSQP may fail with bounds-infeasible initial point)
    have_bound = np.isfinite(xl)
    x[have_bound] = np.clip(x[have_bound], xl[have_bound], np.inf)
    have_bound = np.isfinite(xu)
    x[have_bound] = np.clip(x[have_bound], -np.inf, xu[have_bound])

    # Initialize the iteration counter and the mode value
    mode = array(0, int)
    acc = array(acc, float)
    majiter = array(iter, int)
    majiter_prev = 0

    # Print the header if iprint >= 2
    if iprint >= 2:
        print("%5s %5s %16s %16s" % ("NIT", "FC", "OBJFUN", "GNORM"))

    while 1:

        if mode == 0 or mode == 1:  # objective and constraint evaluation requird

            # Compute objective function
            fx = func(x)
            try:
                fx = float(np.asarray(fx))
            except (TypeError, ValueError):
                raise ValueError("Objective function must return a scalar")
            # Compute the constraints
            if cons['eq']:
                c_eq = concatenate([
                    atleast_1d(con['fun'](x, *con['args']))
                    for con in cons['eq']
                ])
            else:
                c_eq = zeros(0)
            if cons['ineq']:
                c_ieq = concatenate([
                    atleast_1d(con['fun'](x, *con['args']))
                    for con in cons['ineq']
                ])
            else:
                c_ieq = zeros(0)

            # Now combine c_eq and c_ieq into a single matrix
            c = concatenate((c_eq, c_ieq))

        if mode == 0 or mode == -1:  # gradient evaluation required

            # Compute the derivatives of the objective function
            # For some reason SLSQP wants g dimensioned to n+1
            g = append(fprime(x), 0.0)

            # Compute the normals of the constraints
            if cons['eq']:
                a_eq = vstack(
                    [con['jac'](x, *con['args']) for con in cons['eq']])
            else:  # no equality constraint
                a_eq = zeros((meq, n))

            if cons['ineq']:
                a_ieq = vstack(
                    [con['jac'](x, *con['args']) for con in cons['ineq']])
            else:  # no inequality constraint
                a_ieq = zeros((mieq, n))

            # Now combine a_eq and a_ieq into a single a matrix
            if m == 0:  # no constraints
                a = zeros((la, n))
            else:
                a = vstack((a_eq, a_ieq))
            a = concatenate((a, zeros([la, 1])), 1)

        # Call SLSQP
        slsqp(m, meq, x, xl, xu, fx, c, g, a, acc, majiter, mode, w, jw)

        # call callback if major iteration has incremented
        if callback is not None and majiter > majiter_prev:
            callback(x)

        # Print the status of the current iterate if iprint > 2 and the
        # major iteration has incremented
        if iprint >= 2 and majiter > majiter_prev:
            print("%5i %5i % 16.6E % 16.6E" %
                  (majiter, feval[0], fx, linalg.norm(g)))

        # If exit mode is not -1 or 1, slsqp has completed
        if abs(mode) != 1:
            break

        majiter_prev = int(majiter)

    # Optimization loop complete.  Print status if requested
    if iprint >= 1:
        print(exit_modes[int(mode)] + "    (Exit mode " + str(mode) + ')')
        print("            Current function value:", fx)
        print("            Iterations:", majiter)
        print("            Function evaluations:", feval[0])
        print("            Gradient evaluations:", geval[0])

    return OptimizeResult(x=x,
                          fun=fx,
                          jac=g[:-1],
                          nit=int(majiter),
                          nfev=feval[0],
                          njev=geval[0],
                          status=int(mode),
                          message=exit_modes[int(mode)],
                          success=(mode == 0))
Exemplo n.º 7
0
def _minimize_neldermead(func, x0, args=(), callback=None,
                         xtol=1e-4, ftol=1e-4, maxiter=None, maxfev=None,
                         disp=False, return_all=False, return_simplex=False,
                         **unknown_options):
    """
    Minimization of scalar function of one or more variables using the
    Nelder-Mead algorithm.

    Options
    -------
    disp : bool
        Set to True to print convergence messages.
    xtol : float
        Relative error in solution `xopt` acceptable for convergence.
    ftol : float
        Relative error in ``fun(xopt)`` acceptable for convergence.
    maxiter : int
        Maximum number of iterations to perform.
    maxfev : int
        Maximum number of function evaluations to make.
    return_simplex : bool
        Set to True to return all nodes of final simplex and their function
        values.

    """
    _check_unknown_options(unknown_options)
    maxfun = maxfev
    retall = return_all

    fcalls, func = wrap_function(func, args)
    x0 = asfarray(x0).flatten()
    N = len(x0)
    if maxiter is None:
        maxiter = N * 200
    if maxfun is None:
        maxfun = N * 200

    rho = 1
    chi = 2
    psi = 0.5
    sigma = 0.5
    one2np1 = list(range(1, N + 1))

    sim = numpy.zeros((N + 1, N), dtype=x0.dtype)
    fsim = numpy.zeros((N + 1,), float)
    sim[0] = x0
    if retall:
        allvecs = [sim[0]]
    fsim[0] = func(x0)
    nonzdelt = 0.05
    zdelt = 0.00025
    for k in range(0, N):
        y = numpy.array(x0, copy=True)
        if y[k] != 0:
            y[k] = (1 + nonzdelt) * y[k]
        else:
            y[k] = zdelt

        sim[k + 1] = y
        f = func(y)
        fsim[k + 1] = f

    ind = numpy.argsort(fsim)
    fsim = numpy.take(fsim, ind, 0)
    # sort so sim[0,:] has the lowest function value
    sim = numpy.take(sim, ind, 0)

    iterations = 1

    while (fcalls[0] < maxfun and iterations < maxiter):
        if (numpy.max(numpy.ravel(numpy.abs(sim[1:] - sim[0]))) <= xtol and
                numpy.max(numpy.abs(fsim[0] - fsim[1:])) <= ftol):
            break

        xbar = numpy.add.reduce(sim[:-1], 0) / N
        xr = (1 + rho) * xbar - rho * sim[-1]
        fxr = func(xr)
        doshrink = 0

        if fxr < fsim[0]:
            xe = (1 + rho * chi) * xbar - rho * chi * sim[-1]
            fxe = func(xe)

            if fxe < fxr:
                sim[-1] = xe
                fsim[-1] = fxe
            else:
                sim[-1] = xr
                fsim[-1] = fxr
        else:  # fsim[0] <= fxr
            if fxr < fsim[-2]:
                sim[-1] = xr
                fsim[-1] = fxr
            else:  # fxr >= fsim[-2]
                # Perform contraction
                if fxr < fsim[-1]:
                    xc = (1 + psi * rho) * xbar - psi * rho * sim[-1]
                    fxc = func(xc)

                    if fxc <= fxr:
                        sim[-1] = xc
                        fsim[-1] = fxc
                    else:
                        doshrink = 1
                else:
                    # Perform an inside contraction
                    xcc = (1 - psi) * xbar + psi * sim[-1]
                    fxcc = func(xcc)

                    if fxcc < fsim[-1]:
                        sim[-1] = xcc
                        fsim[-1] = fxcc
                    else:
                        doshrink = 1

                if doshrink:
                    for j in one2np1:
                        sim[j] = sim[0] + sigma * (sim[j] - sim[0])
                        fsim[j] = func(sim[j])

        ind = numpy.argsort(fsim)
        sim = numpy.take(sim, ind, 0)
        fsim = numpy.take(fsim, ind, 0)
        if callback is not None:
            callback(sim[0])
        iterations += 1
        if retall:
            allvecs.append(sim[0])

    x = sim[0]
    fval = numpy.min(fsim)
    warnflag = 0

    if fcalls[0] >= maxfun:
        warnflag = 1
        msg = _status_message['maxfev']
        if disp:
            print('Warning: ' + msg)
    elif iterations >= maxiter:
        warnflag = 2
        msg = _status_message['maxiter']
        if disp:
            print('Warning: ' + msg)
    else:
        msg = _status_message['success']
        if disp:
            print(msg)
            print("         Current function value: %f" % fval)
            print("         Iterations: %d" % iterations)
            print("         Function evaluations: %d" % fcalls[0])

    result = OptimizeResult(fun=fval, nit=iterations, nfev=fcalls[0],
                            status=warnflag, success=(warnflag == 0),
                            message=msg, x=x)
    if retall:
        result['allvecs'] = allvecs
    if return_simplex:
        result['sim'] = sim
        result['fsim'] = fsim
    return result
Exemplo n.º 8
0
def _minimize_lbfgsb_timeup(
                     fun, x0, args=(), jac=None, bounds=None,
                     disp=None, maxcor=10, ftol=2.2204460492503131e-09,
                     gtol=1e-5, eps=1e-8, maxfun=15000, maxiter=15000,
                     iprint=-1, callback=None, maxls=20,
                     t0=None, timeup=float("inf"),
                     **unknown_options): # JFF: added time-up check
    """
    Minimize a scalar function of one or more variables using the L-BFGS-B
    algorithm.

    Options
    -------
    disp : bool
       Set to True to print convergence messages.
    maxcor : int
        The maximum number of variable metric corrections used to
        define the limited memory matrix. (The limited memory BFGS
        method does not store the full hessian but uses this many terms
        in an approximation to it.)
    factr : float
        The iteration stops when ``(f^k -
        f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= factr * eps``, where ``eps``
        is the machine precision, which is automatically generated by
        the code. Typical values for `factr` are: 1e12 for low
        accuracy; 1e7 for moderate accuracy; 10.0 for extremely high
        accuracy.
    ftol : float
        The iteration stops when ``(f^k -
        f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= ftol``.
    gtol : float
        The iteration will stop when ``max{|proj g_i | i = 1, ..., n}
        <= gtol`` where ``pg_i`` is the i-th component of the
        projected gradient.
    eps : float
        Step size used for numerical approximation of the jacobian.
    disp : int
        Set to True to print convergence messages.
    maxfun : int
        Maximum number of function evaluations.
    maxiter : int
        Maximum number of iterations.
    maxls : int, optional
        Maximum number of line search steps (per iteration). Default is 20.

    """
    _check_unknown_options(unknown_options)
    m = maxcor
    epsilon = eps
    pgtol = gtol
    factr = ftol / np.finfo(float).eps

    x0 = asarray(x0).ravel()
    n, = x0.shape

    if bounds is None:
        bounds = [(None, None)] * n
    if len(bounds) != n:
        raise ValueError('length of x0 != length of bounds')
    # unbounded variables must use None, not +-inf, for optimizer to work properly
    bounds = [(None if l == -np.inf else l, None if u == np.inf else u) for l, u in bounds]

    if disp is not None:
        if disp == 0:
            iprint = -1
        else:
            iprint = disp

    n_function_evals, fun = wrap_function(fun, ())
    if jac is None:
        def func_and_grad(x):
            f = fun(x, *args)
            g = _approx_fprime_helper(x, fun, epsilon, args=args, f0=f)
            return f, g
    else:
        def func_and_grad(x):
            f = fun(x, *args)
            g = jac(x, *args)
            return f, g

    nbd = zeros(n, int32)
    low_bnd = zeros(n, float64)
    upper_bnd = zeros(n, float64)
    bounds_map = {(None, None): 0,
                  (1, None): 1,
                  (1, 1): 2,
                  (None, 1): 3}
    for i in range(0, n):
        l, u = bounds[i]
        if l is not None:
            low_bnd[i] = l
            l = 1
        if u is not None:
            upper_bnd[i] = u
            u = 1
        nbd[i] = bounds_map[l, u]

    if not maxls > 0:
        raise ValueError('maxls must be positive.')

    x = array(x0, float64)
    f = array(0.0, float64)
    g = zeros((n,), float64)
    wa = zeros(2*m*n + 5*n + 11*m*m + 8*m, float64)
    iwa = zeros(3*n, int32)
    task = zeros(1, 'S60')
    csave = zeros(1, 'S60')
    lsave = zeros(4, int32)
    isave = zeros(44, int32)
    dsave = zeros(29, float64)

    task[:] = 'START'

    n_iterations = 0
    if t0 is None:
        t0 = time.time()

    time_profile.predicted_inner_loop_func2_duration = 0.0
    while 1:
        # x, f, g, wa, iwa, task, csave, lsave, isave, dsave = \
        _lbfgsb.setulb(m, x, low_bnd, upper_bnd, nbd, f, g, factr,
                       pgtol, wa, iwa, task, iprint, csave, lsave,
                       isave, dsave, maxls)
        task_str = task.tostring()

        # begin EB
        curr_time = time.time()
        predicted_inner_loop_func2_duration = (curr_time + 
            time_profile.maximize_inner_time_profile.mean +
            time_profile.func2_time_profile.mean - t0)
        if predicted_inner_loop_func2_duration > timeup: # JFF: added time-up check
            task[:] = ('STOP: PREDICTED COMPUTATION TIME EXCEEDS LIMIT')
            break
        # end EB
        if task_str.startswith(b'FG'):
            # The minimization routine wants f and g at the current x.
            # Note that interruptions due to maxfun are postponed
            # until the completion of the current minimization iteration.
            # Overwrite f and g:
            f, g = func_and_grad(x)
        elif task_str.startswith(b'NEW_X'):
            # new iteration
            if n_iterations > maxiter:
                task[:] = 'STOP: TOTAL NO. of ITERATIONS EXCEEDS LIMIT'
            elif n_function_evals[0] > maxfun:
                task[:] = ('STOP: TOTAL NO. of f AND g EVALUATIONS '
                           'EXCEEDS LIMIT')
            else:
                n_iterations += 1
                if callback is not None:
                    callback(x)
        else:
            break

        time_profile.predicted_inner_loop_func2_duration = predicted_inner_loop_func2_duration

    task_str = task.tostring().strip(b'\x00').strip()
    if task_str.startswith(b'CONV'):
        warnflag = 0
    elif n_function_evals[0] > maxfun:
        warnflag = 1
    elif n_iterations > maxiter:
        warnflag = 1
    else:
        warnflag = 2

    # These two portions of the workspace are described in the mainlb
    # subroutine in lbfgsb.f. See line 363.
    s = wa[0: m*n].reshape(m, n)
    y = wa[m*n: 2*m*n].reshape(m, n)

    # See lbfgsb.f line 160 for this portion of the workspace.
    # isave(31) = the total number of BFGS updates prior the current iteration;
    n_bfgs_updates = isave[30]

    n_corrs = min(n_bfgs_updates, maxcor)
    hess_inv = LbfgsInvHessProduct(s[:n_corrs], y[:n_corrs])

    return OptimizeResult(fun=f, jac=g, nfev=n_function_evals[0],
                          nit=n_iterations, status=warnflag, message=task_str,
                          x=x, success=(warnflag == 0), hess_inv=hess_inv)
Exemplo n.º 9
0
def model_gradient_descent(
        f: Callable[..., float],
        x0: np.ndarray,
        *,
        args=(),
        rate: float = 1e-1,
        sample_radius: float = 1e-1,
        n_sample_points: int = 100,
        n_sample_points_ratio: Optional[float] = None,
        rate_decay_exponent: float = 0.0,
        stability_constant: float = 0.0,
        sample_radius_decay_exponent: float = 0.0,
        tol: float = 1e-8,
        known_values: Optional[Tuple[List[np.ndarray], List[float]]] = None,
        max_iterations: Optional[int] = None,
        max_evaluations: Optional[int] = None) -> scipy.optimize.OptimizeResult:
    """Model gradient descent algorithm for black-box optimization.

    The idea of this algorithm is to perform gradient descent, but estimate
    the gradient using a surrogate model instead of, say, by
    finite-differencing. The surrogate model is a least-squared quadratic
    fit to points sampled from the vicinity of the current iterate.
    This algorithm works well when you have an initial guess which is in the
    convex neighborhood of a local optimum and you want to converge to that
    local optimum. It's meant to be used when the function is stochastic.

    Args:
        f: The function to minimize.
        x0: An initial guess.
        args: Additional arguments to pass to the function.
        rate: The learning rate for the gradient descent.
        sample_radius: The radius around the current iterate to sample
            points from to build the quadratic model.
        n_sample_points: The number of points to sample in each iteration.
        n_sample_points_ratio: This specifies the number of points to sample
            in each iteration as a coefficient of the number of points
            required to exactly determine a quadratic model. The number
            of sample points will be this coefficient times (n+1)(n+2)/2,
            rounded up, where n is the number of parameters.
            Setting this overrides n_sample_points.
        rate_decay_exponent: Controls decay of learning rate.
            In each iteration, the learning rate is changed to the
            base learning rate divided by (i + 1 + S)**a, where S
            is the stability constant and a is the rate decay exponent
            (this parameter).
        stability_constant: Affects decay of learning rate.
            In each iteration, the learning rate is changed to the
            base learning rate divided by (i + 1 + S)**a, where S
            is the stability constant (this parameter) and a is the rate decay
            exponent.
        sample_radius_decay_exponent: Controls decay of sample radius.
        tol: The algorithm terminates when the difference between the current
            iterate and the next suggested iterate is smaller than this value.
        known_values: Any prior known values of the objective function.
            This is given as a tuple where the first element is a list
            of points and the second element is a list of the function values
            at those points.
        max_iterations: The maximum number of iterations to allow before
            termination.
        max_evaluations: The maximum number of function evaluations to allow
            before termination.

    Returns:
        Scipy OptimizeResult
    """

    if known_values is not None:
        known_xs, known_ys = known_values
        known_xs = [np.copy(x) for x in known_xs]
        known_ys = [np.copy(y) for y in known_ys]
    else:
        known_xs, known_ys = [], []

    if max_iterations is None:
        max_iterations = np.inf
    if max_evaluations is None:
        max_evaluations = np.inf

    n = len(x0)
    if n_sample_points_ratio is not None:
        n_sample_points = int(
            np.ceil(n_sample_points_ratio * (n + 1) * (n + 2) / 2))

    _, f = wrap_function(f, args)
    res = OptimizeResult()
    current_x = np.copy(x0)
    res.x_iters = []  # initializes as lists
    res.xs_iters = []
    res.ys_iters = []
    res.func_vals = []
    res.model_vals = [None]
    res.fun = 0
    total_evals = 0
    num_iter = 0
    converged = False
    message = None

    while num_iter < max_iterations:
        current_sample_radius = (sample_radius /
                                 (num_iter + 1)**sample_radius_decay_exponent)

        # Determine points to evaluate
        # in ball around current point
        new_xs = [np.copy(current_x)] + [
            current_x + _random_point_in_ball(n, current_sample_radius)
            for _ in range(n_sample_points)
        ]

        if total_evals + len(new_xs) > max_evaluations:
            message = 'Reached maximum number of evaluations.'
            break

        # Evaluate points
        res.xs_iters.append(new_xs)
        new_ys = [f(x) for x in new_xs]
        res.ys_iters.append(new_ys)
        total_evals += len(new_ys)
        known_xs.extend(new_xs)
        known_ys.extend(new_ys)

        # Save function value
        res.func_vals.append(new_ys[0])
        res.x_iters.append(np.copy(current_x))
        res.fun = res.func_vals[-1]

        # Determine points to use to build model
        model_xs = []
        model_ys = []
        for x, y in zip(known_xs, known_ys):
            if np.linalg.norm(x - current_x) < current_sample_radius:
                model_xs.append(x)
                model_ys.append(y)
        # Build and solve model
        model_gradient, model = _get_least_squares_model_gradient(
            model_xs, model_ys, current_x)

        # calculate the gradient and update the current point
        gradient_norm = np.linalg.norm(model_gradient)
        decayed_rate = (
            rate / (num_iter + 1 + stability_constant)**rate_decay_exponent)
        # Convergence criteria
        if decayed_rate * gradient_norm < tol:
            converged = True
            message = 'Optimization converged successfully.'
            break
        # Update
        current_x -= decayed_rate * model_gradient
        res.model_vals.append(
            model.predict([-decayed_rate * model_gradient])[0])

        num_iter += 1

    if converged:
        final_val = res.func_vals[-1]
    else:
        final_val = f(current_x)
        res.func_vals.append(final_val)

    if message is None:
        message = 'Reached maximum number of iterations.'

    res.x_iters.append(current_x)
    total_evals += 1
    res.x = current_x
    res.fun = final_val
    res.nit = num_iter
    res.nfev = total_evals
    res.message = message
    return res
Exemplo n.º 10
0
def fmin_slsqp( func, x0 , eqcons=[], f_eqcons=None, ieqcons=[], f_ieqcons=None,
                bounds = [], fprime = None, fprime_eqcons=None,
                fprime_ieqcons=None, args = (), iter = 100, acc = 1.0E-6,
                iprint = 1, full_output = 0, epsilon = _epsilon ):
    """
    Minimize a function using Sequential Least SQuares Programming

    Python interface function for the SLSQP Optimization subroutine
    originally implemented by Dieter Kraft.

    *Parameters*:
        func : callable f(x,*args)
            Objective function.
        x0 : ndarray of float
            Initial guess for the independent variable(s).
        eqcons : list
            A list of functions of length n such that
            eqcons[j](x0,*args) == 0.0 in a successfully optimized
            problem.
        f_eqcons : callable f(x,*args)
            Returns an array in which each element must equal 0.0 in a
            successfully optimized problem.  If f_eqcons is specified,
            eqcons is ignored.
        ieqcons : list
            A list of functions of length n such that
            ieqcons[j](x0,*args) >= 0.0 in a successfully optimized
            problem.
        f_ieqcons : callable f(x0,*args)
            Returns an array in which each element must be greater or
            equal to 0.0 in a successfully optimized problem.  If
            f_ieqcons is specified, ieqcons is ignored.
        bounds : list
            A list of tuples specifying the lower and upper bound
            for each independent variable [(xl0, xu0),(xl1, xu1),...]
        fprime : callable f(x,*args)
            A function that evaluates the partial derivatives of func.
        fprime_eqcons : callable f(x,*args)
            A function of the form f(x, *args) that returns the m by n
            array of equality constraint normals.  If not provided,
            the normals will be approximated. The array returned by
            fprime_eqcons should be sized as ( len(eqcons), len(x0) ).
        fprime_ieqcons : callable f(x,*args)
            A function of the form f(x, *args) that returns the m by n
            array of inequality constraint normals.  If not provided,
            the normals will be approximated. The array returned by
            fprime_ieqcons should be sized as ( len(ieqcons), len(x0) ).
        args : sequence
            Additional arguments passed to func and fprime.
        iter : int
            The maximum number of iterations.
        acc : float
            Requested accuracy.
        iprint : int
            The verbosity of fmin_slsqp:
              iprint <= 0 : Silent operation
              iprint == 1 : Print summary upon completion (default)
              iprint >= 2 : Print status of each iterate and summary
        full_output : bool
            If False, return only the minimizer of func (default).
            Otherwise, output final objective function and summary
            information.
        epsilon : float
            The step size for finite-difference derivative estimates.

    *Returns*: ( x, { fx, its, imode, smode })
        x : ndarray of float
            The final minimizer of func.
        fx : ndarray of float
            The final value of the objective function.
        its : int
            The number of iterations.
        imode : int
            The exit mode from the optimizer (see below).
        smode : string
            Message describing the exit mode from the optimizer.

    *Notes*

        Exit modes are defined as follows:
            -1 : Gradient evaluation required (g & a)
             0 : Optimization terminated successfully.
             1 : Function evaluation required (f & c)
             2 : More equality constraints than independent variables
             3 : More than 3*n iterations in LSQ subproblem
             4 : Inequality constraints incompatible
             5 : Singular matrix E in LSQ subproblem
             6 : Singular matrix C in LSQ subproblem
             7 : Rank-deficient equality constraint subproblem HFTI
             8 : Positive directional derivative for linesearch
             9 : Iteration limit exceeded

    """

    exit_modes = { -1 : "Gradient evaluation required (g & a)",
                    0 : "Optimization terminated successfully.",
                    1 : "Function evaluation required (f & c)",
                    2 : "More equality constraints than independent variables",
                    3 : "More than 3*n iterations in LSQ subproblem",
                    4 : "Inequality constraints incompatible",
                    5 : "Singular matrix E in LSQ subproblem",
                    6 : "Singular matrix C in LSQ subproblem",
                    7 : "Rank-deficient equality constraint subproblem HFTI",
                    8 : "Positive directional derivative for linesearch",
                    9 : "Iteration limit exceeded" }

    # Now do a lot of function wrapping

    # Wrap func
    feval, func = wrap_function(func, args)
    # Wrap fprime, if provided, or approx_fprime if not
    if fprime:
        geval, fprime = wrap_function(fprime,args)
    else:
        geval, fprime = wrap_function(approx_fprime,(func,epsilon))

    if f_eqcons:
        # Equality constraints provided via f_eqcons
        ceval, f_eqcons = wrap_function(f_eqcons,args)
        if fprime_eqcons:
            # Wrap fprime_eqcons
            geval, fprime_eqcons = wrap_function(fprime_eqcons,args)
        else:
            # Wrap approx_jacobian
            geval, fprime_eqcons = wrap_function(approx_jacobian,
                                                 (f_eqcons,epsilon))
    else:
        # Equality constraints provided via eqcons[]
        eqcons_prime = []
        for i in range(len(eqcons)):
            eqcons_prime.append(None)
            if eqcons[i]:
                # Wrap eqcons and eqcons_prime
                ceval, eqcons[i] = wrap_function(eqcons[i],args)
                geval, eqcons_prime[i] = wrap_function(approx_fprime,
                                                       (eqcons[i],epsilon))

    if f_ieqcons:
        # Inequality constraints provided via f_ieqcons
        ceval, f_ieqcons = wrap_function(f_ieqcons,args)
        if fprime_ieqcons:
            # Wrap fprime_ieqcons
            geval, fprime_ieqcons = wrap_function(fprime_ieqcons,args)
        else:
            # Wrap approx_jacobian
            geval, fprime_ieqcons = wrap_function(approx_jacobian,
                                                  (f_ieqcons,epsilon))
    else:
        # Inequality constraints provided via ieqcons[]
        ieqcons_prime = []
        for i in range(len(ieqcons)):
            ieqcons_prime.append(None)
            if ieqcons[i]:
                # Wrap ieqcons and ieqcons_prime
                ceval, ieqcons[i] = wrap_function(ieqcons[i],args)
                geval, ieqcons_prime[i] = wrap_function(approx_fprime,
                                                        (ieqcons[i],epsilon))


    # Transform x0 into an array.
    x = asfarray(x0).flatten()

    # Set the parameters that SLSQP will need
    # meq = The number of equality constraints
    if f_eqcons:
        meq = len(f_eqcons(x))
    else:
        meq = len(eqcons)
    if f_ieqcons:
        mieq = len(f_ieqcons(x))
    else:
        mieq = len(ieqcons)
    # m = The total number of constraints
    m = meq + mieq #+ len(bounds)
    # la = The number of constraints, or 1 if there are no constraints
    la = array([1,m]).max()
    # n = The number of independent variables
    n = len(x)

    # Define the workspaces for SLSQP
    n1 = n+1
    mineq = m + len(bounds) - meq + n1 + n1
#    mineq = m - meq + n1 + n1
    len_w = (3*n1+m)*(n1+1)+(n1-meq+1)*(mineq+2) + 2*mineq+(n1+mineq)*(n1-meq) \
            + 2*meq + n1 +(n+1)*n/2 + 2*m + 3*n + 3*n1 + 1
    len_jw = mineq
    w = zeros(len_w)
    jw = zeros(len_jw)

    # Decompose bounds into xl and xu
    if len(bounds) == 0:
        bounds = [(-1.0E12, 1.0E12) for i in range(n)]
    elif len(bounds) != n:
        raise IndexError, \
        'SLSQP Error:  If bounds is specified, len(bounds) == len(x0)'
    else:
        for i in range(len(bounds)):
            if bounds[i][0] > bounds[i][1]:
                raise ValueError, \
                'SLSQP Error: lb > ub in bounds[' + str(i) +']  ' + str(bounds[4])

    xl = array( [ b[0] for b in bounds ] )
    xu = array( [ b[1] for b in bounds ] )



    # Initialize the iteration counter and the mode value
    mode = array(0,int)
    acc = array(acc,float)
    majiter = array(iter,int)
    majiter_prev = 0

    # Print the header if iprint >= 2
    if iprint >= 2:
        print "%5s %5s %16s %16s" % ("NIT","FC","OBJFUN","GNORM")

    while 1:

        if mode == 0 or mode == 1: # objective and constraint evaluation requird

            # Compute objective function
            fx = func(x)
            # Compute the constraints
            if f_eqcons:
                c_eq = f_eqcons(x)
            else:
                c_eq = array([ eqcons[i](x) for i in range(meq) ])
            if f_ieqcons:
                c_ieq = f_ieqcons(x)
            else:
                c_ieq = array([ ieqcons[i](x) for i in range(len(ieqcons)) ])

            # Now combine c_eq and c_ieq into a single matrix
            if m == 0:
                # no constraints
                c = zeros([la])
            else:
                # constraints exist
                if meq > 0 and mieq == 0:
                    # only equality constraints
                    c = c_eq
                if meq == 0 and mieq > 0:
                    # only inequality constraints
                    c = c_ieq
                if meq > 0 and mieq > 0:
                    # both equality and inequality constraints exist
                    c = append(c_eq, c_ieq)

        if mode == 0 or mode == -1: # gradient evaluation required

            # Compute the derivatives of the objective function
            # For some reason SLSQP wants g dimensioned to n+1
            g = append(fprime(x),0.0)

            # Compute the normals of the constraints
            if fprime_eqcons:
                a_eq = fprime_eqcons(x)
            else:
                a_eq = zeros([meq,n])
                for i in range(meq):
                    a_eq[i] = eqcons_prime[i](x)

            if fprime_ieqcons:
                a_ieq = fprime_ieqcons(x)
            else:
                a_ieq = zeros([mieq,n])
                for i in range(mieq):
                    a_ieq[i] = ieqcons_prime[i](x)

            # Now combine a_eq and a_ieq into a single a matrix
            if m == 0:
                # no constraints
                a = zeros([la,n])
            elif meq > 0 and mieq == 0:
                # only equality constraints
                a = a_eq
            elif meq == 0 and mieq > 0:
                # only inequality constraints
                a = a_ieq
            elif meq > 0 and mieq > 0:
                # both equality and inequality constraints exist
                a = vstack((a_eq,a_ieq))
            a = concatenate((a,zeros([la,1])),1)

        # Call SLSQP
        slsqp(m, meq, x, xl, xu, fx, c, g, a, acc, majiter, mode, w, jw)

        # Print the status of the current iterate if iprint > 2 and the
        # major iteration has incremented
        if iprint >= 2 and majiter > majiter_prev:
            print "%5i %5i % 16.6E % 16.6E" % (majiter,feval[0],
                                               fx,linalg.norm(g))

        # If exit mode is not -1 or 1, slsqp has completed
        if abs(mode) != 1:
            break

        majiter_prev = int(majiter)

    # Optimization loop complete.  Print status if requested
    if iprint >= 1:
        print exit_modes[int(mode)] + "    (Exit mode " + str(mode) + ')'
        print "            Current function value:", fx
        print "            Iterations:", majiter
        print "            Function evaluations:", feval[0]
        print "            Gradient evaluations:", geval[0]

    if not full_output:
        return x
    else:
        return [list(x),
                float(fx),
                int(majiter),
                int(mode),
                exit_modes[int(mode)] ]
Exemplo n.º 11
0
def Customfmin_bfgs(f, x0, fprime=None, args=(), gtol=1e-5, norm=Inf,
              epsilon= numpy.sqrt(numpy.finfo(float).eps), maxiter=None, full_output=0, disp=1,
              retall=0, callback=None):


        testVar = 0
        x0 = asarray(x0).squeeze()
        if x0.ndim == 0:
            x0.shape = (1,)
        if maxiter is None:
            maxiter = len(x0)*200
        func_calls, f = wrap_function(f, args)
        if fprime is None:
            grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon))
        else:
            grad_calls, myfprime = wrap_function(fprime, args)
        gfk = myfprime(x0)
        k = 0
        N = len(x0)
        I = numpy.eye(N,dtype=int)
        Hk = I
        old_fval = f(x0)
        old_old_fval = old_fval + 5000
        xk = x0
        if retall:
            allvecs = [x0]
        sk = [2*gtol]
        warnflag = 0
        gnorm = vecnorm(gfk,ord=norm)
        while (gnorm > gtol) and (k < maxiter):
            pk = -numpy.dot(Hk,gfk)
            alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
               linesearch.line_search(f,myfprime,xk,pk,gfk,
                                      old_fval,old_old_fval)
            if alpha_k is None:  # line search failed try different one.
                alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
                         line_search(f,myfprime,xk,pk,gfk,
                                     old_fval,old_old_fval)
                if alpha_k is None:
                    # This line search also failed to find a better solution.
                    warnflag = 2
                    break
            xkp1 = xk + alpha_k * pk
            if retall:
                allvecs.append(xkp1)
            sk = xkp1 - xk
            xk = xkp1
            if gfkp1 is None:
                gfkp1 = myfprime(xkp1)

            yk = gfkp1 - gfk
            gfk = gfkp1
            if callback is not None:
                callback(xk)
            k += 1
            gnorm = vecnorm(gfk,ord=norm)
            if (gnorm <= gtol):
                break

            try: # this was handled in numeric, let it remaines for more safety
                rhok = 1.0 / (numpy.dot(yk,sk))
            except ZeroDivisionError:
                rhok = 1000.0
                print "Divide-by-zero encountered: rhok assumed large"
            if numpy.isinf(rhok): # this is patch for numpy
                rhok = 1000.0
                print "Divide-by-zero encountered: rhok assumed large"
            A1 = I - sk[:,numpy.newaxis] * yk[numpy.newaxis,:] * rhok
            A2 = I - yk[:,numpy.newaxis] * sk[numpy.newaxis,:] * rhok
            Hk = numpy.dot(A1,numpy.dot(Hk,A2)) + rhok * sk[:,numpy.newaxis] \
                     * sk[numpy.newaxis,:]

        if disp or full_output:
            fval = old_fval
        if warnflag == 2:
            if disp:
                print "Warning: Desired error not necessarily achieved" \
                      "due to precision loss"
                print "         Current function value: %f" % fval
                print "         Iterations: %d" % k
                print "         Function evaluations: %d" % func_calls[0]
                print "         Gradient evaluations: %d" % grad_calls[0]

        elif k >= maxiter:
            warnflag = 1
            if disp:
                print "Warning: Maximum number of iterations has been exceeded"
                print "         Current function value: %f" % fval
                print "         Iterations: %d" % k
                print "         Function evaluations: %d" % func_calls[0]
                print "         Gradient evaluations: %d" % grad_calls[0]
        else:
            if disp:
                print "Optimization terminated successfully."
                print "         Current function value: %f" % fval
                print "         Iterations: %d" % k
                print "         Function evaluations: %d" % func_calls[0]
                print "         Gradient evaluations: %d" % grad_calls[0]

        if full_output:
            retlist = xk, fval, gfk, Hk, func_calls[0], grad_calls[0], warnflag
            if retall:
                retlist += (allvecs,)
        else:
            retlist = xk
            if retall:
                retlist = (xk, allvecs)

        return retlist
Exemplo n.º 12
0
def _minimize(fun, x0, args=(), jac=None, callback=None,
                   gtol=1e-5, fxtol=1e-09, xtol=1e-09, norm=Inf,
                   eps=_epsilon, maxiter=None, disp=False,
                   return_all=False, **unknown_options):

    _check_unknown_options(unknown_options)
    f = fun
    fprime = jac
    epsilon = eps
    retall = return_all

    x0 = asarray(x0).flatten()
    if x0.ndim == 0:
        x0.shape = (1,)
    if maxiter is None:
        maxiter = len(x0) * 200
    func_calls, f = wrap_function(f, args)

    grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon))


    gfk = myfprime(x0)
    k = 0
    N = len(x0)
    I = numpy.eye(N, dtype=int)
    Hk = I
    old_fval = f(x0)
    old_old_fval = None
    xk = x0
    if retall:
        allvecs = [x0]
    sk = [2 * gtol]
    warnflag = 0
    gnorm = vecnorm(gfk, ord=norm)
    xnorm = np.Inf
    fx = np.Inf
    print_lst = []
    while (gnorm > gtol) and (xnorm > xtol) and (fx > fxtol) and (k < maxiter):
        pk = -numpy.dot(Hk, gfk)
        try:
            alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
                     _line_search_wolfe12(f, myfprime, xk, pk, gfk,
                                          old_fval, old_old_fval)
        except _LineSearchError:
            # search failed to find a better solution.
            print_lst.append("Przeszukiwanie liniowe zawiodlo lub nie moze osiagnac lepszego rozwiazania")
            warnflag = 2
            break

        xkp1 = xk + alpha_k * pk

        fx = np.absolute(old_old_fval - old_fval)
        xnorm = vecnorm(xkp1 - xk)
        if retall:
            allvecs.append(xkp1)

        sk = xkp1 - xk
        xk = xkp1
        if gfkp1 is None:
            gfkp1 = myfprime(xkp1)

        yk = gfkp1 - gfk
        gfk = gfkp1
        if callback is not None:
            callback(xk)
        k += 1

        if disp:
            print_ = ('Iter: ' + str(k) + '\n')
            print_ += ('x: ' + str(xk) + '\n')
            print_ += ('f(x): ' + str(f(xk)) + '\n') #zmiana na fx
            print_ +=('gtol: ' + str(gnorm) + '\n')
            print_ +=('xtol: ' + str(xnorm) + '\n')
            print_ +=('fxtol: ' + str(fx) + '\n')
            print_lst.append(print_)

        gnorm = vecnorm(gfk, ord=norm)
        if (gnorm <= gtol):
            break

        if not numpy.isfinite(old_fval):
            # We correctly found +-Inf as optimal value, or something went
            # wrong.
            print_lst.append("Zlaneziono +-Inf za optymalna wartosc... lub cos poszlo zle.")
            warnflag = 2
            break

        try:  # this was handled in numeric, let it remaines for more safety
            rhok = 1.0 / (numpy.dot(yk, sk))
        except ZeroDivisionError:
            rhok = 1000.0
            if disp:
                print_lst.append("Dzielenie przez zero!!")
        if isinf(rhok):  # this is patch for numpy
            rhok = 1000.0
            if disp:
                print_lst.appedn("Dzielenie przez zero!!")
        A1 = I - sk[:, numpy.newaxis] * yk[numpy.newaxis, :] * rhok
        A2 = I - yk[:, numpy.newaxis] * sk[numpy.newaxis, :] * rhok
        Hk = numpy.dot(A1, numpy.dot(Hk, A2)) + (rhok * sk[:, numpy.newaxis] *
                                                 sk[numpy.newaxis, :])

    fval = old_fval
    if np.isnan(fval):
        # This can happen if the first call to f returned NaN;
        # the loop is then never entered.
        print_lst.append("Osiagnieto Nan w pierwszym wywolaniem algorytmu.")
        warnflag = 2

    if warnflag == 2:
        msg = _status_message['pr_loss']
        if disp:
            print_ = ("Ostrzezenie: " + msg)
            print_ += ("         Wartosc funkcji celu: %f" % fval)
            print_ += ("         Iteracje:  %d" % k)
            print_ += ("         Wywolania funkcji: %d" % func_calls[0])
            print_ += ("         Wywolania gradientu: %d" % grad_calls[0])

    elif k >= maxiter:
        warnflag = 1
        msg = _status_message['maxiter']
        if disp:
            print_ = ("Ostrzerzenie: " + msg)
            print_ += ("         Wartosc funkcji celu: %f" % fval)
            print_ += ("         Iteracje:  %d" % k)
            print_ += ("         Wywolania funkcji: %d" % func_calls[0])
            print_ += ("         Wywolania gradientu: %d" % grad_calls[0])
            print_lst.append(print_)
    else:
        msg = _status_message['success']
        if disp:
            print_ = (msg + '\n')
            print_ += ("         Wartosc funkcji celu: %f" % fval)
            print_ += ("         Iteracje:  %d" % k)
            print_ += ("         Wywolania funkcji: %d" % func_calls[0])
            print_ += ("         Wywolania gradientu: %d" % grad_calls[0])
            print_lst.append(print_)

    [print(line) for line in print_lst]

    result = OptimizeResult(fun=fval,lst=print_lst, jac=gfk, hess_inv=Hk, nfev=func_calls[0],
                            njev=grad_calls[0], status=warnflag,
                            success=(warnflag == 0), message=msg, x=xk,
                            nit=k)
    if retall:
        result['allvecs'] = allvecs
    return result
Exemplo n.º 13
0
def _minimize_cg(fun, x0, args=(), jac=None, callback=None,
                 gtol=1e-5, norm=Inf, eps=_epsilon, maxiter=None,
                 disp=False, return_all=False,
                 xtol= 1e-6,
                 **unknown_options):
    """
    Minimization of scalar function of one or more variables using the
    conjugate gradient algorithm.

    Options for the conjugate gradient algorithm are:
        disp : bool
            Set to True to print convergence messages.
        maxiter : int
            Maximum number of iterations to perform.
        gtol : float
            Gradient norm must be less than `gtol` before successful
            termination.
        norm : float
            Order of norm (Inf is max, -Inf is min).
        eps : float or ndarray
            If `jac` is approximated, use this value for the step size.

    This function is called by the `minimize` function with `method=CG`. It
    is not supposed to be called directly.
    """
    _check_unknown_options(unknown_options)
    f = fun
    fprime = jac
    epsilon = eps
    retall = return_all

    x0 = asarray(x0).flatten()
    if maxiter is None:
        maxiter = len(x0) * 200
    func_calls, f = wrap_function(f, args)
    if fprime is None:
        grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon))
    else:
        grad_calls, myfprime = wrap_function(fprime, args)
    gfk = myfprime(x0)
    k = 0
    xk = x0
    old_fval = f(xk)
    old_old_fval = None

    if retall:
        allvecs = [xk]
    warnflag = 0
    pk = -gfk
    gnorm = vecnorm(gfk, ord=norm)
    while (gnorm > gtol) and (k < maxiter):
        deltak = numpy.dot(gfk, gfk)

        try:
            alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
                     _line_search_wolfe12(f, myfprime, xk, pk, gfk, old_fval,
                                          old_old_fval, c2=0.4, xtol=xtol)
        except _LineSearchError:
            # Line search failed to find a better solution.
            warnflag = 2
            break

        xk = xk + alpha_k * pk
        if retall:
            allvecs.append(xk)
        if gfkp1 is None:
            gfkp1 = myfprime(xk)
        yk = gfkp1 - gfk
        beta_k = max(0, numpy.dot(yk, gfkp1) / deltak)
        pk = -gfkp1 + beta_k * pk
        gfk = gfkp1
        gnorm = vecnorm(gfk, ord=norm)
        if callback is not None:
            callback(xk)
        k += 1

    fval = old_fval
    if warnflag == 2:
        msg = _status_message['pr_loss']
        if disp:
            print("Warning: " + msg)
            print("         Current function value: %f" % fval)
            print("         Iterations: %d" % k)
            print("         Function evaluations: %d" % func_calls[0])
            print("         Gradient evaluations: %d" % grad_calls[0])

    elif k >= maxiter:
        warnflag = 1
        msg = _status_message['maxiter']
        if disp:
            print("Warning: " + msg)
            print("         Current function value: %f" % fval)
            print("         Iterations: %d" % k)
            print("         Function evaluations: %d" % func_calls[0])
            print("         Gradient evaluations: %d" % grad_calls[0])
    else:
        msg = _status_message['success']
        if disp:
            print(msg)
            print("         Current function value: %f" % fval)
            print("         Iterations: %d" % k)
            print("         Function evaluations: %d" % func_calls[0])
            print("         Gradient evaluations: %d" % grad_calls[0])

    result = OptimizeResult(fun=fval, jac=gfk, nfev=func_calls[0],
                            njev=grad_calls[0], status=warnflag,
                            success=(warnflag == 0), message=msg, x=xk)
    if retall:
        result['allvecs'] = allvecs
    return result
Exemplo n.º 14
0
def Customfmin_bfgs(f,
                    x0,
                    fprime=None,
                    args=(),
                    gtol=1e-5,
                    norm=Inf,
                    epsilon=numpy.sqrt(numpy.finfo(float).eps),
                    maxiter=None,
                    full_output=0,
                    disp=1,
                    retall=0,
                    callback=None):

    testVar = 0
    x0 = asarray(x0).squeeze()
    if x0.ndim == 0:
        x0.shape = (1, )
    if maxiter is None:
        maxiter = len(x0) * 200
    func_calls, f = wrap_function(f, args)
    if fprime is None:
        grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon))
    else:
        grad_calls, myfprime = wrap_function(fprime, args)
    gfk = myfprime(x0)
    k = 0
    N = len(x0)
    I = numpy.eye(N, dtype=int)
    Hk = I
    old_fval = f(x0)
    old_old_fval = old_fval + 5000
    xk = x0
    if retall:
        allvecs = [x0]
    sk = [2 * gtol]
    warnflag = 0
    gnorm = vecnorm(gfk, ord=norm)
    while (gnorm > gtol) and (k < maxiter):
        pk = -numpy.dot(Hk, gfk)
        alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
           linesearch.line_search(f,myfprime,xk,pk,gfk,
                                  old_fval,old_old_fval)
        if alpha_k is None:  # line search failed try different one.
            alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
                     line_search(f,myfprime,xk,pk,gfk,
                                 old_fval,old_old_fval)
            if alpha_k is None:
                # This line search also failed to find a better solution.
                warnflag = 2
                break
        xkp1 = xk + alpha_k * pk
        if retall:
            allvecs.append(xkp1)
        sk = xkp1 - xk
        xk = xkp1
        if gfkp1 is None:
            gfkp1 = myfprime(xkp1)

        yk = gfkp1 - gfk
        gfk = gfkp1
        if callback is not None:
            callback(xk)
        k += 1
        gnorm = vecnorm(gfk, ord=norm)
        if (gnorm <= gtol):
            break

        try:  # this was handled in numeric, let it remaines for more safety
            rhok = 1.0 / (numpy.dot(yk, sk))
        except ZeroDivisionError:
            rhok = 1000.0
            print("Divide-by-zero encountered: rhok assumed large")
        if numpy.isinf(rhok):  # this is patch for numpy
            rhok = 1000.0
            print("Divide-by-zero encountered: rhok assumed large")
        A1 = I - sk[:, numpy.newaxis] * yk[numpy.newaxis, :] * rhok
        A2 = I - yk[:, numpy.newaxis] * sk[numpy.newaxis, :] * rhok
        Hk = numpy.dot(A1,numpy.dot(Hk,A2)) + rhok * sk[:,numpy.newaxis] \
                 * sk[numpy.newaxis,:]

    if disp or full_output:
        fval = old_fval
    if warnflag == 2:
        if disp:
            print("Warning: Desired error not necessarily achieved" \
                  "due to precision loss")
            print("         Current function value: %f" % fval)
            print("         Iterations: %d" % k)
            print("         Function evaluations: %d" % func_calls[0])
            print("         Gradient evaluations: %d" % grad_calls[0])

    elif k >= maxiter:
        warnflag = 1
        if disp:
            print("Warning: Maximum number of iterations has been exceeded")
            print("         Current function value: %f" % fval)
            print("         Iterations: %d" % k)
            print("         Function evaluations: %d" % func_calls[0])
            print("         Gradient evaluations: %d" % grad_calls[0])
    else:
        if disp:
            print("Optimization terminated successfully.")
            print("         Current function value: %f" % fval)
            print("         Iterations: %d" % k)
            print("         Function evaluations: %d" % func_calls[0])
            print("         Gradient evaluations: %d" % grad_calls[0])

    if full_output:
        retlist = xk, fval, gfk, Hk, func_calls[0], grad_calls[0], warnflag
        if retall:
            retlist += (allvecs, )
    else:
        retlist = xk
        if retall:
            retlist = (xk, allvecs)

    return retlist
Exemplo n.º 15
0
Arquivo: ncg.py Projeto: delallea/ncg
def leon_ncg_python(make_f, w_0, make_fprime=None, gtol=1e-5, norm=numpy.Inf,
              maxiter=None, full_output=0, disp=1, retall=0, callback=None,
              direction='hestenes-stiefel',
              minibatch_size=None,
              minibatch_offset=None,
              restart_every=0,
              normalize=False,
              constrain_lambda=True,
              ):
    """Minimize a function using a nonlinear conjugate gradient algorithm.

    Parameters
    ----------
    make_f : callable make_f(k0, k1)
    When called with (k0, k1) as arguments, return a function f such that
    f(w) is the objective to be minimize at parameter w, on minibatch x_k0
    to x_k1. If k1 is None then the minibatch should contain all the
    remaining data.
    w_0 : ndarray
    Initial guess.
    make_fprime : callable make_f'(k0, k1)
    Same as `make_f`, but to compute the derivative of f on a minibatch.
    gtol : float
    Stop when norm of gradient is less than gtol.
    norm : float
    Order of vector norm to use. -Inf is min, Inf is max.
    size (can be scalar or vector).
    callback : callable
    An optional user-supplied function, called after each
    iteration. Called as callback(w_t, lambda_t), where w_t is the
    current parameter vector and lambda_t the coefficient for the
    new direction.
    direction : string
    Formula used to computed the new direction, among:
        - polak-ribiere
        - hestenes-stiefel
    minibatch_size : int
    Size of each minibatch. Use None for batch learning.
    minibatch_offset : int
    Shift of the minibatch. Use None to use the minibatch size (i.e. no
    overlap at all).
    restart_every : int
    Force restart every this number of iterations. If <= 0, then never
    force a restart.
    normalize : bool
    If True, then use the normalized gradient instead of the gradient
    itself to find the next search direction, and always normalize the
    search direction.
    constrain_lambda : bool
    If True, then the `lambda_t` factor used to compute conjugate directions
    is constrained to be non-negative (it is thus set to zero if the formula
    given by `direction` computes a negative value).

    Returns
    -------
    xopt : ndarray
    Parameters which minimize f, i.e. f(xopt) == fopt.
    fopt : float
    Minimum value found, f(xopt).
    func_calls : int
    The number of function_calls made.
    grad_calls : int
    The number of gradient calls made.
    warnflag : int
    1 : Maximum number of iterations exceeded.
    2 : Gradient and/or function calls not changing.
    allvecs : ndarray
    If retall is True (see other parameters below), then this
    vector containing the result at each iteration is returned.

    Other Parameters
    ----------------
    maxiter : int
    Maximum number of iterations to perform.
    full_output : bool
    If True then return fopt, func_calls, grad_calls, and
    warnflag in addition to xopt.
    disp : bool
    Print convergence message if True.
    retall : bool
    Return a list of results at each iteration if True.

    Notes
    -----
    Optimize the function, f, whose gradient is given by fprime
    using the nonlinear conjugate gradient algorithm of Polak and
    Ribiere. See Wright & Nocedal, 'Numerical Optimization',
    1999, pg. 120-122.
    """
    if minibatch_offset is None:
        if minibatch_size is None:
            # Batch learning: no offset is needed.
            minibatch_offset = 0
        else:
            # Use the same offset as the minibatch size.
            minibatch_offset = minibatch_size
    w_0 = numpy.asarray(w_0).flatten()
    if maxiter is None:
        maxiter = len(w_0)*200
    k0 = 0
    k1 = minibatch_size
    assert make_fprime is not None
    f = make_f(k0, k1)
    fprime = make_fprime(k0, k1)
    func_calls = [0]
    grad_calls = [0]
    tmp_func_calls, f = wrap_function(f, ())
    tmp_grad_calls, myfprime = wrap_function(fprime, ())
    g_t = myfprime(w_0)
    t = 0
    N = len(w_0)
    w_t = w_0

    if retall:
        allvecs = [w_t]
    warnflag = 0
    if normalize:
        d_t = -g_t / numpy.linalg.norm(g_t)
    else:
        d_t = -g_t
    gnorm = vecnorm(g_t, ord=norm)
    w_t_previous = None

    while (gnorm > gtol) and (t < maxiter):
        #print '||g_t|| = %s' % numpy.linalg.norm(g_t)
        # Since the function changes at each iteration, we cannot re-use
        # previous function values.
        old_fval = f(w_t)
        if w_t_previous is None:
            old_old_fval = old_fval + 5000
        else:
            old_old_fval = f(w_t_previous)
        # These values are modified by the line search, even if it fails.
        old_fval_backup = old_fval
        old_old_fval_backup = old_old_fval

        alpha_t, fc, gc, old_fval, old_old_fval, h_t = \
                 line_search_wolfe1(f, myfprime, w_t, d_t, g_t, old_fval,
                                  old_old_fval, c2=0.4)
        if alpha_t is None: # line search failed -- use different one.
            print '*********************************** LINE SEARCH FAILURE *********************************'
            alpha_t, fc, gc, old_fval, old_old_fval, h_t = \
                     line_search_wolfe2(f, myfprime, w_t, d_t, g_t,
                                        old_fval_backup, old_old_fval_backup)
            print '*********************************** %s *********************************' % alpha_t
            if alpha_t is None or alpha_t == 0:
                # This line search also failed to find a better solution.
                raise AssertionError()
                warnflag = 2
                break
        print 'alpha_t = %s' % alpha_t
        # Update weights.
        w_tp1 = w_t + alpha_t * d_t

        # Compute derivative after the weight update, if not done already.
        if h_t is None:
            h_t = myfprime(w_tp1)
        else:
            assert (h_t == myfprime(w_tp1)).all() # Sanity check.

        # Switch to next minibatch.
        func_calls[0] += tmp_func_calls[0]
        grad_calls[0] += tmp_grad_calls[0]
        k0 += minibatch_offset
        if minibatch_size is None:
            k1 = None
        else:
            k1 = k0 + minibatch_size
        tmp_func_calls, f = wrap_function(make_f(k0, k1), ())
        tmp_grad_calls, myfprime = wrap_function(make_fprime(k0, k1), ())

        # Compute derivative on new minibatch.
        g_tp1 = myfprime(w_tp1)
        if normalize:
            g_tp1_for_dt = g_tp1 / numpy.linalg.norm(g_tp1)
        else:
            g_tp1_for_dt = g_tp1

        if retall:
            allvecs.append(w_tp1)
        h_t_minus_g_t = h_t - g_t
        if direction == 'polak-ribiere':
            # Polak-Ribiere.
            delta_t = numpy.dot(g_t, g_t)
            lambda_t = numpy.dot(h_t_minus_g_t, g_tp1_for_dt) / delta_t
        elif direction == 'hestenes-stiefel':
            # Hestenes-Stiefel.
            lambda_t = numpy.dot(h_t_minus_g_t, g_tp1_for_dt) / numpy.dot(h_t_minus_g_t, d_t)
        else:
            raise NotImplementedError(direction)
        if constrain_lambda and lambda_t < 0:
            lambda_t = 0
        if restart_every > 0 and (t + 1) % restart_every == 0:
            lambda_t = 0
        if lambda_t == 0:
            print '*** RESTART ***'
        else:
            print 'lambda_t = %s' % lambda_t
        d_t = -g_tp1_for_dt + lambda_t * d_t
        if normalize:
            d_t /= numpy.linalg.norm(d_t)
        g_t = g_tp1
        w_t_previous = w_t
        w_t = w_tp1
        gnorm = vecnorm(g_t, ord=norm)
        if callback is not None:
            callback(w_t, lambda_t)
        t += 1


    if disp or full_output:
        fval = old_fval
    if warnflag == 2:
        if disp:
            print "Warning: Desired error not necessarily achieved due to precision loss"
            print " Current function value: %f" % fval
            print " Iterations: %d" % t
            print " Function evaluations: %d" % func_calls[0]
            print " Gradient evaluations: %d" % grad_calls[0]

    elif t >= maxiter:
        warnflag = 1
        if disp:
            print "Warning: Maximum number of iterations has been exceeded"
            print " Current function value: %f" % fval
            print " Iterations: %d" % t
            print " Function evaluations: %d" % func_calls[0]
            print " Gradient evaluations: %d" % grad_calls[0]
    else:
        if disp:
            print "Optimization terminated successfully."
            print " Current function value: %f" % fval
            print " Iterations: %d" % t
            print " Function evaluations: %d" % func_calls[0]
            print " Gradient evaluations: %d" % grad_calls[0]


    if full_output:
        retlist = w_t, fval, func_calls[0], grad_calls[0], warnflag
        if retall:
            retlist += (allvecs,)
    else:
        retlist = w_t
        if retall:
            retlist = (w_t, allvecs)

    return retlist
Exemplo n.º 16
0
def _minimize_bhhh(fun,
                   x0,
                   bounds=None,
                   args=(),
                   jac=None,
                   callback=None,
                   tol={
                       "abs": 1e-05,
                       "rel": 1e-08
                   },
                   norm=np.Inf,
                   maxiter=None,
                   disp=False,
                   return_all=False,
                   **unknown_options):
    """
    Minimization of scalar function of one or more variables using the
    BHHH algorithm.

    Options
    -------
    disp : bool
        Set to True to print convergence messages.
    maxiter : int
        Maximum number of iterations to perform.
    tol : dict
        Absolute and relative tolerance values.
    norm : float
        Order of norm (Inf is max, -Inf is min).

    """

    _check_unknown_options(unknown_options)

    f = fun
    fprime = jac
    retall = return_all
    k = 0
    N = len(x0)

    x0 = np.asarray(x0).flatten()
    if x0.ndim == 0:
        x0.shape = (1, )

    if bounds is None:
        bounds = np.array([np.inf] * N * 2).reshape((2, N))
        bounds[0, :] = -bounds[0, :]
    if bounds.shape[1] != N:
        raise ValueError("length of x0 != length of bounds")

    low = bounds[0, :]
    up = bounds[1, :]
    x0 = np.clip(x0, low, up)

    if maxiter is None:
        maxiter = len(x0) * 200

    # Need the aggregate functions to take only x0 as an argument
    func_calls, agg_fun = wrap_function_agg(f, args)

    if not callable(fprime):
        grad_calls, myfprime = wrap_function_num_dev(f, args)
    else:
        grad_calls, myfprime = wrap_function(fprime, args)

    def agg_fprime(x0):
        return myfprime(x0).sum(axis=0)

    # Setup for iteration
    old_fval = agg_fun(x0)

    gf0 = agg_fprime(x0)
    norm_pg0 = vecnorm(x0 - np.clip(x0 - gf0, low, up), ord=norm)

    xk = x0
    norm_pgk = norm_pg0

    if retall:
        allvecs = [x0]
    warnflag = 0

    for _ in range(maxiter):  # for loop instead.

        # Individual
        gfk_obs = myfprime(xk)

        # Aggregate fprime. Might replace by simply summing up gfk_obs
        gfk = gfk_obs.sum(axis=0)
        norm_pgk = vecnorm(xk - np.clip(xk - gfk, low, up), ord=norm)

        # Check tolerance of gradient norm
        if norm_pgk <= tol["abs"] + tol["rel"] * norm_pg0:
            break

        # Sets the initial step guess to dx ~ 1
        old_old_fval = old_fval + np.linalg.norm(gfk) / 2

        # Calculate BHHH hessian and step
        Hk = np.dot(gfk_obs.T, gfk_obs)
        Bk = np.linalg.inv(Hk)
        pk = np.empty(N)
        pk = -np.dot(Bk, gfk)

        try:
            alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = _line_search_wolfe12(
                agg_fun,
                agg_fprime,
                xk,
                pk,
                gfk,
                old_fval,
                old_old_fval,
                amin=1e-100,
                amax=1e100,
            )
        except _LineSearchError:
            # Line search failed to find a better solution.
            warnflag = 2
            break

        xkp1 = np.clip(xk + alpha_k * pk, low, up)
        if retall:
            allvecs.append(xkp1)
        xk = xkp1
        if callback is not None:
            callback(xk)
        k += 1

        if np.isinf(old_fval):
            # We correctly found +-Inf as optimal value, or something went
            # wrong.
            warnflag = 2
            break

    fval = old_fval

    if warnflag == 2:
        msg = _status_message["pr_loss"]
    elif k >= maxiter:
        warnflag = 1
        msg = _status_message["maxiter"]
    elif np.isnan(fval) or np.isnan(xk).any():
        warnflag = 3
        msg = _status_message["nan"]
    else:
        msg = _status_message["success"]

    if disp:
        print("{}{}".format("Warning: " if warnflag != 0 else "", msg))
        print("         Current function value: %f" % fval)
        print("         Iterations: %d" % k)

    result = OptimizeResult(
        fun=fval,
        jac=gfk,
        hess_inv=Bk,
        nfev=func_calls[0],
        njev=grad_calls[0],
        status=warnflag,
        success=(warnflag == 0),
        message=msg,
        x=xk,
        nit=k,
    )
    if retall:
        result["allvecs"] = allvecs
    return result