def hanso(func, x0=None, grad=None, nvar=None, nstart=None, sampgrad=False, funcrtol=1e-20, gradnormtol=1e-6, verbose=2, fvalquit=-np.inf, cpumax=np.inf, maxit=100, **kwargs): """ HANSO: Hybrid Algorithm for Nonsmooth Optimization The algorithm is two-fold. Viz, BFGS phase: BFGS is run from multiple starting points, taken from the columns of x0 parameter, if provided, and otherwise 10 points generated randomly. If the termination test was satisfied at the best point found by BFGS, or if nvar > 100, HANSO terminates; otherwise, it continues to: Gradient sampling phases: 3 gradient sampling phases are run from lowest point found, using sampling radii: 10*evaldist, evaldist, evaldist/10 Termination takes place immediately during any phase if cpumax CPU time is exceeded. References ---------- A.S. Lewis and M.L. Overton, Nonsmooth Optimization via Quasi-Newton Methods, Math Programming, 2012 J.V. Burke, A.S. Lewis and M.L. Overton, A Robust Gradient Sampling Algorithm for Nonsmooth, Nonconvex Optimization SIAM J. Optimization 15 (2005), pp. 751-779 Parameters ---------- func: callable function on 1D arrays of length nvar function being optimized grad: callable function gradient of func fvalquit: float, optional (default -inf) param passed to bfgs1run function gradnormtol: float, optional (default 1e-4) termination tolerance for smallest vector in convex hull of saved gradients verbose: int, optional (default 1) param passed to bfgs1run function cpumax: float, optional (default inf) quit if cpu time in secs exceeds this (applies to total running time) sampgrad: boolean, optional (default False) if set, the gradient-sampling will be used to continue the algorithm in case the BFGS fails **kwargs: param-value dict optional parameters passed to bfgs backend. Possible key/values are: x0: 2D array of shape (nvar, nstart), optional (default None) intial points, one per column nvar: int, optional (default None) number of dimensions in the problem (exclusive x0) nstart: int, optional (default None) number of starting points for BFGS algorithm (exclusive x0) maxit: int, optional (default 100) param passed to bfgs1run function wolfe1: float, optional (default 0) param passed to bfgs1run function wolfe2: float, optional (default .5) param passed to bfgs1run function Returns ------- x: D array of same length nvar = len(x0) final iterate f: list of nstart floats final function values, one per run of bfgs1run d: list of nstart 1D arrays, each of same length as input nvar final smallest vectors in convex hull of saved gradients, one array per run of bfgs1run H: list of nstarts 2D arrays, each of shape (nvar, nvar) final inverse Hessian approximations, one array per run of bfgs1run itrecs: list of nstart int numbers of iterations, one per run of bfgs1run; see bfgs1run for details inforecs: list of int reason for termination; see bfgs1run for details pobj: list of tuples of the form (duration of iteration, final func value) trajectory for best starting point (i.e of the starting point that led to the greatest overall decrease in the cost function. Note that the O(1) time consumed by the gradient-sampling stage is not counted. Optional Outputs (in case output_records is True): Xrecs: list of nstart 2D arrays, each of shape (iter, nvar) iterates where saved gradients were evaluated; one array per run of bfgs1run; see bfgs1run for details Grecs: ist of nstart 2D arrays, each of shape (nvar, nvar) gradients evaluated at these points, one per run of bfgs1run; see bfgs1run for details wrecs: list of nstart 1D arrays, each of length iter weights defining convex combinations d = G*w; one array per run of bfgs1run; see bfgs1run for details fevalrecs: list of nstart 1D arrays, each of length iter records of all function evaluations in the line searches; one array per run of bfgs1run; see bfgs1run for details xrecs: list of nstart 2D arrays, each of length (iter, nvar) record of x iterates Hrecs: list of nstart 2D arrays, each of shape (iter, nvar) record of H (Hessian) iterates; one array per run of bfgs1run; see bfgs1run for details Raises ------ RuntimeError """ def _log(msg, level=0): if verbose > level: print msg # sanitize x0 if x0 is None: assert not nvar is None, ( "No value specified for x0, expecting a value for nvar") assert not nstart is None, ( "No value specified for x0, expecting a value for nstart") x0 = setx0(nvar, nstart) else: assert nvar is None, ( "Value specified for x0, expecting no value for nvar") assert nstart is None, ( "Value specified for x0, expecting no value for nstart") x0 = np.array(x0) if x0.ndim == 1: x0 = x0.reshape((-1, 1)) nvar, nstart = x0.shape cpufinish = time.time() + cpumax # run BFGS step kwargs['output_records'] = 1 x, f, d, H, _, info, X, G, w, pobj = bfgs(func, x0=x0, grad=grad, fvalquit=fvalquit, funcrtol=funcrtol, gradnormtol=gradnormtol, cpumax=cpumax, maxit=maxit, verbose=verbose, **kwargs) # throw away all but the best result assert len(f) == np.array(x).shape[1], np.array(x).shape indx = np.argmin(f) f = f[indx] x = x[..., indx] d = d[..., indx] H = H[indx] # bug if do this when only one start point: H already matrix X = X[indx] G = G[indx] w = w[indx] pobj = pobj[indx] dnorm = linalg.norm(d, 2) # the 2nd argument will not be used since x == X(:,1) after bfgs loc, X, G, w = postprocess(x, np.nan, dnorm, X, G, w, verbose=verbose) if np.isnan(f) or np.isinf(f): _log('hanso: f is infinite or nan at all starting points') return x, f, loc, X, G, w, H, pobj if time.time() > cpufinish: _log('hanso: cpu time limit exceeded') _log('hanso: best point found has f = %g with local optimality ' 'measure: dnorm = %5.1e, evaldist = %5.1e' % (f, loc['dnorm'], loc['evaldist'])) return x, f, loc, X, G, w, H, pobj if f < fvalquit: _log('hanso: reached target objective') _log('hanso: best point found has f = %g with local optimality' ' measure: dnorm = %5.1e, evaldist = %5.1e' % (f, loc['dnorm'], loc['evaldist'])) return x, f, loc, X, G, w, H, pobj if dnorm < gradnormtol: _log('hanso: verified optimality within tolerance in bfgs phase') _log('hanso: best point found has f = %g with local optimality ' 'measure: dnorm = %5.1e, evaldist = %5.1e' % (f, loc['dnorm'], loc['evaldist'])) return x, f, loc, X, G, w, H, pobj if sampgrad: # launch gradient sampling # time0 = time.time() f_BFGS = f # save optimality certificate info in case gradient sampling cannot # improve the one provided by BFGS dnorm_BFGS = dnorm loc_BFGS = loc d_BFGS = d X_BFGS = X G_BFGS = G w_BFGS = w x0 = x.reshape((-1, 1)) # otherwise gradient sampling is too expensivea if maxit > 100: maxit = 100 # # otherwise grad sampling will augment with random starts # x0 = x0[..., :1] # assert 0, x0.shape cpumax = cpufinish - time.time() # time left # run gradsamp proper x, f, g, dnorm, X, G, w = gradsamp(func, x0, grad=grad, maxit=maxit, cpumax=cpumax) if f == f_BFGS: # gradient sampling did not reduce f _log('hanso: gradient sampling did not reduce f below best point' ' found by BFGS\n') # use the better optimality certificate if dnorm > dnorm_BFGS: loc = loc_BFGS d = d_BFGS X = X_BFGS G = G_BFGS w = w_BFGS elif f < f_BFGS: loc, X, G, w = postprocess(x, g, dnorm, X, G, w, verbose=verbose) _log('hanso: gradient sampling reduced f below best point found' ' by BFGS\n') else: raise RuntimeError('hanso: f > f_BFGS: this should never happen' ) # this should never happen x = x[0] f = f[0] # pobj.append((time.time() - time0, f)) return x, f, loc, X, G, w, H, pobj else: return x, f, loc, X, G, w, H, pobj
if __name__ == '__main__': nvar = 300 nstart = 20 func_name = 'Rosenbrock "Banana" function in %i dimensions' % nvar import os from example_functions import (l1, grad_l1) from setx0 import setx0 import scipy.io if os.path.isfile("/tmp/x0.mat"): x0 = scipy.io.loadmat("/tmp/x0.mat", squeeze_me=True, struct_as_record=False)['x0'] else: x0 = setx0(nvar, nstart) if x0.ndim == 1: x0 = x0.reshape((-1, 1)) _x = None _f = np.inf for j in xrange(x0.shape[1]): print ">" * 100, "(j = %i)" % j x, f = bfgs1run(l1, x0[..., j], grad=grad_l1, maxit=100, verbose=2, nvec=10)[:2] if f < _f:
def bfgs(func, x0=None, grad=None, nvar=None, nstart=None, maxit=100, nvec=0, verbose=1, funcrtol=1e-20, gradnormtol=1e-6, fvalquit=-np.inf, xnormquit=np.inf, cpumax=np.inf, strongwolfe=False, wolfe1=0, wolfe2=.5, quitLSfail=1, ngrad=None, evaldist=1e-6, H0=None, scale=1, output_records=2, callback=None): """ Make a single run of BFGS from one starting point. Intended to be called from bfgs. Parameters ---------- func: callable function on 1D arrays of length nvar function being optimized grad: callable function gradient of func x0: 2D array of shape (nvar, nstart), optional (default None) intial points, one per column nvar: int, optional (default None) number of dimensions in the problem (exclusive x0) nstart: int, optional (default None) number of starting points for BFGS algorithm (exclusive x0) maxit: int, optional (default 100) param passed to bfgs1run function wolfe1: float, optional (default 0) param passed to bfgs1run function wolfe2: float, optional (default .5) param passed to bfgs1run function gradnormtol: float, optional (default 1e-6) termination tolerance on d: smallest vector in convex hull of up to ngrad gradients xnormquit: float, optional (default inf) quit if norm(x) exceeds this value evaldist: float, optional default (1e-4) the gradients used in the termination test qualify only if they are evaluated at points approximately within distance evaldist of x H0: 2D array of shape (nvar, nvar), optional (default identity matrix) for full BFGS: initial inverse Hessian approximation (must be positive definite, but this is not checked), this could be draw drawn from a Wishart distribution; for limited memory BFGS: same, but applied every iteration (must be sparse in this case) scale: boolean, optional (default True) for full BFGS: 1 to scale H0 at first iteration, 0 otherwise for limited memory BFGS: 1 to scale H0 every time, 0 otherwise cpumax: float, optional (default inf) quit if cpu time in secs exceeds this (applies to total running time) fvalquit: float, optional (default -inf) param passed to bfgs1run function quitLSfail: int, optional (default 1) 1 if quit when line search fails, 0 (potentially useful if func is not numerically continuous) ngrad: int, optional (default max(100, 2 * nvar)) number of gradients willing to save and use in solving QP to check optimality tolerance on smallest vector in their convex hull; see also next two options verbose: int, optional (default 1) param passed to bfgs1run function output_records: int, optional (default 2) Which low-level execution records to return from low-level bfgs1run calls ? Possible values are: 0: don't return execution records from low-level bfgs1run calls 1: return H and w records from low-level bfgs1run calls 2: return all execution records from low-level bfgs1run calls Returns ------- x: D array of same length nvar = len(x0) final iterate f: list of nstart floats final function values, one per run of bfgs1run d: list of nstart 1D arrays, each of same length as input nvar final smallest vectors in convex hull of saved gradients, one array per run of bfgs1run H: list of nstarts 2D arrays, each of shape (nvar, nvar) final inverse Hessian approximations, one array per run of bfgs1run itrecs: list of nstart int numbers of iterations, one per run of bfgs1run; see bfgs1run for details inforecs: list of int reason for termination; see bfgs1run for details pobj: list of lists of tuples of the form (duration of iteration, final func value) for each starting point, the energy trajectory for each iteration of the iterates therefrom Optional Outputs (in case output_records is True): Xrecs: list of nstart 2D arrays, each of shape (iter, nvar) iterates where saved gradients were evaluated; one array per run of bfgs1run; see bfgs1run for details Grecs: ist of nstart 2D arrays, each of shape (nvar, nvar) gradients evaluated at these points, one per run of bfgs1run; see bfgs1run for details wrecs: list of nstart 1D arrays, each of length iter weights defining convex combinations d = G*w; one array per run of bfgs1run; see bfgs1run for details fevalrecs: list of nstart 1D arrays, each of length iter records of all function evaluations in the line searches; one array per run of bfgs1run; see bfgs1run for details xrecs: list of nstart 2D arrays, each of length (iter, nvar) record of x iterates Hrecs: list of nstart 2D arrays, each of shape (iter, nvar) record of H (Hessian) iterates; one array per run of bfgs1run; see bfgs1run for details Notes ----- if there is more than one starting vector, then: f, iter, info are vectors of length nstart x, d are matrices of size pars.nvar by nstart H, X, G, w, xrec, Hrec are cell arrays of length nstart, and fevalrec is a cell array of cell arrays Thus, for example, d[:,i] = G[i] * w[i], for i = 0,...,nstart - 1 BFGS is normally used for optimizing smooth, not necessarily convex, functions, for which the convergence rate is generically superlinear. But it also works very well for functions that are nonsmooth at their minimizers, typically with a linear convergence rate and a final inverse Hessian approximation that is very ill conditioned, as long as a weak Wolfe line search is used. This version of BFGS will work well both for smooth and nonsmooth functions and has a stopping criterion that applies for both cases, described above. Reference: A.S. Lewis and M.L. Overton, Nonsmooth Optimization via Quasi-Newton Methods, Math Programming, 2012 See Also -------- `gradsamp` """ def _fg(x): return func(x) if grad is None else func(x), grad(x) def _log(msg, level=0): if verbose > level: print msg # sanitize x0 if x0 is None: assert not nvar is None, ( "No value specified for x0, expecting a value for nvar") assert not nstart is None, ( "No value specified for x0, expecting a value for nstart") x0 = setx0(nvar, nstart) else: assert nvar is None, ( "Value specified for x0, expecting no value for nvar") assert nstart is None, ( "Value specified for x0, expecting no value for nstart") if x0.ndim == 1: x0 = x0[:, np.newaxis] nvar, nstart = x0.shape cpufinish = time.time() + cpumax pobj = [] _f = [] itrecs = [] inforecs = [] _d = [] _x = [] _H = [] if output_records: xrecs = [] fevalrecs = [] Hrecs = [] Xrecs = [] Grecs = [] wrecs = [] for run in xrange(nstart): _log("Staring bfgs1run %i/%i..." % ( run + 1, nstart)) if verbose > 0 & nstart > 1: _log('bfgs: starting point %d' % (run + 1)) cpumax = cpufinish - time.time() if output_records > 1: x, f, d, HH, it, info, X, G, w, fevalrec, xrec, Hrec, times = \ bfgs1run(func, x0[..., run], grad=grad, maxit=maxit, wolfe1=wolfe1, wolfe2=wolfe2, funcrtol=funcrtol, gradnormtol=gradnormtol, fvalquit=fvalquit, xnormquit=xnormquit, cpumax=cpumax, strongwolfe=strongwolfe, nvec=nvec, verbose=verbose, quitLSfail=quitLSfail, ngrad=ngrad, evaldist=evaldist, H0=H0, scale=scale, callback=callback) _x.append(x) _f.append(x) _d.append(d) itrecs.append(it) inforecs.append(info) Xrecs.append(X) Grecs.append(G) wrecs.append(w) fevalrecs.append(fevalrec) xrecs.append(xrec) Hrecs.append(Hrec) elif output_records > 0: x, f, d, HH, it, info, X, G, w, _, _, _, times = bfgs1run( func, x0[..., run], grad=grad, maxit=maxit, wolfe1=wolfe1, wolfe2=wolfe2, funcrtol=funcrtol, gradnormtol=gradnormtol, fvalquit=fvalquit, xnormquit=xnormquit, cpumax=cpumax, strongwolfe=strongwolfe, nvec=nvec, verbose=verbose, quitLSfail=quitLSfail, ngrad=ngrad, evaldist=evaldist, H0=H0, scale=scale, callback=callback) _x.append(x) _f.append(f) _d.append(d) itrecs.append(it) inforecs.append(info) Xrecs.append(X) Grecs.append(G) wrecs.append(w) else: # avoid computing unnecessary arrays x, f, d, HH, it, info, _, _, _, _, _, _, times = bfgs1run( func, x0[..., run], grad=grad, maxit=maxit, wolfe1=wolfe1, wolfe2=wolfe2, funcrtol=funcrtol, gradnormtol=gradnormtol, fvalquit=fvalquit, xnormquit=xnormquit, cpumax=cpumax, strongwolfe=strongwolfe, nvec=nvec, verbose=verbose, quitLSfail=quitLSfail, ngrad=ngrad, evaldist=evaldist, H0=H0, scale=scale, callback=callback) _x.append(x) _f.append(f) _d.append(d) itrecs.append(it) inforecs.append(info) _log('... done (bfgs1run %i/%i).' % (run + 1, nstart)) _log("\r\n") # HH should be exactly symmetric as of version 2.02, but does no harm _H.append((HH + HH.T) / 2.) # commit times run_pobj = [] for duration, f in times: run_pobj.append((duration, f)) pobj.append(run_pobj) # check that we'ven't exploded the time budget if time.time() > cpufinish or f < fvalquit or linalg.norm( x, 2) > xnormquit: break # end of for loop # we're done: now collect and return outputs to caller _x = np.array(_x).T _f = np.array(_f) _d = np.array(_d).T if output_records > 1: return (_x, _f, _d, _H, itrecs, inforecs, Xrecs, Grecs, wrecs, fevalrecs, xrecs, Hrecs, pobj) elif output_records > 0: return _x, _f, _d, _H, itrecs, inforecs, Xrecs, Grecs, wrecs, pobj else: return _x, _f, _d, _H, itrecs, inforecs, pobj
def hanso(func, x0=None, grad=None, nvar=None, nstart=None, sampgrad=False, funcrtol=1e-20, gradnormtol=1e-6, verbose=2, fvalquit=-np.inf, cpumax=np.inf, maxit=100, callback=None, **kwargs): """ HANSO: Hybrid Algorithm for Nonsmooth Optimization The algorithm is two-fold. Viz, BFGS phase: BFGS is run from multiple starting points, taken from the columns of x0 parameter, if provided, and otherwise 10 points generated randomly. If the termination test was satisfied at the best point found by BFGS, or if nvar > 100, HANSO terminates; otherwise, it continues to: Gradient sampling phases: 3 gradient sampling phases are run from lowest point found, using sampling radii: 10*evaldist, evaldist, evaldist/10 Termination takes place immediately during any phase if cpumax CPU time is exceeded. References ---------- A.S. Lewis and M.L. Overton, Nonsmooth Optimization via Quasi-Newton Methods, Math Programming, 2012 J.V. Burke, A.S. Lewis and M.L. Overton, A Robust Gradient Sampling Algorithm for Nonsmooth, Nonconvex Optimization SIAM J. Optimization 15 (2005), pp. 751-779 Parameters ---------- func: callable function on 1D arrays of length nvar function being optimized grad: callable function gradient of func fvalquit: float, optional (default -inf) param passed to bfgs1run function gradnormtol: float, optional (default 1e-4) termination tolerance for smallest vector in convex hull of saved gradients verbose: int, optional (default 1) param passed to bfgs1run function cpumax: float, optional (default inf) quit if cpu time in secs exceeds this (applies to total running time) sampgrad: boolean, optional (default False) if set, the gradient-sampling will be used to continue the algorithm in case the BFGS fails **kwargs: param-value dict optional parameters passed to bfgs backend. Possible key/values are: x0: 2D array of shape (nvar, nstart), optional (default None) intial points, one per column nvar: int, optional (default None) number of dimensions in the problem (exclusive x0) nstart: int, optional (default None) number of starting points for BFGS algorithm (exclusive x0) maxit: int, optional (default 100) param passed to bfgs1run function wolfe1: float, optional (default 0) param passed to bfgs1run function wolfe2: float, optional (default .5) param passed to bfgs1run function Returns ------- x: D array of same length nvar = len(x0) final iterate f: list of nstart floats final function values, one per run of bfgs1run d: list of nstart 1D arrays, each of same length as input nvar final smallest vectors in convex hull of saved gradients, one array per run of bfgs1run H: list of nstarts 2D arrays, each of shape (nvar, nvar) final inverse Hessian approximations, one array per run of bfgs1run itrecs: list of nstart int numbers of iterations, one per run of bfgs1run; see bfgs1run for details inforecs: list of int reason for termination; see bfgs1run for details pobj: list of tuples of the form (duration of iteration, final func value) trajectory for best starting point (i.e of the starting point that led to the greatest overall decrease in the cost function. Note that the O(1) time consumed by the gradient-sampling stage is not counted. Optional Outputs (in case output_records is True): Xrecs: list of nstart 2D arrays, each of shape (iter, nvar) iterates where saved gradients were evaluated; one array per run of bfgs1run; see bfgs1run for details Grecs: ist of nstart 2D arrays, each of shape (nvar, nvar) gradients evaluated at these points, one per run of bfgs1run; see bfgs1run for details wrecs: list of nstart 1D arrays, each of length iter weights defining convex combinations d = G*w; one array per run of bfgs1run; see bfgs1run for details fevalrecs: list of nstart 1D arrays, each of length iter records of all function evaluations in the line searches; one array per run of bfgs1run; see bfgs1run for details xrecs: list of nstart 2D arrays, each of length (iter, nvar) record of x iterates Hrecs: list of nstart 2D arrays, each of shape (iter, nvar) record of H (Hessian) iterates; one array per run of bfgs1run; see bfgs1run for details Raises ------ RuntimeError """ def _log(msg, level=0): if verbose > level: print msg # sanitize x0 if x0 is None: assert not nvar is None, ( "No value specified for x0, expecting a value for nvar") assert not nstart is None, ( "No value specified for x0, expecting a value for nstart") x0 = setx0(nvar, nstart) else: assert nvar is None, ( "Value specified for x0, expecting no value for nvar") assert nstart is None, ( "Value specified for x0, expecting no value for nstart") x0 = np.array(x0) if x0.ndim == 1: x0 = x0.reshape((-1, 1)) nvar, nstart = x0.shape cpufinish = time.time() + cpumax # run BFGS step kwargs['output_records'] = 1 x, f, d, H, _, info, X, G, w, pobj = bfgs( func, x0=x0, grad=grad, fvalquit=fvalquit, funcrtol=funcrtol, gradnormtol=gradnormtol, cpumax=cpumax, maxit=maxit, verbose=verbose, callback=callback, **kwargs) # throw away all but the best result assert len(f) == np.array(x).shape[1], np.array(x).shape indx = np.argmin(f) f = f[indx] x = x[..., indx] d = d[..., indx] H = H[indx] # bug if do this when only one start point: H already matrix X = X[indx] G = G[indx] w = w[indx] pobj = pobj[indx] dnorm = linalg.norm(d, 2) # the 2nd argument will not be used since x == X(:,1) after bfgs loc, X, G, w = postprocess(x, np.nan, dnorm, X, G, w, verbose=verbose) if np.isnan(f) or np.isinf(f): _log('hanso: f is infinite or nan at all starting points') return x, f, loc, X, G, w, H, pobj if time.time() > cpufinish: _log('hanso: cpu time limit exceeded') _log('hanso: best point found has f = %g with local optimality ' 'measure: dnorm = %5.1e, evaldist = %5.1e' % ( f, loc['dnorm'], loc['evaldist'])) return x, f, loc, X, G, w, H, pobj if f < fvalquit: _log('hanso: reached target objective') _log('hanso: best point found has f = %g with local optimality' ' measure: dnorm = %5.1e, evaldist = %5.1e' % ( f, loc['dnorm'], loc['evaldist'])) return x, f, loc, X, G, w, H, pobj if dnorm < gradnormtol: _log('hanso: verified optimality within tolerance in bfgs phase') _log('hanso: best point found has f = %g with local optimality ' 'measure: dnorm = %5.1e, evaldist = %5.1e' % ( f, loc['dnorm'], loc['evaldist'])) return x, f, loc, X, G, w, H, pobj if sampgrad: # launch gradient sampling # time0 = time.time() f_BFGS = f # save optimality certificate info in case gradient sampling cannot # improve the one provided by BFGS dnorm_BFGS = dnorm loc_BFGS = loc d_BFGS = d X_BFGS = X G_BFGS = G w_BFGS = w x0 = x.reshape((-1, 1)) # otherwise gradient sampling is too expensivea if maxit > 100: maxit = 100 # # otherwise grad sampling will augment with random starts # x0 = x0[..., :1] # assert 0, x0.shape cpumax = cpufinish - time.time() # time left # run gradsamp proper x, f, g, dnorm, X, G, w = gradsamp(func, x0, grad=grad, maxit=maxit, cpumax=cpumax) if f == f_BFGS: # gradient sampling did not reduce f _log('hanso: gradient sampling did not reduce f below best point' ' found by BFGS\n') # use the better optimality certificate if dnorm > dnorm_BFGS: loc = loc_BFGS d = d_BFGS X = X_BFGS G = G_BFGS w = w_BFGS elif f < f_BFGS: loc, X, G, w = postprocess(x, g, dnorm, X, G, w, verbose=verbose) _log('hanso: gradient sampling reduced f below best point found' ' by BFGS\n') else: raise RuntimeError( 'hanso: f > f_BFGS: this should never happen' ) # this should never happen x = x[0] f = f[0] if callback: callback(x) return x, f, loc, X, G, w, H, pobj else: if callback: callback(x) return x, f, loc, X, G, w, H, pobj
from example_functions import (l2 as func, gradl2 as grad) elif "banana" in func_name: nvar = 2 from example_functions import (rosenbrock_banana as func, grad_rosenbrock_banana as grad) elif "esterov" in func_name: from example_functions import (nesterov as func, grad_nesterov as grad) if os.path.exists("/tmp/x0.mat"): x0 = scipy.io.loadmat("/tmp/x0.mat", squeeze_me=True, struct_as_record=False)['x0'] if x0.ndim == 1: x0 = x0.reshape((-1, 1), order='F') else: x0 = setx0(nvar, nstart) if "banana" in func_name: x0 = x0[:nvar, ...] nvar, nstart = x0.shape func_name = func_name + " in %i dimensions" % nvar print "Running HANSO for %s ..." % func_name for strongwolfe in wolfe_kinds: # run BFGS results = hanso(func, x0=x0, grad=grad, sampgrad=True, strongwolfe=strongwolfe,
g.append(g0) dnorm.append(linalg.norm(g0, 2)) X.append(x[..., run]) G.append(g0) w.append(1) else: cpumax = cpufinish - time.time() # time left xtmp, ftmp, gtmp, dnormtmp, Xtmp, Gtmp, wtmp = \ gradsamp1run(func, x0[..., run], grad=grad, f0=f0, g0=g0, **kwargs) x.append(xtmp) f.append(ftmp) g.append(gtmp) dnorm.append(dnormtmp) X.append(Xtmp) G.append(Gtmp) w.append(wtmp) if time.time() > cpufinish: break return x, f, np.array(g).T, dnorm, np.array(X)[0], np.array(G)[0], w if __name__ == '__main__': from setx0 import setx0 from example_functions import (l1 as func, grad_l1 as grad) x0 = setx0(20, 10) x, f, g, dnorm, X, G, w = gradsamp(func, x0, grad=grad) print "fmin:", f print "xopt:", x