def ipPD(func, grad, hessian=None, x0=None, lb=None, ub=None, G=None, h=None, A=None, b=None, maxiter=100, disp=0, full_output=False): z, G, h, y, A, b = _setup(lb, ub, G, h, A, b) x = _checkInitialValue(x0, G, h, A, b) p = len(x) if hessian is None: approxH = BFGS elif type(hessian) is str: if hessian.lower()=='bfgs': approxH = BFGS elif hessian.lower()=='sr1': approxH = SR1 elif hessian.lower()=='dfp': approxH = DFP else: raise Exception("Input name of hessian is not recognizable") if grad is None: def finiteForward(func,p): def finiteForward1(x): return forward(func,x.ravel()) return finiteForward1 grad = finiteForward(func,p) g = numpy.zeros((p,1)) gOrig = g.copy() oldFx = numpy.inf oldGrad = None deltaX = None deltaY = 0 deltaZ = 0 H = numpy.zeros((p,p)) Haug = H.copy() fx = func(x) dispObj = Disp(disp) i = 0 mu = 5.0 step = 1.0 t = 1.0 # because we determine the size of the back tracking # step on the fly, we don't give it a maximum. At the # same time, because we are only evaluating the residuals # of the KKT system, there are times where we may want to # give the descent a nudge #step0 = 1.0 # back tracking search step maximum value if G is not None: s = h - G.dot(x) z = 1.0/s #z = numpy.ones(s.shape) # print G.dot(x) # print s # print z m = G.shape[0] eta = _surrogateGap(x, z, G, h, y, A, b) # print eta t = mu * m / eta # print t while maxiter>i: gOrig[:] = grad(x).reshape(p,1) g[:] = gOrig.copy() if hessian is None: if oldGrad is None: H = numpy.eye(len(x)) else: diffG = (gOrig - oldGrad).ravel() H = approxH(H, diffG, step * deltaX.ravel()) else: H = hessian(x) Haug[:] = H.copy() x, y, z, fx, step, oldFx, oldGrad, deltaX = _solveKKTAndUpdatePD(x, func, grad, fx, g, gOrig, Haug, z, G, h, y, A, b, t) # ## standard log barrier, \nabla f(x) / -f(x) # if G is not None: # s = h - G.dot(x) # Gs = G/s # zs = z/s # # now find the matrix/vector of our qp # Haug += numpy.einsum('ji,ik->jk',G.T, G*zs) # Dphi = Gs.sum(axis=0).reshape(p,1) # g += Dphi / t # ## solving the QP to get the descent direction # if A is not None: # bTemp = _rPriFunc(x, A, b) # g += A.T.dot(y) # #print "here" # LHS = scipy.sparse.bmat([[Haug,A.T],[A,None]],'csc') # RHS = numpy.append(g,bTemp,axis=0) # # print LHS # # print RHS # # if the total number of elements (in sparse format) is # # more than half total possible elements, it is a dense matrix # if LHS.size>= (LHS.shape[0] * LHS.shape[1])/2: # deltaTemp = scipy.linalg.solve(LHS.todense(),-RHS).reshape(len(RHS),1) # else: # deltaTemp = scipy.linalg.solve(LHS.todense(),-RHS).reshape(len(RHS),1) # deltaX = deltaTemp[:p] # deltaY = deltaTemp[p::] # else: # deltaX = scipy.linalg.solve(Haug,-g).reshape(p,1) # # store the information for the next iteration # oldFx = fx # oldGrad = gOrig.copy() # if G is None: # maxStep = 1 # barrierFunc = _logBarrier(x, func, t, G, h) # lineFunc = lineSearch(maxStep, x, deltaX, barrierFunc) # searchScale = deltaX.ravel().dot(g.ravel()) # else: # maxStep = _maxStepSize(z, x, deltaX, t, G, h) # lineFunc = residualLineSearch(maxStep, # x, deltaX, # grad, t, # z, _deltaZFunc, G, h, # y, deltaY, A, b) # searchScale = -lineFunc(0.0) # # perform a line search. Because the minimization routine # # in scipy can sometimes be a bit weird, we assume that the # # exact line search can sometimes fail, so we do a # # back tracking line search if that is the case # step, fx = exactLineSearch(maxStep, lineFunc) # if fx >= oldFx or step <=0: # step, fx = backTrackingLineSearch(maxStep, lineFunc, searchScale) # # found one iteration, now update the information # if z is not None: # z += step * _deltaZFunc(x, deltaX, t, z, G, h) # if y is not None: # y += step * deltaY # print "deltaX" # print deltaX # print "deltaZ" # print _deltaZFunc(x, deltaX, t, z, G, h) # x += step * deltaX i += 1 dispObj.d(i, x , fx, deltaX.ravel(), g.ravel(), step) feasible = False if G is not None: feasible = True eta = _surrogateGap(x, z, G, h, y, A, b) if eta >= EPSILON: feasible = False if G is not None: r = _rDualFunc(x, grad, z, G, y, A) if scipy.linalg.norm(r) >= EPSILON: feasible = False if A is not None: r = _rPriFunc(x, A, b) if scipy.linalg.norm(r) >= EPSILON: feasible = False t = mu * m / eta else: if abs(fx-oldFx)<=EPSILON: break if feasible: break # TODO: full_output- dual variables if full_output: output = dict() output['t'] = t if G is not None: gap = _surrogateGap(x, z, G, h, y, A, b) else: gap = 0 output['dgap'] = gap output['fx'] = func(x) output['H'] = H output['g'] = gOrig.ravel() if G is not None: output['s'] = s.ravel() output['z'] = z.ravel() output['rDual'] = _rDualFunc(x, grad, z, G, y, A).ravel() if A is not None: output['rPri'] = _rPriFunc(x, A, b).ravel() output['y'] = y.ravel() return x.ravel(), output else: return x.ravel()
def sqp(func, grad=None, hessian=None, x0=None, lb=None, ub=None, G=None, h=None, A=None, b=None, maxiter=100, method='trust', disp=0, full_output=False): if method.lower()=='trust' or method.lower()=='line': pass else: raise Exception("Input method not recognized") z, G, h, y, A, b = _setup(lb, ub, G, h, A, b) x = _checkInitialValue(x0, G, h, A, b) p = len(x) if hessian is None: approxH = BFGS if grad is None: def finiteForward(x,func,p): def finiteForward1(x): return forward(func,x.ravel()) return finiteForward1 grad = finiteForward(x,func,p) g = numpy.zeros((p,1)) H = numpy.zeros((p,p)) oldFx = numpy.inf oldOldFx = numpy.inf oldGrad = None update = True deltaX = numpy.zeros((p,1)) fx = func(x) dispObj = Disp(disp) i = 0 innerI = 0 step = 1.0 radius = 1.0 if hessian is None: H = numpy.eye(len(x)) while maxiter>=i: g[:] = grad(x.ravel()).reshape(p,1) if hessian is None: if oldGrad is not None: if update: # update is always true for line search diffG = (g - oldGrad).ravel() H = approxH(H, diffG, step * deltaX.ravel()) else: H = hessian(x.ravel()) if method=='trust': if hessian is None: # we assume that the hessian is always a PSD x, update, radius, deltaX, z, y, fx, oldFx, oldGrad, innerIter = _updateTrustRegion(x, fx, oldFx, deltaX, p, radius, g, oldGrad, H, func, grad, z, G, h, y, A, b) else: x, update, radius, deltaX, z, y, fx, oldFx, oldGrad, innerIter = _updateTrustRegionSOCP(x, fx, oldFx, deltaX, p, radius, g, oldGrad, H, func, grad, z, G, h, y, A, b) else: x, deltaX, z, y, fx, oldFx, oldOldFx, oldGrad, step, innerIter = _updateLineSearch(x, fx, oldFx, oldOldFx, deltaX, g, H, func, grad, z, G, h, y, A, b) innerI += innerIter # print qpOut # print "b Temp" # print bTemp # print "b" # print b - A.dot(x) i += 1 dispObj.d(i, x , fx, deltaX.ravel(), g.ravel(), radius) # print "s" # print h - G.dot(x) # print "z" # print numpy.array(qpOut['z']).ravel() if sufficientNewtonDecrement(deltaX.ravel(),g.ravel()): break if abs(fx-oldFx)<=EPSILON: break # TODO: full_output- dual variables if full_output: output = dict() output['H'] = H output['g'] = g.flatten() output['fx'] = fx output['iter'] = i output['innerIter'] = innerI if G is not None: output['z'] = z.flatten() output['s'] = (h - G.dot(x)).flatten() if A is not None: output['y'] = y.flatten() return x, output else: return x
def sqp(func, grad=None, hessian=None, x0=None, lb=None, ub=None, G=None, h=None, A=None, b=None, maxiter=100, method='trust', disp=0, full_output=False): if method.lower() == 'trust' or method.lower() == 'line': pass else: raise Exception("Input method not recognized") z, G, h, y, A, b = _setup(lb, ub, G, h, A, b) x = _checkInitialValue(x0, G, h, A, b) p = len(x) if hessian is None: approxH = BFGS if grad is None: def finiteForward(x, func, p): def finiteForward1(x): return forward(func, x.ravel()) return finiteForward1 grad = finiteForward(x, func, p) g = numpy.zeros((p, 1)) H = numpy.zeros((p, p)) oldFx = numpy.inf oldOldFx = numpy.inf oldGrad = None update = True deltaX = numpy.zeros((p, 1)) fx = func(x) dispObj = Disp(disp) i = 0 innerI = 0 step = 1.0 radius = 1.0 if hessian is None: H = numpy.eye(len(x)) while maxiter >= i: g[:] = grad(x.ravel()).reshape(p, 1) if hessian is None: if oldGrad is not None: if update: # update is always true for line search diffG = (g - oldGrad).ravel() H = approxH(H, diffG, step * deltaX.ravel()) else: H = hessian(x.ravel()) if method == 'trust': if hessian is None: # we assume that the hessian is always a PSD x, update, radius, deltaX, z, y, fx, oldFx, oldGrad, innerIter = _updateTrustRegion( x, fx, oldFx, deltaX, p, radius, g, oldGrad, H, func, grad, z, G, h, y, A, b) else: x, update, radius, deltaX, z, y, fx, oldFx, oldGrad, innerIter = _updateTrustRegionSOCP( x, fx, oldFx, deltaX, p, radius, g, oldGrad, H, func, grad, z, G, h, y, A, b) else: x, deltaX, z, y, fx, oldFx, oldOldFx, oldGrad, step, innerIter = _updateLineSearch( x, fx, oldFx, oldOldFx, deltaX, g, H, func, grad, z, G, h, y, A, b) innerI += innerIter # print qpOut # print "b Temp" # print bTemp # print "b" # print b - A.dot(x) i += 1 dispObj.d(i, x, fx, deltaX.ravel(), g.ravel(), radius) # print "s" # print h - G.dot(x) # print "z" # print numpy.array(qpOut['z']).ravel() if sufficientNewtonDecrement(deltaX.ravel(), g.ravel()): break if abs(fx - oldFx) <= EPSILON: break # TODO: full_output- dual variables if full_output: output = dict() output['H'] = H output['g'] = g.flatten() output['fx'] = fx output['iter'] = i output['innerIter'] = innerI if G is not None: output['z'] = z.flatten() output['s'] = (h - G.dot(x)).flatten() if A is not None: output['y'] = y.flatten() return x, output else: return x
def ipPDandPDC(func, grad, hessian=None, x0=None, lb=None, ub=None, G=None, h=None, A=None, b=None, maxiter=100, method="pd", disp=0, full_output=False): z, G, h, y, A, b = _setup(lb, ub, G, h, A, b) x = _checkInitialValue(x0, G, h, A, b) p = len(x) if hessian is None: approxH = BFGS elif type(hessian) is str: if hessian.lower() == 'bfgs': approxH = BFGS elif hessian.lower() == 'sr1': approxH = SR1 elif hessian.lower() == 'dfp': approxH = DFP else: raise Exception("Input name of hessian is not recognizable") hessian = None if grad is None: def finiteForward(func, p): def finiteForward1(x): return forward(func, x.ravel()) return finiteForward1 grad = finiteForward(func, p) if method.lower() == 'pd' or method.lower() == 'pdc': updateFunc = _solveKKTAndUpdatePD else: raise Exception("interior point update method not recognized") g = numpy.zeros((p, 1)) gOrig = g.copy() oldOldFx = numpy.inf oldFx = numpy.inf oldGrad = None deltaX = None deltaY = 0 deltaZ = 0 H = numpy.zeros((p, p)) Haug = H.copy() fx = func(x) dispObj = Disp(disp) i = 0 mu = 1.0 step = 1.0 t = 1.0 # because we determine the size of the back tracking # step on the fly, we don't give it a maximum. At the # same time, because we are only evaluating the residuals # of the KKT system, there are times where we may want to # give the descent a nudge #step0 = 1.0 # back tracking search step maximum value if G is not None: s = h - G.dot(x) z = 1.0 / s m = G.shape[0] eta = _surrogateGap(x, z, G, h, y, A, b) t = mu * m / eta while maxiter > i: gOrig[:] = grad(x).reshape(p, 1) g[:] = gOrig.copy() if hessian is None: if oldGrad is None: H = numpy.eye(len(x)) else: diffG = (gOrig - oldGrad).ravel() H = approxH(H, diffG, step * deltaX.ravel()) else: H = hessian(x) Haug[:] = H.copy() oldOldFxTemp = oldFx x, y, z, fx, step, oldFx, oldGrad, deltaX = updateFunc( x, func, grad, fx, oldFx, oldOldFx, g, gOrig, Haug, z, G, h, y, A, b, t, method) oldOldFx = oldOldFxTemp i += 1 dispObj.d(i, x, fx, deltaX.ravel(), g.ravel(), step) feasible = False if G is not None: feasible = True eta = _surrogateGap(x, z, G, h, y, A, b) if eta >= EPSILON: feasible = False if G is not None: r = _rDualFunc(x, grad, z, G, y, A) if scipy.linalg.norm(r) >= EPSILON: feasible = False if A is not None: r = _rPriFunc(x, A, b) if scipy.linalg.norm(r) >= EPSILON: feasible = False t = mu * m / eta else: if abs(fx - oldFx) <= EPSILON: break if feasible: break # TODO: full_output- dual variables if full_output: output = dict() output['t'] = t if G is not None: gap = _surrogateGap(x, z, G, h, y, A, b) else: gap = 0 output['dgap'] = gap output['fx'] = func(x) output['H'] = H output['g'] = gOrig.ravel() if G is not None: output['s'] = s.ravel() output['z'] = z.ravel() output['rDual'] = _rDualFunc(x, grad, z, G, y, A).ravel() if A is not None: output['rPri'] = _rPriFunc(x, A, b).ravel() output['y'] = y.ravel() return x.ravel(), output else: return x.ravel()
def ipBar(func, grad, hessian=None, x0=None, lb=None, ub=None, G=None, h=None, A=None, b=None, maxiter=100, disp=0, full_output=False): z, G, h, y, A, b = _setup(lb, ub, G, h, A, b) x = _checkInitialValue(x0, G, h, A, b) p = len(x) if hessian is None: approxH = BFGS elif type(hessian) is str: if hessian.lower() == 'bfgs': approxH = BFGS elif hessian.lower() == 'sr1': approxH = SR1 elif hessian.lower() == 'dfp': approxH = DFP else: raise Exception("Input name of hessian is not recognizable") hessian = None if grad is None: def finiteForward(x, func, p): def finiteForward1(x): return forward(func, x.ravel()) return finiteForward1 grad = finiteForward(x, func, p) if G is not None: m = G.shape[0] else: m = 1 fx = None oldFx = None oldOldFx = None oldGrad = None deltaX = numpy.zeros((p, 1)) g = numpy.zeros((p, 1)) H = numpy.zeros((p, p)) Haug = numpy.zeros((p, p)) dispObj = Disp(disp) i = 0 t = 0.01 mu = 20.0 step0 = 1.0 # back tracking search step maximum value step = 0.0 j = 0 while maxiter >= j: oldFx = numpy.inf # define the barrier function given t. Note that # t is adjusted at each outer iteration barrierFunc = _logBarrier(func, t, G, h) if j == 0: fx = barrierFunc(x) #print "barrier = " +str(fx) update = True #while (abs(fx-oldFx)/fx)>=rtol and abs(fx-oldFx)>=atol: # for i in range(1): while update: # print abs(fx-oldFx) # print abs(fx-oldFx)/fx # print fx # print oldFx gOrig = grad(x.ravel()).reshape(p, 1) if hessian is None: if oldGrad is None: H = numpy.eye(len(x)) else: diffG = numpy.array(gOrig - oldGrad).ravel() H = approxH(H, diffG, step * deltaX.ravel()) else: H = hessian(x.ravel()) ## standard log barrier if G is not None: s = h - G.dot(x) Gs = G / s s2 = s**2 Dphi = Gs.sum(axis=0).reshape(p, 1) if j == 0: t = _findInitialBarrier(gOrig, Dphi, A) # print "initial barrier = " +str(t) # print "fake barrier = "+str(_findInitialBarrier(gOrig,Dphi,A)) Haug = t * H + numpy.einsum('ji,ik->jk', G.T, G / s2) g = t * gOrig + Dphi else: Haug = t * H g = t * gOrig ## solving the QP to get the descent direction if A is not None: # re-adjust the bounds bTemp = b - A.dot(x) LHS = scipy.sparse.bmat([[Haug, A.T], [A, None]], 'csc') RHS = numpy.append(g, -bTemp, axis=0) if LHS.size >= (LHS.shape[0] * LHS.shape[1]) / 2: deltaTemp = scipy.linalg.solve(LHS.todense(), -RHS).reshape(len(RHS), 1) else: deltaTemp = scipy.sparse.linalg.spsolve(LHS, -RHS).reshape( len(RHS), 1) deltaX = deltaTemp[:p] y = deltaTemp[p::] else: deltaX = scipy.linalg.solve(Haug, -g) oldOldFxTemp = oldFx oldFx = fx oldGrad = gOrig lineFunc = lineSearch(x, deltaX, barrierFunc) #step, fx = exactLineSearch2(step0, lineFunc, deltaX.ravel().dot(g.ravel()), oldFx) # step, fx = backTrackingLineSearch(step0, lineFunc, deltaX.ravel().dot(g.ravel()), alpha=0.0001,beta=0.8) barrierGrad = _logBarrierGrad(func, gOrig, t, G, h) step, fc, gc, fx, oldFx, new_slope = scipy.optimize.line_search( barrierFunc, barrierGrad, x.ravel(), deltaX.ravel(), g.ravel(), oldFx, oldOldFx) # print "fx = " +str(fx) # print "step= " +str(step) # if step is not None: # print "step = "+str(step)+ " with fx" +str(fx)+ " and barrier = " +str(barrierFunc(x + step * deltaX)) # print "s" # print h - G.dot(x + step * deltaX) if step is None: # step, fx = exactLineSearch2(step0, lineFunc, deltaX.ravel().dot(g.ravel()), oldFx) step, fx = backTrackingLineSearch(step0, lineFunc, deltaX.ravel().dot( g.ravel()), alpha=0.0001, beta=0.8) # print "fail wolfe = " +str(step)+ " maxStep = " +str(step0) # print "fx = " +str(fx) # print "step= " +str(step) update = False oldOldFx = oldOldFxTemp x += step * deltaX # print "stepped func = "+str(func(x)) j += 1 # dispObj.d(j, x.ravel() , fx, deltaX.ravel(), g.ravel(), step) dispObj.d(j, x.ravel(), func(x.ravel()), deltaX.ravel(), g.ravel(), step) # end of inner iteration i += 1 # obtain the missing Lagrangian multiplier if G is not None: s = h - G.dot(x) z = 1.0 / (t * s) if m / t < atol: if sufficientNewtonDecrement(deltaX.ravel(), g.ravel()): break else: t *= mu # print scipy.linalg.norm(_rDualFunc(x, grad, z, G, y, A)) if scipy.linalg.norm(_rDualFunc(x, grad, z, G, y, A)) <= EPSILON: break # end of outer iteration # TODO: full_output- dual variables if full_output: output = dict() output['t'] = t output['outerIter'] = i output['innerIter'] = j if G is not None: s = h - G.dot(x) z = 1.0 / (t * s) output['s'] = s.ravel() output['z'] = z.ravel() if A is not None: y = y / t output['y'] = y.ravel() gap = _dualityGap(func, x, z, G, h, y, A, b) output['subopt'] = m / t output['dgap'] = gap output['fx'] = func(x) output['H'] = H output['g'] = gOrig.ravel() output['rDual'] = _rDualFunc(x, grad, z, G, y, A) return x.ravel(), output else: return x.ravel()
def ipPDC(func, grad, hessian=None, x0=None, lb=None, ub=None, G=None, h=None, A=None, b=None, maxiter=100, disp=0, full_output=False): z, G, h, y, A, b = _setup(lb, ub, G, h, A, b) x = _checkInitialValue(x0, G, h, A, b) p = len(x) if hessian is None: approxH = BFGS elif type(hessian) is str: if hessian.lower() == 'bfgs': approxH = BFGS elif hessian.lower() == 'sr1': approxH = SR1 elif hessian.lower() == 'dfp': approxH = DFP else: raise Exception("Input name of hessian is not recognizable") if grad is None: def finiteForward(x, func, p): def finiteForward1(x): return forward(func, x.ravel()) return finiteForward1 grad = finiteForward(x, func, p) g = numpy.zeros((p, 1)) gOrig = g.copy() oldFx = numpy.inf oldGrad = None deltaX = None deltaY = 0 deltaZ = 0 H = numpy.zeros((p, p)) Haug = H.copy() fx = func(x) dispObj = Disp(disp) i = 0 mu = 5.0 step = 1.0 t = 1.0 # because we determine the size of the back tracking # step on the fly, we don't give it a maximum. At the # same time, because we are only evaluating the residuals # of the KKT system, there are times where we may want to # give the descent a nudge #step0 = 1.0 # back tracking search step maximum value if G is not None: s = h - G.dot(x) z = 1.0 / s m = G.shape[0] eta = _surrogateGap(x, z, G, h, y, A, b) t = mu * m / eta while maxiter >= i: gOrig[:] = grad(x).reshape(p, 1) g[:] = gOrig.copy() if hessian is None: if oldGrad is None: H = numpy.eye(len(x)) else: diffG = (gOrig - oldGrad).ravel() H = approxH(H, diffG, step * deltaX.ravel()) else: H = hessian(x) Haug[:] = H.copy() x, y, z, fx, step, oldFx, oldGrad, deltaX = _solveKKTAndUpdatePDC( x, func, grad, fx, g, gOrig, Haug, z, G, h, y, A, b, t) # ## standard log barrier, \nabla f(x) / -f(x) # # print h # # print G.dot(x) # rDual = _rDualFunc(x, grad, z, G, y, A) # RHS = rDual # if G is not None: # s = h - G.dot(x) # Gs = G/s # zs = z/s # # now find the matrix/vector of our qp # #Haug += numpy.einsum('ji,ik->jk',G.T, G*zs) # rCent = _rCentFunc(z, s, t) # RHS = numpy.append(RHS,rCent,axis=0) # ## solving the QP to get the descent direction # if A is not None: # bTemp = b - A.dot(x) # g += A.T.dot(y) # rPri = _rPriFunc(x, A, b) # RHS = numpy.append(RHS,rPri,axis=0) # if G is not None: # LHS = scipy.sparse.bmat([ # [Haug, G.T, A.T], # [G*-z, scipy.sparse.diags(s.ravel(),0),None], # [A, None, None] # ],'csc') # if LHS.size>= (LHS.shape[0] * LHS.shape[1])/2: # deltaTemp = scipy.sparse.linalg.spsolve(LHS,-RHS).reshape(len(RHS),1) # else: # deltaTemp = scipy.linalg.solve(LHS.todense(),-RHS).reshape(len(RHS),1) # # print deltaTemp # deltaX = deltaTemp[:p] # deltaZ = deltaTemp[p:-len(A)] # deltaY = deltaTemp[-len(A):] # else: # LHS = scipy.sparse.bmat([ # [Haug, A.T], # [A, None] # ],'csc') # if LHS.size>= (LHS.shape[0] * LHS.shape[1])/2: # deltaTemp = scipy.sparse.linalg.spsolve(LHS,-RHS).reshape(len(RHS),1) # else: # deltaTemp = scipy.linalg.solve(LHS.todense(),-RHS).reshape(len(RHS),1) # deltaX = deltaTemp[:p] # deltaY = deltaTemp[p::] # else: # if G is not None: # LHS = scipy.sparse.bmat([ # [Haug, G.T], # [G*-z, scipy.sparse.diags(s.ravel(),0)], # ],'csc') # deltaTemp = scipy.sparse.linalg.spsolve(LHS,-RHS).reshape(len(RHS),1) # deltaX = deltaTemp[:p] # deltaZ = deltaTemp[p::] # else: # deltaX = scipy.linalg.solve(H,-RHS).reshape(len(RHS),1) # # store the information for the next iteration # oldFx = fx # oldGrad = gOrig.copy() # if G is None: # # print "obj" # maxStep = 1 # barrierFunc = _logBarrier(x, func, t, G, h) # lineFunc = lineSearch(maxStep, x, deltaX, barrierFunc) # searchScale = deltaX.ravel().dot(g.ravel()) # else: # maxStep = _maxStepSize(z, deltaZ, x, deltaX, G, h) # lineFunc = residualLineSearch(step, x, deltaX, # grad, t, # z, deltaZ, G, h, # y, deltaY, A, b) # searchScale = -lineFunc(0.0) # # perform a line search. Because the minimization routine # # in scipy can sometimes be a bit weird, we assume that the # # exact line search can sometimes fail, so we do a # # back tracking line search if that is the case # step, fx = exactLineSearch(maxStep, lineFunc) # if fx >= oldFx or step <= 0: # step, fx = backTrackingLineSearch(maxStep, lineFunc, searchScale, oldFx) # if z is not None: # z += step * deltaZ # if y is not None: # y += step * deltaY # x += step * deltaX # print "deltaX" # print deltaX # print "deltaZ" # print deltaZ i += 1 dispObj.d(i, x, fx, deltaX.ravel(), g.ravel(), step) feasible = False if G is not None: feasible = True eta = _surrogateGap(x, z, G, h, y, A, b) if eta >= EPSILON: feasible = False if G is not None: r = _rDualFunc(x, grad, z, G, y, A) if scipy.linalg.norm(r) >= EPSILON: feasible = False if A is not None: r = _rPriFunc(x, A, b) if scipy.linalg.norm(r) >= EPSILON: feasible = False t = mu * m / eta else: if abs(fx - oldFx) <= EPSILON: break if feasible: break # TODO: full_output- dual variables if full_output: output = dict() output['t'] = t if G is not None: gap = _surrogateGap(x, z, G, h, y, A, b) else: gap = 0 output['dgap'] = gap output['fx'] = func(x) output['H'] = H output['g'] = gOrig.ravel() if G is not None: output['s'] = s.ravel() output['z'] = z.ravel() output['rDual'] = _rDualFunc(x, grad, z, G, y, A).ravel() if A is not None: output['rPri'] = _rPriFunc(x, A, b).ravel() output['y'] = y.ravel() return x.ravel(), output else: return x.ravel()
def trustRegion(func, grad, hessian=None, x0=None, maxiter=100, method='exact', disp=0, full_output=False): x = checkArrayType(x0) p = len(x) if grad is None: def finiteForward(x,func,p): def finiteForward1(x): return forward(func,x.ravel()) return finiteForward1 grad = finiteForward(x,func,p) if hessian is None: approxH = BFGS elif type(hessian) is str: if hessian.lower()=='bfgs': approxH = BFGS elif hessian.lower()=='sr1': approxH = SR1 elif hessian.lower()=='dfp': approxH = DFP else: raise Exception("Input name of hessian is not recognizable") hessian = None if method is None: trustMethod = trustExact elif type(method) is str: if method.lower()=='exact': trustMethod = trustExact else: raise Exception("Input name of hessian is not recognizable") fx = None oldGrad = None deltaX = None oldFx = numpy.inf i = 0 oldi = -1 j = 0 tau = 1.0 radius = 1.0 maxRadius = 1.0 dispObj = Disp(disp) while maxiter>i: # if we have successfully moved on, then # we would need to recompute some of the quantities if i!=oldi: g = grad(x) fx = func(x) if hessian is None: if oldGrad is None: H = numpy.eye(len(x)) else: diffG = numpy.array(g - oldGrad) H = approxH(H, diffG, deltaX) else: H = hessian(x) deltaX, tau = trustMethod(x, g, H, radius) deltaX = deltaX.real M = diffM(deltaX, g, H) # print x # print deltaX newFx = func(x + deltaX) predRatio = (fx - newFx) / M(deltaX) if predRatio>=0.75: if tau>0.0: radius = min(2.0*radius, maxRadius) elif predRatio<=0.25: radius *= 0.25 if predRatio>=0.25: oldGrad = g x += deltaX oldFx = fx fx = newFx i +=1 oldi = i - 1 # we only allow termination if we make a move if (abs(fx-oldFx)/fx)<=reltol: break if abs(deltaX.dot(g))<=atol: break else: oldi = i dispObj.d(j, x , fx, deltaX, g, i) j += 1 if full_output: output = dict() output['totalIter'] = i output['outerIter'] = j output['fx'] = func(x) output['H'] = H output['g'] = g return x, output else: return x
def ipBar(func, grad, hessian=None, x0=None, lb=None, ub=None, G=None, h=None, A=None, b=None, maxiter=100, disp=0, full_output=False): z, G, h, y, A, b = _setup(lb, ub, G, h, A, b) x = _checkInitialValue(x0, G, h, A, b) p = len(x) if hessian is None: approxH = BFGS elif type(hessian) is str: if hessian.lower()=='bfgs': approxH = BFGS elif hessian.lower()=='sr1': approxH = SR1 elif hessian.lower()=='dfp': approxH = DFP else: raise Exception("Input name of hessian is not recognizable") hessian = None if grad is None: def finiteForward(x,func,p): def finiteForward1(x): return forward(func,x.ravel()) return finiteForward1 grad = finiteForward(x,func,p) if G is not None: m = G.shape[0] else: m = 1 fx = None oldFx = None oldOldFx = None oldGrad = None deltaX = numpy.zeros((p,1)) g = numpy.zeros((p,1)) H = numpy.zeros((p,p)) Haug = numpy.zeros((p,p)) dispObj = Disp(disp) i = 0 t = 0.01 mu = 20.0 step0 = 1.0 # back tracking search step maximum value step = 0.0 j = 0 while maxiter>=j: oldFx = numpy.inf # define the barrier function given t. Note that # t is adjusted at each outer iteration barrierFunc = _logBarrier(func, t, G, h) if j==0: fx = barrierFunc(x) #print "barrier = " +str(fx) update = True #while (abs(fx-oldFx)/fx)>=rtol and abs(fx-oldFx)>=atol: # for i in range(1): while update: # print abs(fx-oldFx) # print abs(fx-oldFx)/fx # print fx # print oldFx gOrig = grad(x.ravel()).reshape(p,1) if hessian is None: if oldGrad is None: H = numpy.eye(len(x)) else: diffG = numpy.array(gOrig - oldGrad).ravel() H = approxH(H, diffG, step * deltaX.ravel()) else: H = hessian(x.ravel()) ## standard log barrier if G is not None: s = h - G.dot(x) Gs = G/s s2 = s**2 Dphi = Gs.sum(axis=0).reshape(p,1) if j==0: t = _findInitialBarrier(gOrig,Dphi,A) # print "initial barrier = " +str(t) # print "fake barrier = "+str(_findInitialBarrier(gOrig,Dphi,A)) Haug = t*H + numpy.einsum('ji,ik->jk',G.T, G/s2) g = t*gOrig + Dphi else: Haug = t*H g = t*gOrig ## solving the QP to get the descent direction if A is not None: # re-adjust the bounds bTemp = b - A.dot(x) LHS = scipy.sparse.bmat([ [Haug,A.T], [A,None] ], 'csc') RHS = numpy.append(g,-bTemp,axis=0) if LHS.size>= (LHS.shape[0] * LHS.shape[1])/2: deltaTemp = scipy.linalg.solve(LHS.todense(),-RHS).reshape(len(RHS),1) else: deltaTemp = scipy.sparse.linalg.spsolve(LHS,-RHS).reshape(len(RHS),1) deltaX = deltaTemp[:p] y = deltaTemp[p::] else: deltaX = scipy.linalg.solve(Haug,-g) oldOldFxTemp = oldFx oldFx = fx oldGrad = gOrig lineFunc = lineSearch(x, deltaX, barrierFunc) #step, fx = exactLineSearch2(step0, lineFunc, deltaX.ravel().dot(g.ravel()), oldFx) # step, fx = backTrackingLineSearch(step0, lineFunc, deltaX.ravel().dot(g.ravel()), alpha=0.0001,beta=0.8) barrierGrad = _logBarrierGrad(func, gOrig, t, G, h) step, fc, gc, fx, oldFx, new_slope = scipy.optimize.line_search(barrierFunc, barrierGrad, x.ravel(), deltaX.ravel(), g.ravel(), oldFx, oldOldFx ) # print "fx = " +str(fx) # print "step= " +str(step) # if step is not None: # print "step = "+str(step)+ " with fx" +str(fx)+ " and barrier = " +str(barrierFunc(x + step * deltaX)) # print "s" # print h - G.dot(x + step * deltaX) if step is None: # step, fx = exactLineSearch2(step0, lineFunc, deltaX.ravel().dot(g.ravel()), oldFx) step, fx = backTrackingLineSearch(step0, lineFunc, deltaX.ravel().dot(g.ravel()), alpha=0.0001,beta=0.8) # print "fail wolfe = " +str(step)+ " maxStep = " +str(step0) # print "fx = " +str(fx) # print "step= " +str(step) update = False oldOldFx = oldOldFxTemp x += step * deltaX # print "stepped func = "+str(func(x)) j += 1 # dispObj.d(j, x.ravel() , fx, deltaX.ravel(), g.ravel(), step) dispObj.d(j, x.ravel() , func(x.ravel()), deltaX.ravel(), g.ravel(), step) # end of inner iteration i += 1 # obtain the missing Lagrangian multiplier if G is not None: s = h - G.dot(x) z = 1.0 / (t * s) if m/t < atol: if sufficientNewtonDecrement(deltaX.ravel(),g.ravel()): break else: t *= mu # print scipy.linalg.norm(_rDualFunc(x, grad, z, G, y, A)) if scipy.linalg.norm(_rDualFunc(x, grad, z, G, y, A))<=EPSILON: break # end of outer iteration # TODO: full_output- dual variables if full_output: output = dict() output['t'] = t output['outerIter'] = i output['innerIter'] = j if G is not None: s = h - G.dot(x) z = 1.0 / (t * s) output['s'] = s.ravel() output['z'] = z.ravel() if A is not None: y = y/t output['y'] = y.ravel() gap = _dualityGap(func, x, z, G, h, y, A, b) output['subopt'] = m/t output['dgap'] = gap output['fx'] = func(x) output['H'] = H output['g'] = gOrig.ravel() output['rDual'] = _rDualFunc(x, grad, z, G, y, A) return x.ravel(), output else: return x.ravel()