Пример #1
0
def ipPD(func, grad, hessian=None, x0=None,
        lb=None, ub=None,
        G=None, h=None,
        A=None, b=None,
        maxiter=100,
        disp=0, full_output=False):

    z, G, h, y, A, b = _setup(lb, ub, G, h, A, b)
    x = _checkInitialValue(x0, G, h, A, b)
    p = len(x)

    if hessian is None:
        approxH = BFGS
    elif type(hessian) is str:
        if hessian.lower()=='bfgs':
            approxH = BFGS
        elif hessian.lower()=='sr1':
            approxH = SR1
        elif hessian.lower()=='dfp':
            approxH = DFP
        else:
            raise Exception("Input name of hessian is not recognizable")

    if grad is None:
        def finiteForward(func,p):
            def finiteForward1(x):
                return forward(func,x.ravel())
            return finiteForward1
        grad = finiteForward(func,p)

    g = numpy.zeros((p,1))
    gOrig = g.copy()

    oldFx = numpy.inf
    oldGrad = None
    deltaX = None
    deltaY = 0
    deltaZ = 0
    H = numpy.zeros((p,p))
    Haug = H.copy()

    fx = func(x)

    dispObj = Disp(disp)
    i = 0
    mu = 5.0
    step = 1.0
    t = 1.0
    # because we determine the size of the back tracking
    # step on the fly, we don't give it a maximum.  At the
    # same time, because we are only evaluating the residuals
    # of the KKT system, there are times where we may want to
    # give the descent a nudge
    #step0 = 1.0  # back tracking search step maximum value
    
    if G is not None:
        s = h - G.dot(x)
        z = 1.0/s
        #z = numpy.ones(s.shape)
        # print G.dot(x)
        # print s
        # print z
        m = G.shape[0]
        eta = _surrogateGap(x, z, G, h, y, A, b)
        # print eta
        t = mu * m / eta
        # print t

    while maxiter>i:

        gOrig[:] = grad(x).reshape(p,1)
        g[:] = gOrig.copy()
        
        if hessian is None:
            if oldGrad is None:
                H = numpy.eye(len(x))
            else:
                diffG = (gOrig - oldGrad).ravel()
                H = approxH(H, diffG, step * deltaX.ravel())
        else:
            H = hessian(x)

        Haug[:] = H.copy()

        x, y, z, fx, step, oldFx, oldGrad, deltaX = _solveKKTAndUpdatePD(x, func, grad,
                                                                         fx,
                                                                         g, gOrig,
                                                                         Haug,
                                                                         z, G, h,
                                                                         y, A, b, t)

        # ## standard log barrier, \nabla f(x) / -f(x)
        # if G is not None:
        #     s = h - G.dot(x)
        #     Gs = G/s
        #     zs = z/s
        #     # now find the matrix/vector of our qp
        #     Haug += numpy.einsum('ji,ik->jk',G.T, G*zs)
        #     Dphi = Gs.sum(axis=0).reshape(p,1)
        #     g += Dphi / t

        # ## solving the QP to get the descent direction
        # if A is not None:
        #     bTemp = _rPriFunc(x, A, b)
        #     g += A.T.dot(y)
        #     #print "here"
        #     LHS = scipy.sparse.bmat([[Haug,A.T],[A,None]],'csc')
        #     RHS = numpy.append(g,bTemp,axis=0)
        #     # print LHS
        #     # print RHS
        #     # if the total number of elements (in sparse format) is
        #     # more than half total possible elements, it is a dense matrix
        #     if LHS.size>= (LHS.shape[0] * LHS.shape[1])/2:
        #         deltaTemp = scipy.linalg.solve(LHS.todense(),-RHS).reshape(len(RHS),1)
        #     else:
        #         deltaTemp = scipy.linalg.solve(LHS.todense(),-RHS).reshape(len(RHS),1)
        #     deltaX = deltaTemp[:p]
        #     deltaY = deltaTemp[p::]
        # else:
        #     deltaX = scipy.linalg.solve(Haug,-g).reshape(p,1)

        # # store the information for the next iteration
        # oldFx = fx
        # oldGrad = gOrig.copy()

        # if G is None:
        #     maxStep = 1
        #     barrierFunc = _logBarrier(x, func, t, G, h)
        #     lineFunc = lineSearch(maxStep, x, deltaX, barrierFunc)
        #     searchScale = deltaX.ravel().dot(g.ravel())
        # else:
        #     maxStep = _maxStepSize(z, x, deltaX, t, G, h)
        #     lineFunc = residualLineSearch(maxStep,
        #                                       x, deltaX,
        #                                       grad, t,
        #                                       z, _deltaZFunc, G, h,
        #                                       y, deltaY, A, b)

        #     searchScale = -lineFunc(0.0)

        # # perform a line search.  Because the minimization routine
        # # in scipy can sometimes be a bit weird, we assume that the
        # # exact line search can sometimes fail, so we do a
        # # back tracking line search if that is the case
        # step, fx =  exactLineSearch(maxStep, lineFunc)
        # if fx >= oldFx or step <=0:
        #     step, fx =  backTrackingLineSearch(maxStep, lineFunc, searchScale)

        # # found one iteration, now update the information
        # if z is not None:
        #     z += step * _deltaZFunc(x, deltaX, t, z, G, h)
        # if y is not None:
        #     y += step * deltaY

#         print "deltaX"
#         print deltaX
#         print "deltaZ"
#         print _deltaZFunc(x, deltaX, t, z, G, h)
        
        # x += step * deltaX
        i += 1
        dispObj.d(i, x , fx, deltaX.ravel(), g.ravel(), step)

        feasible = False
        if G is not None:
            feasible = True
            eta = _surrogateGap(x, z, G, h, y, A, b)
            if eta >= EPSILON:
                feasible = False
            if G is not None:
                r = _rDualFunc(x, grad, z, G, y, A)
                if scipy.linalg.norm(r) >= EPSILON:
                    feasible = False
            if A is not None:
                r = _rPriFunc(x, A, b)
                if scipy.linalg.norm(r) >= EPSILON:
                    feasible = False

            t = mu * m / eta
        else:
            if abs(fx-oldFx)<=EPSILON:
                break

        if feasible:
            break

    # TODO: full_output- dual variables
    if full_output:
        output = dict()
        output['t'] = t

        if G is not None:
            gap = _surrogateGap(x, z, G, h, y, A, b)
        else:
            gap = 0

        output['dgap'] = gap
        output['fx'] = func(x)
        output['H'] = H
        output['g'] = gOrig.ravel()

        if G is not None:
            output['s'] = s.ravel()
            output['z'] = z.ravel()
            output['rDual'] = _rDualFunc(x, grad, z, G, y, A).ravel()
        if A is not None:
            output['rPri'] = _rPriFunc(x, A, b).ravel()
            output['y'] = y.ravel()

        return x.ravel(), output
    else:
        return x.ravel()
Пример #2
0
def sqp(func, grad=None, hessian=None, x0=None,
        lb=None, ub=None,
        G=None, h=None,
        A=None, b=None,
        maxiter=100,
        method='trust',
        disp=0, full_output=False):

    if method.lower()=='trust' or method.lower()=='line':
        pass
    else:
        raise Exception("Input method not recognized")
    
    z, G, h, y, A, b = _setup(lb, ub, G, h, A, b)
    x = _checkInitialValue(x0, G, h, A, b)
    p = len(x)

    if hessian is None:
        approxH = BFGS
    if grad is None:
        def finiteForward(x,func,p):
            def finiteForward1(x):
                return forward(func,x.ravel())
            return finiteForward1
        grad = finiteForward(x,func,p)
        
    g = numpy.zeros((p,1))
    H = numpy.zeros((p,p))

    oldFx = numpy.inf
    oldOldFx = numpy.inf
    oldGrad = None
    update = True
    deltaX = numpy.zeros((p,1))
    fx = func(x)

    dispObj = Disp(disp)
    i = 0
    innerI = 0
    step = 1.0
    radius = 1.0

    if hessian is None:
        H = numpy.eye(len(x))

    while maxiter>=i:

        g[:] = grad(x.ravel()).reshape(p,1)

        if hessian is None:
            if oldGrad is not None:
                if update: # update is always true for line search
                    diffG = (g - oldGrad).ravel()
                    H = approxH(H, diffG, step * deltaX.ravel())
        else:
            H = hessian(x.ravel())

        if method=='trust':
            if hessian is None:
                # we assume that the hessian is always a PSD
                x, update, radius, deltaX, z, y, fx, oldFx, oldGrad, innerIter = _updateTrustRegion(x, fx, oldFx, deltaX, p, radius, g, oldGrad, H, func, grad, z, G, h, y, A, b)
            else:
                x, update, radius, deltaX, z, y, fx, oldFx, oldGrad, innerIter = _updateTrustRegionSOCP(x, fx, oldFx, deltaX, p, radius, g, oldGrad, H, func, grad, z, G, h, y, A, b)
        else:
            x, deltaX, z, y, fx, oldFx, oldOldFx, oldGrad, step, innerIter = _updateLineSearch(x, fx, oldFx, oldOldFx, deltaX, g, H, func, grad, z, G, h, y, A, b)

        innerI += innerIter
        # print qpOut
        # print "b Temp" 
        # print bTemp
        # print "b" 
        # print b - A.dot(x)

        i += 1
        dispObj.d(i, x , fx, deltaX.ravel(), g.ravel(), radius)

        # print "s"
        # print h - G.dot(x)
        # print "z"
        # print numpy.array(qpOut['z']).ravel()
        
        if sufficientNewtonDecrement(deltaX.ravel(),g.ravel()):
            break
        
        if abs(fx-oldFx)<=EPSILON:
            break

    # TODO: full_output- dual variables
    if full_output:
        output = dict()
        
        output['H'] = H
        output['g'] = g.flatten()

        output['fx'] = fx
        output['iter'] = i
        output['innerIter'] = innerI
        if G is not None:
            output['z'] = z.flatten()
            output['s'] = (h - G.dot(x)).flatten()
        if A is not None:
            output['y'] = y.flatten()

        return x, output
    else:
        return x
Пример #3
0
def sqp(func,
        grad=None,
        hessian=None,
        x0=None,
        lb=None,
        ub=None,
        G=None,
        h=None,
        A=None,
        b=None,
        maxiter=100,
        method='trust',
        disp=0,
        full_output=False):

    if method.lower() == 'trust' or method.lower() == 'line':
        pass
    else:
        raise Exception("Input method not recognized")

    z, G, h, y, A, b = _setup(lb, ub, G, h, A, b)
    x = _checkInitialValue(x0, G, h, A, b)
    p = len(x)

    if hessian is None:
        approxH = BFGS
    if grad is None:

        def finiteForward(x, func, p):
            def finiteForward1(x):
                return forward(func, x.ravel())

            return finiteForward1

        grad = finiteForward(x, func, p)

    g = numpy.zeros((p, 1))
    H = numpy.zeros((p, p))

    oldFx = numpy.inf
    oldOldFx = numpy.inf
    oldGrad = None
    update = True
    deltaX = numpy.zeros((p, 1))
    fx = func(x)

    dispObj = Disp(disp)
    i = 0
    innerI = 0
    step = 1.0
    radius = 1.0

    if hessian is None:
        H = numpy.eye(len(x))

    while maxiter >= i:

        g[:] = grad(x.ravel()).reshape(p, 1)

        if hessian is None:
            if oldGrad is not None:
                if update:  # update is always true for line search
                    diffG = (g - oldGrad).ravel()
                    H = approxH(H, diffG, step * deltaX.ravel())
        else:
            H = hessian(x.ravel())

        if method == 'trust':
            if hessian is None:
                # we assume that the hessian is always a PSD
                x, update, radius, deltaX, z, y, fx, oldFx, oldGrad, innerIter = _updateTrustRegion(
                    x, fx, oldFx, deltaX, p, radius, g, oldGrad, H, func, grad,
                    z, G, h, y, A, b)
            else:
                x, update, radius, deltaX, z, y, fx, oldFx, oldGrad, innerIter = _updateTrustRegionSOCP(
                    x, fx, oldFx, deltaX, p, radius, g, oldGrad, H, func, grad,
                    z, G, h, y, A, b)
        else:
            x, deltaX, z, y, fx, oldFx, oldOldFx, oldGrad, step, innerIter = _updateLineSearch(
                x, fx, oldFx, oldOldFx, deltaX, g, H, func, grad, z, G, h, y,
                A, b)

        innerI += innerIter
        # print qpOut
        # print "b Temp"
        # print bTemp
        # print "b"
        # print b - A.dot(x)

        i += 1
        dispObj.d(i, x, fx, deltaX.ravel(), g.ravel(), radius)

        # print "s"
        # print h - G.dot(x)
        # print "z"
        # print numpy.array(qpOut['z']).ravel()

        if sufficientNewtonDecrement(deltaX.ravel(), g.ravel()):
            break

        if abs(fx - oldFx) <= EPSILON:
            break

    # TODO: full_output- dual variables
    if full_output:
        output = dict()

        output['H'] = H
        output['g'] = g.flatten()

        output['fx'] = fx
        output['iter'] = i
        output['innerIter'] = innerI
        if G is not None:
            output['z'] = z.flatten()
            output['s'] = (h - G.dot(x)).flatten()
        if A is not None:
            output['y'] = y.flatten()

        return x, output
    else:
        return x
Пример #4
0
def ipPDandPDC(func,
               grad,
               hessian=None,
               x0=None,
               lb=None,
               ub=None,
               G=None,
               h=None,
               A=None,
               b=None,
               maxiter=100,
               method="pd",
               disp=0,
               full_output=False):

    z, G, h, y, A, b = _setup(lb, ub, G, h, A, b)
    x = _checkInitialValue(x0, G, h, A, b)
    p = len(x)

    if hessian is None:
        approxH = BFGS
    elif type(hessian) is str:
        if hessian.lower() == 'bfgs':
            approxH = BFGS
        elif hessian.lower() == 'sr1':
            approxH = SR1
        elif hessian.lower() == 'dfp':
            approxH = DFP
        else:
            raise Exception("Input name of hessian is not recognizable")
        hessian = None

    if grad is None:

        def finiteForward(func, p):
            def finiteForward1(x):
                return forward(func, x.ravel())

            return finiteForward1

        grad = finiteForward(func, p)

    if method.lower() == 'pd' or method.lower() == 'pdc':
        updateFunc = _solveKKTAndUpdatePD
    else:
        raise Exception("interior point update method not recognized")

    g = numpy.zeros((p, 1))
    gOrig = g.copy()

    oldOldFx = numpy.inf
    oldFx = numpy.inf
    oldGrad = None
    deltaX = None
    deltaY = 0
    deltaZ = 0
    H = numpy.zeros((p, p))
    Haug = H.copy()

    fx = func(x)

    dispObj = Disp(disp)
    i = 0
    mu = 1.0
    step = 1.0
    t = 1.0
    # because we determine the size of the back tracking
    # step on the fly, we don't give it a maximum.  At the
    # same time, because we are only evaluating the residuals
    # of the KKT system, there are times where we may want to
    # give the descent a nudge
    #step0 = 1.0  # back tracking search step maximum value

    if G is not None:
        s = h - G.dot(x)
        z = 1.0 / s
        m = G.shape[0]
        eta = _surrogateGap(x, z, G, h, y, A, b)
        t = mu * m / eta

    while maxiter > i:

        gOrig[:] = grad(x).reshape(p, 1)
        g[:] = gOrig.copy()

        if hessian is None:
            if oldGrad is None:
                H = numpy.eye(len(x))
            else:
                diffG = (gOrig - oldGrad).ravel()
                H = approxH(H, diffG, step * deltaX.ravel())
        else:
            H = hessian(x)

        Haug[:] = H.copy()
        oldOldFxTemp = oldFx

        x, y, z, fx, step, oldFx, oldGrad, deltaX = updateFunc(
            x, func, grad, fx, oldFx, oldOldFx, g, gOrig, Haug, z, G, h, y, A,
            b, t, method)

        oldOldFx = oldOldFxTemp

        i += 1
        dispObj.d(i, x, fx, deltaX.ravel(), g.ravel(), step)

        feasible = False
        if G is not None:
            feasible = True
            eta = _surrogateGap(x, z, G, h, y, A, b)
            if eta >= EPSILON:
                feasible = False
            if G is not None:
                r = _rDualFunc(x, grad, z, G, y, A)
                if scipy.linalg.norm(r) >= EPSILON:
                    feasible = False
            if A is not None:
                r = _rPriFunc(x, A, b)
                if scipy.linalg.norm(r) >= EPSILON:
                    feasible = False

            t = mu * m / eta
        else:
            if abs(fx - oldFx) <= EPSILON:
                break

        if feasible:
            break

    # TODO: full_output- dual variables
    if full_output:
        output = dict()
        output['t'] = t

        if G is not None:
            gap = _surrogateGap(x, z, G, h, y, A, b)
        else:
            gap = 0

        output['dgap'] = gap
        output['fx'] = func(x)
        output['H'] = H
        output['g'] = gOrig.ravel()

        if G is not None:
            output['s'] = s.ravel()
            output['z'] = z.ravel()
            output['rDual'] = _rDualFunc(x, grad, z, G, y, A).ravel()
        if A is not None:
            output['rPri'] = _rPriFunc(x, A, b).ravel()
            output['y'] = y.ravel()

        return x.ravel(), output
    else:
        return x.ravel()
Пример #5
0
def ipBar(func,
          grad,
          hessian=None,
          x0=None,
          lb=None,
          ub=None,
          G=None,
          h=None,
          A=None,
          b=None,
          maxiter=100,
          disp=0,
          full_output=False):

    z, G, h, y, A, b = _setup(lb, ub, G, h, A, b)
    x = _checkInitialValue(x0, G, h, A, b)
    p = len(x)

    if hessian is None:
        approxH = BFGS
    elif type(hessian) is str:
        if hessian.lower() == 'bfgs':
            approxH = BFGS
        elif hessian.lower() == 'sr1':
            approxH = SR1
        elif hessian.lower() == 'dfp':
            approxH = DFP
        else:
            raise Exception("Input name of hessian is not recognizable")
        hessian = None

    if grad is None:

        def finiteForward(x, func, p):
            def finiteForward1(x):
                return forward(func, x.ravel())

            return finiteForward1

        grad = finiteForward(x, func, p)

    if G is not None:
        m = G.shape[0]
    else:
        m = 1

    fx = None
    oldFx = None
    oldOldFx = None
    oldGrad = None
    deltaX = numpy.zeros((p, 1))
    g = numpy.zeros((p, 1))
    H = numpy.zeros((p, p))
    Haug = numpy.zeros((p, p))

    dispObj = Disp(disp)
    i = 0
    t = 0.01
    mu = 20.0
    step0 = 1.0  # back tracking search step maximum value
    step = 0.0

    j = 0
    while maxiter >= j:
        oldFx = numpy.inf
        # define the barrier function given t.  Note that
        # t is adjusted at each outer iteration
        barrierFunc = _logBarrier(func, t, G, h)
        if j == 0:
            fx = barrierFunc(x)
            #print "barrier = " +str(fx)
        update = True
        #while (abs(fx-oldFx)/fx)>=rtol and abs(fx-oldFx)>=atol:
        # for i in range(1):
        while update:
            # print abs(fx-oldFx)
            # print abs(fx-oldFx)/fx
            # print fx
            # print oldFx
            gOrig = grad(x.ravel()).reshape(p, 1)

            if hessian is None:
                if oldGrad is None:
                    H = numpy.eye(len(x))
                else:
                    diffG = numpy.array(gOrig - oldGrad).ravel()
                    H = approxH(H, diffG, step * deltaX.ravel())
            else:
                H = hessian(x.ravel())

            ## standard log barrier
            if G is not None:
                s = h - G.dot(x)
                Gs = G / s
                s2 = s**2
                Dphi = Gs.sum(axis=0).reshape(p, 1)
                if j == 0:
                    t = _findInitialBarrier(gOrig, Dphi, A)
                    # print "initial barrier = " +str(t)
                    # print "fake barrier = "+str(_findInitialBarrier(gOrig,Dphi,A))

                Haug = t * H + numpy.einsum('ji,ik->jk', G.T, G / s2)
                g = t * gOrig + Dphi

            else:
                Haug = t * H
                g = t * gOrig

        ## solving the QP to get the descent direction
            if A is not None:
                # re-adjust the bounds
                bTemp = b - A.dot(x)
                LHS = scipy.sparse.bmat([[Haug, A.T], [A, None]], 'csc')
                RHS = numpy.append(g, -bTemp, axis=0)
                if LHS.size >= (LHS.shape[0] * LHS.shape[1]) / 2:
                    deltaTemp = scipy.linalg.solve(LHS.todense(),
                                                   -RHS).reshape(len(RHS), 1)
                else:
                    deltaTemp = scipy.sparse.linalg.spsolve(LHS, -RHS).reshape(
                        len(RHS), 1)

                deltaX = deltaTemp[:p]
                y = deltaTemp[p::]
            else:
                deltaX = scipy.linalg.solve(Haug, -g)

            oldOldFxTemp = oldFx
            oldFx = fx
            oldGrad = gOrig

            lineFunc = lineSearch(x, deltaX, barrierFunc)
            #step, fx = exactLineSearch2(step0, lineFunc, deltaX.ravel().dot(g.ravel()), oldFx)

            # step, fx =  backTrackingLineSearch(step0, lineFunc, deltaX.ravel().dot(g.ravel()), alpha=0.0001,beta=0.8)

            barrierGrad = _logBarrierGrad(func, gOrig, t, G, h)
            step, fc, gc, fx, oldFx, new_slope = scipy.optimize.line_search(
                barrierFunc, barrierGrad, x.ravel(), deltaX.ravel(), g.ravel(),
                oldFx, oldOldFx)

            # print "fx = " +str(fx)
            # print "step= " +str(step)
            # if step is not None:
            # print "step = "+str(step)+ " with fx" +str(fx)+ " and barrier = " +str(barrierFunc(x + step * deltaX))
            # print "s"
            # print h - G.dot(x + step * deltaX)
            if step is None:
                # step, fx = exactLineSearch2(step0, lineFunc, deltaX.ravel().dot(g.ravel()), oldFx)
                step, fx = backTrackingLineSearch(step0,
                                                  lineFunc,
                                                  deltaX.ravel().dot(
                                                      g.ravel()),
                                                  alpha=0.0001,
                                                  beta=0.8)
                # print "fail wolfe = " +str(step)+ " maxStep = " +str(step0)
                # print "fx = " +str(fx)
                # print "step= " +str(step)
                update = False

            oldOldFx = oldOldFxTemp
            x += step * deltaX
            # print "stepped func = "+str(func(x))
            j += 1
            # dispObj.d(j, x.ravel() , fx, deltaX.ravel(), g.ravel(), step)
            dispObj.d(j, x.ravel(), func(x.ravel()), deltaX.ravel(), g.ravel(),
                      step)
            # end of inner iteration
        i += 1
        # obtain the missing Lagrangian multiplier
        if G is not None:
            s = h - G.dot(x)
            z = 1.0 / (t * s)

        if m / t < atol:
            if sufficientNewtonDecrement(deltaX.ravel(), g.ravel()):
                break
        else:
            t *= mu

        # print scipy.linalg.norm(_rDualFunc(x, grad, z, G, y, A))

        if scipy.linalg.norm(_rDualFunc(x, grad, z, G, y, A)) <= EPSILON:
            break

        # end of outer iteration

    # TODO: full_output- dual variables
    if full_output:
        output = dict()
        output['t'] = t
        output['outerIter'] = i
        output['innerIter'] = j

        if G is not None:
            s = h - G.dot(x)
            z = 1.0 / (t * s)
            output['s'] = s.ravel()
            output['z'] = z.ravel()

        if A is not None:
            y = y / t
            output['y'] = y.ravel()

        gap = _dualityGap(func, x, z, G, h, y, A, b)

        output['subopt'] = m / t
        output['dgap'] = gap
        output['fx'] = func(x)
        output['H'] = H
        output['g'] = gOrig.ravel()
        output['rDual'] = _rDualFunc(x, grad, z, G, y, A)

        return x.ravel(), output
    else:
        return x.ravel()
Пример #6
0
def ipPDC(func,
          grad,
          hessian=None,
          x0=None,
          lb=None,
          ub=None,
          G=None,
          h=None,
          A=None,
          b=None,
          maxiter=100,
          disp=0,
          full_output=False):

    z, G, h, y, A, b = _setup(lb, ub, G, h, A, b)
    x = _checkInitialValue(x0, G, h, A, b)
    p = len(x)

    if hessian is None:
        approxH = BFGS
    elif type(hessian) is str:
        if hessian.lower() == 'bfgs':
            approxH = BFGS
        elif hessian.lower() == 'sr1':
            approxH = SR1
        elif hessian.lower() == 'dfp':
            approxH = DFP
        else:
            raise Exception("Input name of hessian is not recognizable")

    if grad is None:

        def finiteForward(x, func, p):
            def finiteForward1(x):
                return forward(func, x.ravel())

            return finiteForward1

        grad = finiteForward(x, func, p)

    g = numpy.zeros((p, 1))
    gOrig = g.copy()

    oldFx = numpy.inf
    oldGrad = None
    deltaX = None
    deltaY = 0
    deltaZ = 0
    H = numpy.zeros((p, p))
    Haug = H.copy()

    fx = func(x)

    dispObj = Disp(disp)
    i = 0
    mu = 5.0
    step = 1.0
    t = 1.0
    # because we determine the size of the back tracking
    # step on the fly, we don't give it a maximum.  At the
    # same time, because we are only evaluating the residuals
    # of the KKT system, there are times where we may want to
    # give the descent a nudge
    #step0 = 1.0  # back tracking search step maximum value

    if G is not None:
        s = h - G.dot(x)
        z = 1.0 / s
        m = G.shape[0]
        eta = _surrogateGap(x, z, G, h, y, A, b)
        t = mu * m / eta

    while maxiter >= i:

        gOrig[:] = grad(x).reshape(p, 1)
        g[:] = gOrig.copy()

        if hessian is None:
            if oldGrad is None:
                H = numpy.eye(len(x))
            else:
                diffG = (gOrig - oldGrad).ravel()
                H = approxH(H, diffG, step * deltaX.ravel())
        else:
            H = hessian(x)

        Haug[:] = H.copy()

        x, y, z, fx, step, oldFx, oldGrad, deltaX = _solveKKTAndUpdatePDC(
            x, func, grad, fx, g, gOrig, Haug, z, G, h, y, A, b, t)

        # ## standard log barrier, \nabla f(x) / -f(x)
        # # print h
        # # print G.dot(x)
        # rDual = _rDualFunc(x, grad, z, G, y, A)
        # RHS = rDual
        # if G is not None:
        #     s = h - G.dot(x)
        #     Gs = G/s
        #     zs = z/s

        #     # now find the matrix/vector of our qp
        #     #Haug += numpy.einsum('ji,ik->jk',G.T, G*zs)
        #     rCent = _rCentFunc(z, s, t)
        #     RHS = numpy.append(RHS,rCent,axis=0)

        # ## solving the QP to get the descent direction
        # if A is not None:
        #     bTemp = b - A.dot(x)
        #     g += A.T.dot(y)

        #     rPri = _rPriFunc(x, A, b)
        #     RHS = numpy.append(RHS,rPri,axis=0)

        #     if G is not None:
        #         LHS = scipy.sparse.bmat([
        #             [Haug, G.T, A.T],
        #             [G*-z, scipy.sparse.diags(s.ravel(),0),None],
        #             [A, None, None]
        #         ],'csc')
        #         if LHS.size>= (LHS.shape[0] * LHS.shape[1])/2:
        #             deltaTemp = scipy.sparse.linalg.spsolve(LHS,-RHS).reshape(len(RHS),1)
        #         else:
        #             deltaTemp = scipy.linalg.solve(LHS.todense(),-RHS).reshape(len(RHS),1)
        #         # print deltaTemp
        #         deltaX = deltaTemp[:p]
        #         deltaZ = deltaTemp[p:-len(A)]
        #         deltaY = deltaTemp[-len(A):]
        #     else:
        #         LHS = scipy.sparse.bmat([
        #             [Haug, A.T],
        #             [A, None]
        #         ],'csc')
        #         if LHS.size>= (LHS.shape[0] * LHS.shape[1])/2:
        #             deltaTemp = scipy.sparse.linalg.spsolve(LHS,-RHS).reshape(len(RHS),1)
        #         else:
        #             deltaTemp = scipy.linalg.solve(LHS.todense(),-RHS).reshape(len(RHS),1)
        #         deltaX = deltaTemp[:p]
        #         deltaY = deltaTemp[p::]

        # else:
        #     if G is not None:
        #         LHS = scipy.sparse.bmat([
        #             [Haug, G.T],
        #             [G*-z, scipy.sparse.diags(s.ravel(),0)],
        #         ],'csc')
        #         deltaTemp = scipy.sparse.linalg.spsolve(LHS,-RHS).reshape(len(RHS),1)
        #         deltaX = deltaTemp[:p]
        #         deltaZ = deltaTemp[p::]
        #     else:
        #         deltaX = scipy.linalg.solve(H,-RHS).reshape(len(RHS),1)

        # # store the information for the next iteration
        # oldFx = fx
        # oldGrad = gOrig.copy()

        # if G is None:
        #     # print "obj"
        #     maxStep = 1
        #     barrierFunc = _logBarrier(x, func, t, G, h)
        #     lineFunc = lineSearch(maxStep, x, deltaX, barrierFunc)
        #     searchScale = deltaX.ravel().dot(g.ravel())
        # else:
        #     maxStep = _maxStepSize(z, deltaZ, x, deltaX, G, h)
        #     lineFunc = residualLineSearch(step, x, deltaX,
        #                                       grad, t,
        #                                       z, deltaZ, G, h,
        #                                       y, deltaY, A, b)
        #     searchScale = -lineFunc(0.0)

        # # perform a line search.  Because the minimization routine
        # # in scipy can sometimes be a bit weird, we assume that the
        # # exact line search can sometimes fail, so we do a
        # # back tracking line search if that is the case
        # step, fx =  exactLineSearch(maxStep, lineFunc)
        # if fx >= oldFx or step <= 0:
        #     step, fx =  backTrackingLineSearch(maxStep, lineFunc, searchScale, oldFx)

        # if z is not None:
        #     z += step * deltaZ
        # if y is not None:
        #     y += step * deltaY

        # x += step * deltaX
        #         print "deltaX"
        #         print deltaX
        #         print "deltaZ"
        #         print deltaZ
        i += 1
        dispObj.d(i, x, fx, deltaX.ravel(), g.ravel(), step)

        feasible = False
        if G is not None:
            feasible = True
            eta = _surrogateGap(x, z, G, h, y, A, b)
            if eta >= EPSILON:
                feasible = False
            if G is not None:
                r = _rDualFunc(x, grad, z, G, y, A)
                if scipy.linalg.norm(r) >= EPSILON:
                    feasible = False
            if A is not None:
                r = _rPriFunc(x, A, b)
                if scipy.linalg.norm(r) >= EPSILON:
                    feasible = False

            t = mu * m / eta
        else:
            if abs(fx - oldFx) <= EPSILON:
                break

        if feasible:
            break

    # TODO: full_output- dual variables
    if full_output:
        output = dict()
        output['t'] = t

        if G is not None:
            gap = _surrogateGap(x, z, G, h, y, A, b)
        else:
            gap = 0

        output['dgap'] = gap
        output['fx'] = func(x)
        output['H'] = H
        output['g'] = gOrig.ravel()

        if G is not None:
            output['s'] = s.ravel()
            output['z'] = z.ravel()
            output['rDual'] = _rDualFunc(x, grad, z, G, y, A).ravel()
        if A is not None:
            output['rPri'] = _rPriFunc(x, A, b).ravel()
            output['y'] = y.ravel()

        return x.ravel(), output
    else:
        return x.ravel()
Пример #7
0
def trustRegion(func, grad, hessian=None, x0=None,
                maxiter=100,
                method='exact',
                disp=0, full_output=False):

    x = checkArrayType(x0)
    p = len(x)

    if grad is None:
        def finiteForward(x,func,p):
            def finiteForward1(x):
                return forward(func,x.ravel())
            return finiteForward1
        grad = finiteForward(x,func,p)

    if hessian is None:
        approxH = BFGS
    elif type(hessian) is str:
        if hessian.lower()=='bfgs':
            approxH = BFGS
        elif hessian.lower()=='sr1':
            approxH = SR1
        elif hessian.lower()=='dfp':
            approxH = DFP
        else:
            raise Exception("Input name of hessian is not recognizable")
        hessian = None

    if method is None:
        trustMethod = trustExact
    elif type(method) is str:
        if method.lower()=='exact':
            trustMethod = trustExact
        else:
            raise Exception("Input name of hessian is not recognizable")

    fx = None
    oldGrad = None
    deltaX = None
    oldFx = numpy.inf
    i = 0
    oldi = -1
    j = 0
    tau = 1.0
    radius = 1.0
    maxRadius = 1.0
    
    dispObj = Disp(disp)

    while maxiter>i:

        # if we have successfully moved on, then
        # we would need to recompute some of the quantities
        if i!=oldi:
            g = grad(x)
            fx = func(x)
        
            if hessian is None:
                if oldGrad is None:
                    H = numpy.eye(len(x))
                else:
                    diffG = numpy.array(g - oldGrad)
                    H = approxH(H, diffG, deltaX)
            else:
                H = hessian(x)

        deltaX, tau = trustMethod(x, g, H, radius)
        deltaX = deltaX.real
        M = diffM(deltaX, g, H)
    
        # print x
        # print deltaX
        newFx = func(x + deltaX)
        predRatio = (fx - newFx) / M(deltaX)
        
        if predRatio>=0.75:
            if tau>0.0:
                radius = min(2.0*radius, maxRadius)
        elif predRatio<=0.25:
            radius *= 0.25
    
        if predRatio>=0.25:
            oldGrad = g
            x += deltaX
            oldFx = fx
            fx = newFx
            i +=1
            oldi = i - 1
            # we only allow termination if we make a move
            if (abs(fx-oldFx)/fx)<=reltol:
                break
            if abs(deltaX.dot(g))<=atol:
                break
        else:
            oldi = i

        
        dispObj.d(j, x , fx, deltaX, g, i)
        j += 1
        

    if full_output:
        output = dict()
        output['totalIter'] = i
        output['outerIter'] = j

        output['fx'] = func(x)
        output['H'] = H
        output['g'] = g

        return x, output
    else:
        return x
Пример #8
0
def ipBar(func, grad, hessian=None, x0=None,
        lb=None, ub=None,
        G=None, h=None,
        A=None, b=None,
        maxiter=100,
        disp=0, full_output=False):

    z, G, h, y, A, b = _setup(lb, ub, G, h, A, b)
    x = _checkInitialValue(x0, G, h, A, b)
    p = len(x)

    if hessian is None:
        approxH = BFGS
    elif type(hessian) is str:
        if hessian.lower()=='bfgs':
            approxH = BFGS
        elif hessian.lower()=='sr1':
            approxH = SR1
        elif hessian.lower()=='dfp':
            approxH = DFP
        else:
            raise Exception("Input name of hessian is not recognizable")
        hessian = None

    if grad is None:
        def finiteForward(x,func,p):
            def finiteForward1(x):
                return forward(func,x.ravel())
            return finiteForward1
        grad = finiteForward(x,func,p)

    if G is not None:
        m = G.shape[0]
    else:
        m = 1

    fx = None
    oldFx = None
    oldOldFx = None 
    oldGrad = None
    deltaX = numpy.zeros((p,1))
    g = numpy.zeros((p,1))
    H = numpy.zeros((p,p))
    Haug = numpy.zeros((p,p))

    dispObj = Disp(disp)
    i = 0
    t = 0.01
    mu = 20.0
    step0 = 1.0  # back tracking search step maximum value
    step = 0.0

    j = 0
    while maxiter>=j:
        oldFx = numpy.inf
        # define the barrier function given t.  Note that
        # t is adjusted at each outer iteration
        barrierFunc = _logBarrier(func, t, G, h)
        if j==0:
            fx = barrierFunc(x)
            #print "barrier = " +str(fx)
        update = True
        #while (abs(fx-oldFx)/fx)>=rtol and abs(fx-oldFx)>=atol:
        # for i in range(1):
        while update:
            # print abs(fx-oldFx)
            # print abs(fx-oldFx)/fx
            # print fx
            # print oldFx
            gOrig = grad(x.ravel()).reshape(p,1)

            if hessian is None:
                if oldGrad is None:
                    H = numpy.eye(len(x))
                else:
                    diffG = numpy.array(gOrig - oldGrad).ravel()
                    H = approxH(H, diffG, step * deltaX.ravel())
            else:
                H = hessian(x.ravel())

            ## standard log barrier
            if G is not None:
                s = h - G.dot(x)
                Gs = G/s
                s2 = s**2
                Dphi = Gs.sum(axis=0).reshape(p,1)
                if j==0:
                    t = _findInitialBarrier(gOrig,Dphi,A)
                    # print "initial barrier = " +str(t)
                    # print "fake barrier = "+str(_findInitialBarrier(gOrig,Dphi,A))
                
                Haug = t*H + numpy.einsum('ji,ik->jk',G.T, G/s2)
                g = t*gOrig + Dphi

            else:
                Haug = t*H
                g = t*gOrig

        ## solving the QP to get the descent direction
            if A is not None:
                # re-adjust the bounds
                bTemp = b - A.dot(x)
                LHS = scipy.sparse.bmat([
                                         [Haug,A.T],
                                         [A,None]
                                         ], 'csc')
                RHS = numpy.append(g,-bTemp,axis=0)
                if LHS.size>= (LHS.shape[0] * LHS.shape[1])/2:
                    deltaTemp = scipy.linalg.solve(LHS.todense(),-RHS).reshape(len(RHS),1)
                else:    
                    deltaTemp = scipy.sparse.linalg.spsolve(LHS,-RHS).reshape(len(RHS),1)

                deltaX = deltaTemp[:p]
                y = deltaTemp[p::]
            else:
                deltaX = scipy.linalg.solve(Haug,-g)

            oldOldFxTemp = oldFx
            oldFx = fx
            oldGrad = gOrig

            lineFunc = lineSearch(x, deltaX, barrierFunc)
            #step, fx = exactLineSearch2(step0, lineFunc, deltaX.ravel().dot(g.ravel()), oldFx)

            # step, fx =  backTrackingLineSearch(step0, lineFunc, deltaX.ravel().dot(g.ravel()), alpha=0.0001,beta=0.8)
            
            barrierGrad = _logBarrierGrad(func, gOrig, t, G, h)
            step, fc, gc, fx, oldFx, new_slope = scipy.optimize.line_search(barrierFunc,
                                                                            barrierGrad,
                                                                            x.ravel(),
                                                                            deltaX.ravel(),
                                                                            g.ravel(),
                                                                            oldFx,
                                                                            oldOldFx
                                                                            )

            # print "fx = " +str(fx)
            # print "step= " +str(step)
            # if step is not None:
                # print "step = "+str(step)+ " with fx" +str(fx)+ " and barrier = " +str(barrierFunc(x + step * deltaX))
                # print "s"
                # print h - G.dot(x + step * deltaX)
            if step is None:
                # step, fx = exactLineSearch2(step0, lineFunc, deltaX.ravel().dot(g.ravel()), oldFx)
                step, fx =  backTrackingLineSearch(step0, lineFunc, deltaX.ravel().dot(g.ravel()), alpha=0.0001,beta=0.8)
                # print "fail wolfe = " +str(step)+ " maxStep = " +str(step0)                
                # print "fx = " +str(fx)
                # print "step= " +str(step)
                update = False
                
            oldOldFx = oldOldFxTemp
            x += step * deltaX
            # print "stepped func = "+str(func(x))
            j += 1
            # dispObj.d(j, x.ravel() , fx, deltaX.ravel(), g.ravel(), step)
            dispObj.d(j, x.ravel() , func(x.ravel()), deltaX.ravel(), g.ravel(), step)
            # end of inner iteration
        i += 1
        # obtain the missing Lagrangian multiplier
        if G is not None: 
            s = h - G.dot(x)
            z = 1.0 / (t * s)
        
        if m/t < atol:
            if sufficientNewtonDecrement(deltaX.ravel(),g.ravel()):
                break
        else:
            t *= mu
        
        # print scipy.linalg.norm(_rDualFunc(x, grad, z, G, y, A))
        
        if scipy.linalg.norm(_rDualFunc(x, grad, z, G, y, A))<=EPSILON:
            break

        # end of outer iteration

    # TODO: full_output- dual variables
    if full_output:
        output = dict()
        output['t'] = t
        output['outerIter'] = i
        output['innerIter'] = j

        if G is not None:
            s = h - G.dot(x)
            z = 1.0 / (t * s)
            output['s'] = s.ravel()
            output['z'] = z.ravel()

        if A is not None:
            y = y/t
            output['y'] = y.ravel()

        gap = _dualityGap(func, x,
                          z, G, h,
                          y, A, b)
        
        output['subopt'] = m/t
        output['dgap'] = gap
        output['fx'] = func(x)
        output['H'] = H
        output['g'] = gOrig.ravel()
        output['rDual'] = _rDualFunc(x, grad, z, G, y, A)

        return x.ravel(), output
    else:
        return x.ravel()