annealIPOPT.py

import numpy as np
import pyipopt, adolc, time
from scipy.integrate import odeint
import scipy.sparse as sps
import sys

# Different Adolc tapes will write different files. If running
# multiple programs in same directory, each one needs to have a
# different ID or they will overwrite each other. Pass an argument
# when running the script which is different for each execution and
# the program will assign appropriate IDS to each tape so they dont
# interfere 
#Ex: 
#python anneal.py 1 
#python anneal.py 2


N = 100 # Number of Time Steps
D = 3 # Dimensions
dt = 0.01 # Timestep size
NBETA = 30 # Number of anneal steps to use. beta = {0...NBETA}

measIdx = [0]  #Indices of measured variables
taus = [] # Time Delays

# Set low/ upper bnd for each state var in IPOPT
lowbnd = np.array([-15.,-15.,-15.,-15.,-15.])
upbnd = np.array([15.,15.,15.,15.,15.])

# Set bounds for constraint functions eval_g(x)
g_L = np.array([])
g_U = np.array([])

modelname = 'colpitts' # Name of differential Model function to use
mapname = 'rk2' # Name of Discretization function to use

#File of initial paths. If initfile =='random', will generate random
#NxD path and save to initpaths.txt
initfile = 'random' 
savefile = 'test_anneal.txt' #Filename ot save output

# Optimization Options
epsf = 1e-6 # Function Tolerance
maxits = 100000 #Max Iterations
epsg = 1e-6 # Constraint Tol
linear_solver = 'ma97'
#epsx = 1e-8 # Step Size Tolerance

pyipopt.set_loglevel(1)

# Returns the differential model xdot = f(x,t). Should return numpy
# array.
def lorenz96(x, t):
    D = len(x)
    dxdt = []
    for i in range(D):
        dxdt.append(x[np.mod(i-1,D)]*(x[np.mod(i+1,D)]-x[np.mod(i-2,D)]) - x[i] + 8.17)    
    return np.array(dxdt)

def colpitts(x,t):
    dx = x[1]
    dy =-5.0*(x[0]+x[2])-2.0*x[1]
    dz = 2.8*(x[1]+1-np.exp(-x[0]))
    return np.array([dx,dy,dz])
 

# low, high should be D-dim vectors, with each entry corresponding to
# the lower/upper bounds of a state variable over the entire path.
def set_x_bounds(low, high):
    x_L = np.zeros(D*N)
    x_U = np.zeros(D*N)
    for i in range(D):
        x_L[i*N:(i+1)*N] = low[i]*np.ones(N)
        x_U[i*N:(i+1)*N] = high[i]*np.ones(N)
    return x_L, x_U

# Actual Cost function to be minimized. 
def action(x, beta):
#    x = np.array(x)

    x = x.reshape((N,D))
    Rf = 0.01*(2**beta)
    Rm = 4.0
    Rtd = 1.0/(1.0/Rf+1.0/Rm)

    if x.shape != (N,D):
        raise "x is wrong dims!"

    # Rm term
    dy = x[:,measIdx]-y
    action = np.sum(0.5*Rm*(dy)**2)
    #action = (0.5*Rm*(dy)**2)
    
    #Rf term
    f1 = array_map(x)

    action = action + np.sum(0.5*Rf*(x[1:,:]-f1[:-1,:])**2)
    #action = np.hstack((action,0.5*Rf*(x[1:,:]-f1[:-1,:])**2))
    
    #Rtd term

    #f1 = array_map(f1)
    #f1 = array_map(f1)
    #f1 = array_map(f1)
    #tmp1 = array_map(f1)
    #tmp2 = array_map(tmp1)
    #tmp3 = array_map(tmp2)

    for count in range(Ntd):
        tau = taus[count]
        if count==0:
            for i in range(1,tau):
                f1 = array_map(f1)
        else:
            for i in range(taus[count-1],tau):
                f1 = array_map(f1)
        dytd = y[tau:,:]-f1[:-tau,measIdx]   
        #dxtd = x[tau:,measIdx]-f1[:-tau,measIdx]   
        #+ np.sum(0.5*Rf*(dxtd)**2)
        action = action + np.sum(0.5*Rtd*(dytd)**2) 

    #dytd = y[2:,:]-f1[:-2,measIdx]   
    #action = np.hstack((action,0.5*Rtd*(dytd)**2))


    return action/nvar

def actTDtest(x,beta):
    x = x.reshape((N,D))
    Rf = 0.01*(2**beta)
    Rm = 4.0
    Rtd = 1.0/(1.0/Rf+1.0/Rm)

    f1 = array_map(x)
    action = 0
    for count in range(Ntd):
        tau = taus[count]
        if count==0:
            for i in range(1,tau):
                f1 = array_map(f1)
            else:
                for i in range(taus[count-1],tau):
                    f1 = array_map(f1)
            dytd = y[tau:,:]-f1[:-tau,measIdx]   
                    #dxtd = x[tau:,measIdx]-f1[:-tau,measIdx]   
                    #+ np.sum(0.5*Rf*(dxtd)**2)
            action = action + np.sum(0.5*Rtd*(dytd)**2) 
    return action/nvar

# IPOPT constraint function. Each term in the array should == 0. Commented
# example, because I am not using constraints. Could be modified to
# make this "makecode.py" equivalent
def eval_g(x, user_data= None):
#    """ constraint function """
#    assert len(x) == 4
#    return numpy.array([
#        x[0] * x[1] * x[2] * x[3], 
#        x[0]*x[0] + x[1]*x[1] + x[2]*x[2] + x[3]*x[3]
#    ])
    return np.array([])

def eval_lagrangian(x,lagrange,obj_factor, beta, user_data = None):
#    assert xtmp.shape == (D*N+ncon+1)
#    x = xtmp[:D*N]
#    lagrange = xtmp[D*N:D*N+ncon]
#    obj_factor = xtmp[-1]
    
    return obj_factor * action(x,beta) + np.dot(lagrange, eval_g(x))

# Evaluate the Action, via adolc
def eval_f_adolc(x, user_data = None):
    return adolc.function(fID,x)
    
def eval_grad_f(x, user_data=None):
    return adolc.gradient(fID,x)

def eval_g_adolc(x, user_data=None):
    return adolc.function(gID,x)

# Object which will evaluate the jacobian of the constraints. 
class Eval_jac_g:
    def __init__(self,x):
        #don't really know what options are, copying example

        options = np.array([1,1,0,0], dtype=int)
        result = adolc.colpack.sparse_jac_no_repeat(gID,x,options)
        print 'jac initialized'
        self.nnz  = result[0]
        self.rind = np.asarray(result[1],dtype=int)
        self.cind = np.asarray(result[2],dtype=int)
        self.values = np.asarray(result[3],dtype=float)
    def __call__(self,x,flag,user_data=None):
        if flag:
            return (self.rind, self.cind)
        else:
            result = adolc.colpack.sparse_jac_repeat(gID, x, self.nnz, self.rind,
                self.cind, self.values)
            return result[3]

# Evaluates Hessian of Lagranian (Lag. combines the Obj func and the
# constraints into one big ol' matrix). Should be very sparse
class Eval_h_dense:
    def __init__(self, x0, lagrange, obj_factor):    
        x = np.hstack([x0,lagrange,obj_factor])
        result = adolc.hessian(lID,x)
        print 'hess initialized'     
        result1 = result[:nvar,:nvar]
        result = None
        result = sps.triu(result1,format='coo')
        result1 = None
            
        self.rind  = result.row
        self.cind = result.col
        self.values = result.data
        
        # Only keep hess values w/ respect to x, not lagrange/obj_factor
     
        self.nnz = result.nnz
        
    def __call__(self, x0, lagrange, obj_factor, flag, user_data = None):

        if flag:
            return (self.rind, self.cind)
        else:
            x = np.hstack([x0,lagrange,obj_factor])
            result = adolc.hessian(lID,x)
            result1 = result[:nvar,:nvar]
            result = None
            result = sps.triu(result1,format='coo')

            return result.data


class Eval_h:    
    def __init__(self, x0, lagrange, obj_factor):

        x = np.hstack([x0,lagrange,obj_factor])
        #x = x0
        options = np.array([1,0],dtype=int)
        result = adolc.colpack.sparse_hess_no_repeat(lID,x,options)
        print 'hess initialized'      

        self.rind = np.asarray(result[1],dtype=int)
        self.cind = np.asarray(result[2],dtype=int)
        self.values = np.asarray(result[3],dtype=float)
        
        # Only keep hess values w/ respect to x, not lagrange/obj_factor
        self.mask = np.where(self.cind < nvar)
        self.nnz = len(self.mask[0])

        
    def __call__(self, x0, lagrange, obj_factor, flag, user_data = None):
        if flag:
            return (self.rind[self.mask], self.cind[self.mask])
        else:
            x = np.hstack([x0,lagrange,obj_factor])
            result = adolc.colpack.sparse_hess_repeat(lID, x, self.rind,
                self.cind, self.values)
            return result[3][self.mask]


# Helper function to apply map to entire NxD path array x
def array_map(x):
    f = np.zeros_like(x)
    for i in range(x.shape[0]):
        f[i,:] = map(x[i,:],model,dt,i*dt)
    return f

   
# Simple discretization scheme
def rk2(x,f,dt,t):
    k1 = dt*f(x,t)
    k2 = dt*f(x+0.5*k1,t+0.5*dt)
    return x + k2

# More accurate, more complicate discretization scheme
def rk4(x,f,dt,t):
    k1 = dt*f(x,t)
    k2 = dt*f(x+0.5*k1, t+0.5*dt)
    k3 = dt*f(x+0.5*k2, t+0.5*dt)
    k4 = dt*f(x+k3, t+dt)
    return x + 1.0/6.0*(k1+2.0*k2+2.0*k3+k4)


def run(x0, eval_jac_g, eval_h):
      
    if x0.shape != (N*D,):
        raise "x is wrong dims!"      
        
    x_L, x_U = set_x_bounds(lowbnd, upbnd)

    start = time.time()

    print eval_h.nnz
    nlp = pyipopt.create(nvar, x_L, x_U, ncon, g_L, g_U, eval_jac_g.nnz, eval_h.nnz, eval_f_adolc, eval_grad_f, eval_g_adolc, eval_jac_g, eval_h)
    #nlp = pyipopt.create(nvar, x_L, x_U, ncon, g_L, g_U, eval_jac_g.nnz, 365, eval_f_adolc, eval_grad_f, eval_g_adolc, eval_jac_g)


    nlp.int_option('max_iter', maxits)
    nlp.num_option('constr_viol_tol', epsg)
    nlp.num_option('tol', epsf)
    nlp.num_option('acceptable_tol', 1e-3)
    nlp.str_option('linear_solver', linear_solver)
    nlp.str_option('mu_strategy', 'adaptive')
    nlp.num_option('bound_relax_factor', 0)
    nlp.str_option('adaptive_mu_globalization','never-monotone-mode')

    results = nlp.solve(x0)


    print "optimized: ", time.time()-start, "s"
    print "Exit flag = ", results[5]
    print "Action = ", results[4]
    return results[4], results[0]

def tape(fID, gID, lID, x0, beta):

    t0 = time.time()

    # trace objective function
    adolc.trace_on(fID)
    ax = adolc.adouble(x0)
    adolc.independent(ax)
    ay = action(ax, beta)
    #ay = actTDtest(ax, beta)
    adolc.dependent(ay)
    adolc.trace_off()

    # trace constraint function
    adolc.trace_on(gID)
    ax = adolc.adouble(x0)
    adolc.independent(ax)
    ay = eval_g(ax)
    adolc.dependent(ay)
    adolc.trace_off()

    # trace lagrangian function
    
    adolc.trace_on(lID)
    # xtmp = [x0, lambdas, obj_factor]
#    xtmp = np.hstack(x0,np.ones(ncon),1.)
    ax = adolc.adouble(x0)
    alagrange = adolc.adouble(np.ones(ncon))
    aobj_factor = adolc.adouble(1.)
    adolc.independent(ax)
    adolc.independent(alagrange)
    adolc.independent(aobj_factor)
    ay = eval_lagrangian(ax, alagrange, aobj_factor, beta)
    adolc.dependent(ay)
    adolc.trace_off()

    t1 = time.time()
    print "tape time = ", t1-t0
    
    eval_jac_g_adolc = Eval_jac_g(x0)

    #eval_h_adolc = Eval_h(x0, np.ones(ncon), 1.)
    eval_h_adolc = Eval_h_dense(x0, np.ones(ncon), 1.)
    t2 = time.time()
    print "Hess time = ", t2-t1


    #


    return eval_jac_g_adolc, eval_h_adolc
    
if __name__ == "__main__" :

    
    ytmp = np.loadtxt("dataN_D{0}_dt{1}_noP.txt".format(D,dt))
    y = ytmp[:N,measIdx]
    Ntd = len(taus)
    nvar = D*N
    if initfile == 'random':
        tmp = np.random.rand(N,D)
        np.savetxt('initpaths.txt', tmp)
        x0 = tmp
    else:
        x0 = np.loadtxt(initfile)
        x0 = x0[:N,:D]

    x0 = x0.flatten()

    ncon = len(eval_g(x0))
    

    # Different Adolc tapes will write different files. If running
    # multiple programs in same directory, each one needs to have a
    # different ID or they will overwrite each other.
    if len(sys.argv)>0:
        adolcBaseID = 3*int(sys.argv[1])
    else:
        adolcBaseID = 0
    fID = adolcBaseID
    gID = adolcBaseID+1
    lID = adolcBaseID+2

    model = eval(modelname)
    map = eval(mapname) 
    

    store = np.zeros((NBETA,N*D+2))
    for beta in [20.]: #range(NBETA):
        store[beta,0] = beta
        t0 = time.time()
        eval_jac_g, eval_h = tape(fID, gID, lID,x0,beta)
        t1 = time.time()
        store[beta,1], store[beta,2:]= run(x0, eval_jac_g, eval_h)
        t2 = time.time()
        print "opt time = ", t2-t1
        x0 = store[beta,2:]
    
    np.savetxt(savefile, store)
    
    
# Just a convenience to generate data. 
def gen_data(dt, skip=0, data_initial=0, model=lorenz96):

    T_final = (skip+110000)*dt
    T_total = np.arange(0,T_final,dt)
    #data_initial = [ -2.33211 ,  -5.508487 , -9.208057, -10.98852 ,   6.165695]
    if data_initial ==0:
        data_initial = 10.0*np.random.rand(D)

    #[0.80, 0.95, 0.71, 0.24, 0.63];

    Y = odeint(model,data_initial,T_total)
    Y = Y[skip:skip+100001,:]
    param = 8.17*np.ones(len(Y))


    np.savetxt("data_D{0}_dt{1}_noP.txt".format(len(data_initial),dt),Y)
    noise = 0.5*np.random.standard_normal(Y.shape)
    Ynoise = Y + noise
    np.savetxt("dataN_D{0}_dt{1}_noP.txt".format(len(data_initial),dt),Ynoise)

    Y = np.column_stack((Y,param))
    np.savetxt("data_D{0}_dt{1}.txt".format(len(data_initial),dt),Y)
    noise = 0.5*np.random.standard_normal(Y.shape)
    Ynoise = Y + noise
    np.savetxt("dataN_D{0}_dt{1}.txt".format(len(data_initial),dt),Ynoise)