Exemplo n.º 1
0
    B[0, 1] = 1;

    #Q = make_symmetric_random_psd(state_dim)
    #R = make_symmetric_random_psd(control_dim)
    Q = np.identity(state_dim)
    R = np.identity(control_dim)


    dyn_func = linear_dynamics(A, B);
    cost_func = quadtratic_cost(Q, R);

    T = 4
    x0 = np.asarray((3, 2, 1))
    #x0 = np.random.random(state_dim)

    lqr = LQR(A, B, Q, R, T)
    lqr.solve()
    lqr_states, lqr_controls, lqr_costs = lqr.forward_pass(x0)

    #Us2 = np.random.random((T, control_dim))
    Us2 = np.ones((T, control_dim))
    #Us2 = np.asarray(lqr_controls) 
    xt = x0.copy(); Xs2 = xt;
    for t in range(0, T-1):
        xt1 = dyn_func(xt, Us2[t])
        Xs2 = np.vstack((Xs2, xt1))
        xt = xt1 
    #Xs = lqr_states; Us = lqr_controls;
    Xs = Xs2; Us = Us2;
    original_ilqr_cost = np.sum([cost_func(x, u) for (x,u) in zip(Xs, Us)])
Exemplo n.º 2
0
    def integrate(self,
                  use_jac=False,
                  linearize=True,
                  interpolate=True,
                  **kwargs):
        keys = kwargs.keys()
        xinit = kwargs['xinit'] if 'xinit' in keys else self.xinit
        lin = linearize
        interp = interpolate

        if self.ufun is not None:
            func = lambda t, x: self.f(t, x, self.ufun(t, x))
            dfdx = lambda t, x: self.dfdx(t, x, self.ufun(t, x))
            dfdu = lambda t, x: self.dfdu(t, x, self.ufun(t, x))
        else:
            func = self.f
            dfdx = self.dfdx
            dfdu = self.dfdu

        jac = dfdx if use_jac else None

        #Tracer()()
        if self.delf is not None:
            delfunc = lambda t, x: self.delf(t, x, self.ufun(t, x))
            (t, x, jumps) = sysIntegrate(func,
                                         self.xinit,
                                         tlims=self.tlims,
                                         phi=self.phi,
                                         jac=jac,
                                         delfunc=delfunc)
        else:
            (t, x, jumps) = sysIntegrate(func,
                                         self.xinit,
                                         tlims=self.tlims,
                                         phi=self.phi,
                                         jac=jac)

        #Tracer()()
        components = ['x']
        if 'ufun' in self.__dict__.keys():
            components.append('u')
        if lin:
            components.append('A')
            components.append('B')

        traj = Trajectory(*components)
        for (tt, xx) in zip(t, x):
            names = {'x': xx}
            if 'u' in components:
                names['u'] = self.ufun(tt, xx)
            if 'A' in components:
                names['A'] = dfdx(tt, xx)
            if 'B' in components:
                names['B'] = dfdu(tt, xx)

            traj.addpoint(tt, **names)

        # interpolate, unless requested;
        # saves a few manual calls
        if interp:
            traj.interpolate()

        if lin:
            print("linearizing...")
            self.lintraj = traj
            self.regulator = LQR(self.tlims, traj.A, traj.B)  #, jumps=jumps)
            self.regulator.solve()

        traj.feasible = True
        traj.tlims = self.tlims
        traj.jumps = jumps
        return traj
Exemplo n.º 3
0
# -------- Settings ------------
NUM_OF_TIMESTAMP = 50

# -------- Main Code ----------
pos_setpoint = 5
vel_setpoint = 0
Q = np.array([[1, 0],
              [0, 1]]) # controls how much state difference cost
R = 0.01 # controls how much control cost.

state_setpoint = np.array([[pos_setpoint], [vel_setpoint]])

initial_covariance = np.eye(2)

my_controller = LQR(env.A, env.B, Q, R)
gain = my_controller.get_K(10) # note that the minus sign is omitted here because of the way we calculate error.
print('gain: {}'.format(gain))

true_state = [env.state]
accel_cmd = [0]

for i in range(NUM_OF_TIMESTAMP):
    accel = np.dot(gain, (state_setpoint - env.state))[0, 0]
    meas = env.control_and_sense(accel)

    print("True state: {}".format(env.state))
    print("accel : {}".format(accel))

    true_state.append(env.state)
    accel_cmd.append(accel)
    # State vector = [x, y, vx, vy]
    x_init = np.array([10, 30, 10, -5.0], dtype='float64').transpose()

    A = np.eye(4)
    A[2, 2] = (1 - dt * friction) / mass
    A[3, 3] = (1 - dt * friction) / mass
    A[0, 2] = dt
    A[1, 3] = dt

    B = np.array([[0, 0], [0, 0], [dt / mass, 0], [0, dt / mass]],
                 dtype='float64')

    Q = 0.01 * np.eye(4)
    R = np.eye(2)

    lqr = LQR(A, B, Q, R)
    K = lqr.compute_policy_gains(T, dt)

    x = x_init
    X = np.zeros((T, 4), dtype='float64')
    U = np.zeros((T, 2), dtype='float64')

    for i in range(T):
        u = np.dot(K[i], x)

        # This is essentially the simulator of the vehicle
        x = np.dot(A, x) + np.dot(B, u)

        X[i, :] = x.transpose()
        U[i, :] = u.transpose()