Пример #1
0
    def setup(self):

        J_hist = []

        def on_iteration(iteration_count, xs, us, J_opt, accepted, converged):
            J_hist.append(J_opt)
            info = "converged" if converged else ("accepted" if accepted else "failed")
            print("iteration", iteration_count, info, J_opt)

        def f(x, us, i):
            max_bounds = 8.0
            min_bounds = -8.0
            diff = (max_bounds - min_bounds) / 2.0
            mean = (max_bounds + min_bounds) / 2.0
            us = diff * np.tanh(us) + mean
            y = Model.runge_integrator(self._model.get_rbdl_model(), x, 0.01, us)
            return np.array(y)

        dynamics = FiniteDiffDynamics(f, 12, 6)

        x_path = []
        u_path = []
        count = 0
        N = self.runner.get_length()
        while count < self.runner.get_length():
            count += 1
            self.runner.step()
            u_path.append(self.runner.ddx.flatten().tolist())
            self.x.append(self.runner.x.flatten())
            x = self.runner.x.flatten().tolist() + self.runner.dx.flatten().tolist()
            x_path.append(x)

        u_path = u_path[:-1]
        expSigma = self.runner.get_expSigma()
        size = expSigma[0].shape[0]
        Q = [np.zeros((size * 2, size * 2))] * len(expSigma)
        for ii in range(len(expSigma) - 2, -1, -1):
            Q[ii][:size, :size] = np.linalg.pinv(expSigma[ii])

        x0 = x_path[0]
        x_path = np.array(x_path)
        us_path = np.array(u_path)

        us_init = np.random.uniform(-1, 1, (N - 1, dynamics.action_size))
        R = 0.1 * np.eye(dynamics.action_size)

        cost = PathQsRCost(Q, R, x_path=x_path, u_path=us_path)
        ilqr = iLQR(dynamics, cost, N - 1)

        xs, self.us = ilqr.fit(x0, us_init, on_iteration=on_iteration)

        R = 0.5 * np.eye(dynamics.action_size)

        cost2 = PathQsRCost(Q, R, x_path=x_path, u_path=self.us)

        ilqr2 = iLQR(dynamics, cost2, N - 1)

        self.cntrl = RecedingHorizonController(x0, ilqr2)
Пример #2
0
def main(T, dt):
    T = T
    dt = dt

    x_init = np.array([1.5, 1.5, -0.05, -0.05])
    U_init = np.zeros((T, 2))

    Q_f = np.eye(4)  # terminal cost
    Q = np.zeros((4, 4))  # cost matrix for states

    ilqr_solver = iLQR(f,
                       T,
                       dt,
                       x_init,
                       U_init,
                       compute_quadratic_approx_cost_torque,
                       Q=Q,
                       Q_f=Q_f)
    threshold = pow(10, -3)
    U = ilqr_solver.run_algorithm(
        threshold)  # column i of U should be control input at time i

    # Execute the control sequence
    traj = execute_trajectory(x_init, U, dt)

    plot_trajectory(traj)
Пример #3
0
    def setup(self):

        J_hist = []

        def on_iteration(iteration_count, xs, us, J_opt, accepted, converged):
            J_hist.append(J_opt)
            info = "converged" if converged else ("accepted" if accepted else "failed")
            print("iteration", iteration_count, info, J_opt)

        def f(x, u, i):
            y = Model.runge_integrator(self._model.get_rbdl_model(), x, 0.01, u)

            return np.array(y)

        dynamics = FiniteDiffDynamics(f, 12, 6)


        x_path = []
        u_path = []
        count = 0
        N = self.runner.get_length()
        while count < self.runner.get_length():
            count += 1
            self.runner.step()
            u_path.append(self.runner.ddx.flatten().tolist())
            self.x.append(self.runner.x.flatten())
            x = self.runner.x.flatten().tolist() + self.runner.dx.flatten().tolist()
            x_path.append(x)

        u_path = u_path[:-1]
        expSigma = self.runner.get_expSigma()
        size = expSigma[0].shape[0]
        Q = [np.zeros((size * 2, size * 2))] * len(expSigma)
        for ii in range(len(expSigma) - 2, -1, -1):
            Q[ii][:size, :size] = np.linalg.pinv(expSigma[ii])

        x0 = x_path[0]
        x_path = np.array(x_path)
        u_path = np.array(u_path)

        #####change################################!!!!! changed
        R = 5.0e-4 * np.eye(dynamics.action_size)
        R[3,3] = 3.0e-3
        R[4,4] = 3.0e-3
        R[5,5] = 3.0e-3
        #R = 0.1 * np.eye(dynamics.action_size)
        #
        cost2 = PathQsRCost(Q, R, x_path=x_path, u_path=u_path)
        #
        # # Random initial action path.
        us_init = np.random.uniform(-1, 1, (N-1, dynamics.action_size))
        #
        J_hist = []
        ilqr = iLQR(dynamics, cost2, N-1)
        self.xs, self.us = ilqr.fit(x0, us_init, on_iteration=on_iteration)
Пример #4
0
def main():
    # (TODO)Consider a particular example (maybe an example from the paper)?
    T = 50
    nX = 10

    # Get a sequence of control input u_1,..., u_T
    solver = iLQR()
    U = solver.run_algorithm() # column i of U should be control input at time i

    X = np.zeros((nX, T))
    # (TODO)Execute the control sequence
    X[:,0] = starting_state
    for t in range(T - 1):
        X[:,t+1] = f(X[:,t], U[:,t])
Пример #5
0
def test_lqr_mujoco(dynamics_cls):
    """Smoke test for MujcooFiniteDiff{Dynamics,Cost}.
    Jupyter notebook experiments/mujoco_control.ipynb has quantitative results
    attained; for efficiency, we only run for a few iterations here."""
    env = gym.make("Reacher-v2").unwrapped
    env.seed(42)
    env.reset()
    dynamics = dynamics_cls(env)
    cost = MujocoFiniteDiffCost(env)
    N = 10
    ilqr = iLQR(dynamics, cost, N)
    x0 = dynamics.get_state()
    us_init = np.array([env.action_space.sample() for _ in range(N)])
    xs, us = ilqr.fit(x0, us_init, n_iterations=3)
    assert x0.shape == xs[0].shape
    assert xs.shape[0] == N + 1
    assert us.shape == (N, 2)
    assert env.action_space.contains(us[0])
def main(T, dt):
    T = T
    dt = dt

    x_init = np.array([1.5, 1.5, -0.05, -0.05])
    U_init = np.zeros((T, 2))

    Q_f = np.eye(4) # terminal cost
    Q = np.zeros((4, 4)) # cost matrix for states

    ilqr_solver = iLQR(f, T, dt, x_init, U_init, compute_quadratic_approx_cost_torque, Q = Q, Q_f = Q_f)
    threshold = pow(10, -3)
    U = ilqr_solver.run_algorithm(threshold) # column i of U should be control input at time i

    # Execute the control sequence
    traj = execute_trajectory(x_init, U, dt)

    plot_trajectory(traj)
Пример #7
0
def control_ilqr(N=1000):
    pend = Pendulum()
    dynamics = FiniteDiffDynamics(pend.f, pend.state_size, pend.action_size)

    # The coefficients weigh how much your state error is worth to you vs
    # the size of your controls. You can favor a solution that uses smaller
    # controls by increasing R's coefficient.
    Q = 100 * np.eye(pend.state_size)
    R = 50 * np.eye(pend.action_size)

    Q_terminal = np.diag((1000, 1000, 1000))
    cost = QRCost(Q, R, Q_terminal=Q_terminal, x_goal=pend.x_goal)

    x0 = pend.initial_state  # Initial state.
    us_init = np.random.uniform(-2, 2, (N, 1))  # Random initial action path.

    ilqr = iLQR(dynamics, cost, N)
    xs, us = ilqr.fit(x0, us_init)
    pend.close()

    return us
Пример #8
0
def control_ilqr():
    cart = MountainCar()
    dynamics = FiniteDiffDynamics(cart.f, cart.state_size, cart.action_size)

    # The coefficients weigh how much your state error is worth to you vs
    # the size of your controls. You can favor a solution that uses smaller
    # controls by increasing R's coefficient.
    Q = 500 * np.eye(cart.state_size)
    R = 100 * np.eye(cart.action_size)

    Q_terminal = np.eye(cart.state_size)*1000
    cost = QRCost(Q, R, Q_terminal=Q_terminal, x_goal=cart.x_goal)
    # cost = BatchAutoDiffCost(cart.cost_function, cart.state_size, cart.action_size)

    N = 1000  # Number of time-steps in trajectory.
    x0 = cart.initial_state  # Initial state.
    us_init = np.random.uniform(-1, 1, (N, 1)) # Random initial action path.

    ilqr = iLQR(dynamics, cost, N)
    xs, us = ilqr.fit(x0, us_init)
    cart.close()

    return us
Пример #9
0
 def run_IterLinQuadReg_matrix(self,
                               A,
                               B,
                               C,
                               dist_info_sharing='AM',
                               us_init=None):
     x_input, u_input = self.state_inputs()
     if np.ndim(A) != 2:
         if dist_info_sharing == 'GM':
             A = gmean(A, axis=0)
             B = gmean(B, axis=0)
             C = gmean(C, axis=0)
             print(A.shape, 'A', B.shape, 'B', C.shape, 'C')
         elif dist_info_sharing == 'AM':
             A = np.sum(A, axis=0) / A.shape[0]
             B = np.sum(B, axis=0, keepdims=True) / B.shape[0]
             B = B.T
             C = np.sum(C, axis=0) / C.shape[0]
     else:
         pass
     f = self.next_states_matrix(x_input, u_input, A, B, C)
     dynamics = AutoDiffDynamics(f, x_input, u_input)
     x_goal = self.augment_state(self.x_goal)
     if self.Q_terminal.all() == None:
         cost = QRCost(self.Q, self.R)
     else:
         cost = QRCost(self.Q,
                       self.R,
                       Q_terminal=self.Q_terminal,
                       x_goal=x_goal)
     x0 = self.augment_state(self.x0)
     if us_init == None:
         us_init = np.random.uniform(-1, 1, (self.N, dynamics.action_size))
     ilqr = iLQR(dynamics, cost, self.N)
     xs, us = ilqr.fit(x0, us_init, on_iteration=self.on_iteration)
     return xs, us
Пример #10
0
 def run_IterLinQuadReg(self, us_init=None):
     x_input, u_input = self.state_inputs()
     x_dot_dot, theta_dot_dot, theta_prime = self.accel(
         x_input, u_input, self.xd)
     f = self.next_states(x_input, x_dot_dot, theta_dot_dot, theta_prime)
     dynamics = AutoDiffDynamics(f, x_input, u_input, hessians=False)
     x_goal = self.augment_state(self.x_goal)
     if self.Q_terminal.all() == None:
         cost = QRCost(self.Q, self.R)
     else:
         cost = QRCost(self.Q,
                       self.R,
                       Q_terminal=self.Q_terminal,
                       x_goal=x_goal)
     x0 = self.augment_state(self.x0)
     if us_init == None:
         us_init = np.random.uniform(-1, 1, (self.N, dynamics.action_size))
     ilqr = iLQR(dynamics, cost, self.N, hessians=False)
     xs, us = ilqr.fit(x0,
                       us_init,
                       on_iteration=self.on_iteration,
                       n_iterations=1000)
     # print(ilqr._K,'this is capital K')
     return xs, us, ilqr._k, ilqr._K
Пример #11
0
def main_fuc(R0):
    # print(R0)
    R1 = R0[0]
    R2 = R0[1]
    R[0, 0] = R1
    R[1, 1] = R1
    R[2, 2] = R1
    R[3, 3] = R2
    R[4, 4] = R2
    R[5, 5] = R2
    us_init = np.random.uniform(-1, 1, (N - 1, dynamics.action_size))
    cost = PathQsRCost(Q, R, x_path=x_path, u_path=u_path)
    #print(cost)
    J_hist = []
    ilqr = iLQR(dynamics, cost, N - 1)
    xs, us = ilqr.fit(x0, us_init, on_iteration=on_iteration)
    # len_us = len(us)
    # print(f"len(us)={len_us}")
    # print(us)
    # print(xs)
    # print(us)
    cost2 = PathQsRCost(Q, R, x_path, us)
    ilqr2 = iLQR(dynamics, cost2, N - 1)
    cntrl = RecedingHorizonControllerPath(x0, ilqr2)
    # print(J_hist)
    ####### if want to smooth J_hist
    # plt.figure()
    # plt.plot(J_hist)
    # plt.xlabel('Iteration')
    # plt.ylabel('Cost')
    # plt.grid()
    # plt.show(

    plt.ion()
    count = 0
    fig = plt.figure()
    ax1 = fig.add_subplot(321)
    ax1.set_xlim([0, 177])
    ax1.set_ylim([-0.4, 0.6])
    ax2 = fig.add_subplot(323)
    ax2.set_xlim([0, 177])
    ax2.set_ylim([-0.2, 2.0])
    ax3 = fig.add_subplot(325)
    ax3.set_xlim([0, 177])
    ax3.set_ylim([-0.3, 0.3])
    ax4 = fig.add_subplot(322)
    ax4.set_xlim([0, 177])
    ax4.set_ylim([-0.8, 0.6])
    ax5 = fig.add_subplot(324)
    ax5.set_xlim([0, 177])
    ax5.set_ylim([0.0, 1.7])
    ax6 = fig.add_subplot(326)
    ax6.set_xlim([0, 177])
    ax6.set_ylim([-0.3, 0.3])
    x1 = []
    y1 = []
    x1_follow = []
    y1_follow = []
    x2 = []
    y2 = []
    x2_follow = []
    y2_follow = []
    x3 = []
    y3 = []
    x3_follow = []
    y3_follow = []
    x4 = []
    y4 = []
    x4_follow = []
    y4_follow = []
    x5 = []
    y5 = []
    x5_follow = []
    y5_follow = []
    x6 = []
    y6 = []
    x6_follow = []
    y6_follow = []
    us3 = []
    line1, = ax1.plot(x1, y1, 'r-')
    line2, = ax1.plot(x1_follow, y1_follow, 'b-')
    line3, = ax2.plot(x2, y2, 'r-')
    line4, = ax2.plot(x2_follow, y2_follow, 'b-')
    line5, = ax3.plot(x3, y3, 'r-')
    line6, = ax3.plot(x3_follow, y3_follow, 'b-')
    line7, = ax4.plot(x4, y4, 'r-')
    line8, = ax4.plot(x4_follow, y4_follow, 'b-')
    line9, = ax5.plot(x5, y5, 'r-')
    line10, = ax5.plot(x5_follow, y5_follow, 'b-')
    line11, = ax6.plot(x6, y6, 'r-')
    line12, = ax6.plot(x6_follow, y6_follow, 'b-')

    #print(sys.getsizeof(cntrl.control(us)))
    #
    # with open('test.npy', 'wb') as f:
    #     np.save(f, us)
    # print(len(us))
    # print('step1')

    for xs2, us2 in tqdm(cntrl.control(us)):
        # print(us2[0])
        # print(type(us2[0]))
        us3.append(us2[0])
        x1.append(count)
        y1.append(xs2[0][0])
        x1_follow.append(count)
        y1_follow.append(x_path[count][0])
        x2.append(count)
        y2.append(xs2[0][1])
        x2_follow.append(count)
        y2_follow.append(x_path[count][1])
        x3.append(count)
        y3.append(xs2[0][2])
        x3_follow.append(count)
        y3_follow.append(x_path[count][2])
        x4.append(count)
        y4.append(xs2[0][3])
        x4_follow.append(count)
        y4_follow.append(x_path[count][3])
        x5.append(count)
        y5.append(xs2[0][4])
        x5_follow.append(count)
        y5_follow.append(x_path[count][4])
        x6.append(count)
        y6.append(xs2[0][5])
        x6_follow.append(count)
        y6_follow.append(x_path[count][5])
        # print(xs2[0][0:6])
        # print(x_path[count][0:6])
        count += 1
        cntrl.set_state(xs2[1])
        line1.set_ydata(y1)
        line1.set_xdata(x1)
        line2.set_ydata(y1_follow)
        line2.set_xdata(x1_follow)
        line3.set_ydata(y2)
        line3.set_xdata(x2)
        line4.set_ydata(y2_follow)
        line4.set_xdata(x2_follow)
        line5.set_ydata(y3)
        line5.set_xdata(x3)
        line6.set_ydata(y3_follow)
        line6.set_xdata(x3_follow)
        line7.set_ydata(y4)
        line7.set_xdata(x4)
        line8.set_ydata(y4_follow)
        line8.set_xdata(x4_follow)
        line9.set_ydata(y5)
        line9.set_xdata(x5)
        line10.set_ydata(y5_follow)
        line10.set_xdata(x5_follow)
        line11.set_ydata(y6)
        line11.set_xdata(x6)
        line12.set_ydata(y6_follow)
        line12.set_xdata(x6_follow)
        fig.canvas.draw()
        fig.canvas.flush_events()
        us = us[1:]
        if count == 177:
            break
    plt.ioff()
    # plt.show()

    KL1 = measure_d(y1_follow, y1)
    KL2 = measure_d(y2_follow, y2)
    KL3 = measure_d(y3_follow, y3)
    KL4 = measure_d(y4_follow, y4)
    KL5 = measure_d(y5_follow, y5)
    KL6 = measure_d(y6_follow, y6)

    KL = KL1 + KL2 + KL3 + KL4 + KL5 + KL6

    fname = f'R1_{R1}_R2_{R2}_KL_{KL}.png'
    s_path = f'./figures_new/{fname}'
    plt.savefig(s_path)

    plt.close('all')

    print(KL)

    # if KL > KL0:
    #     KL = KL0

    gc.collect()
    return KL
Q[2, 2] = Q[3, 3] = 1.0**2
R = 0.1 * np.eye(dynamics.action_size)

# Terminal state cost.
Q_terminal = 100 * np.eye(dynamics.state_size)

# Instantaneous control cost.
R = np.array([[0.1]])
cost = PathQRCost(Q, R, x_path=x_path, u_path=u_path)
#cost = PathQsRCost(Q,R,x_path=x_path,u_path=u_path)
cost = QRCost(Q, R, Q_terminal=Q[0], x_goal=x_goal)
# Random initial action path.
us_init = np.random.uniform(-1, 1, (N - 1, dynamics.action_size))

J_hist = []
ilqr = iLQR(dynamics, cost, N - 1)
#xs, us = ilqr.fit(x0, us_init, on_iteration=on_iteration)

controller = RecedingHorizonController(x0, ilqr)
count = 0
plt.ion()

fig = plt.figure()
ax = fig.add_subplot(111)
ax.set_xlim([4, 12])
ax.set_ylim([5, 12])
x = []
y = []

line1, = ax.plot(x, y, 'r-')
for xs2, us2 in controller.control(us_init):
Пример #13
0
    lqr.solve()
    lqr_states, lqr_controls, lqr_costs = lqr.forward_pass(x0)

    #Us2 = np.random.random((T, control_dim))
    Us2 = np.ones((T, control_dim))
    #Us2 = np.asarray(lqr_controls) 
    xt = x0.copy(); Xs2 = xt;
    for t in range(0, T-1):
        xt1 = dyn_func(xt, Us2[t])
        Xs2 = np.vstack((Xs2, xt1))
        xt = xt1 
    #Xs = lqr_states; Us = lqr_controls;
    Xs = Xs2; Us = Us2;
    original_ilqr_cost = np.sum([cost_func(x, u) for (x,u) in zip(Xs, Us)])

    ilqr = iLQR(dyn_func, cost_func, Xs, Us);

    for _ in range(1):
        ilqr.backwards_pass()
        ilqr_states, ilqr_controls, ilqr_costs = ilqr.forward_pass(x0, update=True)

    for t in range(T):
        lqr_x = lqr_states[t]
        lqr_u = lqr_controls[t]
        ilqr_x = ilqr_states[t]
        ilqr_u = ilqr_controls[t]
        print("t={}, xlqr:  {}".format(t, lqr_x))
        print("  {}, xilqr: {} ".format(t, ilqr_x));
        print("  {}, xilqr_orig: {} ".format(t, Xs[t]));
        print("  {}, ulqr:  {}".format(t, lqr_u))
        print("  {}, uilqr: {} ".format(t, ilqr_u));
Пример #14
0
print(R)

# print(u_path)

us_init = np.random.uniform(-1, 1, (N-1, dynamics.action_size))
#
#
cost = PathQRCost(Q[0], R, x_path=x_path, u_path=u_path)

#print(cost)
#
# # Random initial action path.

#
J_hist = []
ilqr = iLQR(dynamics, cost, N-1)
xs, us = ilqr.fit(x0, us_init, on_iteration=on_iteration)
# len_us = len(us)
# print(f"len(us)={len_us}")
# print(us)
# print(xs)

#R = 0.01 * np.eye(dynamics.action_size)

#
# R[1,1] = 40.0
# R[4,4] = 40.0

# us = np.where(us>max_bounds,max_bounds,us)
# us = np.where(us<min_bounds,min_bounds,us)
Пример #15
0
l_og = X0[:, 0]
theta_og = X0[:, 1]
x_og = (l_og + a) * np.cos(theta_og)
y_og = (l_og + a) * np.sin(theta_og)
Fl_og = np.append(U0[:, 0], 0)
Ft_og = np.append(U0[:, 1], 0)

# %% Generate Costs
Q = np.diag([275, 275, 275, 275])
R = np.diag([.1, .1])
Q_terminal = np.diag([1000, 1000, 1000, 1000])
cost = QRCost(Q, R, Q_terminal=Q_terminal, x_goal=xe)

#%% Optimizing Trajectory
J_hist = []
ilqr1 = iLQR(dynamics, cost, N)
xs, us = ilqr1.fit(x0, U0, on_iteration=on_iteration)
us = constrain(us, -max_torque, max_torque)

# %% Post Processing
l = xs[:, 0]
theta = xs[:, 1]
l_dot = xs[:, 2]
theta_dot = xs[:, 3]
Fl = np.append(us[:, 0], 0)
Ft = np.append(us[:, 1], 0)
x = (l + a + d) * np.cos(theta)
y = (l + a + d) * np.sin(theta)
ax = plt.figure(1)
plt.clf()
plt.plot(x, y, label="iLQR")