Пример #1
0
def excecute_iLQR(init_theta, use_predicted, limited_force=False):
    curr_theta_values = []
    env = CartPoleContEnv(initial_theta=init_theta)


    # start a new episode
    actual_state = env.reset()
    env.render()
    # use LQR to plan controls
    xs, us, Ks = find_ilqr_control_input(env, limited_force)
    # run the episode until termination, and print the difference between planned and actual
    is_done = False
    iteration = 0
    while not is_done:
        # print the differences between planning and execution time
        predicted_theta = xs[iteration].item(2)
        actual_theta = actual_state[2]
        predicted_action = us[iteration].item(0)
        actual_action = (Ks[iteration] @ np.expand_dims(actual_state, 1)).item(0)
        if use_predicted:
            actual_action = predicted_action
        # print_diff(iteration, predicted_theta, actual_theta, predicted_action, actual_action)
        # apply action according to actual state visited
        # make action in range
        actual_action = max(env.action_space.low.item(0), min(env.action_space.high.item(0), actual_action))
        actual_action = np.array([actual_action])
        actual_state, reward, is_done, _ = env.step(actual_action)
        env.render()
        iteration += 1
        curr_theta_values.append(np.mod(actual_theta + np.pi, 2 * np.pi) - np.pi)
    times = np.arange(0.0, env.tau*len(curr_theta_values), env.tau)
    env.close()
    return curr_theta_values, times
Пример #2
0
def ilqr():
    env = CartPoleContEnv(initial_theta=np.pi)
    actual_state = env.reset()
    u_ref = [0] * env.planning_steps
    x_ref = [actual_state]
    """run forward simulation to obtain x_ref"""
    for actual_action in u_ref:
        actual_state, reward, is_done, _ = env.step(actual_action)
        x_ref.append(actual_state)

    """linearilize f around the ref"""
    A_ref = get_A(env, angle, delta_angle, force)
    B_ref = get_B(env)
    """quadritize c"""

    """LQR"""

    """simulate u*"""

    """get new reference"""
Пример #3
0
def ploting_teta(a,b,c,d):
    tetha={str(a):[],str(b):[],str(c):[],str(d):[]}
    for i in [a,b,c,d]:
        env = CartPoleContEnv(initial_theta=np.pi * i)
        # the following is an example to start at a different theta
        # env = CartPoleContEnv(initial_theta=np.pi * 0.25)
        tau=env.tau
        # print the matrices used in LQR
        print('A: {}'.format(get_A(env)))
        print('B: {}'.format(get_B(env)))

        # start a new episode
        actual_state = env.reset()
        # env.render()
        # use LQR to plan controls
        xs, us, Ks = find_lqr_control_input(env)
        # run the episode until termination, and print the difference between planned and actual
        is_done = False
        iteration = 0
        is_stable_all = []
        angular = []
        while not is_done:
            # print the differences between planning and execution time
            predicted_theta = xs[iteration].item(2)
            actual_theta = actual_state[2]
            tetha[str(i)].append(actual_theta)
            predicted_action = us[iteration].item(0)
            actual_action = (Ks[iteration] * np.expand_dims(actual_state, 1)).item(0)
            #actual_action = (Ks[iteration] * np.expand_dims(actual_state, 1)).item(0)
            #print_diff(iteration, predicted_theta, actual_theta, predicted_action, actual_action)
            # apply action according to actual state visited
            # make action in range
            actual_action = max(env.action_space.low.item(0), min(env.action_space.high.item(0), actual_action))
            actual_action = np.array([actual_action])
            actual_state, reward, is_done, _ = env.step(actual_action)
            is_stable = reward == 1.0
            is_stable_all.append(is_stable)
            # env.render()
            # time.sleep(0.2)
            iteration += 1
        env.close()

        # we assume a valid episode is an episode where the agent managed to stabilize the pole for the last 100 time-steps
        valid_episode = np.all(is_stable_all[-100:])
        # print if LQR succeeded
    t=np.linspace(1, 600, num=600)*tau
    plt.figure(0, figsize=(8, 8))
    plt.plot(t,tetha[str(a)], 'k', markersize=1.2, label='theta_unstable='+str(np.round(a,4))+'pi')
    plt.plot(t,tetha[str(d)], 'g', markersize=1.2, label=str(np.round(d,4))+'pi')
    plt.plot(t,tetha[str(b)], 'r', markersize=1.2, label='0.1pi')
    plt.plot(t,tetha[str(c)], 'b', markersize=1.2, label='0.5pi')

    plt.xlabel("t[sec]", fontsize=18)
    plt.ylabel("deg[rad]", fontsize=18)

    #plt.xlim(0,300)
    plt.legend(loc='upper left')
    plt.show()
Пример #4
0
def Finding_unstable():
    # the following is an example to start at a different theta
    # env = CartPoleContEnv(initial_theta=np.pi * 0.25)

    # print the matrices used in LQR
    #print('A: {}'.format(get_A(env)))
    #print('B: {}'.format(get_B(env)))

    valid_episode=True
    for i in range(1,1000,1):
        env = CartPoleContEnv(initial_theta=np.pi * 0.001*i)
    # start a new episode
        actual_state = env.reset()
        #env.render()
        # use LQR to plan controls
        xs, us, Ks = find_lqr_control_input(env)
        # run the episode until termination, and print the difference between planned and actual
        is_done = False
        iteration = 0
        is_stable_all = []
        while not is_done:
            # print the differences between planning and execution time
            predicted_theta = xs[iteration].item(2)
            actual_theta = actual_state[2]
            predicted_action = us[iteration].item(0)
            actual_action = (Ks[iteration] * np.expand_dims(actual_state, 1)).item(0)
            #actual_action = (Ks[iteration] * np.expand_dims(actual_state, 1)).item(0)
            #print_diff(iteration, predicted_theta, actual_theta, predicted_action, actual_action)
            # apply action according to actual state visited
            # make action in range
            actual_action = max(env.action_space.low.item(0), min(env.action_space.high.item(0), actual_action))
            actual_action = np.array([actual_action])
            actual_state, reward, is_done, _ = env.step(actual_action)
            is_stable = reward == 1.0
            is_stable_all.append(is_stable)
            #env.render()
            #time.sleep(0.2)
            iteration += 1
        env.close()
        # we assume a valid episode is an episode where the agent managed to stabilize the pole for the last 100 time-steps
        valid_episode = np.all(is_stable_all[-100:])
        if not valid_episode:
            break

    print("the unstable angle is {}".format(np.round(180 *i* 0.001,2)))
Пример #5
0
def main():
    env = CartPoleContEnv(initial_theta=np.pi)
    xs, us, Ks = find_ilqr_control_input(env)
    is_done = False
    iteration = 0
    is_stable_all = []
    while not is_done:
        predicted_theta = xs[iteration].item(2)
        actual_theta = actual_state[2]
        predicted_action = us[iteration].item(0)
        actual_action = (Ks[iteration] @ np.expand_dims(actual_state, 1)).item(0)
        print_diff(iteration, predicted_theta, actual_theta, predicted_action, actual_action)
        actual_action = max(env.action_space.low.item(0), min(env.action_space.high.item(0), actual_action))
        actual_action = np.array([actual_action])
        actual_state, reward, is_done, _ = env.step(actual_action)
        is_stable = reward == 1.0
        is_stable_all.append(is_stable)
        env.render()
        iteration += 1
    env.close()
    valid_episode = np.all(is_stable_all[-100:])
    print('valid episode: {}'.format(valid_episode))
Пример #6
0
        assert u.shape == (
            1, 1), "make sure the action dimension is correct: should be (1,1)"
    return xs, us, Ks


def print_diff(iteration, planned_theta, actual_theta, planned_action,
               actual_action):
    print('iteration {}'.format(iteration))
    print('planned theta: {}, actual theta: {}, difference: {}'.format(
        planned_theta, actual_theta, np.abs(planned_theta - actual_theta)))
    print('planned action: {}, actual action: {}, difference: {}'.format(
        planned_action, actual_action, np.abs(planned_action - actual_action)))


if __name__ == '__main__':
    env = CartPoleContEnv(initial_theta=np.pi * 0.1)
    # the following is an example to start at a different theta
    # env = CartPoleContEnv(initial_theta=np.pi * 0.4)

    # print the matrices used in LQR
    print('A: {}'.format(get_A(env)))
    print('B: {}'.format(get_B(env)))

    # start a new episode
    actual_state = env.reset()
    env.render()
    # use LQR to plan controls
    xs, us, Ks = find_lqr_control_input(env)
    # run the episode until termination, and print the difference between planned and actual
    is_done = False
    iteration = 0