def excecute_iLQR(init_theta, use_predicted, limited_force=False): curr_theta_values = [] env = CartPoleContEnv(initial_theta=init_theta) # start a new episode actual_state = env.reset() env.render() # use LQR to plan controls xs, us, Ks = find_ilqr_control_input(env, limited_force) # run the episode until termination, and print the difference between planned and actual is_done = False iteration = 0 while not is_done: # print the differences between planning and execution time predicted_theta = xs[iteration].item(2) actual_theta = actual_state[2] predicted_action = us[iteration].item(0) actual_action = (Ks[iteration] @ np.expand_dims(actual_state, 1)).item(0) if use_predicted: actual_action = predicted_action # print_diff(iteration, predicted_theta, actual_theta, predicted_action, actual_action) # apply action according to actual state visited # make action in range actual_action = max(env.action_space.low.item(0), min(env.action_space.high.item(0), actual_action)) actual_action = np.array([actual_action]) actual_state, reward, is_done, _ = env.step(actual_action) env.render() iteration += 1 curr_theta_values.append(np.mod(actual_theta + np.pi, 2 * np.pi) - np.pi) times = np.arange(0.0, env.tau*len(curr_theta_values), env.tau) env.close() return curr_theta_values, times
def ilqr(): env = CartPoleContEnv(initial_theta=np.pi) actual_state = env.reset() u_ref = [0] * env.planning_steps x_ref = [actual_state] """run forward simulation to obtain x_ref""" for actual_action in u_ref: actual_state, reward, is_done, _ = env.step(actual_action) x_ref.append(actual_state) """linearilize f around the ref""" A_ref = get_A(env, angle, delta_angle, force) B_ref = get_B(env) """quadritize c""" """LQR""" """simulate u*""" """get new reference"""
def ploting_teta(a,b,c,d): tetha={str(a):[],str(b):[],str(c):[],str(d):[]} for i in [a,b,c,d]: env = CartPoleContEnv(initial_theta=np.pi * i) # the following is an example to start at a different theta # env = CartPoleContEnv(initial_theta=np.pi * 0.25) tau=env.tau # print the matrices used in LQR print('A: {}'.format(get_A(env))) print('B: {}'.format(get_B(env))) # start a new episode actual_state = env.reset() # env.render() # use LQR to plan controls xs, us, Ks = find_lqr_control_input(env) # run the episode until termination, and print the difference between planned and actual is_done = False iteration = 0 is_stable_all = [] angular = [] while not is_done: # print the differences between planning and execution time predicted_theta = xs[iteration].item(2) actual_theta = actual_state[2] tetha[str(i)].append(actual_theta) predicted_action = us[iteration].item(0) actual_action = (Ks[iteration] * np.expand_dims(actual_state, 1)).item(0) #actual_action = (Ks[iteration] * np.expand_dims(actual_state, 1)).item(0) #print_diff(iteration, predicted_theta, actual_theta, predicted_action, actual_action) # apply action according to actual state visited # make action in range actual_action = max(env.action_space.low.item(0), min(env.action_space.high.item(0), actual_action)) actual_action = np.array([actual_action]) actual_state, reward, is_done, _ = env.step(actual_action) is_stable = reward == 1.0 is_stable_all.append(is_stable) # env.render() # time.sleep(0.2) iteration += 1 env.close() # we assume a valid episode is an episode where the agent managed to stabilize the pole for the last 100 time-steps valid_episode = np.all(is_stable_all[-100:]) # print if LQR succeeded t=np.linspace(1, 600, num=600)*tau plt.figure(0, figsize=(8, 8)) plt.plot(t,tetha[str(a)], 'k', markersize=1.2, label='theta_unstable='+str(np.round(a,4))+'pi') plt.plot(t,tetha[str(d)], 'g', markersize=1.2, label=str(np.round(d,4))+'pi') plt.plot(t,tetha[str(b)], 'r', markersize=1.2, label='0.1pi') plt.plot(t,tetha[str(c)], 'b', markersize=1.2, label='0.5pi') plt.xlabel("t[sec]", fontsize=18) plt.ylabel("deg[rad]", fontsize=18) #plt.xlim(0,300) plt.legend(loc='upper left') plt.show()
def Finding_unstable(): # the following is an example to start at a different theta # env = CartPoleContEnv(initial_theta=np.pi * 0.25) # print the matrices used in LQR #print('A: {}'.format(get_A(env))) #print('B: {}'.format(get_B(env))) valid_episode=True for i in range(1,1000,1): env = CartPoleContEnv(initial_theta=np.pi * 0.001*i) # start a new episode actual_state = env.reset() #env.render() # use LQR to plan controls xs, us, Ks = find_lqr_control_input(env) # run the episode until termination, and print the difference between planned and actual is_done = False iteration = 0 is_stable_all = [] while not is_done: # print the differences between planning and execution time predicted_theta = xs[iteration].item(2) actual_theta = actual_state[2] predicted_action = us[iteration].item(0) actual_action = (Ks[iteration] * np.expand_dims(actual_state, 1)).item(0) #actual_action = (Ks[iteration] * np.expand_dims(actual_state, 1)).item(0) #print_diff(iteration, predicted_theta, actual_theta, predicted_action, actual_action) # apply action according to actual state visited # make action in range actual_action = max(env.action_space.low.item(0), min(env.action_space.high.item(0), actual_action)) actual_action = np.array([actual_action]) actual_state, reward, is_done, _ = env.step(actual_action) is_stable = reward == 1.0 is_stable_all.append(is_stable) #env.render() #time.sleep(0.2) iteration += 1 env.close() # we assume a valid episode is an episode where the agent managed to stabilize the pole for the last 100 time-steps valid_episode = np.all(is_stable_all[-100:]) if not valid_episode: break print("the unstable angle is {}".format(np.round(180 *i* 0.001,2)))
def main(): env = CartPoleContEnv(initial_theta=np.pi) xs, us, Ks = find_ilqr_control_input(env) is_done = False iteration = 0 is_stable_all = [] while not is_done: predicted_theta = xs[iteration].item(2) actual_theta = actual_state[2] predicted_action = us[iteration].item(0) actual_action = (Ks[iteration] @ np.expand_dims(actual_state, 1)).item(0) print_diff(iteration, predicted_theta, actual_theta, predicted_action, actual_action) actual_action = max(env.action_space.low.item(0), min(env.action_space.high.item(0), actual_action)) actual_action = np.array([actual_action]) actual_state, reward, is_done, _ = env.step(actual_action) is_stable = reward == 1.0 is_stable_all.append(is_stable) env.render() iteration += 1 env.close() valid_episode = np.all(is_stable_all[-100:]) print('valid episode: {}'.format(valid_episode))
assert u.shape == ( 1, 1), "make sure the action dimension is correct: should be (1,1)" return xs, us, Ks def print_diff(iteration, planned_theta, actual_theta, planned_action, actual_action): print('iteration {}'.format(iteration)) print('planned theta: {}, actual theta: {}, difference: {}'.format( planned_theta, actual_theta, np.abs(planned_theta - actual_theta))) print('planned action: {}, actual action: {}, difference: {}'.format( planned_action, actual_action, np.abs(planned_action - actual_action))) if __name__ == '__main__': env = CartPoleContEnv(initial_theta=np.pi * 0.1) # the following is an example to start at a different theta # env = CartPoleContEnv(initial_theta=np.pi * 0.4) # print the matrices used in LQR print('A: {}'.format(get_A(env))) print('B: {}'.format(get_B(env))) # start a new episode actual_state = env.reset() env.render() # use LQR to plan controls xs, us, Ks = find_lqr_control_input(env) # run the episode until termination, and print the difference between planned and actual is_done = False iteration = 0