ax.legend(('1', '2', '3', '4')) ax.set_ylabel('motor speeds, rad/s') ax.grid('major') ax.set_title('Commands') M = control['cmd_moment'] ax = axes[1] ax.plot(time, M[:, 0], 'r.', time, M[:, 1], 'g.', time, M[:, 2], 'b.') ax.legend(('x', 'y', 'z')) ax.set_ylabel('moment, N*m') ax.grid('major') T = control['cmd_thrust'] ax = axes[2] ax.plot(time, T, 'k.') ax.set_ylabel('thrust, N') ax.set_xlabel('time, s') ax.grid('major') # 3D Paths fig = plt.figure('3D Path') ax = Axes3Ds(fig) world.draw(ax) ax.plot3D(state['x'][:, 0], state['x'][:, 1], state['x'][:, 2], 'b.') ax.plot3D(flat['x'][:, 0], flat['x'][:, 1], flat['x'][:, 2], 'k') # Animation (Slow) # Instead of viewing the animation live, you may provide a .mp4 filename to save. R = Rotation.from_quat(state['q']).as_dcm() animate(time, state['x'], R, world=world, filename=None) plt.show()
markeredgewidth=3, markerfacecolor='none') world.draw_line(ax, flat['x'], color='black', linewidth=2) world.draw_points(ax, state['x'], color='blue', markersize=4) if collision_pts.size > 0: ax.plot(collision_pts[0, [0]], collision_pts[0, [1]], collision_pts[0, [2]], 'rx', markersize=36, markeredgewidth=4) ax.legend(handles=[ Line2D([], [], color='black', linewidth=2, label='Trajectory'), Line2D([], [], color='blue', linestyle='', marker='.', markersize=4, label='Flight') ], loc='upper right') # Animation (Slow) # # Instead of viewing the animation live, you may provide a .mp4 filename to save. R = Rotation.from_quat(state['q']).as_dcm() animate(sim_time, state['x'], R, world=world, filename=None, show_axes=True) plt.show()
def Qlearning(args): """ The main Q-learning function, utilizing the functions implemented above. Need to change to choose actions of discretized action space """ reward_list = [] position_list = [] success_list = [] success = 0 # count of number of successes reached success_array_5 = 0 args.log_permit = False for i in tqdm(range(args.max_episodes), position=0): # Initialize parameters done = False # indicates whether the episode is done terminal = False # indicates whether the episode is done AND the car has reached the flag (>=0.5 position) tot_reward = 0 # sum of total reward over a single state = args.start num_steps = 0 path_length = 0 path_list = [] flag = 0 delete_action_list = [] print(f'\n Searching Likelihood: {args.epsilon}') while done != True and num_steps <= args.max_steps: # Determine next action path_list.append( (args.occ_map.index_to_metric_center(state)).tolist()) action, _ = choose_action(args, state, args.epsilon) next_state, reward, done = step(args, state, action) # Update terminal terminal = (done and (np.linalg.norm(next_state - args.goal) <= args.tol)) # Update Q Q = get_target_Q(args, state, next_state, action, reward, terminal, done) # Update tot_reward, state_disc, and success (if applicable) state_action_pair = get_pair(args, state, action) add_replace_element(args, state_action_pair, Q) tot_reward += reward path_length += np.linalg.norm(next_state - state) state = next_state if terminal: success += 1 num_steps += 1 time = np.zeros((len(path_list), )) for j in range(1, len(time)): time[j] = time[j - 1] + args.max_time / len(time) if args.animate_permit is True: position = np.asarray(path_list) rotation = np.full((len(time), 3, 3), np.identity(3)) animate(args.st, args.go, time, position, rotation, args.world, filename='episode_' + str(i) + '.mp4', show_axes=True) if terminal and path_length < args.best_path_length: args.best_path_length = path_length args.final_path = path_list args.dataloader = load_dataset(args.train_set, args.train_labels, batch_size=20) train(args) args.epsilon = update_epsilon( args.epsilon, args.decay_rate) #Update level of epsilon using update_epsilon() # Track rewards reward_list.append(tot_reward) position_list.append(next_state.tolist()) success_array_5 += success if i == 0 or i % 5 == 4: success_list.append(success_array_5 / 5) success_array_5 = 0 success = 0 return reward_list, position_list, success_list