def policy_rollout(agent, path, t_interval=1, timesteps=200): for j in range(1): robot = SwimmingRobot(a1=0, a2=0, t_interval=t_interval) xs = [robot.x] ys = [robot.y] thetas = [robot.theta] a1s = [robot.a1] a2s = [robot.a2] steps = [0] # robot.randomize_state(enforce_opposite_angle_signs=True) robot_params = [] robot_param = [ robot.x, robot.y, robot.theta, float(robot.a1), float(robot.a2), robot.a1dot, robot.a2dot ] robot_params.append(robot_param) print('Beginning', j + 1, 'th Policy Rollout') try: for i in range(timesteps): # rollout state = robot.state print('In', i + 1, 'th iteration the initial state is: ', state) old_x = robot.x action = agent.choose_action(state) print('In', i + 1, 'th iteration the chosen action is: ', action) robot.move(action=action) new_x = robot.x print('In', i + 1, 'th iteration, the robot moved ', new_x - old_x, ' in x direction') # add values to lists xs.append(robot.x) ys.append(robot.y) thetas.append(robot.theta) a1s.append(robot.a1) a2s.append(robot.a2) steps.append(i + 1) robot_param = [ robot.x, robot.y, robot.theta, float(robot.a1), float(robot.a2), robot.a1dot, robot.a2dot ] robot_params.append(robot_param) except ZeroDivisionError as e: print(str(e), 'occured at ', j + 1, 'th policy rollout') # plotting make_rollout_graphs(xs, ys, thetas, a1s, a2s, steps, path=path) generate_csv(robot_params, path + "/policy_rollout.csv")
def policy_rollout(self, timesteps=200, random_start=False): rollout_path = self.file_path + "/policy_rollout_results" if not os.path.exists(rollout_path): os.mkdir(rollout_path) os.chmod(rollout_path, 0o0777) reps = 5 if random_start else 1 for i in range(reps): rep_path = rollout_path + "/" + str(reps) if not os.path.exists(rep_path): os.mkdir(rep_path) os.chmod(rep_path, 0o0777) self.robot_in_action.set_state(*self._robot_original_state) self.robot_in_action.x = 0 self.robot_in_action.y = 0 assert self.robot_in_action.state == self._robot_original_state, 'there is a problem with reset' if self.is_physical_robot: encoders = [self.robot_in_action.encoder_val] else: xs = [self.robot_in_action.x] ys = [self.robot_in_action.y] thetas = [self.robot_in_action.theta] a1s = [self.robot_in_action.a1] a2s = [self.robot_in_action.a2] steps = [0] if random_start: self.robot_in_action.randomize_state( enforce_opposite_angle_signs=True) robot_params = [] if self.is_physical_robot: robot_param = [ float(self.robot_in_action.encoder_val), float(self.robot_in_action.a1), float(self.robot_in_action.a2), self.robot_in_action.a1dot, self.robot_in_action.a2dot ] else: robot_param = [ self.robot_in_action.x, self.robot_in_action.y, self.robot_in_action.theta, float(self.robot_in_action.a1), float(self.robot_in_action.a2), self.robot_in_action.a1dot, self.robot_in_action.a2dot ] robot_params.append(robot_param) print('Beginning Policy Rollout') try: for i in range(timesteps): # rollout state = self.robot_in_action.state print('In', i + 1, 'th iteration the initial state is: ', state) if not self.is_physical_robot: old_x = self.robot_in_action.x action = self.choose_action(state) print('In', i + 1, 'th iteration the chosen action is: ', action) self.robot_in_action.move(action=action) if self.is_physical_robot: displacement = self.robot_in_action.encoder_val else: displacement = self.robot_in_action.x - old_x print('In', i + 1, 'th iteration, the robot moved ', displacement, ' in x direction') # add values to lists if self.is_physical_robot: encoders.append(self.robot_in_action.encoder_val) else: xs.append(self.robot_in_action.x) ys.append(self.robot_in_action.y) thetas.append(self.robot_in_action.theta) a1s.append(self.robot_in_action.a1) a2s.append(self.robot_in_action.a2) steps.append(i + 1) if self.is_physical_robot: robot_param = [ float(self.robot_in_action.encoder_val), float(self.robot_in_action.a1), float(self.robot_in_action.a2), self.robot_in_action.a1dot, self.robot_in_action.a2dot ] else: robot_param = [ self.robot_in_action.x, self.robot_in_action.y, self.robot_in_action.theta, float(self.robot_in_action.a1), float(self.robot_in_action.a2), self.robot_in_action.a1dot, self.robot_in_action.a2dot ] robot_params.append(robot_param) except ZeroDivisionError as e: print(str(e), 'occured during policy rollout') # self.robot_in_action = None # plotting if self.is_physical_robot: make_rollout_graphs_physical(encoders, a1s, a2s, steps, path=rep_path) else: make_rollout_graphs(xs, ys, thetas, a1s, a2s, steps, path=rep_path) generate_csv(robot_params, rep_path + "/policy_rollout.csv")
def perform_DQN(self): """ :param agent: the RL agent :param batch_size: size of minibatch sampled from replay buffer :param C: network update frequency :return: agent, and other information about DQN """ models_path = self.file_path + "/models" robot_data_path = self.file_path + "/robot_data" if not os.path.exists(models_path): os.mkdir(models_path) os.chmod(models_path, 0o0777) if not os.path.exists(robot_data_path): os.mkdir(robot_data_path) os.chmod(robot_data_path, 0o0777) avg_losses = [] std_losses = [] avg_rewards = [] std_rewards = [] avg_Qs = [] std_Qs = [] num_episodes = [] try: # loop through each episodes for e in range(1, self.episodes + 1): # save model if e % (self.episodes / 10) == 0: self.save_model(models_path, e) self.robot_in_action.set_state(*self._robot_original_state) self.robot_in_action.x = 0 self.robot_in_action.y = 0 assert self.robot_in_action.state == self._robot_original_state, 'there is a problem with reset' # theta = random.uniform(-pi/4, pi/4) if self.randomize_theta else 0 # self.robot_in_action.theta = theta state = self.robot_in_action.state rewards = [] losses = [] Qs = [] robot_params = [] if self.is_physical_robot: robot_param = [ float(self.robot_in_action.encoder_val), float(self.robot_in_action.a1), float(self.robot_in_action.a2), self.robot_in_action.a1dot, self.robot_in_action.a2dot ] else: robot_param = [ self.robot_in_action.x, self.robot_in_action.y, self.robot_in_action.theta, float(self.robot_in_action.a1), float(self.robot_in_action.a2), self.robot_in_action.a1dot, self.robot_in_action.a2dot ] robot_params.append(robot_param) # loop through each iteration for i in range(1, self.iterations + 1): # print('In ', e, ' th epsiode, ', i, ' th iteration, the initial state is: ', state) action = self.choose_action(state, epsilon_greedy=True) print( 'In {}th epsiode {}th iteration, the chosen action is: {}' .format(e, i, action)) reward, next_state = self.act(action=action) if self.is_physical_robot: robot_param = [ float(self.robot_in_action.encoder_val), float(self.robot_in_action.a1), float(self.robot_in_action.a2), self.robot_in_action.a1dot, self.robot_in_action.a2dot ] else: robot_param = [ self.robot_in_action.x, self.robot_in_action.y, self.robot_in_action.theta, float(self.robot_in_action.a1), float(self.robot_in_action.a2), self.robot_in_action.a1dot, self.robot_in_action.a2dot ] robot_params.append(robot_param) print('The reward is: {}'.format(reward)) rewards.append(reward) # print('In ', e, ' th epsiode, ', i, ' th iteration, the state after transition is: ', next_state) self.remember(state, action, reward, next_state) state = next_state if len(self.memory ) > self.memory_size / self.memory_buffer_coef: loss, Q = self.replay() losses.append(loss) Qs.append(Q) print('The average loss is: {}'.format(loss)) print('The average Q is: {}'.format(Q)) if i % self.network_update_freq == 0: self.update_model() num_episodes.append(e) avg_rewards.append(np.mean(rewards)) std_rewards.append(np.std(rewards)) avg_losses.append(np.mean(losses)) std_losses.append(np.std(losses)) avg_Qs.append(np.mean(Qs)) std_Qs.append(np.std(Qs)) # self.robot_in_action = None generate_csv(robot_params, robot_data_path + "/episode {}.csv".format(e)) except Exception as e: traceback.print_exc() finally: # save learning data learning_path = self.file_path + "/learning_results" if not os.path.exists(learning_path): os.mkdir(learning_path) os.chmod(learning_path, 0o0777) save_learning_data(learning_path, num_episodes, avg_rewards, std_rewards, avg_losses, std_losses, avg_Qs, std_Qs) return num_episodes, avg_rewards, std_rewards, avg_losses, std_losses, avg_Qs, std_Qs
# rollout state = robot.state print('In', i + 1, 'th iteration the initial state is: ', state) old_x = robot.x action = agent.choose_action(state) print('In', i + 1, 'th iteration the chosen action is: ', action) robot.move(action=action) new_x = robot.x print('In', i + 1, 'th iteration, the robot moved ', new_x - old_x, ' in x direction') # add values to lists xs.append(robot.x) a1s.append(robot.a1) a2s.append(robot.a2) steps.append(i + 1) robot_param = [ robot.x, robot.y, robot.theta, float(robot.a1), float(robot.a2), robot.a1dot, robot.a2dot ] robot_params.append(robot_param) except ZeroDivisionError as e: print(str(e), 'occured at ', j + 1, 'th policy rollout') # plotting make_graphs(xs, a1s, a2s, steps, j + 1, trial_name=TRIAL_NAME) generate_csv(robot_params, PATH + "/" + str(j + 1) + " th rollout.csv")
try: for i in range(TIMESTEPS): # rollout state = robot.state print('In', i+1, 'th iteration the initial state is: ', state) old_x, old_y = robot.x, robot.y action = agent.choose_action(state) print('In', i+1, 'th iteration the chosen action is: ', action) robot.move(action=action) new_x, new_y = robot.x, robot.y print('In', i+1, 'th iteration, the robot moved ', sqrt((new_x-old_x) ** 2 + (new_y-old_y) ** 2)) # add values to lists xs.append(robot.x) ys.append(robot.y) a1s.append(robot.a1) a2s.append(robot.a2) steps.append(i+1) robot_param = [robot.x, robot.y, robot.theta, float(robot.a1), float(robot.a2), robot.a1dot, robot.a2dot] robot_params.append(robot_param) except ZeroDivisionError as e: print(str(e), 'occured at ', j+1, 'th policy rollout') # plotting make_graphs(xs, ys, a1s, a2s, steps, j+1, trial_name=TRIAL_NAME) generate_csv(robot_params, "Trial" + TRIAL_NAME + str(j+1) + " th rollout " + str(datetime.datetime.now()).replace(' ', '').replace(':','') + ".csv")
x_pos.append(robot.x) y_pos.append(robot.y) thetas.append(robot.theta) times.append(t + 1) a1.append(robot.a1) a2.append(robot.a2) robot_param = [robot.x, robot.y, robot.theta, float(robot.a1), float(robot.a2), robot.a1dot, robot.a2dot] robot_params.append(robot_param) generate_csv(robot_params, "results/RobotTestResults/SwimmerIdealFluid/" + "result.csv") # view results # print('x positions are: ' + str(x_pos)) # print('y positions are: ' + str(y_pos)) # print('thetas are: ' + str(thetas)) plt.plot(x_pos, y_pos) plt.xlabel('x') plt.ylabel('y') plt.title('y vs x') plt.show() plt.plot(times, a1) plt.ylabel('a1 displacements') plt.show()
plt.plot(times, a2dots, plot_style, markersize=marker_size) plt.ylabel('a2dot') plt.xlabel('time') plt.savefig(os.path.join(plots_dir, 'a2dot' + '.png')) # plt.show() plt.close() plt.plot(times, ks, plot_style, markersize=marker_size) plt.ylabel('k') plt.xlabel('time') plt.savefig(os.path.join(plots_dir, 'k' + '.png')) # plt.show() plt.close() plt.plot(times, cs, plot_style, markersize=marker_size) plt.ylabel('c') plt.xlabel('time') plt.savefig(os.path.join(plots_dir, 'c' + '.png')) # plt.show() plt.close() plt.plot(times, a1ddots, plot_style, markersize=marker_size) plt.ylabel('a1ddot') plt.xlabel('time') plt.savefig(os.path.join(plots_dir, 'a1ddot' + '.png')) # plt.show() plt.close() csv_path = os.path.join(plots_dir, 'rollout.csv') generate_csv(robot_params, csv_path)