def get_action(self, state): """Get action from student's agent and compare with reference. Returns an optimal action from reference. """ GameState.get_and_reset_explored() student_action = (self.student_agent.get_action(state), len(GameState.get_and_reset_explored())) optimal_actions = self.optimal_actions[self.step_count] alt_depth_actions = self.alt_depth_actions[self.step_count] partial_ply_bug_actions = self.partial_ply_bug_actions[self.step_count] student_optimal_action = False current_right_states_explored = False for i in range(len(optimal_actions)): if student_action[0] in optimal_actions[i][0]: student_optimal_action = True else: self.actions_consistent_with_optimal[i] = False if student_action[1] == int(optimal_actions[i][1]): current_right_states_explored = True if (not current_right_states_explored and self.wrong_states_explored < 0): self.wrong_states_explored = 1 for i in range(len(alt_depth_actions)): if student_action[0] not in alt_depth_actions[i]: self.actions_consistent_with_alternative_depth[i] = False for i in range(len(partial_ply_bug_actions)): if student_action[0] not in partial_ply_bug_actions[i]: self.actions_consistent_with_partial_ply_bug[i] = False if not student_optimal_action: self.suboptimal_moves.append( (state, student_action[0], optimal_actions[0][0][0])) self.step_count += 1 random.seed(self.seed + self.step_count) return optimal_actions[0][0][0]
def get_action(self, state): # survey agents GameState.get_and_reset_explored() optimal_action_lists = [] for agent in self.solution_agents: optimal_action_lists.append( (agent.get_best_pacman_actions(state)[0], len(GameState.get_and_reset_explored()))) alternative_depth_lists = [ agent.get_best_pacman_actions(state)[0] for agent in self.alternative_depth_agents ] partial_ply_bug_lists = [ agent.get_best_pacman_actions(state)[0] for agent in self.partial_ply_bug_agents ] # record responses self.optimal_action_lists.append(optimal_action_lists) self.alternative_depth_lists.append(alternative_depth_lists) self.partial_ply_bug_lists.append(partial_ply_bug_lists) self.step_count += 1 random.seed(self.seed + self.step_count) return optimal_action_lists[0][0][0]