def get_closest_fruits(self, grid, safe_moves, pacman_location): ''' Return the list of the distance of the closest fruit depending on the direction. To be more specific: distance[i] is equal to the distance to the closest fruit if the Pac-Man repectively goes up, left, down, right if there is no wall in this direction, -1 otherwise ''' distances = [-1]*4 for move in safe_moves: start = grid.check_position(utils.index_sum(pacman_location, grid.action_map[move])) flags = np.zeros(grid.grid.shape) queue = [(start, 0)] while len(queue) > 0: position, distance = queue.pop() if grid.grid[position] & 1: distances[letter_to_act[move]] = distance+1 break moves = grid.get_valid_moves(position) new_positions = [utils.index_sum(position, grid.action_map[move]) for move in moves] for new_position in new_positions: new_position = grid.check_position(new_position) if not flags[new_position]: flags[new_position] = 1 distance = distance + 1 queue = [(new_position, distance)] + queue return distances
def compute_distances(self): """ Compute distance between every possible tiles (Djikstra's algorithm on all free tiles) """ for x in range(self.grid.shape[0]): for y in range(self.grid.shape[1]): start = (x, y) if self.grid[start] == 64: continue flags = np.zeros(self.grid.shape) distances = np.zeros(self.grid.shape) queue = [start] flags[start] = 1 while len(queue) > 0: position = queue.pop() moves = self.get_valid_moves(position) new_positions = [ index_sum(position, self.action_map[move]) for move in moves ] for new_position in new_positions: new_position = self.check_position(new_position) if not flags[new_position]: flags[new_position] = 1 distances[new_position] = distances[position] + 1 queue = [new_position] + queue self.distances[start] = distances
def get_valid_moves(self, position): """ Return the list of the possible moves starting from position. """ valid_moves = [] for move, action in self.action_map.items(): try: new_position = self.check_position(index_sum(position, action)) valid_moves.append(move) except InvalidIndex: pass return valid_moves
def flee_move(self, observation): """ Return one of the possible moves which increases the most the distance between the ghost and the pacman. """ x_pacman, y_pacman = observation.positions[0] # pacman's position move = self.random_move(observation) x_new, y_new = observation.check_position( index_sum(observation.positions[self.id], observation.action_map[move])) distance_after_move = observation.distances[x_new, y_new][x_pacman, y_pacman] for test_move in observation.get_valid_moves( observation.positions[self.id]): x_test, y_test = observation.check_position( index_sum(observation.positions[self.id], observation.action_map[test_move])) test_distance = observation.distances[x_test, y_test][x_pacman, y_pacman] if test_distance > distance_after_move: move = test_move distance_after_move = test_distance return move
def update(self, actions): """ Update the grid according to agent and ghosts' actions. actions is a char list of size 5 containing the action ('U', 'D', 'R', 'L') return (reward, ended) tuple. """ self.last_point_taken += 1 self.old_positions = copy(self.positions) for i, action in enumerate(actions): self.grid[ self.positions[i]] = self.grid[self.positions[i]] - 2**(i + 1) self.positions[i] = self.check_position( index_sum(self.positions[i], self.action_map[action])) self.grid[ self.positions[i]] = self.grid[self.positions[i]] + 2**(i + 1) reward = self.compute_reward() ended = self.check_ending() return (reward, ended)
def grid_to_state(self, grid): """ Compute the state (vector of size 11) given a Grid object that represents the current observation. The goal is to simplify the observation space. """ state = np.zeros(11) # [s1, s2, s3, s4, s5 & 2, s5 & 1, s6, s7, s8, s9, s10] pacman_location = grid.positions[0] ghosts_location = grid.positions[1:] pacman_possible_moves = grid.get_valid_moves(pacman_location) # s1 to s4 for move in letter_to_act.keys(): if move not in pacman_possible_moves: state[letter_to_act[move]] = 1 # s5 min_distance_to_ghost_tab = -1*np.ones(4) dangerous_path_counter = 0 non_dangerous_path_counter = 0 for move in pacman_possible_moves: test_move = utils.index_sum(pacman_location, grid.action_map[move]) test_move = grid.check_position(test_move) min_distance_to_ghost = min([grid.distances[test_move][ghost_location] for ghost_location in ghosts_location]) min_distance_to_ghost_tab[letter_to_act[move]] = min_distance_to_ghost if min_distance_to_ghost < 8 : dangerous_path_counter += 1 else : non_dangerous_path_counter +=1 if non_dangerous_path_counter == 0 or non_dangerous_path_counter == 1: state[4] = min_distance_to_ghost_tab.argmax() & 2 state[5] = min_distance_to_ghost_tab.argmax() & 1 else: safe_moves = [] for move in pacman_possible_moves: if min_distance_to_ghost_tab[letter_to_act[move]] >= 8: safe_moves.append(move) distances_to_fruits = self.get_closest_fruits(grid, safe_moves, pacman_location) for i in range(len(distances_to_fruits)): if act_to_letter[i] not in safe_moves or distances_to_fruits[i] == -1 : distances_to_fruits[i] = np.infty state[4] = np.array(distances_to_fruits).argmin() & 2 state[5] = np.array(distances_to_fruits).argmin() & 1 # s6 to s9 for move in pacman_possible_moves: test_move = utils.index_sum(pacman_location, grid.action_map[move]) test_move = grid.check_position(test_move) for ghost_location in ghosts_location: if grid.distances[test_move][ghost_location] < 8: state[6+letter_to_act[move]] = 1 # s10 # since the ghosts can cut back, we only consider pacman as trapped when he will reach a ghost position whatever move he makes is_trapped = np.zeros(len(pacman_possible_moves)) for i, move in enumerate(pacman_possible_moves): test_move = utils.index_sum(pacman_location, grid.action_map[move]) test_move = grid.check_position(test_move) for ghost_location in ghosts_location: if test_move == ghost_location: is_trapped[i] = 1 state[10] = int(is_trapped.sum()/len(pacman_possible_moves)) return state