Python index_sum示例，utils.index_sum Python示例

示例#1

0

显示文件

文件： Q_learning.py 项目： barnabemas/Pac-Man-Reinforcement-Learning-with-Python

 def get_closest_fruits(self, grid, safe_moves, pacman_location):
     '''
     Return the list of the distance of the closest fruit depending on the direction.
     To be more specific:
         distance[i] is equal to the distance to the closest fruit if the Pac-Man repectively goes up, left, down, right if there is no wall in this direction, -1 otherwise
     '''
     distances = [-1]*4
     for move in safe_moves:
         start = grid.check_position(utils.index_sum(pacman_location, grid.action_map[move]))
         flags = np.zeros(grid.grid.shape)
         queue = [(start, 0)]
         while len(queue) > 0:
             position, distance = queue.pop()
             if grid.grid[position] & 1:
                 distances[letter_to_act[move]] = distance+1
                 break
             moves = grid.get_valid_moves(position)
             new_positions = [utils.index_sum(position, grid.action_map[move]) for move in moves]
             for new_position in new_positions:
                 new_position = grid.check_position(new_position)
                 if not flags[new_position]:
                     flags[new_position] = 1
                     distance = distance + 1
                     queue = [(new_position, distance)] + queue
     return distances

示例#2

0

显示文件

 def compute_distances(self):
     """
     Compute distance between every possible tiles (Djikstra's algorithm on all free tiles)
     """
     for x in range(self.grid.shape[0]):
         for y in range(self.grid.shape[1]):
             start = (x, y)
             if self.grid[start] == 64:
                 continue
             flags = np.zeros(self.grid.shape)
             distances = np.zeros(self.grid.shape)
             queue = [start]
             flags[start] = 1
             while len(queue) > 0:
                 position = queue.pop()
                 moves = self.get_valid_moves(position)
                 new_positions = [
                     index_sum(position, self.action_map[move])
                     for move in moves
                 ]
                 for new_position in new_positions:
                     new_position = self.check_position(new_position)
                     if not flags[new_position]:
                         flags[new_position] = 1
                         distances[new_position] = distances[position] + 1
                         queue = [new_position] + queue
                         self.distances[start] = distances

示例#3

0

显示文件

 def get_valid_moves(self, position):
     """
     Return the list of the possible moves starting from position.
     """
     valid_moves = []
     for move, action in self.action_map.items():
         try:
             new_position = self.check_position(index_sum(position, action))
             valid_moves.append(move)
         except InvalidIndex:
             pass
     return valid_moves

示例#4

0

显示文件

文件： ghost.py 项目： barnabemas/Pac-Man-Reinforcement-Learning-with-Python

    def flee_move(self, observation):
        """
		Return one of the possible moves which increases the most the distance between the ghost and the pacman.
		"""
        x_pacman, y_pacman = observation.positions[0]  # pacman's position

        move = self.random_move(observation)
        x_new, y_new = observation.check_position(
            index_sum(observation.positions[self.id],
                      observation.action_map[move]))
        distance_after_move = observation.distances[x_new, y_new][x_pacman,
                                                                  y_pacman]

        for test_move in observation.get_valid_moves(
                observation.positions[self.id]):
            x_test, y_test = observation.check_position(
                index_sum(observation.positions[self.id],
                          observation.action_map[test_move]))
            test_distance = observation.distances[x_test, y_test][x_pacman,
                                                                  y_pacman]
            if test_distance > distance_after_move:
                move = test_move
                distance_after_move = test_distance
        return move

示例#5

0

显示文件

 def update(self, actions):
     """ 
     Update the grid according to agent and ghosts' actions.
     actions is a char list of size 5 containing the action ('U', 'D', 'R', 'L')
     return (reward, ended) tuple.
     """
     self.last_point_taken += 1
     self.old_positions = copy(self.positions)
     for i, action in enumerate(actions):
         self.grid[
             self.positions[i]] = self.grid[self.positions[i]] - 2**(i + 1)
         self.positions[i] = self.check_position(
             index_sum(self.positions[i], self.action_map[action]))
         self.grid[
             self.positions[i]] = self.grid[self.positions[i]] + 2**(i + 1)
     reward = self.compute_reward()
     ended = self.check_ending()
     return (reward, ended)

示例#6

0

显示文件

文件： Q_learning.py 项目： barnabemas/Pac-Man-Reinforcement-Learning-with-Python

    def grid_to_state(self, grid):
        """
        Compute the state (vector of size 11) given a Grid object that represents the current observation. 
        The goal is to simplify the observation space.
        """
        state = np.zeros(11) # [s1, s2, s3, s4, s5 & 2, s5 & 1, s6, s7, s8, s9, s10]
        pacman_location = grid.positions[0]
        ghosts_location = grid.positions[1:]

        pacman_possible_moves = grid.get_valid_moves(pacman_location)

        # s1 to s4
        for move in letter_to_act.keys():
            if move not in pacman_possible_moves:
                state[letter_to_act[move]] = 1
        
        # s5
        min_distance_to_ghost_tab = -1*np.ones(4)
        dangerous_path_counter = 0
        non_dangerous_path_counter = 0
        for move in pacman_possible_moves:
            test_move = utils.index_sum(pacman_location, grid.action_map[move])
            test_move = grid.check_position(test_move)
            min_distance_to_ghost = min([grid.distances[test_move][ghost_location] for ghost_location in ghosts_location])
            min_distance_to_ghost_tab[letter_to_act[move]] = min_distance_to_ghost
            if min_distance_to_ghost < 8 :
                dangerous_path_counter += 1
            else :
                non_dangerous_path_counter +=1
        if non_dangerous_path_counter == 0 or non_dangerous_path_counter == 1:
            state[4] = min_distance_to_ghost_tab.argmax() & 2
            state[5] = min_distance_to_ghost_tab.argmax() & 1
        else:
            safe_moves = []
            for move in pacman_possible_moves:
                if min_distance_to_ghost_tab[letter_to_act[move]] >= 8:
                    safe_moves.append(move)
            distances_to_fruits = self.get_closest_fruits(grid, safe_moves, pacman_location)
            for i in range(len(distances_to_fruits)):
                if act_to_letter[i] not in safe_moves or distances_to_fruits[i] == -1 :
                    distances_to_fruits[i] = np.infty
            state[4] = np.array(distances_to_fruits).argmin() & 2
            state[5] = np.array(distances_to_fruits).argmin() & 1

        # s6 to s9
        for move in pacman_possible_moves:
            test_move = utils.index_sum(pacman_location, grid.action_map[move])
            test_move = grid.check_position(test_move)
            for ghost_location in ghosts_location:
                if grid.distances[test_move][ghost_location] < 8:
                    state[6+letter_to_act[move]] = 1
        
        # s10
        # since the ghosts can cut back, we only consider pacman as trapped when he will reach a ghost position whatever move he makes
        is_trapped = np.zeros(len(pacman_possible_moves))
        for i, move in enumerate(pacman_possible_moves):
            test_move = utils.index_sum(pacman_location, grid.action_map[move])
            test_move = grid.check_position(test_move)
            for ghost_location in ghosts_location:
                if test_move == ghost_location:
                    is_trapped[i] = 1
        state[10] = int(is_trapped.sum()/len(pacman_possible_moves))
        
        return state