Пример #1
0
 def test_reorganize(self):
     trajectories = reorganize([[[1, 2], 1, [4, 5]]], [1])
     self.assertEqual(np.array(trajectories).shape, (1, 1, 5))
Пример #2
0
    def run(self,
            is_training: bool = False,
            seed: int = None) -> (List[List[dict]], dict):
        """
        Run a complete game, either for evaluation or training RL agent.
        :param is_training: (boolean): True if for training purpose.
        :param seed: (int): A seed for running the game. For single-process program,
              the seed should be set to None. For multi-process program, the
              seed should be asigned for reproducibility.
        :return: (tuple) Tuple containing:

                (list): A list of trajectories generated from the environment.
                (list): A list payoffs. Each entry corresponds to one player.

        Note: The trajectories are 3-dimension list. The first dimension is for different players.
              The second dimension is for different transitions. The third dimension is for the contents of each transiton
        """
        if self.single_agent_mode or self.human_mode:
            raise ValueError(
                'Run in single agent mode or human mode is not allowed.')

        if seed is not None:
            np.random.seed(seed)
            random.seed(seed)

        trajectories = [[] for _ in range(self.player_num)]
        state, player_id = self.init_game()

        # Loop to play the game
        trajectories[player_id].append(state)

        while not self.is_over():

            # Agent plays
            if not is_training:
                action = self.agents[player_id].eval_step(state)
            else:
                action = self.agents[player_id].step(state)

            # Environment steps
            next_state, next_player_id = self.step(action)
            # Save action
            trajectories[player_id].append(action)

            # Set the state and player
            state = next_state
            player_id = next_player_id

            # Save state.
            if not self.game.is_over():
                trajectories[player_id].append(state)

        # Add a final state to all the players
        for player_id in range(self.player_num):
            state = self.get_state(player_id)
            trajectories[player_id].append(state)

        # Payoffs
        payoffs = self.get_payoffs()

        # Reorganize the trajectories
        trajectories = reorganize(trajectories, payoffs)

        return trajectories, payoffs