def act(self, player_index: int, information_state: InformationState,
         available_actions: Iterable[int]) -> int:
     """
     Play the given action for the `ReinforceClassicAgent`
     :param player_index: The ID of the player playing
     :param information_state: The `InformationState` of the game
     :param available_actions: The legal action to choose from
     :return: The selected action
     """
     vectorized_states = np.array([information_state.vectorize()] *
                                  len(available_actions))
     actions_vectorized = np.array([
         to_categorical(action, self.action_size)
         for action in available_actions
     ])
     logits = self.brain.predict_policies(vectorized_states,
                                          actions_vectorized)
     sum = np.sum(logits)
     probabilities = np.reshape(logits / sum, (len(available_actions), ))
     chosen_action = np.random.choice(available_actions, p=probabilities)
     transition = dict()
     transition['s'] = information_state.vectorize()
     transition['a'] = to_categorical(chosen_action, self.action_size)
     transition['r'] = 0.0
     transition['t'] = False
     self.episode_buffer.append(transition)
     return chosen_action
示例#2
0
 def act(self, player_index: int, information_state: InformationState,
         available_actions: Iterable[int]) -> int:
     """
     Play the given action for the `MOISMCTSWithValueNetworkAgent`
     :param player_index: The ID of the player playing
     :param information_state: The `InformationState` of the game
     :param available_actions: The legal action to choose from
     :return: The selected action
     """
     if self.current_transition:
         self.current_transition['terminal'] = False
         self.current_trajectory.append(self.current_transition)
         self.current_transition = None
     for i in range(self.iteration_count):
         self.current_iteration_selected_nodes = {}
         gs = information_state.create_game_state_from_information_state()
         # SELECT
         gs, info_state, current_player, terminal = self.select(gs)
         if not terminal:
             # EXPAND
             node = self.current_trees[current_player][info_state]
             available_actions = gs.get_available_actions_id_for_player(
                 current_player)
             node['a'] = [{
                 'n': 0,
                 'r': 0,
                 'action_id': action_id
             } for action_id in available_actions]
             child_action = random.choice(node['a'])
             action_to_execute = child_action['action_id']
             self.add_visited_node(node, child_action, current_player)
             gs, reward, terminal = gs.step(current_player,
                                            action_to_execute)
         # EVALUATE
         scores = self.brain.predict_state(info_state.vectorize())
         # BACKPROPAGATE SCORE
         for player_id in self.current_iteration_selected_nodes.keys():
             visited_nodes = self.current_iteration_selected_nodes[
                 player_id]
             for node, child_action in reversed(visited_nodes):
                 node['nprime'] += 1
                 child_action['n'] += 1
                 child_action['r'] += scores[player_id]
     child_action = max(
         self.current_iteration_selected_nodes[player_index][0][0]['a'],
         key=lambda child: child['n'])
     self.current_transition = {
         's': information_state.vectorize(),
         'r': 0,
         'player_index': player_index,
         'terminal': False
     }
     return child_action['action_id']
示例#3
0
 def act(self, player_index: int, information_state: InformationState, available_actions: Iterable[int]) -> int:
     """
     Play the given action for the `MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent`
     :param player_index: The ID of the player playing
     :param information_state: The `InformationState` of the game
     :param available_actions: The legal action to choose from
     :return: The selected action
     """
     available_actions = list(available_actions)
     if self.evaluation_episodes + self.training_episodes < self.current_episode:
         self.X = []
         self.Y = []
         self.current_episode = 0
     if self.current_episode > self.training_episodes:
         probs = self.model.predict(np.array([information_state.vectorize()]))[0]
         available_probs = probs[np.array(available_actions)]
         probs_sum = np.sum(available_probs)
         if probs_sum > 0.001:
             chosen_action_index = np.argmax(available_probs)
             action = available_actions[chosen_action_index]
         else:
             action = random.choice(available_actions)
         return action
     for i in range(self.iteration_count):
         self.current_iteration_selected_nodes = {}
         gs = information_state.create_game_state_from_information_state()
         # SELECT
         gs, info_state, current_player, terminal = self.select(gs)
         if not terminal:
             # EXPAND
             node = self.current_trees[current_player][info_state]
             available_actions = gs.get_available_actions_id_for_player(current_player)
             node['a'] = [{'n': 0, 'r': 0, 'action_id': action_id} for action_id in available_actions]
             child_action = random.choice(node['a'])
             action_to_execute = child_action['action_id']
             self.add_visited_node(node, child_action, current_player)
             gs, reward, terminal = gs.step(current_player, action_to_execute)
         # EVALUATE
         scores = self.runner.run(initial_game_state=gs, max_rounds=1)
         # BACKPROPAGATE SCORE
         for player_id in self.current_iteration_selected_nodes.keys():
             visited_nodes = self.current_iteration_selected_nodes[player_id]
             for node, child_action in reversed(visited_nodes):
                 node['nprime'] += 1
                 child_action['n'] += 1
                 child_action['r'] += scores[player_id]
     child_action = max(self.current_iteration_selected_nodes[player_index][0][0]['a'], key=lambda child: child['n'])
     self.X.append(information_state.vectorize().tolist())
     self.Y.append(to_categorical(child_action['action_id'], self.action_size))
     return child_action['action_id']
 def act(self, player_index: int, information_state: InformationState, available_actions: Iterable[int]) -> int:
     """
     Play the given action for the `DeepQLearningAgent`
     :param player_index: The ID of the player playing
     :param information_state: The `InformationState` of the game
     :param available_actions: The legal action to choose from
     :return: The selected action
     """
     available_actions_list = list(available_actions)
     inputs_states = np.array([information_state.vectorize()] * len(available_actions_list))
     actions_vectorized = np.array(
         [keras.utils.to_categorical(action_id, self.action_size) for action_id in available_actions_list])
     if self.s is not None:
         self.s_next_duplicated = inputs_states
         self.s_next_available_actions = actions_vectorized
         self.t = False
         self.learn()
     if random.random() > self.epsilon:
         q_values = self.Q.predict([inputs_states, actions_vectorized]).flatten()
         best_id = q_values.argmax()
     else:
         best_id = random.randint(0, len(available_actions_list) - 1)
     self.s = inputs_states[best_id]
     self.a = actions_vectorized[best_id]
     return available_actions_list[best_id]
示例#5
0
    def act(self, player_index: int, information_state: InformationState,
            available_actions: Iterable[int]) -> int:
        """
        Play the given action for the `RandomRolloutAgent`
        :param player_index: The ID of the player playing
        :param information_state: The `InformationState` of the game
        :param available_actions: The legal action to choose from
        :return: The selected action
        """
        actions = tuple(available_actions)
        action_count = len(actions)
        action_scores = np.zeros(action_count)
        for i in range(action_count):
            gs = information_state.create_game_state_from_information_state()
            (result_gs, score, terminal) = gs.step(player_index, actions[i])

            # Two player zero sum game hypothesis
            player_score = (1 if player_index == 0 else -1) * score
            if not terminal:
                history = self.runner.random_rollout_run(
                    gs, self.num_rollouts_per_available_action)
                player_score += history[player_index] - history[
                    (player_index + 1) % 2]
            player_score = player_score / (
                1.0 if terminal else self.num_rollouts_per_available_action)
            action_scores[i] = player_score
        return actions[np.argmax(action_scores)]
 def act(self, player_index: int, information_state: InformationState,
         available_actions: Iterable[int]) -> int:
     """
     Play the given action for the `PPOWithMultipleTrajectoriesMultiOutputsAgent`
     :param player_index: The ID of the player playing
     :param information_state: The `InformationState` of the game
     :param available_actions: The legal action to choose from
     :return: The selected action
     """
     available_actions = list(available_actions)
     num_actions = len(available_actions)
     vectorized_state = information_state.vectorize()
     full_actions_probability, value = self.brain.predict_policy_and_value(
         vectorized_state)
     available_actions_probabilities = full_actions_probability[
         available_actions]
     sum_available_action_probabilities = np.sum(
         available_actions_probabilities)
     if sum_available_action_probabilities > 0.0000001:  # just in case all is zero, but unlikely
         probabilities = available_actions_probabilities / sum_available_action_probabilities
         chosen_index = np.random.choice(list(range(num_actions)),
                                         p=probabilities)
         chosen_action = available_actions[chosen_index]
     else:
         print("No action eligible, this should be extremely rare")
         chosen_index = np.random.choice(list(range(num_actions)))
         chosen_action = available_actions[chosen_index]
     transition = dict()
     transition['s'] = vectorized_state
     transition['a'] = chosen_action
     transition['r'] = 0.0
     transition['t'] = False
     transition['p_old'] = full_actions_probability.tolist()
     self.current_trajectory_buffer.append(transition)
     return chosen_action
 def act(self, player_index: int, information_state: InformationState,
         available_actions: Iterable[int]) -> int:
     """
     Play the given action for the `MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent`
     :param player_index: The ID of the player playing
     :param information_state: The `InformationState` of the game
     :param available_actions: The legal action to choose from
     :return: The selected action
     """
     for i in range(self.iteration_count):
         self.current_iteration_selected_nodes = {}
         gs = information_state.create_game_state_from_information_state()
         # SELECT
         gs, info_state, current_player, terminal = self.select(gs)
         if not terminal:
             # EXPAND
             node = self.current_trees[current_player][info_state]
             available_actions = gs.get_available_actions_id_for_player(
                 current_player)
             node['a'] = [{
                 'n': 0,
                 'r': 0,
                 'action_id': action_id
             } for action_id in available_actions]
             child_action = random.choice(node['a'])
             action_to_execute = child_action['action_id']
             self.add_visited_node(node, child_action, current_player)
             gs, reward, terminal = gs.step(current_player,
                                            action_to_execute)
         # EVALUATE
         scores = self.runner.run(initial_game_state=gs, max_rounds=1)
         # BACKPROPAGATE SCORE
         for player_id in self.current_iteration_selected_nodes.keys():
             visited_nodes = self.current_iteration_selected_nodes[
                 player_id]
             for node, child_action in reversed(visited_nodes):
                 node['nprime'] += 1
                 child_action['n'] += 1
                 child_action['r'] += scores[player_id]
     child_action = max(
         self.current_iteration_selected_nodes[player_index][0][0]['a'],
         key=lambda child: child['n'])
     return child_action['action_id']