Python KuhnPokerGame.KuhnInfoset примеры использования

Язык программирования: Python

Пространство имен/Пакет: KuhnPoker

Класс/Тип: KuhnPokerGame

Метод/Функция: KuhnInfoset

Примеров на hotexamples.com: 4

Python KuhnPokerGame.KuhnInfoset - 4 примера найдено. Это лучшие примеры Python кода для KuhnPoker.KuhnPokerGame.KuhnInfoset, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

KuhnNode(9)

KuhnInfoset(4)

KuhnPokerGame(1)

Пример #1

Показать файл

Файл: PPO.py Проект: thomasj02/nfsp-pytorch

def log_strategy(writer: SummaryWriter, policy: NnPolicyWrapper,
                 global_step: int):
    infoset = KuhnPokerGame.KuhnInfoset(0, ())

    for card in range(3):
        infoset.card = card

        infoset.bet_sequence = ()
        aggressive_action_prob = policy.aggressive_action_prob(infoset)
        node_name = "strategy/%s/p0_open" % card_to_str(card)
        writer.add_scalar(node_name,
                          aggressive_action_prob,
                          global_step=global_step)

        infoset.bet_sequence = (0, )
        aggressive_action_prob = policy.aggressive_action_prob(infoset)
        node_name = "strategy/%s/p0_check/p1" % card_to_str(card)
        writer.add_scalar(node_name,
                          aggressive_action_prob,
                          global_step=global_step)

        infoset.bet_sequence = (0, 1)
        aggressive_action_prob = policy.aggressive_action_prob(infoset)
        node_name = "strategy/%s/p0_check/p1_bet/p0" % card_to_str(card)
        writer.add_scalar(node_name,
                          aggressive_action_prob,
                          global_step=global_step)

        infoset.bet_sequence = (1, )
        aggressive_action_prob = policy.aggressive_action_prob(infoset)
        node_name = "strategy/%s/p0_bet/p1" % card_to_str(card)
        writer.add_scalar(node_name,
                          aggressive_action_prob,
                          global_step=global_step)

Пример #2

Показать файл

Файл: Exploitability.py Проект: thomasj02/nfsp-pytorch

    def get_game_value(self) -> float:
        game_values = []
        player_cards: Tuple[int, int]

        for my_card in range(3):
            opponent_card_probs = np.array([0.5, 0.5, 0.5])
            opponent_card_probs[my_card] = 0
            my_infoset = KuhnPoker.KuhnInfoset(card=my_card, bet_sequence=())
            game_value = self._get_game_state_value(my_infoset,
                                                    opponent_card_probs)
            game_values.append(game_value[self.player_num])

        return sum(game_values) / len(game_values)

Пример #3

Показать файл

Файл: Exploitability.py Проект: thomasj02/nfsp-pytorch

    def _get_opponent_game_value(
            self, my_infoset: KuhnPoker.KuhnInfoset,
            opponent_card_probs: np.ndarray) -> np.ndarray:
        cards_to_action_probs = {}
        total_action_probs = np.zeros(2)
        for opponent_card in range(3):
            if opponent_card == my_infoset.card:
                continue
            opponent_infoset = KuhnPoker.KuhnInfoset(
                card=opponent_card, bet_sequence=my_infoset.bet_sequence)
            aggressive_action_prob = self.opponent_policy.aggressive_action_prob(
                opponent_infoset)
            passive_action_prob = 1.0 - aggressive_action_prob
            cards_to_action_probs[opponent_card] = [
                passive_action_prob, aggressive_action_prob
            ]
            total_action_probs += opponent_card_probs[opponent_card] * np.array(
                [passive_action_prob, aggressive_action_prob])

        retval = np.zeros(2)
        for action in [0, 1]:
            if total_action_probs[action] == 0:
                continue
            post_action_card_probs = opponent_card_probs.copy()
            for opponent_card in range(3):
                if opponent_card == my_infoset.card:
                    continue
                post_action_card_probs[opponent_card] *= cards_to_action_probs[
                    opponent_card][action]
            post_action_card_probs = self._normalize(post_action_card_probs)
            bet_sequence = my_infoset.bet_sequence + (action, )
            game_value = self._get_game_state_value(
                KuhnPoker.KuhnInfoset(my_infoset.card, bet_sequence),
                post_action_card_probs)
            retval += game_value * total_action_probs[action]

        return retval

Пример #4

Показать файл

Файл: Exploitability.py Проект: thomasj02/nfsp-pytorch

    def _get_game_state_value(self, my_infoset: KuhnPoker.KuhnInfoset,
                              opponent_card_probs: np.ndarray) -> np.ndarray:
        if my_infoset.is_terminal:
            return self._get_terminal_game_state_value(my_infoset,
                                                       my_infoset.card,
                                                       opponent_card_probs)
        elif my_infoset.player_to_act != self.player_num:
            return self._get_opponent_game_value(my_infoset,
                                                 opponent_card_probs)
        else:
            state_value = np.array([float('-inf'), float('-inf')])

            for action in (0, 1):
                bet_sequence = my_infoset.bet_sequence + (action, )
                new_infoset = KuhnPoker.KuhnInfoset(my_infoset.card,
                                                    bet_sequence)
                action_value = self._get_game_state_value(
                    new_infoset, opponent_card_probs)
                if action_value[self.player_num] > state_value[
                        self.player_num]:
                    state_value = action_value

            return state_value