示例#1
0
    def test_check_check(self):
        bet_sequence = (0, 0)
        payoffs = KuhnPokerGame.KuhnNode(bet_sequence).get_payoffs(
            player_cards=[0, 1])
        self.assertEqual([-1, 1], payoffs.tolist())

        payoffs = KuhnPokerGame.KuhnNode(bet_sequence).get_payoffs(
            player_cards=[1, 0])
        self.assertEqual([1, -1], payoffs.tolist())
示例#2
0
    def test_bet_call(self):
        bet_sequence = (1, 1)
        payoffs = KuhnPokerGame.KuhnNode(bet_sequence).get_payoffs(
            player_cards=[0, 1])
        self.assertEqual([-2, 2], payoffs.tolist())

        payoffs = KuhnPokerGame.KuhnNode(bet_sequence).get_payoffs(
            player_cards=[2, 1])
        self.assertEqual([2, -2], payoffs.tolist())
示例#3
0
def log_strategy(writer: SummaryWriter, policy: NnPolicyWrapper,
                 global_step: int):
    infoset = KuhnPokerGame.KuhnInfoset(0, ())

    for card in range(3):
        infoset.card = card

        infoset.bet_sequence = ()
        aggressive_action_prob = policy.aggressive_action_prob(infoset)
        node_name = "strategy/%s/p0_open" % card_to_str(card)
        writer.add_scalar(node_name,
                          aggressive_action_prob,
                          global_step=global_step)

        infoset.bet_sequence = (0, )
        aggressive_action_prob = policy.aggressive_action_prob(infoset)
        node_name = "strategy/%s/p0_check/p1" % card_to_str(card)
        writer.add_scalar(node_name,
                          aggressive_action_prob,
                          global_step=global_step)

        infoset.bet_sequence = (0, 1)
        aggressive_action_prob = policy.aggressive_action_prob(infoset)
        node_name = "strategy/%s/p0_check/p1_bet/p0" % card_to_str(card)
        writer.add_scalar(node_name,
                          aggressive_action_prob,
                          global_step=global_step)

        infoset.bet_sequence = (1, )
        aggressive_action_prob = policy.aggressive_action_prob(infoset)
        node_name = "strategy/%s/p0_bet/p1" % card_to_str(card)
        writer.add_scalar(node_name,
                          aggressive_action_prob,
                          global_step=global_step)
示例#4
0
    def get_game_value(self) -> float:
        game_values = []
        player_cards: Tuple[int, int]

        for my_card in range(3):
            opponent_card_probs = np.array([0.5, 0.5, 0.5])
            opponent_card_probs[my_card] = 0
            my_infoset = KuhnPoker.KuhnInfoset(card=my_card, bet_sequence=())
            game_value = self._get_game_state_value(my_infoset,
                                                    opponent_card_probs)
            game_values.append(game_value[self.player_num])

        return sum(game_values) / len(game_values)
示例#5
0
    def _get_opponent_game_value(
            self, my_infoset: KuhnPoker.KuhnInfoset,
            opponent_card_probs: np.ndarray) -> np.ndarray:
        cards_to_action_probs = {}
        total_action_probs = np.zeros(2)
        for opponent_card in range(3):
            if opponent_card == my_infoset.card:
                continue
            opponent_infoset = KuhnPoker.KuhnInfoset(
                card=opponent_card, bet_sequence=my_infoset.bet_sequence)
            aggressive_action_prob = self.opponent_policy.aggressive_action_prob(
                opponent_infoset)
            passive_action_prob = 1.0 - aggressive_action_prob
            cards_to_action_probs[opponent_card] = [
                passive_action_prob, aggressive_action_prob
            ]
            total_action_probs += opponent_card_probs[opponent_card] * np.array(
                [passive_action_prob, aggressive_action_prob])

        retval = np.zeros(2)
        for action in [0, 1]:
            if total_action_probs[action] == 0:
                continue
            post_action_card_probs = opponent_card_probs.copy()
            for opponent_card in range(3):
                if opponent_card == my_infoset.card:
                    continue
                post_action_card_probs[opponent_card] *= cards_to_action_probs[
                    opponent_card][action]
            post_action_card_probs = self._normalize(post_action_card_probs)
            bet_sequence = my_infoset.bet_sequence + (action, )
            game_value = self._get_game_state_value(
                KuhnPoker.KuhnInfoset(my_infoset.card, bet_sequence),
                post_action_card_probs)
            retval += game_value * total_action_probs[action]

        return retval
示例#6
0
def collect_trajectories(policy: Policies.Policy, num_games: int):
    nash_policy = Policies.NashPolicy(0)
    nash_player = 0
    player_trajectories = [PlayerTrajectories(), PlayerTrajectories()]

    for _ in range(num_games):
        game = KuhnPokerGame.KuhnPokerGame()

        while not game.game_state.is_terminal:
            player_to_act = game.game_state.player_to_act
            infoset = game.game_state.infosets[player_to_act]

            if player_to_act == nash_player:
                action = nash_policy.get_action(infoset)
            else:
                infoset_state = infoset_to_state(infoset)
                infoset_state = torch.from_numpy(
                    np.array(infoset_state)).float().to(device)
                aggressive_action_prob = policy.forward(
                    infoset_state).cpu().detach()
                state = infoset_to_state(infoset)

                # Manually calculate the action so we don't have to re-evaluate the infoset
                action = int(
                    random.random() < aggressive_action_prob.numpy()[0])

            new_bet_sequence = game.game_state.bet_sequence + (action, )
            game.game_state.bet_sequence = new_bet_sequence
            if game.game_state.is_terminal:
                game_rewards = game.game_state.get_payoffs()
            else:
                game_rewards = 0, 0

            if player_to_act != nash_player:
                player_trajectories[player_to_act].add_transition(
                    state, action, aggressive_action_prob,
                    game_rewards[player_to_act])

            if game.game_state.is_terminal:
                other_player = (player_to_act + 1) % 2
                if other_player != nash_player:
                    player_trajectories[other_player].amend_last_reward(
                        game_rewards[other_player])
                player_trajectories[(nash_player + 1) %
                                    2].complete_trajectory()

    return player_trajectories
示例#7
0
    def _get_game_state_value(self, my_infoset: KuhnPoker.KuhnInfoset,
                              opponent_card_probs: np.ndarray) -> np.ndarray:
        if my_infoset.is_terminal:
            return self._get_terminal_game_state_value(my_infoset,
                                                       my_infoset.card,
                                                       opponent_card_probs)
        elif my_infoset.player_to_act != self.player_num:
            return self._get_opponent_game_value(my_infoset,
                                                 opponent_card_probs)
        else:
            state_value = np.array([float('-inf'), float('-inf')])

            for action in (0, 1):
                bet_sequence = my_infoset.bet_sequence + (action, )
                new_infoset = KuhnPoker.KuhnInfoset(my_infoset.card,
                                                    bet_sequence)
                action_value = self._get_game_state_value(
                    new_infoset, opponent_card_probs)
                if action_value[self.player_num] > state_value[
                        self.player_num]:
                    state_value = action_value

            return state_value
示例#8
0
 def test_first_action_is_not_terminal(self):
     bet_sequence = (0, )
     self.assertFalse(KuhnPokerGame.KuhnNode(bet_sequence).is_terminal)
示例#9
0
 def test_game_start_is_not_terminal(self):
     bet_sequence = ()
     self.assertFalse(KuhnPokerGame.KuhnNode(bet_sequence).is_terminal)
示例#10
0
 def test_bet_fold(self):
     bet_sequence = (1, 0)
     payoffs = KuhnPokerGame.KuhnNode(bet_sequence).get_payoffs(
         player_cards=[0, 1])
     self.assertEqual([1, -1], payoffs.tolist())
示例#11
0
 def test_nonterminal_payoff_raises_exception(self):
     with self.assertRaises(RuntimeError):
         KuhnPokerGame.KuhnNode(bet_sequence=()).get_payoffs([0, 0])
示例#12
0
 def test_p0_calls_is_terminal(self):
     bet_sequence = (0, 1, 1)
     self.assertTrue(KuhnPokerGame.KuhnNode(bet_sequence).is_terminal)
示例#13
0
 def test_p1_calls_is_not_terminal(self):
     bet_sequence = (0, 1)
     self.assertFalse(KuhnPokerGame.KuhnNode(bet_sequence).is_terminal)
示例#14
0
 def test_p1_checks_is_terminal(self):
     bet_sequence = (0, 0)
     self.assertTrue(KuhnPokerGame.KuhnNode(bet_sequence).is_terminal)