def test_check_check(self): bet_sequence = (0, 0) payoffs = KuhnPokerGame.KuhnNode(bet_sequence).get_payoffs( player_cards=[0, 1]) self.assertEqual([-1, 1], payoffs.tolist()) payoffs = KuhnPokerGame.KuhnNode(bet_sequence).get_payoffs( player_cards=[1, 0]) self.assertEqual([1, -1], payoffs.tolist())
def test_bet_call(self): bet_sequence = (1, 1) payoffs = KuhnPokerGame.KuhnNode(bet_sequence).get_payoffs( player_cards=[0, 1]) self.assertEqual([-2, 2], payoffs.tolist()) payoffs = KuhnPokerGame.KuhnNode(bet_sequence).get_payoffs( player_cards=[2, 1]) self.assertEqual([2, -2], payoffs.tolist())
def log_strategy(writer: SummaryWriter, policy: NnPolicyWrapper, global_step: int): infoset = KuhnPokerGame.KuhnInfoset(0, ()) for card in range(3): infoset.card = card infoset.bet_sequence = () aggressive_action_prob = policy.aggressive_action_prob(infoset) node_name = "strategy/%s/p0_open" % card_to_str(card) writer.add_scalar(node_name, aggressive_action_prob, global_step=global_step) infoset.bet_sequence = (0, ) aggressive_action_prob = policy.aggressive_action_prob(infoset) node_name = "strategy/%s/p0_check/p1" % card_to_str(card) writer.add_scalar(node_name, aggressive_action_prob, global_step=global_step) infoset.bet_sequence = (0, 1) aggressive_action_prob = policy.aggressive_action_prob(infoset) node_name = "strategy/%s/p0_check/p1_bet/p0" % card_to_str(card) writer.add_scalar(node_name, aggressive_action_prob, global_step=global_step) infoset.bet_sequence = (1, ) aggressive_action_prob = policy.aggressive_action_prob(infoset) node_name = "strategy/%s/p0_bet/p1" % card_to_str(card) writer.add_scalar(node_name, aggressive_action_prob, global_step=global_step)
def get_game_value(self) -> float: game_values = [] player_cards: Tuple[int, int] for my_card in range(3): opponent_card_probs = np.array([0.5, 0.5, 0.5]) opponent_card_probs[my_card] = 0 my_infoset = KuhnPoker.KuhnInfoset(card=my_card, bet_sequence=()) game_value = self._get_game_state_value(my_infoset, opponent_card_probs) game_values.append(game_value[self.player_num]) return sum(game_values) / len(game_values)
def _get_opponent_game_value( self, my_infoset: KuhnPoker.KuhnInfoset, opponent_card_probs: np.ndarray) -> np.ndarray: cards_to_action_probs = {} total_action_probs = np.zeros(2) for opponent_card in range(3): if opponent_card == my_infoset.card: continue opponent_infoset = KuhnPoker.KuhnInfoset( card=opponent_card, bet_sequence=my_infoset.bet_sequence) aggressive_action_prob = self.opponent_policy.aggressive_action_prob( opponent_infoset) passive_action_prob = 1.0 - aggressive_action_prob cards_to_action_probs[opponent_card] = [ passive_action_prob, aggressive_action_prob ] total_action_probs += opponent_card_probs[opponent_card] * np.array( [passive_action_prob, aggressive_action_prob]) retval = np.zeros(2) for action in [0, 1]: if total_action_probs[action] == 0: continue post_action_card_probs = opponent_card_probs.copy() for opponent_card in range(3): if opponent_card == my_infoset.card: continue post_action_card_probs[opponent_card] *= cards_to_action_probs[ opponent_card][action] post_action_card_probs = self._normalize(post_action_card_probs) bet_sequence = my_infoset.bet_sequence + (action, ) game_value = self._get_game_state_value( KuhnPoker.KuhnInfoset(my_infoset.card, bet_sequence), post_action_card_probs) retval += game_value * total_action_probs[action] return retval
def collect_trajectories(policy: Policies.Policy, num_games: int): nash_policy = Policies.NashPolicy(0) nash_player = 0 player_trajectories = [PlayerTrajectories(), PlayerTrajectories()] for _ in range(num_games): game = KuhnPokerGame.KuhnPokerGame() while not game.game_state.is_terminal: player_to_act = game.game_state.player_to_act infoset = game.game_state.infosets[player_to_act] if player_to_act == nash_player: action = nash_policy.get_action(infoset) else: infoset_state = infoset_to_state(infoset) infoset_state = torch.from_numpy( np.array(infoset_state)).float().to(device) aggressive_action_prob = policy.forward( infoset_state).cpu().detach() state = infoset_to_state(infoset) # Manually calculate the action so we don't have to re-evaluate the infoset action = int( random.random() < aggressive_action_prob.numpy()[0]) new_bet_sequence = game.game_state.bet_sequence + (action, ) game.game_state.bet_sequence = new_bet_sequence if game.game_state.is_terminal: game_rewards = game.game_state.get_payoffs() else: game_rewards = 0, 0 if player_to_act != nash_player: player_trajectories[player_to_act].add_transition( state, action, aggressive_action_prob, game_rewards[player_to_act]) if game.game_state.is_terminal: other_player = (player_to_act + 1) % 2 if other_player != nash_player: player_trajectories[other_player].amend_last_reward( game_rewards[other_player]) player_trajectories[(nash_player + 1) % 2].complete_trajectory() return player_trajectories
def _get_game_state_value(self, my_infoset: KuhnPoker.KuhnInfoset, opponent_card_probs: np.ndarray) -> np.ndarray: if my_infoset.is_terminal: return self._get_terminal_game_state_value(my_infoset, my_infoset.card, opponent_card_probs) elif my_infoset.player_to_act != self.player_num: return self._get_opponent_game_value(my_infoset, opponent_card_probs) else: state_value = np.array([float('-inf'), float('-inf')]) for action in (0, 1): bet_sequence = my_infoset.bet_sequence + (action, ) new_infoset = KuhnPoker.KuhnInfoset(my_infoset.card, bet_sequence) action_value = self._get_game_state_value( new_infoset, opponent_card_probs) if action_value[self.player_num] > state_value[ self.player_num]: state_value = action_value return state_value
def test_first_action_is_not_terminal(self): bet_sequence = (0, ) self.assertFalse(KuhnPokerGame.KuhnNode(bet_sequence).is_terminal)
def test_game_start_is_not_terminal(self): bet_sequence = () self.assertFalse(KuhnPokerGame.KuhnNode(bet_sequence).is_terminal)
def test_bet_fold(self): bet_sequence = (1, 0) payoffs = KuhnPokerGame.KuhnNode(bet_sequence).get_payoffs( player_cards=[0, 1]) self.assertEqual([1, -1], payoffs.tolist())
def test_nonterminal_payoff_raises_exception(self): with self.assertRaises(RuntimeError): KuhnPokerGame.KuhnNode(bet_sequence=()).get_payoffs([0, 0])
def test_p0_calls_is_terminal(self): bet_sequence = (0, 1, 1) self.assertTrue(KuhnPokerGame.KuhnNode(bet_sequence).is_terminal)
def test_p1_calls_is_not_terminal(self): bet_sequence = (0, 1) self.assertFalse(KuhnPokerGame.KuhnNode(bet_sequence).is_terminal)
def test_p1_checks_is_terminal(self): bet_sequence = (0, 0) self.assertTrue(KuhnPokerGame.KuhnNode(bet_sequence).is_terminal)