def log_strategy(writer: SummaryWriter, policy: NnPolicyWrapper, global_step: int): infoset = KuhnPokerGame.KuhnInfoset(0, ()) for card in range(3): infoset.card = card infoset.bet_sequence = () aggressive_action_prob = policy.aggressive_action_prob(infoset) node_name = "strategy/%s/p0_open" % card_to_str(card) writer.add_scalar(node_name, aggressive_action_prob, global_step=global_step) infoset.bet_sequence = (0, ) aggressive_action_prob = policy.aggressive_action_prob(infoset) node_name = "strategy/%s/p0_check/p1" % card_to_str(card) writer.add_scalar(node_name, aggressive_action_prob, global_step=global_step) infoset.bet_sequence = (0, 1) aggressive_action_prob = policy.aggressive_action_prob(infoset) node_name = "strategy/%s/p0_check/p1_bet/p0" % card_to_str(card) writer.add_scalar(node_name, aggressive_action_prob, global_step=global_step) infoset.bet_sequence = (1, ) aggressive_action_prob = policy.aggressive_action_prob(infoset) node_name = "strategy/%s/p0_bet/p1" % card_to_str(card) writer.add_scalar(node_name, aggressive_action_prob, global_step=global_step)
def get_game_value(self) -> float: game_values = [] player_cards: Tuple[int, int] for my_card in range(3): opponent_card_probs = np.array([0.5, 0.5, 0.5]) opponent_card_probs[my_card] = 0 my_infoset = KuhnPoker.KuhnInfoset(card=my_card, bet_sequence=()) game_value = self._get_game_state_value(my_infoset, opponent_card_probs) game_values.append(game_value[self.player_num]) return sum(game_values) / len(game_values)
def _get_opponent_game_value( self, my_infoset: KuhnPoker.KuhnInfoset, opponent_card_probs: np.ndarray) -> np.ndarray: cards_to_action_probs = {} total_action_probs = np.zeros(2) for opponent_card in range(3): if opponent_card == my_infoset.card: continue opponent_infoset = KuhnPoker.KuhnInfoset( card=opponent_card, bet_sequence=my_infoset.bet_sequence) aggressive_action_prob = self.opponent_policy.aggressive_action_prob( opponent_infoset) passive_action_prob = 1.0 - aggressive_action_prob cards_to_action_probs[opponent_card] = [ passive_action_prob, aggressive_action_prob ] total_action_probs += opponent_card_probs[opponent_card] * np.array( [passive_action_prob, aggressive_action_prob]) retval = np.zeros(2) for action in [0, 1]: if total_action_probs[action] == 0: continue post_action_card_probs = opponent_card_probs.copy() for opponent_card in range(3): if opponent_card == my_infoset.card: continue post_action_card_probs[opponent_card] *= cards_to_action_probs[ opponent_card][action] post_action_card_probs = self._normalize(post_action_card_probs) bet_sequence = my_infoset.bet_sequence + (action, ) game_value = self._get_game_state_value( KuhnPoker.KuhnInfoset(my_infoset.card, bet_sequence), post_action_card_probs) retval += game_value * total_action_probs[action] return retval
def _get_game_state_value(self, my_infoset: KuhnPoker.KuhnInfoset, opponent_card_probs: np.ndarray) -> np.ndarray: if my_infoset.is_terminal: return self._get_terminal_game_state_value(my_infoset, my_infoset.card, opponent_card_probs) elif my_infoset.player_to_act != self.player_num: return self._get_opponent_game_value(my_infoset, opponent_card_probs) else: state_value = np.array([float('-inf'), float('-inf')]) for action in (0, 1): bet_sequence = my_infoset.bet_sequence + (action, ) new_infoset = KuhnPoker.KuhnInfoset(my_infoset.card, bet_sequence) action_value = self._get_game_state_value( new_infoset, opponent_card_probs) if action_value[self.player_num] > state_value[ self.player_num]: state_value = action_value return state_value