def _fill_chance(self, node): ''' Fills all chance nodes of a subtree with the probability of each outcome. Params: node: the root of the subtree ''' if (node.terminal): return if node.current_player == constants.players.chance: # chance node, we will fill uniform strategy # works only for chance node at start of second round assert (len(node.children) == self.board_count) # filling strategy # we will fill strategy with an uniform probability, but it has to be zero for hands that are not possible on # corresponding board node.strategy = arguments.Tensor(len(node.children), game_settings.card_count).fill_(0) # setting strategy for impossible hands to 0 for i in _range(len(node.children)): child_node = node.children[i] mask = card_tools.get_possible_hand_indexes( child_node.board).byte() node.strategy[i][mask] = 1.0 / (self.board_count - 2) for i in _range(len(node.children)): child_node = node.children[i] self._fill_chance(child_node)
def _handle_blocking_cards(self, equity_matrix, board): ''' Zeroes entries in an equity matrix that correspond to invalid hands. A hand is invalid if it shares any cards with the board. Params: equity_matrix: the matrix to modify board: a possibly empty vector of board cards ''' possible_hand_indexes = card_tools.get_possible_hand_indexes(board) possible_hand_matrix = possible_hand_indexes.view(1, game_settings.card_count).expand_as(equity_matrix) equity_matrix.mul_(possible_hand_matrix) possible_hand_matrix = possible_hand_indexes.view(game_settings.card_count, 1).expand_as(equity_matrix) equity_matrix.mul_(possible_hand_matrix)
def set_board(self, board): ''' Sets the (possibly empty) board cards to sample ranges with. The sampled ranges will assign 0 probability to any private hands that share any cards with the board. Params: board: a possibly empty vector of board cards''' hand_strengths = evaluator.batch_eval(board) possible_hand_indexes = card_tools.get_possible_hand_indexes(board) self.possible_hands_count = possible_hand_indexes.sum(0, dtype=torch.uint8).item() self.possible_hands_mask = possible_hand_indexes.view(1, -1).bool() non_coliding_strengths = arguments.Tensor(self.possible_hands_count) non_coliding_strengths = torch.masked_select(hand_strengths, self.possible_hands_mask) _, order = non_coliding_strengths.sort() _, self.reverse_order = order.sort() self.reverse_order = self.reverse_order.view(1, -1).long() self.reordered_range = arguments.Tensor()
def _process_chance_node(self, params): ''' Recursively fills a player's strategy for the subtree rooted at a chance node. Params: params: tree walk parameters (see @{_fill_strategies_dfs}) ''' resolving = params.resolving node = params.node player = params.player _range = params.range cf_values = params.cf_values our_last_action = params.our_last_action assert (resolving) assert (our_last_action) assert (not node.terminal and node.current_player == constants.players.chance) # on chance node we need to recompute values in next round for i in range(len(node.children)): child_node = node.children[i] assert (child_node.current_player == constants.players.P1) assert (not child_node.terminal) # computing cf_values for the child node child_cf_values = resolving.get_chance_action_cfv( our_last_action, child_node.board) # we need to remove impossible hands from the range and then renormalize it child_range = _range.clone() mask = card_tools.get_possible_hand_indexes(child_node.board) child_range.mul_(mask) range_weight = child_range.sum( dim=0) # weight should be single number child_range.mul_(1 / range_weight) # we should never touch same re-solving again after the chance action, set it to None params = Parameters() params.node = child_node params.range = child_range params.player = player params.cf_values = child_cf_values params.resolving = None params.our_last_action = None self._fill_strategies_dfs(params)
def _fill_chance(self, node): ''' Fills a chance node with the probability of each outcome. Params: node: the chance node ''' assert not node.terminal # filling strategy # we will fill strategy with an uniform probability, but it has to be zero for hands that are not possible on # corresponding board node.strategy = arguments.Tensor(len(node.children), game_settings.card_count).fill_(0) # setting probability of impossible hands to 0 for i in range(len(node.children)): child_node = node.children[i] mask = card_tools.get_possible_hand_indexes( child_node.board).bool() node.strategy[i].fill_(0) # remove 2 because each player holds one card node.strategy[i][mask] = 1.0 / (game_settings.card_count - 2)
def __init__(self, board, player_range, opponent_cfvs): ''' Constructor Params: board: board card player_range: an initial range vector for the opponent opponent_cfvs: the opponent counterfactual values vector used for re-solving''' super().__init__() assert (board != None) self.input_opponent_range = player_range.clone() self.input_opponent_value = opponent_cfvs.clone() self.curent_opponent_values = arguments.Tensor( game_settings.card_count) self.regret_epsilon = 1.0 / 100000000 # 2 stands for 2 actions: play/terminate self.opponent_reconstruction_regret = arguments.Tensor( 2, game_settings.card_count) self.play_current_strategy = arguments.Tensor( game_settings.card_count).fill_(0) self.terminate_current_strategy = arguments.Tensor( game_settings.card_count).fill_(1) # holds achieved CFVs at each iteration so that we can compute regret self.total_values = arguments.Tensor(game_settings.card_count) self.terminate_regrets = arguments.Tensor( game_settings.card_count).fill_(0) self.play_regrets = arguments.Tensor(game_settings.card_count).fill_(0) # init range mask for masking out impossible hands self.range_mask = card_tools.get_possible_hand_indexes(board) self.total_values_p2 = None self.play_current_regret = None self.terminate_current_regret = None
def _fill_ranges_dfs(self, node, ranges_absolute): ''' Recursively walk the tree and calculate the probability of reaching each node using the saved strategy profile. The reach probabilities are saved in the `ranges_absolute` field of each node. Params: node: the current node of the tree ranges_absolute: a 2xK tensor containing the probabilities of each player reaching the current node with each private hand ''' node.ranges_absolute = ranges_absolute.clone() if(node.terminal): return assert(node.strategy != None) actions_count = len(node.children) # check that it's a legal strategy strategy_to_check = node.strategy hands_mask = card_tools.get_possible_hand_indexes(node.board) if node.current_player != constants.players.chance: checksum = strategy_to_check.sum(dim=0) assert(not torch.any(strategy_to_check.lt(0))) assert(not torch.any(checksum.gt(1.001))) assert(not torch.any(checksum.lt(0.999))) assert(not torch.any(checksum.ne(checksum))) assert(node.ranges_absolute.lt(0).sum() == 0) assert(node.ranges_absolute.gt(1).sum() == 0) # check if the range consists only of cards that don't overlap with the board impossible_hands_mask = hands_mask.clone().fill_(1) - hands_mask impossible_range_sum = node.ranges_absolute.clone().mul(impossible_hands_mask.view(1, game_settings.card_count).expand_as(node.ranges_absolute)).sum() assert impossible_range_sum == 0, impossible_range_sum children_ranges_absolute = arguments.Tensor(len(node.children), constants.players_count, game_settings.card_count) # chance player if node.current_player == constants.players.chance: # multiply ranges of both players by the chance prob children_ranges_absolute[:, constants.players.P1, :].copy_(node.ranges_absolute[constants.players.P1].repeat(actions_count, 1)) children_ranges_absolute[:, constants.players.P2, :].copy_(node.ranges_absolute[constants.players.P2].repeat(actions_count, 1)) children_ranges_absolute[:, constants.players.P1, :].mul_(node.strategy) children_ranges_absolute[:, constants.players.P2, :].mul_(node.strategy) # player else: # copy the range for the non-acting player children_ranges_absolute[:, 1-node.current_player, :] = node.ranges_absolute[1-node.current_player].clone().repeat(actions_count, 1) # multiply the range for the acting player using his strategy ranges_mul_matrix = node.ranges_absolute[node.current_player].repeat(actions_count, 1) children_ranges_absolute[:, node.current_player, :] = torch.mul(node.strategy, ranges_mul_matrix) # fill the ranges for the children for i in range(len(node.children)): child_node = node.children[i] child_range = children_ranges_absolute[i] # go deeper self._fill_ranges_dfs(child_node, child_range)