Exemplo n.º 1
0
    def _fill_chance(self, node):
        ''' Fills all chance nodes of a subtree with the probability of each outcome.

        Params:
            node: the root of the subtree
        '''
        if (node.terminal):
            return

        if node.current_player == constants.players.chance:  # chance node, we will fill uniform strategy
            # works only for chance node at start of second round
            assert (len(node.children) == self.board_count)
            # filling strategy
            # we will fill strategy with an uniform probability, but it has to be zero for hands that are not possible on
            # corresponding board
            node.strategy = arguments.Tensor(len(node.children),
                                             game_settings.card_count).fill_(0)
            # setting strategy for impossible hands to 0
            for i in _range(len(node.children)):
                child_node = node.children[i]
                mask = card_tools.get_possible_hand_indexes(
                    child_node.board).byte()
                node.strategy[i][mask] = 1.0 / (self.board_count - 2)

        for i in _range(len(node.children)):
            child_node = node.children[i]
            self._fill_chance(child_node)
Exemplo n.º 2
0
    def _handle_blocking_cards(self, equity_matrix, board):
        ''' Zeroes entries in an equity matrix that correspond to invalid hands.
        
        A hand is invalid if it shares any cards with the board.

        Params:
            equity_matrix: the matrix to modify
            board: a possibly empty vector of board cards
        '''
        possible_hand_indexes = card_tools.get_possible_hand_indexes(board)
        possible_hand_matrix = possible_hand_indexes.view(1, game_settings.card_count).expand_as(equity_matrix)
        equity_matrix.mul_(possible_hand_matrix)
        possible_hand_matrix = possible_hand_indexes.view(game_settings.card_count, 1).expand_as(equity_matrix)
        equity_matrix.mul_(possible_hand_matrix)
Exemplo n.º 3
0
    def set_board(self, board):
        ''' Sets the (possibly empty) board cards to sample ranges with.

        The sampled ranges will assign 0 probability to any private hands that
        share any cards with the board.
        
        Params:
            board: a possibly empty vector of board cards'''
        hand_strengths = evaluator.batch_eval(board)    
        possible_hand_indexes = card_tools.get_possible_hand_indexes(board)
        self.possible_hands_count = possible_hand_indexes.sum(0, dtype=torch.uint8).item()
        self.possible_hands_mask = possible_hand_indexes.view(1, -1).bool()
        non_coliding_strengths = arguments.Tensor(self.possible_hands_count)  
        non_coliding_strengths = torch.masked_select(hand_strengths, self.possible_hands_mask)
        _, order = non_coliding_strengths.sort()
        _, self.reverse_order = order.sort() 
        self.reverse_order = self.reverse_order.view(1, -1).long()
        self.reordered_range = arguments.Tensor()
Exemplo n.º 4
0
    def _process_chance_node(self, params):
        ''' Recursively fills a player's strategy for the subtree rooted at a 
        chance node.

        Params:
            params: tree walk parameters (see @{_fill_strategies_dfs})
        '''
        resolving = params.resolving
        node = params.node
        player = params.player
        _range = params.range
        cf_values = params.cf_values
        our_last_action = params.our_last_action
        assert (resolving)
        assert (our_last_action)
        assert (not node.terminal
                and node.current_player == constants.players.chance)
        # on chance node we need to recompute values in next round
        for i in range(len(node.children)):
            child_node = node.children[i]

            assert (child_node.current_player == constants.players.P1)
            assert (not child_node.terminal)
            # computing cf_values for the child node
            child_cf_values = resolving.get_chance_action_cfv(
                our_last_action, child_node.board)
            # we need to remove impossible hands from the range and then renormalize it
            child_range = _range.clone()
            mask = card_tools.get_possible_hand_indexes(child_node.board)
            child_range.mul_(mask)
            range_weight = child_range.sum(
                dim=0)  # weight should be single number
            child_range.mul_(1 / range_weight)

            # we should never touch same re-solving again after the chance action, set it to None
            params = Parameters()
            params.node = child_node
            params.range = child_range
            params.player = player
            params.cf_values = child_cf_values
            params.resolving = None
            params.our_last_action = None
            self._fill_strategies_dfs(params)
Exemplo n.º 5
0
    def _fill_chance(self, node):
        ''' Fills a chance node with the probability of each outcome.

        Params:
            node: the chance node
        '''
        assert not node.terminal

        # filling strategy
        # we will fill strategy with an uniform probability, but it has to be zero for hands that are not possible on
        # corresponding board
        node.strategy = arguments.Tensor(len(node.children),
                                         game_settings.card_count).fill_(0)
        # setting probability of impossible hands to 0
        for i in range(len(node.children)):
            child_node = node.children[i]
            mask = card_tools.get_possible_hand_indexes(
                child_node.board).bool()
            node.strategy[i].fill_(0)
            # remove 2 because each player holds one card
            node.strategy[i][mask] = 1.0 / (game_settings.card_count - 2)
Exemplo n.º 6
0
    def __init__(self, board, player_range, opponent_cfvs):
        ''' Constructor

        Params:
            board: board card
            player_range: an initial range vector for the opponent
            opponent_cfvs: the opponent counterfactual values vector used for re-solving'''
        super().__init__()
        assert (board != None)

        self.input_opponent_range = player_range.clone()
        self.input_opponent_value = opponent_cfvs.clone()

        self.curent_opponent_values = arguments.Tensor(
            game_settings.card_count)

        self.regret_epsilon = 1.0 / 100000000

        # 2 stands for 2 actions: play/terminate
        self.opponent_reconstruction_regret = arguments.Tensor(
            2, game_settings.card_count)

        self.play_current_strategy = arguments.Tensor(
            game_settings.card_count).fill_(0)
        self.terminate_current_strategy = arguments.Tensor(
            game_settings.card_count).fill_(1)

        # holds achieved CFVs at each iteration so that we can compute regret
        self.total_values = arguments.Tensor(game_settings.card_count)

        self.terminate_regrets = arguments.Tensor(
            game_settings.card_count).fill_(0)
        self.play_regrets = arguments.Tensor(game_settings.card_count).fill_(0)

        # init range mask for masking out impossible hands
        self.range_mask = card_tools.get_possible_hand_indexes(board)

        self.total_values_p2 = None
        self.play_current_regret = None
        self.terminate_current_regret = None
Exemplo n.º 7
0
    def _fill_ranges_dfs(self, node, ranges_absolute):
        ''' Recursively walk the tree and calculate the probability of reaching each
        node using the saved strategy profile.
        
        The reach probabilities are saved in the `ranges_absolute` field of each
        node.

        Params:
            node: the current node of the tree
            ranges_absolute: a 2xK tensor containing the probabilities of each 
                player reaching the current node with each private hand
        '''
        node.ranges_absolute = ranges_absolute.clone()

        if(node.terminal):
            return
        
        assert(node.strategy != None)

        actions_count = len(node.children) 
        
        # check that it's a legal strategy
        strategy_to_check = node.strategy
        
        hands_mask = card_tools.get_possible_hand_indexes(node.board)
        
        if node.current_player != constants.players.chance:
            checksum = strategy_to_check.sum(dim=0)
            assert(not torch.any(strategy_to_check.lt(0)))
            assert(not torch.any(checksum.gt(1.001)))    
            assert(not torch.any(checksum.lt(0.999)))
            assert(not torch.any(checksum.ne(checksum)))
        
        assert(node.ranges_absolute.lt(0).sum() == 0)
        assert(node.ranges_absolute.gt(1).sum() == 0)
        
        # check if the range consists only of cards that don't overlap with the board
        impossible_hands_mask = hands_mask.clone().fill_(1) - hands_mask
        impossible_range_sum = node.ranges_absolute.clone().mul(impossible_hands_mask.view(1, game_settings.card_count).expand_as(node.ranges_absolute)).sum()  
        assert impossible_range_sum == 0, impossible_range_sum
            
        children_ranges_absolute = arguments.Tensor(len(node.children), constants.players_count, game_settings.card_count)
        
        # chance player
        if node.current_player == constants.players.chance:
            # multiply ranges of both players by the chance prob
            children_ranges_absolute[:, constants.players.P1, :].copy_(node.ranges_absolute[constants.players.P1].repeat(actions_count, 1))
            children_ranges_absolute[:, constants.players.P2, :].copy_(node.ranges_absolute[constants.players.P2].repeat(actions_count, 1))
            
            children_ranges_absolute[:, constants.players.P1, :].mul_(node.strategy)
            children_ranges_absolute[:, constants.players.P2, :].mul_(node.strategy)
        # player
        else:
            # copy the range for the non-acting player  
            children_ranges_absolute[:, 1-node.current_player, :] = node.ranges_absolute[1-node.current_player].clone().repeat(actions_count, 1) 
            
            # multiply the range for the acting player using his strategy    
            ranges_mul_matrix = node.ranges_absolute[node.current_player].repeat(actions_count, 1) 
            children_ranges_absolute[:, node.current_player, :] = torch.mul(node.strategy, ranges_mul_matrix)
        
        # fill the ranges for the children
        for i in range(len(node.children)):
            child_node = node.children[i]
            child_range = children_ranges_absolute[i]
            
            # go deeper
            self._fill_ranges_dfs(child_node, child_range)