示例#1
0
    def update_average_strategy(self, node, current_strategy, _iter,
                                actions_count):
        ''' Update a node's average strategy with the current iteration strategy.

        Params:
            node: the node to update
            current_strategy: the CFR strategy for the current iteration
            iter: the iteration number of the current CFR iteration'''
        if _iter >= arguments.cfr_skip_iters:
            if node.strategy == None:
                node.strategy = arguments.Tensor(
                    actions_count, game_settings.card_count).fill_(0)
            if node.iter_weight_sum == None:
                node.iter_weight_sum = arguments.Tensor(
                    game_settings.card_count).fill_(0)
            iter_weight_contribution = node.ranges_absolute[
                node.current_player].clone()
            iter_weight_contribution[torch.le(iter_weight_contribution,
                                              0)] = self.regret_epsilon
            node.iter_weight_sum.add_(iter_weight_contribution)
            iter_weight = torch.div(iter_weight_contribution,
                                    node.iter_weight_sum)

            expanded_weight = iter_weight.view(
                1, game_settings.card_count).expand_as(node.strategy)
            old_strategy_scale = expanded_weight * (
                -1) + 1  # same as 1 - expanded weight
            node.strategy.mul_(old_strategy_scale)
            strategy_addition = current_strategy.mul(expanded_weight)
            node.strategy.add_(strategy_addition)
示例#2
0
    def _set_call_matrix(self, board):
        ''' Sets the evaluator's call matrix, which gives the equity for terminal
        nodes where no player has folded.
        
        For nodes in the last betting round, creates the matrix `A` such that for player ranges
        `x` and `y`, `x'Ay` is the equity for the first player when no player folds. For nodes
        in the first betting round, gives the weighted average of all such possible matrices.

        Params:
            board: a possibly empty vector of board cards
        '''
        street = card_tools.board_to_street(board)
        self.equity_matrix = arguments.Tensor(game_settings.card_count, game_settings.card_count).fill_(0)
        
        if street == 1:
            # iterate through all possible next round streets
            next_round_boards = card_tools.get_second_round_boards()
            boards_count = next_round_boards.size(0)
            next_round_equity_matrix = arguments.Tensor(game_settings.card_count, game_settings.card_count)
            for board in range(boards_count):
                self.get_last_round_call_matrix(next_round_boards[board], next_round_equity_matrix)
                self.equity_matrix.add_(next_round_equity_matrix)
            # averaging the values in the call matrix
            weight_constant = 1/(game_settings.card_count -2) if game_settings.board_card_count == 1 else 2/((game_settings.card_count -2) * (game_settings.card_count -3 ))
            self.equity_matrix.mul_(weight_constant)
        elif street == 2:
            # for last round we just return the matrix
            self.get_last_round_call_matrix(board, self.equity_matrix)
        else:
            # impossible street
            assert False, 'impossible street'
示例#3
0
    def get_possible_bucket_mask(self):
        ''' Gives a vector of possible buckets on the the board.

        @{set_board} must be called first.

        Return a mask vector over buckets where each entry is 1 if the bucket is
        valid, 0 if not'''
        mask = arguments.Tensor(1, self.bucket_count)
        card_indicator = arguments.Tensor(1, game_settings.card_count).fill_(1)
        mask = torch.mm(card_indicator, self._range_matrix)
        return mask
示例#4
0
    def set_board(self, board):
        ''' Sets the board cards for the bucketer.

        Params:
            board: a non-empty vector of board cards'''
        self.bucketer = Bucketer()
        self.bucket_count = self.bucketer.get_bucket_count()
        self._range_matrix = arguments.Tensor(game_settings.card_count,
                                              self.bucket_count).zero_()

        buckets = self.bucketer.compute_buckets(board)
        class_ids = torch.arange(0, self.bucket_count)

        if arguments.gpu:
            buckets = buckets.cuda()
            class_ids = class_ids.cuda()
        else:
            class_ids = class_ids.float()

        class_ids = class_ids.view(1, self.bucket_count).expand(
            game_settings.card_count, self.bucket_count)
        card_buckets = buckets.view(game_settings.card_count,
                                    1).expand(game_settings.card_count,
                                              self.bucket_count)

        # finding all strength classes
        # matrix for transformation from card ranges to strength class ranges
        self._range_matrix[torch.eq(class_ids, card_buckets)] = 1

        # matrix for transformation form class values to card values
        self._reverse_value_matrix = self._range_matrix.T.clone()
    def _fill_chance(self, node):
        ''' Fills all chance nodes of a subtree with the probability of each outcome.

        Params:
            node: the root of the subtree
        '''
        if (node.terminal):
            return

        if node.current_player == constants.players.chance:  # chance node, we will fill uniform strategy
            # works only for chance node at start of second round
            assert (len(node.children) == self.board_count)
            # filling strategy
            # we will fill strategy with an uniform probability, but it has to be zero for hands that are not possible on
            # corresponding board
            node.strategy = arguments.Tensor(len(node.children),
                                             game_settings.card_count).fill_(0)
            # setting strategy for impossible hands to 0
            for i in _range(len(node.children)):
                child_node = node.children[i]
                mask = card_tools.get_possible_hand_indexes(
                    child_node.board).byte()
                node.strategy[i][mask] = 1.0 / (self.board_count - 2)

        for i in _range(len(node.children)):
            child_node = node.children[i]
            self._fill_chance(child_node)
示例#6
0
    def _build_tree_dfs(self, current_node):
        ''' Recursively build the (sub)tree rooted at the current node.

        Params:
            current_node: the root to build the (sub)tree from
        Return `current_node` after the (sub)tree has been built
        '''
        self._fill_additional_attributes(current_node)
        children = self._get_children_nodes(current_node)
        current_node.children = children

        depth = 0

        current_node.actions = arguments.Tensor(len(children))
        for i in range(len(children)):
            children[i].parent = current_node
            self._build_tree_dfs(children[i])
            depth = max(depth, children[i].depth)

            if i == 0:
                current_node.actions[i] = constants.actions.fold
            elif i == 1:
                current_node.actions[i] = constants.actions.ccall
            else:
                current_node.actions[i] = children[i].bets.max()

        current_node.depth = depth + 1

        return current_node
示例#7
0
    def compute_values(self, root, starting_ranges=None ):
        ''' Compute the self play and best response values of a strategy profile on
        the given game tree.

        The cfvs for each player in the given strategy profile when playing against
        each other is stored in the `cf_values` field for each node. The cfvs for
        a best response against each player in the profile are stored in the 
        `cf_values_br` field for each node.

        Params:
            root: The root of the game tree. Each node of the tree is assumed to
                have a strategy saved in the `strategy` field.
            starting_ranges [opt]: probability vectors over player private hands
                at the root node (default uniform)'''
        # 1.0 set the starting range
        uniform_ranges = arguments.Tensor(constants.players_count, game_settings.card_count).fill_(1.0/game_settings.card_count)  
        if starting_ranges == None:
            starting_ranges = uniform_ranges
        
        # 2.0 check the starting ranges
        checksum = starting_ranges.sum(dim=1)
        assert abs(checksum[0] - 1) < 0.0001, 'starting range does not sum to 1'
        assert abs(checksum[1] - 1) < 0.0001, 'starting range does not sum to 1'
        assert(starting_ranges.lt(0).sum() == 0) 
        
        # 3.0 compute the values  
        self._fill_ranges_dfs(root, starting_ranges)
        self._compute_values_dfs(root)
示例#8
0
    def build_tree(self, params):
        ''' Builds the tree.

        Params:
            params: table of tree parameters, containing the following fields:
                * `street`: the betting round of the root node
                * `bets`: the number of chips committed at the root node by each player
                * `current_player`: the acting player at the root node
                * `board`: a possibly empty vector of board cards at the root node
                * `limit_to_street`: if `true`, only build the current betting round
                * `bet_sizing` (optional): a @{bet_sizing} object which gives the allowed
                    bets for each player 
        Return the root node of the built tree'''
        root = TreeNode()
        # copy necessary stuff from the root_node not to touch the input
        root.street = params.root_node.street
        root.bets = params.root_node.bets.clone()
        root.current_player = params.root_node.current_player
        root.board = params.root_node.board.clone()

        if not params.bet_sizing:
            params.bet_sizing = BetSizing(
                arguments.Tensor(arguments.bet_sizing))

        assert params.bet_sizing

        self.bet_sizing = params.bet_sizing
        self.limit_to_street = params.limit_to_street

        self._build_tree_dfs(root)

        strategy_filling = StrategyFilling()
        strategy_filling.fill_uniform(root)

        return root
示例#9
0
    def get_possible_bets(self, node):
        ''' Gives the bets which are legal at a game state.

        Params:
            node: a representation of the current game state, with fields:
                * `bets`: the number of chips currently committed by each player
                * `current_player`: the currently acting player
        Return an Nx2 tensor where N is the number of new possible game states,
        containing N sets of new commitment levels for each player '''
        current_player = node.current_player
        assert current_player == 0 or current_player == 1, 'Wrong player for bet size computation'
        opponent = 1 - node.current_player 
        opponent_bet = node.bets[opponent]

        assert node.bets[current_player] <= opponent_bet
        
        # compute min possible raise size 
        max_raise_size = arguments.stack - opponent_bet
        min_raise_size = opponent_bet - node.bets[current_player]
        min_raise_size = max(min_raise_size, arguments.ante)
        min_raise_size = min(max_raise_size, min_raise_size)
        
        if min_raise_size == 0: 
            return arguments.Tensor()
        elif min_raise_size == max_raise_size:
            out = arguments.Tensor(1, 2).fill_(opponent_bet)
            out[0][current_player] = opponent_bet + min_raise_size
            return out
        else:
            # iterate through all bets and check if they are possible
            max_possible_bets_count = self.pot_fractions.size(0) + 1 # we can always go allin 
            out = arguments.Tensor(max_possible_bets_count, 2).fill_(opponent_bet)
            
            # take pot size after opponent bet is called
            pot = opponent_bet * 2
            used_bets_count = 0
            # try all pot fractions bet and see if we can use them 
            for i in range(self.pot_fractions.size(0)): 
                raise_size = pot * self.pot_fractions[i]
                if raise_size >= min_raise_size and raise_size < max_raise_size:
                    out[used_bets_count, current_player] = opponent_bet + raise_size
                    used_bets_count = used_bets_count + 1
            # adding allin
            assert used_bets_count <= max_possible_bets_count
            out[used_bets_count, current_player] = opponent_bet + max_raise_size
            used_bets_count  = used_bets_count + 1
            return out[:used_bets_count, :]
示例#10
0
    def _generate_sorted_range(self, _range):
        ''' Samples a batch of ranges with hands sorted by strength on the board.

        Params:
            range: a NxK tensor in which to store the sampled ranges, where N is 
                the number of ranges to sample and K is the range size'''
        batch_size = _range.size(0)
        self._generate_recursion(_range, arguments.Tensor(batch_size).fill_(1))
示例#11
0
    def get_action_strategy(self, action):
        ''' Returns an arbitrary vector.

        Params:
            action [opt]: not used
        Return a vector of 1s
        '''
        return arguments.Tensor(game_settings.card_count).fill_(1)
示例#12
0
    def get_chance_action_cfv(self, player_action, board):
        ''' Returns an arbitrary vector.

        Params:
            player_action [opt]: not used
            board [opt]: not used
        Return a vector of 1s
        '''
        return arguments.Tensor(game_settings.card_count).fill_(1)
示例#13
0
    def set_board(self, board):
        ''' Sets the (possibly empty) board cards to sample ranges with.

        The sampled ranges will assign 0 probability to any private hands that
        share any cards with the board.
        
        Params:
            board: a possibly empty vector of board cards'''
        hand_strengths = evaluator.batch_eval(board)    
        possible_hand_indexes = card_tools.get_possible_hand_indexes(board)
        self.possible_hands_count = possible_hand_indexes.sum(0, dtype=torch.uint8).item()
        self.possible_hands_mask = possible_hand_indexes.view(1, -1).bool()
        non_coliding_strengths = arguments.Tensor(self.possible_hands_count)  
        non_coliding_strengths = torch.masked_select(hand_strengths, self.possible_hands_mask)
        _, order = non_coliding_strengths.sort()
        _, self.reverse_order = order.sort() 
        self.reverse_order = self.reverse_order.view(1, -1).long()
        self.reordered_range = arguments.Tensor()
示例#14
0
    def forward(ctx, outputs, targets, mask):
        ''' Computes the loss over a batch of neural net outputs and targets.

        Params:
            outputs: an NxM tensor containing N vectors of values over buckets,
                output by the neural net
            targets: an NxM tensor containing N vectors of actual values over
                buckets, produced by @{data_generation_call}
            mask: an NxM tensor containing N mask vectors generated with
                @{bucket_conversion.get_possible_bucket_mask}
        Return the sum of Huber loss applied elementwise on `outputs` and `targets`,
        masked so that only valid buckets are included'''
        batch_size = outputs.size(0)
        feature_size = outputs.size(1)
        
        # 1.0 zero out the outputs/target so that the error does not depend on these
        outputs.mul_(mask)
        targets.mul_(mask)
        
        loss = smoothL1LossForward(outputs, targets)
        
        # 2.0 if the batch size has changed, create new storage for the sum, otherwise reuse
        mask_placeholder = arguments.Tensor(mask.size()).fill_(0)
        mask_sum = arguments.Tensor(batch_size).fill_(0)
        mask_multiplier = mask_sum.clone().fill_(0).view(-1, 1)
        
        # 3.0 compute mask sum for each batch
        mask_placeholder.copy_(mask)
        mask_sum = mask_placeholder.sum(dim=1, keepdim=True)
        
        # 3.1 mask multiplier - note that mask is 1 for impossible features
        mask_multiplier.fill_(feature_size)
        mask_multiplier.sub_(mask_sum)
        mask_multiplier.div_(feature_size)
        
        # 4.0 multiply to get a new losss
        # loss is not really computed batch-wise correctly,
        # but that does not really matter now since gradients are correct
        loss_multiplier = (batch_size * feature_size) / (batch_size * feature_size - mask_sum.sum() )
        new_loss = loss_multiplier * loss

        ctx.save_for_backward(outputs, targets, mask_multiplier)
        
        return new_loss
示例#15
0
    def _fill_uniformly(self, node):
        ''' Fills a player node with a uniform strategy.

        Params:
            node: the player node
        '''
        assert node.current_player == constants.players.P1 or node.current_player == constants.players.P2

        if node.terminal:
            return

        node.strategy = arguments.Tensor(len(node.children),
                                         game_settings.card_count).fill_(
                                             1.0 / len(node.children))
示例#16
0
    def resolve_first_node(self):
        ''' Solves a depth-limited lookahead from the first node of the game to get 
        opponent counterfactual values.

        The cfvs are stored in the field `starting_cfvs_p1`. Because this is the
        first node of the game, exact ranges are known for both players, so
        opponent cfvs are not necessary for solving.'''
        self.first_node_resolving = Resolving()
        first_node = TreeNode()
        first_node.board = arguments.Tensor()
        first_node.street = 1
        first_node.current_player = constants.players.P1
        first_node.bets = arguments.Tensor([arguments.ante, arguments.ante])

        # create the starting ranges
        player_range = card_tools.get_uniform_range(first_node.board)
        opponent_range = card_tools.get_uniform_range(first_node.board)

        # create re-solving and re-solve the first node
        self.first_node_resolving = Resolving()
        self.first_node_resolving.resolve_first_node(first_node, player_range,
                                                     opponent_range)
        # store the initial CFVs
        self.starting_cfvs_p1 = self.first_node_resolving.get_root_cfv()
示例#17
0
    def _set_fold_matrix(self, board):
        ''' Sets the evaluator's fold matrix, which gives the equity for terminal
        nodes where one player has folded.
        
        Creates the matrix `B` such that for player ranges `x` and `y`, `x'By` is the equity
        for the player who doesn't fold

        Params:
            board: a possibly empty vector of board cards
        '''
        self.fold_matrix = arguments.Tensor(game_settings.card_count, game_settings.card_count)
        self.fold_matrix.fill_(1)
        # setting cards that block each other to zero - exactly elements on diagonal in leduc variants
        self.fold_matrix.sub_(torch.eye(game_settings.card_count).type_as(self.fold_matrix))
        self._handle_blocking_cards(self.fold_matrix, board)
示例#18
0
    def _sample_bet(self, node, state):
        ''' Samples an action to take from the strategy at the given game state.

        Params:
            node: the game node where the re-solving player is to act (a table of 
                the type returned by @{protocol_to_node.parsed_state_to_node})
            state: the game state where the re-solving player is to act
                (a table of the type returned by @{protocol_to_node.parse_state})
        Return an index representing the action chosen
        '''
        # 1.0 get the possible bets in the node
        possible_bets = self.resolving.get_possible_actions()
        actions_count = possible_bets.size(0)

        # 2.0 get the strategy for the current hand since the strategy is computed for all hands
        hand_strategy = arguments.Tensor(actions_count)

        for i in range(actions_count):
            action_bet = possible_bets[i]
            action_strategy = self.resolving.get_action_strategy(action_bet)
            hand_strategy[i] = action_strategy[self.hand_id]

        assert (abs(1 - hand_strategy.sum()) < 0.001)

        print("strategy:")
        print(hand_strategy)

        # 3.0 sample the action by doing cumsum and uniform sample
        hand_strategy_cumsum = torch.cumsum(hand_strategy, dim=0)
        r = torch.rand(1)

        sampled_bet = possible_bets[hand_strategy_cumsum.gt(r)][0].item()
        print("playing action that has prob: " +
              hand_strategy[hand_strategy_cumsum.gt(r)][0].item())

        # 4.0 update the invariants based on our action
        self.current_opponent_cfvs_bound = self.resolving.get_action_cfv(
            sampled_bet)

        strategy = self.resolving.get_action_strategy(sampled_bet)
        self.current_player_range.mul_(strategy)
        self.current_player_range = card_tools.normalize_range(
            node.board, self.current_player_range)

        return sampled_bet
示例#19
0
    def get_possible_hand_indexes(self, board):
        ''' Gives the private hands which are valid with a given board.

        Params:
            board: a possibly empty vector of board cards
        Return a vector with an entry for every possible hand (private card), which
        is `1` if the hand shares no cards with the board and `0` otherwise'''
        out = arguments.Tensor(game_settings.card_count).fill_(0)
        if board.dim() == 0:
            out.fill_(1)
            return out

        whole_hand = arguments.IntTensor(board.size(0) + 1)
        whole_hand[:-1].copy_(board)
        for card in range(game_settings.card_count):
            whole_hand[-1] = card
            if self.hand_is_possible(whole_hand):
                out[card] = 1
        return out
示例#20
0
    def _init_bucketing(self):
        ''' Initializes the tensor that translates hand ranges to bucket ranges.
        '''
        self.bucketer = Bucketer()
        self.bucket_count = self.bucketer.get_bucket_count()
        boards = card_tools.get_second_round_boards()
        self.board_count = boards.size(0)
        self._range_matrix = arguments.Tensor(
            game_settings.card_count,
            self.board_count * self.bucket_count).zero_()
        self._range_matrix_board_view = self._range_matrix.view(
            game_settings.card_count, self.board_count, self.bucket_count)

        for idx in range(self.board_count):
            board = boards[idx]

            buckets = self.bucketer.compute_buckets(board)
            class_ids = torch.arange(0, self.bucket_count)

            if arguments.gpu:
                buckets = buckets.cuda()
                class_ids = class_ids.cuda()
            else:
                class_ids = class_ids.float()

            class_ids = class_ids.view(1, self.bucket_count).expand(
                game_settings.card_count, self.bucket_count)
            card_buckets = buckets.view(game_settings.card_count,
                                        1).expand(game_settings.card_count,
                                                  self.bucket_count)

            # finding all strength classes
            # matrix for transformation from card ranges to strength class ranges
            self._range_matrix_board_view[:, idx, :][torch.eq(
                class_ids, card_buckets)] = 1

        # matrix for transformation from class values to card values
        self._reverse_value_matrix = self._range_matrix.T.clone()
        # we need to div the matrix by the sum of possible boards (from point of view of each hand)
        weight_constant = 1 / (self.board_count - 2)  # count
        self._reverse_value_matrix.mul_(weight_constant)
示例#21
0
    def _fill_chance(self, node):
        ''' Fills a chance node with the probability of each outcome.

        Params:
            node: the chance node
        '''
        assert not node.terminal

        # filling strategy
        # we will fill strategy with an uniform probability, but it has to be zero for hands that are not possible on
        # corresponding board
        node.strategy = arguments.Tensor(len(node.children),
                                         game_settings.card_count).fill_(0)
        # setting probability of impossible hands to 0
        for i in range(len(node.children)):
            child_node = node.children[i]
            mask = card_tools.get_possible_hand_indexes(
                child_node.board).bool()
            node.strategy[i].fill_(0)
            # remove 2 because each player holds one card
            node.strategy[i][mask] = 1.0 / (game_settings.card_count - 2)
示例#22
0
    def _fill_uniformly(self, node, player):
        ''' Recursively fills a subtree with a uniform random strategy for the given
        player.

        Used in sections of the game to which the player doesn't play.

        Params:
            node: the root of the subtree
            player: the player which is given the uniform random strategy
        '''
        if (node.terminal):
            return

        if node.current_player == player:
            # fill uniform strategy
            node.strategy = arguments.Tensor(len(node.children),
                                             game_settings.card_count).fill_(
                                                 1.0 / len(node.children))

        for i in range(len(node.children)):
            child_node = node.children[i]
            self._fill_uniformly(child_node, player)
示例#23
0
    def batch_eval(self, board, impossible_hand_value=-1):
        ''' Gives strength representations for all private hands on the given board.

        Params:
            board: a possibly empty vector of board cards
            impossible_hand_value: the value to assign to hands which are invalid on the board
        Return a vector containing a strength value or `impossible_hand_value` for
        every private hand'''
        hand_values = arguments.Tensor(game_settings.card_count).fill_(-1)
        if board.dim() == 0:
            for hand in range(game_settings.card_count):
                hand_values[hand] = (hand // game_settings.suit_count) + 1
        else:
            board_size = board.size(0)
            assert board_size == 1 or board_size == 2, 'Incorrect board size for Leduc'
            whole_hand = arguments.IntTensor(board_size + 1)
            whole_hand[:-1].copy_(board)
            for card in range(game_settings.card_count):
                whole_hand[-1] = card
                hand_values[card] = self.evaluate(whole_hand,
                                                  impossible_hand_value)
        return hand_values
示例#24
0
 def __init__(self):
     ''' Constructor. Creates an equity matrix with entries for every possible
     pair of buckets.'''
     self.bucketer = Bucketer()
     self.bucket_count = self.bucketer.get_bucket_count()
     self.equity_matrix = arguments.Tensor(self.bucket_count, self.bucket_count).zero_()
     # filling equity matrix
     boards = card_tools.get_second_round_boards()
     self.board_count = boards.size(0)
     self.terminal_equity = TerminalEquity()
     for i in range(self.board_count): 
         board = boards[i]
         self.terminal_equity.set_board(board)
         call_matrix = self.terminal_equity.get_call_matrix()
         buckets = self.bucketer.compute_buckets(board)
         for c1 in range(game_settings.card_count): 
             for c2 in range(game_settings.card_count): 
                 b1 = buckets[c1]
                 b2 = buckets[c2]
                 if( b1 > 0 and b2 > 0 ):
                     matrix_entry = call_matrix[c1][c2]
                     self.equity_matrix[b1][b2] = matrix_entry
示例#25
0
    def generate_range(self, _range):
        ''' Samples a batch of random range vectors.
         
        Each vector is sampled indepently by randomly splitting the probability
        mass between the bottom half and the top half of the range, and then
        recursing on the two halfs.

        @{set_board} must be called first.
        
        Params:
            range: a NxK tensor in which to store the sampled ranges, where N is 
                the number of ranges to sample and K is the range size'''
        batch_size = _range.size(0)
        self.sorted_range = arguments.Tensor(batch_size, self.possible_hands_count)
        self._generate_sorted_range(self.sorted_range)
        # we have to reorder the the range back to undo the sort by strength
        index = self.reverse_order.expand_as(self.sorted_range)
        if arguments.gpu:
            index = index.cuda()
        self.reordered_range = self.sorted_range.gather(1, index)
        
        _range.zero_()
        _range[self.possible_hands_mask.expand_as(_range)] = self.reordered_range.view(-1)
示例#26
0
    def __init__(self, board, player_range, opponent_cfvs):
        ''' Constructor

        Params:
            board: board card
            player_range: an initial range vector for the opponent
            opponent_cfvs: the opponent counterfactual values vector used for re-solving'''
        super().__init__()
        assert (board != None)

        self.input_opponent_range = player_range.clone()
        self.input_opponent_value = opponent_cfvs.clone()

        self.curent_opponent_values = arguments.Tensor(
            game_settings.card_count)

        self.regret_epsilon = 1.0 / 100000000

        # 2 stands for 2 actions: play/terminate
        self.opponent_reconstruction_regret = arguments.Tensor(
            2, game_settings.card_count)

        self.play_current_strategy = arguments.Tensor(
            game_settings.card_count).fill_(0)
        self.terminate_current_strategy = arguments.Tensor(
            game_settings.card_count).fill_(1)

        # holds achieved CFVs at each iteration so that we can compute regret
        self.total_values = arguments.Tensor(game_settings.card_count)

        self.terminate_regrets = arguments.Tensor(
            game_settings.card_count).fill_(0)
        self.play_regrets = arguments.Tensor(game_settings.card_count).fill_(0)

        # init range mask for masking out impossible hands
        self.range_mask = card_tools.get_possible_hand_indexes(board)

        self.total_values_p2 = None
        self.play_current_regret = None
        self.terminate_current_regret = None
示例#27
0
from Source.Settings.constants import constants
from Source.Settings.game_settings import game_settings
from Source.Tree.tree_builder import *
from Source.Game.card_to_string_conversion import card_to_string
from Source.Tree.tree_values import TreeValues
from Source.Tree.tree_strategy_filling import TreeStrategyFilling
from Source.Tree.tree_visualiser import TreeVisualiser

if __name__ == "__main__":
    builder = PokerTreeBuilder()
    params = TreeParams()
    params.root_node = TreeNode()
    params.root_node.board = card_to_string.string_to_board('')
    params.root_node.street = 1
    params.root_node.current_player = constants.players.P1
    params.root_node.bets = arguments.Tensor([100, 100])

    tree = builder.build_tree(params)

    filling = TreeStrategyFilling()

    range1 = card_tools.get_uniform_range(params.root_node.board)
    range2 = card_tools.get_uniform_range(params.root_node.board)

    filling.fill_strategies(tree, 0, range1, range2)
    filling.fill_strategies(tree, 1, range1, range2)

    starting_ranges = arguments.Tensor(constants.players_count,
                                       game_settings.card_count)
    starting_ranges[0].copy_(range1)
    starting_ranges[1].copy_(range2)
示例#28
0
    def cfrs_iter_dfs(self, node, _iter):
        ''' Recursively walks the tree, applying the CFR algorithm.

        Params:
            node: the current node in the tree
            iter: the current iteration number
        '''
        assert (node.current_player == constants.players.P1
                or node.current_player == constants.players.P2
                or node.current_player == constants.players.chance)

        opponent_index = 1 - node.current_player

        # dimensions in tensor
        action_dimension = 0
        card_dimension = 1

        # compute values using terminal_equity in terminal nodes
        if (node.terminal):

            terminal_equity = self._get_terminal_equity(node)

            values = node.ranges_absolute.clone().fill_(0)

            if (node.type == constants.node_types.terminal_fold):
                terminal_equity.tree_node_fold_value(node.ranges_absolute,
                                                     values, opponent_index)
            else:
                terminal_equity.tree_node_call_value(node.ranges_absolute,
                                                     values)

            # multiply by the pot
            values = values * node.pot
            node.cf_values = values.view_as(node.ranges_absolute)
        else:

            actions_count = len(node.children)
            current_strategy = None

            if node.current_player == constants.players.chance:
                current_strategy = node.strategy
            else:
                # we have to compute current strategy at the beginning of each iteraton

                # initialize regrets in the first iteration
                if node.regrets == None:
                    node.regrets = arguments.Tensor(
                        actions_count, game_settings.card_count).fill_(
                            self.regret_epsilon
                        )  # [[actions_count x card_count]]
                if node.possitive_regrets == None:
                    node.possitive_regrets = arguments.Tensor(
                        actions_count,
                        game_settings.card_count).fill_(self.regret_epsilon)

                # compute positive regrets so that we can compute the current strategy fromm them
                node.possitive_regrets.copy_(node.regrets)
                node.possitive_regrets[torch.le(
                    node.possitive_regrets,
                    self.regret_epsilon)] = self.regret_epsilon

                # compute the current strategy
                regrets_sum = node.possitive_regrets.sum(action_dimension)
                current_strategy = node.possitive_regrets.clone()
                current_strategy.div_(regrets_sum.expand_as(current_strategy))

            # current cfv [[actions, players, ranges]]
            cf_values_allactions = arguments.Tensor(
                actions_count, constants.players_count,
                game_settings.card_count).fill_(0)

            children_ranges_absolute = {}

            if node.current_player == constants.players.chance:
                ranges_mul_matrix = node.ranges_absolute[0].repeat(
                    actions_count, 1)
                children_ranges_absolute[0] = torch.mul(
                    current_strategy, ranges_mul_matrix)

                ranges_mul_matrix = node.ranges_absolute[1].repeat(
                    actions_count, 1)
                children_ranges_absolute[1] = torch.mul(
                    current_strategy, ranges_mul_matrix)
            else:
                ranges_mul_matrix = node.ranges_absolute[
                    node.current_player].repeat(actions_count, 1)
                children_ranges_absolute[node.current_player] = torch.mul(
                    current_strategy, ranges_mul_matrix)

                children_ranges_absolute[
                    opponent_index] = node.ranges_absolute[
                        opponent_index].repeat(actions_count, 1).clone()

            for i in range(len(node.children)):
                child_node = node.children[i]
                # set new absolute ranges (after the action) for the child
                child_node.ranges_absolute = node.ranges_absolute.clone()

                child_node.ranges_absolute[0].copy_(
                    children_ranges_absolute[0][i])
                child_node.ranges_absolute[1].copy_(
                    children_ranges_absolute[1][i])
                self.cfrs_iter_dfs(child_node, _iter)
                cf_values_allactions[i] = child_node.cf_values

            node.cf_values = arguments.Tensor(
                constants.players_count, game_settings.card_count).fill_(0)

            if node.current_player != constants.players.chance:
                strategy_mul_matrix = current_strategy.view_as(
                    arguments.Tensor(actions_count, game_settings.card_count))

                node.cf_values[node.current_player] = torch.mul(
                    strategy_mul_matrix,
                    cf_values_allactions[:, node.current_player, :]).sum(dim=0)
                node.cf_values[opponent_index] = (
                    cf_values_allactions[:, opponent_index, :]).sum(dim=0)
            else:
                node.cf_values[0] = (cf_values_allactions[:, 0, :]).sum(dim=0)
                node.cf_values[1] = (cf_values_allactions[:, 1, :]).sum(dim=0)

            if node.current_player != constants.players.chance:
                # computing regrets
                current_regrets = cf_values_allactions[:, node.
                                                       current_player, :].reshape(
                                                           actions_count,
                                                           game_settings.
                                                           card_count).clone()
                current_regrets.sub_(node.cf_values[node.current_player].view(
                    1, game_settings.card_count).expand_as(current_regrets))

                self.update_regrets(node, current_regrets)

                # accumulating average strategy
                self.update_average_strategy(node, current_strategy, _iter,
                                             actions_count)
示例#29
0
    def get_value(self, ranges, values):
        ''' Gives the predicted counterfactual values at each evaluated state, given
        input ranges.

        @{start_computation} must be called first. Each state to be evaluated must
        be given in the same order that pot sizes were given for that function.
        Keeps track of iterations internally, so should be called exactly once for
        every iteration of continual re-solving.

        Params:
            ranges: An Nx2xK tensor, where N is the number of states evaluated
                (must match input to @{start_computation}), 2 is the number of players, and
                K is the number of private hands. Contains N sets of 2 range vectors.
            values: an Nx2xK tensor in which to store the N sets of 2 value vectors which are output'''
        assert ranges != None and values != None
        assert (ranges.size(0) == self.batch_size)
        self.iter = self.iter + 1
        if self.iter == 1:
            # initializing data structures
            self.next_round_inputs = arguments.Tensor(
                self.batch_size, self.board_count,
                (self.bucket_count * constants.players_count + 1)).zero_()
            self.next_round_values = arguments.Tensor(
                self.batch_size, self.board_count, constants.players_count,
                self.bucket_count).zero_()
            self.transposed_next_round_values = arguments.Tensor(
                self.batch_size, constants.players_count, self.board_count,
                self.bucket_count)
            self.next_round_extended_range = arguments.Tensor(
                self.batch_size, constants.players_count,
                self.board_count * self.bucket_count).zero_()
            self.next_round_serialized_range = self.next_round_extended_range.view(
                -1, self.bucket_count)
            self.range_normalization = arguments.Tensor()
            self.value_normalization = arguments.Tensor(
                self.batch_size, constants.players_count, self.board_count)
            # handling pot feature for the nn
            nn_bet_input = self.pot_sizes.clone().mul(1 / arguments.stack)
            nn_bet_input = nn_bet_input.view(-1,
                                             1).expand(self.batch_size,
                                                       self.board_count)
            self.next_round_inputs[:, :, -1].copy_(nn_bet_input)

        # we need to find if we need remember something in this iteration
        use_memory = self.iter > arguments.cfr_skip_iters
        if use_memory and self.iter == arguments.cfr_skip_iters + 1:
            # first iter that we need to remember something - we need to init data structures
            self.range_normalization_memory = arguments.Tensor(
                self.batch_size * self.board_count * constants.players_count,
                1).zero_()
            self.counterfactual_value_memory = arguments.Tensor(
                self.batch_size, constants.players_count, self.board_count,
                self.bucket_count).zero_()

        # computing bucket range in next street for both players at once
        self._card_range_to_bucket_range(
            ranges.view(self.batch_size * constants.players_count, -1),
            self.next_round_extended_range.view(
                self.batch_size * constants.players_count, -1))
        self.range_normalization = self.next_round_serialized_range.sum(
            dim=1, keepdim=True)
        rn_view = self.range_normalization.view(self.batch_size,
                                                constants.players_count,
                                                self.board_count)
        for player in range(constants.players_count):
            self.value_normalization[:,
                                     player, :].copy_(rn_view[:,
                                                              1 - player, :])
        if use_memory:
            self.range_normalization_memory.add_(
                self.value_normalization.view(
                    self.range_normalization_memory.shape))
        # eliminating division by zero
        self.range_normalization[torch.eq(self.range_normalization, 0)] = 1
        self.next_round_serialized_range.div_(
            self.range_normalization.expand_as(
                self.next_round_serialized_range))
        serialized_range_by_player = self.next_round_serialized_range.view(
            self.batch_size, constants.players_count, self.board_count,
            self.bucket_count)
        for player in range(constants.players_count):
            self.next_round_inputs[:, :, player * self.bucket_count:(
                player + 1) * self.bucket_count].copy_(
                    self.next_round_extended_range[:, player, :].view(
                        self.next_round_inputs[:, :, player *
                                               self.bucket_count:(player + 1) *
                                               self.bucket_count].shape))

        # usning nn to compute values
        serialized_inputs_view = self.next_round_inputs.view(
            self.batch_size * self.board_count, -1)
        serialized_values_view = self.next_round_values.view(
            self.batch_size * self.board_count, -1)

        # computing value in the next round
        self.nn.get_value(serialized_inputs_view, serialized_values_view)

        # normalizing values back according to the orginal range sum
        normalization_view = self.value_normalization.view(
            self.batch_size, constants.players_count, self.board_count,
            1).transpose(1, 2)
        self.next_round_values.mul_(
            normalization_view.expand_as(self.next_round_values))

        self.transposed_next_round_values.copy_(
            self.next_round_values.transpose(2, 1))
        # remembering the values for the next round
        if use_memory:
            self.counterfactual_value_memory.add_(
                self.transposed_next_round_values)
        # translating bucket values back to the card values
        self._bucket_value_to_card_value(
            self.transposed_next_round_values.view(
                self.batch_size * constants.players_count, -1),
            values.view(self.batch_size * constants.players_count, -1))
示例#30
0
    def generate_data_file(data_count, file_name):
        ''' Generates data files containing examples of random poker situations with
        associated terminal equity.

        Each poker situation is randomly generated using @{range_generator} and 
        @{random_card_generator}. For description of neural net input and target
        type, see @{net_builder}.

        Params:
            data_count: the number of examples to generate
            file_name: the prefix of the files where the data is saved (appended 
                with `.inputs`, `.targets`, and `.mask`).'''
        range_generator = RangeGenerator()
        batch_size = arguments.gen_batch_size
        assert(data_count % batch_size == 0, 'data count has to be divisible by the batch size')
        batch_count = data_count / batch_size
        bucketer = Bucketer()
        bucket_count = bucketer:get_bucket_count()
        player_count = 2
        target_size = bucket_count * player_count
        targets = arguments.Tensor(data_count, target_size)
        input_size = bucket_count * player_count + 1
        inputs = arguments.Tensor(data_count, input_size)
        mask = arguments.Tensor(data_count, bucket_count).zero_()
        bucket_conversion = BucketConversion()
        equity = TerminalEquity()
        for batch in range(batch_count):
            board = card_generator.generate_cards(game_settings.board_card_count)
            range_generator.set_board(board)
            bucket_conversion.set_board(board)
            equity.set_board(board)
            
            # generating ranges
            ranges = arguments.Tensor(player_count, batch_size, game_settings.card_count)
            for player in range(player_count):
                range_generator.generate_range(ranges[player])
            pot_sizes = arguments.Tensor(arguments.gen_batch_size, 1)
            
            # generating pot features
            pot_sizes.copy_(torch.rand(batch_size))
            
            # translating ranges to features 
            pot_feature_index =  -1
            inputs[batch * batch_size : (batch + 1) * batch_size, pot_feature_index].copy_(pot_sizes)
            for player in range(player_count):
                bucket_conversion.card_range_to_bucket_range(ranges[player], inputs[batch * batch_size : (batch + 1) * batch_size, player * bucket_count : (player + 1) * bucket_count])
            
            # computaton of values using terminal equity
            values = arguments.Tensor(player_count, batch_size, game_settings.card_count)
            for player in range(player_count):
                opponent = 1 - player
                equity.call_value(ranges[opponent], values[player])
            
            # translating values to nn targets
            for player in range(player_count):
                bucket_conversion.card_range_to_bucket_range(values[player], targets[batch * batch_size : (batch + 1) * batch_size, player * bucket_count : (player + 1) * bucket_count]])
            
            # computing a mask of possible buckets
            bucket_mask = bucket_conversion.get_possible_bucket_mask()
            mask[batch * batch_size : (batch + 1) * batch_size, :].copy_(bucket_mask.expand(batch_size, bucket_count))

        torch.save(inputs, file_name + '.inputs')
        torch.save(targets, file_name + '.targets')
        torch.save(mask, file_name + '.mask')