Exemplo n.º 1
0
    def set_board(self, board):
        ''' Sets the board cards for the bucketer.

        Params:
            board: a non-empty vector of board cards'''
        self.bucketer = Bucketer()
        self.bucket_count = self.bucketer.get_bucket_count()
        self._range_matrix = arguments.Tensor(game_settings.card_count,
                                              self.bucket_count).zero_()

        buckets = self.bucketer.compute_buckets(board)
        class_ids = torch.arange(0, self.bucket_count)

        if arguments.gpu:
            buckets = buckets.cuda()
            class_ids = class_ids.cuda()
        else:
            class_ids = class_ids.float()

        class_ids = class_ids.view(1, self.bucket_count).expand(
            game_settings.card_count, self.bucket_count)
        card_buckets = buckets.view(game_settings.card_count,
                                    1).expand(game_settings.card_count,
                                              self.bucket_count)

        # finding all strength classes
        # matrix for transformation from card ranges to strength class ranges
        self._range_matrix[torch.eq(class_ids, card_buckets)] = 1

        # matrix for transformation form class values to card values
        self._reverse_value_matrix = self._range_matrix.T.clone()
Exemplo n.º 2
0
    def _init_bucketing(self):
        ''' Initializes the tensor that translates hand ranges to bucket ranges.
        '''
        self.bucketer = Bucketer()
        self.bucket_count = self.bucketer.get_bucket_count()
        boards = card_tools.get_second_round_boards()
        self.board_count = boards.size(0)
        self._range_matrix = arguments.Tensor(
            game_settings.card_count,
            self.board_count * self.bucket_count).zero_()
        self._range_matrix_board_view = self._range_matrix.view(
            game_settings.card_count, self.board_count, self.bucket_count)

        for idx in range(self.board_count):
            board = boards[idx]

            buckets = self.bucketer.compute_buckets(board)
            class_ids = torch.arange(0, self.bucket_count)

            if arguments.gpu:
                buckets = buckets.cuda()
                class_ids = class_ids.cuda()
            else:
                class_ids = class_ids.float()

            class_ids = class_ids.view(1, self.bucket_count).expand(
                game_settings.card_count, self.bucket_count)
            card_buckets = buckets.view(game_settings.card_count,
                                        1).expand(game_settings.card_count,
                                                  self.bucket_count)

            # finding all strength classes
            # matrix for transformation from card ranges to strength class ranges
            self._range_matrix_board_view[:, idx, :][torch.eq(
                class_ids, card_buckets)] = 1

        # matrix for transformation from class values to card values
        self._reverse_value_matrix = self._range_matrix.T.clone()
        # we need to div the matrix by the sum of possible boards (from point of view of each hand)
        weight_constant = 1 / (self.board_count - 2)  # count
        self._reverse_value_matrix.mul_(weight_constant)
Exemplo n.º 3
0
 def __init__(self):
     ''' Constructor. Creates an equity matrix with entries for every possible
     pair of buckets.'''
     self.bucketer = Bucketer()
     self.bucket_count = self.bucketer.get_bucket_count()
     self.equity_matrix = arguments.Tensor(self.bucket_count, self.bucket_count).zero_()
     # filling equity matrix
     boards = card_tools.get_second_round_boards()
     self.board_count = boards.size(0)
     self.terminal_equity = TerminalEquity()
     for i in range(self.board_count): 
         board = boards[i]
         self.terminal_equity.set_board(board)
         call_matrix = self.terminal_equity.get_call_matrix()
         buckets = self.bucketer.compute_buckets(board)
         for c1 in range(game_settings.card_count): 
             for c2 in range(game_settings.card_count): 
                 b1 = buckets[c1]
                 b2 = buckets[c2]
                 if( b1 > 0 and b2 > 0 ):
                     matrix_entry = call_matrix[c1][c2]
                     self.equity_matrix[b1][b2] = matrix_entry
Exemplo n.º 4
0
class MockNnTerminal:
    def __init__(self):
        ''' Constructor. Creates an equity matrix with entries for every possible
        pair of buckets.'''
        self.bucketer = Bucketer()
        self.bucket_count = self.bucketer.get_bucket_count()
        self.equity_matrix = arguments.Tensor(self.bucket_count, self.bucket_count).zero_()
        # filling equity matrix
        boards = card_tools.get_second_round_boards()
        self.board_count = boards.size(0)
        self.terminal_equity = TerminalEquity()
        for i in range(self.board_count): 
            board = boards[i]
            self.terminal_equity.set_board(board)
            call_matrix = self.terminal_equity.get_call_matrix()
            buckets = self.bucketer.compute_buckets(board)
            for c1 in range(game_settings.card_count): 
                for c2 in range(game_settings.card_count): 
                    b1 = buckets[c1]
                    b2 = buckets[c2]
                    if( b1 > 0 and b2 > 0 ):
                        matrix_entry = call_matrix[c1][c2]
                        self.equity_matrix[b1][b2] = matrix_entry

    def get_value(self, inputs, outputs):
        ''' Gives the expected showdown equity of the two players' ranges.

        Params:
            inputs: An NxI tensor containing N instances of neural net inputs. 
                See @{net_builder} for details of each input.
            outputs: An NxO tensor in which to store N sets of expected showdown
                counterfactual values for each player.'''
        assert(outputs.dim() == 2 )
        bucket_count = outputs.size(1) / 2
        batch_size = outputs.size(0)
        players_count = 2
        for player in range(players_count): 
            torch.mm(inputs[:, (1 - player) * self.bucket_count : (2 - player) * self.bucket_count], self.equity_matrix, out=outputs[:, player * self.bucket_count : (player + 1) * self.bucket_count])
Exemplo n.º 5
0
class NextRoundValue:
    def __init__(self, nn):
        ''' Constructor.

        Creates a tensor that can translate hand ranges to bucket ranges
        on any board.

        Params:
            nn: the neural network'''
        super().__init__()
        self._values_are_prepared = False
        self.nn = nn
        self._init_bucketing()

    def _init_bucketing(self):
        ''' Initializes the tensor that translates hand ranges to bucket ranges.
        '''
        self.bucketer = Bucketer()
        self.bucket_count = self.bucketer.get_bucket_count()
        boards = card_tools.get_second_round_boards()
        self.board_count = boards.size(0)
        self._range_matrix = arguments.Tensor(
            game_settings.card_count,
            self.board_count * self.bucket_count).zero_()
        self._range_matrix_board_view = self._range_matrix.view(
            game_settings.card_count, self.board_count, self.bucket_count)

        for idx in range(self.board_count):
            board = boards[idx]

            buckets = self.bucketer.compute_buckets(board)
            class_ids = torch.arange(0, self.bucket_count)

            if arguments.gpu:
                buckets = buckets.cuda()
                class_ids = class_ids.cuda()
            else:
                class_ids = class_ids.float()

            class_ids = class_ids.view(1, self.bucket_count).expand(
                game_settings.card_count, self.bucket_count)
            card_buckets = buckets.view(game_settings.card_count,
                                        1).expand(game_settings.card_count,
                                                  self.bucket_count)

            # finding all strength classes
            # matrix for transformation from card ranges to strength class ranges
            self._range_matrix_board_view[:, idx, :][torch.eq(
                class_ids, card_buckets)] = 1

        # matrix for transformation from class values to card values
        self._reverse_value_matrix = self._range_matrix.T.clone()
        # we need to div the matrix by the sum of possible boards (from point of view of each hand)
        weight_constant = 1 / (self.board_count - 2)  # count
        self._reverse_value_matrix.mul_(weight_constant)

    def _card_range_to_bucket_range(self, card_range, bucket_range):
        ''' Converts a range vector over private hands to a range vector over buckets.

        Params:
            card_range: a probability vector over private hands
            bucket_range: a vector in which to store the output probabilities over buckets
        '''
        torch.mm(card_range, self._range_matrix, out=bucket_range)

    def _bucket_value_to_card_value(self, bucket_value, card_value):
        ''' Converts a value vector over buckets to a value vector over private hands.

        Params:
            bucket_value: a value vector over buckets
            card_value: a vector in which to store the output values over private hands
        '''
        torch.mm(bucket_value, self._reverse_value_matrix, out=card_value)

    def _bucket_value_to_card_value_on_board(self, board, bucket_value,
                                             card_value):
        ''' Converts a value vector over buckets to a value vector over private hands
        given a particular set of board cards.

        Params:
            board: a non-empty vector of board cards
            bucket_value: a value vector over buckets
            card_value: a vector in which to store the output values over private hands
        '''
        board_idx = card_tools.get_board_index(board)
        board_matrix = self._range_matrix_board_view[:, board_idx, :].T.clone()
        serialized_card_value = card_value.view(-1, game_settings.card_count)
        serialized_bucket_value = bucket_value[:, :,
                                               board_idx, :].clone().view(
                                                   -1, self.bucket_count)
        torch.mm(serialized_bucket_value,
                 board_matrix,
                 out=serialized_card_value)

    def start_computation(self, pot_sizes):
        ''' Initializes the value calculator with the pot size of each state that
        we are going to evaluate.

        During continual re-solving, there is one pot size for each initial state
        of the second betting round (before board cards are dealt).

        Params:
            pot_sizes: a vector of pot sizes betting round ends'''
        self.iter = 0
        self.pot_sizes = pot_sizes.view(-1, 1).clone()
        self.batch_size = pot_sizes.size(0)

    def get_value(self, ranges, values):
        ''' Gives the predicted counterfactual values at each evaluated state, given
        input ranges.

        @{start_computation} must be called first. Each state to be evaluated must
        be given in the same order that pot sizes were given for that function.
        Keeps track of iterations internally, so should be called exactly once for
        every iteration of continual re-solving.

        Params:
            ranges: An Nx2xK tensor, where N is the number of states evaluated
                (must match input to @{start_computation}), 2 is the number of players, and
                K is the number of private hands. Contains N sets of 2 range vectors.
            values: an Nx2xK tensor in which to store the N sets of 2 value vectors which are output'''
        assert ranges != None and values != None
        assert (ranges.size(0) == self.batch_size)
        self.iter = self.iter + 1
        if self.iter == 1:
            # initializing data structures
            self.next_round_inputs = arguments.Tensor(
                self.batch_size, self.board_count,
                (self.bucket_count * constants.players_count + 1)).zero_()
            self.next_round_values = arguments.Tensor(
                self.batch_size, self.board_count, constants.players_count,
                self.bucket_count).zero_()
            self.transposed_next_round_values = arguments.Tensor(
                self.batch_size, constants.players_count, self.board_count,
                self.bucket_count)
            self.next_round_extended_range = arguments.Tensor(
                self.batch_size, constants.players_count,
                self.board_count * self.bucket_count).zero_()
            self.next_round_serialized_range = self.next_round_extended_range.view(
                -1, self.bucket_count)
            self.range_normalization = arguments.Tensor()
            self.value_normalization = arguments.Tensor(
                self.batch_size, constants.players_count, self.board_count)
            # handling pot feature for the nn
            nn_bet_input = self.pot_sizes.clone().mul(1 / arguments.stack)
            nn_bet_input = nn_bet_input.view(-1,
                                             1).expand(self.batch_size,
                                                       self.board_count)
            self.next_round_inputs[:, :, -1].copy_(nn_bet_input)

        # we need to find if we need remember something in this iteration
        use_memory = self.iter > arguments.cfr_skip_iters
        if use_memory and self.iter == arguments.cfr_skip_iters + 1:
            # first iter that we need to remember something - we need to init data structures
            self.range_normalization_memory = arguments.Tensor(
                self.batch_size * self.board_count * constants.players_count,
                1).zero_()
            self.counterfactual_value_memory = arguments.Tensor(
                self.batch_size, constants.players_count, self.board_count,
                self.bucket_count).zero_()

        # computing bucket range in next street for both players at once
        self._card_range_to_bucket_range(
            ranges.view(self.batch_size * constants.players_count, -1),
            self.next_round_extended_range.view(
                self.batch_size * constants.players_count, -1))
        self.range_normalization = self.next_round_serialized_range.sum(
            dim=1, keepdim=True)
        rn_view = self.range_normalization.view(self.batch_size,
                                                constants.players_count,
                                                self.board_count)
        for player in range(constants.players_count):
            self.value_normalization[:,
                                     player, :].copy_(rn_view[:,
                                                              1 - player, :])
        if use_memory:
            self.range_normalization_memory.add_(
                self.value_normalization.view(
                    self.range_normalization_memory.shape))
        # eliminating division by zero
        self.range_normalization[torch.eq(self.range_normalization, 0)] = 1
        self.next_round_serialized_range.div_(
            self.range_normalization.expand_as(
                self.next_round_serialized_range))
        serialized_range_by_player = self.next_round_serialized_range.view(
            self.batch_size, constants.players_count, self.board_count,
            self.bucket_count)
        for player in range(constants.players_count):
            self.next_round_inputs[:, :, player * self.bucket_count:(
                player + 1) * self.bucket_count].copy_(
                    self.next_round_extended_range[:, player, :].view(
                        self.next_round_inputs[:, :, player *
                                               self.bucket_count:(player + 1) *
                                               self.bucket_count].shape))

        # usning nn to compute values
        serialized_inputs_view = self.next_round_inputs.view(
            self.batch_size * self.board_count, -1)
        serialized_values_view = self.next_round_values.view(
            self.batch_size * self.board_count, -1)

        # computing value in the next round
        self.nn.get_value(serialized_inputs_view, serialized_values_view)

        # normalizing values back according to the orginal range sum
        normalization_view = self.value_normalization.view(
            self.batch_size, constants.players_count, self.board_count,
            1).transpose(1, 2)
        self.next_round_values.mul_(
            normalization_view.expand_as(self.next_round_values))

        self.transposed_next_round_values.copy_(
            self.next_round_values.transpose(2, 1))
        # remembering the values for the next round
        if use_memory:
            self.counterfactual_value_memory.add_(
                self.transposed_next_round_values)
        # translating bucket values back to the card values
        self._bucket_value_to_card_value(
            self.transposed_next_round_values.view(
                self.batch_size * constants.players_count, -1),
            values.view(self.batch_size * constants.players_count, -1))

    def get_value_on_board(self, board, values):
        ''' Gives the average counterfactual values on the given board across previous
        calls to @{get_value}.

        Used to update opponent counterfactual values during re-solving after board
        cards are dealt.

        Params:
            board: a non-empty vector of board cards
            values: a tensor in which to store the values'''
        # check if we have evaluated correct number of iterations
        assert (self.iter == arguments.cfr_iters)
        batch_size = values.size(0)
        assert (batch_size == self.batch_size)

        self._prepare_next_round_values()

        self._bucket_value_to_card_value_on_board(
            board, self.counterfactual_value_memory, values)

    def _prepare_next_round_values(self):
        ''' Normalizes the counterfactual values remembered between @{get_value} calls
        so that they are an average rather than a sum.
        '''
        assert (self.iter == arguments.cfr_iters)

        # do nothing if already prepared
        if self._values_are_prepared:
            return

        # eliminating division by zero
        self.range_normalization_memory[torch.eq(
            self.range_normalization_memory, 0)] = 1
        serialized_memory_view = self.counterfactual_value_memory.view(
            -1, self.bucket_count)
        serialized_memory_view.div_(
            self.range_normalization_memory.expand_as(serialized_memory_view))

        self._values_are_prepared = True
Exemplo n.º 6
0
class BucketConversion:
    def set_board(self, board):
        ''' Sets the board cards for the bucketer.

        Params:
            board: a non-empty vector of board cards'''
        self.bucketer = Bucketer()
        self.bucket_count = self.bucketer.get_bucket_count()
        self._range_matrix = arguments.Tensor(game_settings.card_count,
                                              self.bucket_count).zero_()

        buckets = self.bucketer.compute_buckets(board)
        class_ids = torch.arange(0, self.bucket_count)

        if arguments.gpu:
            buckets = buckets.cuda()
            class_ids = class_ids.cuda()
        else:
            class_ids = class_ids.float()

        class_ids = class_ids.view(1, self.bucket_count).expand(
            game_settings.card_count, self.bucket_count)
        card_buckets = buckets.view(game_settings.card_count,
                                    1).expand(game_settings.card_count,
                                              self.bucket_count)

        # finding all strength classes
        # matrix for transformation from card ranges to strength class ranges
        self._range_matrix[torch.eq(class_ids, card_buckets)] = 1

        # matrix for transformation form class values to card values
        self._reverse_value_matrix = self._range_matrix.T.clone()

    def card_range_to_bucket_range(self, card_range, bucket_range):
        ''' Converts a range vector over private hands to a range vector over buckets.

        @{set_board} must be called first. Used to create inputs to the neural net.

        Params:
            card_range: a probability vector over private hands
            bucket_range: a vector in which to save the resulting probability vector over buckets'''
        torch.mm(card_range, self._range_matrix, out=bucket_range)

    def bucket_value_to_card_value(self, bucket_value, card_value):
        ''' Converts a value vector over buckets to a value vector over private hands.

        @{set_board} must be called first. Used to process neural net outputs.

        Params:
            bucket_value: a vector of values over buckets
            card_value: a vector in which to save the resulting vector of values over private hands'''
        torch.mm(bucket_value, self._reverse_value_matrix, out=card_value)

    def get_possible_bucket_mask(self):
        ''' Gives a vector of possible buckets on the the board.

        @{set_board} must be called first.

        Return a mask vector over buckets where each entry is 1 if the bucket is
        valid, 0 if not'''
        mask = arguments.Tensor(1, self.bucket_count)
        card_indicator = arguments.Tensor(1, game_settings.card_count).fill_(1)
        mask = torch.mm(card_indicator, self._range_matrix)
        return mask
Exemplo n.º 7
0
    def generate_data_file(self, data_count, file_name):
        ''' Generates data files containing examples of random poker situations with
        counterfactual values from an associated solution.

        Each poker situation is randomly generated using @{range_generator} and 
        @{random_card_generator}. For description of neural net input and target
        type, see @{net_builder}.

        Params:
            data_count: the number of examples to generate
            file_name: the prefix of the files where the data is saved (appended 
                with `.inputs`, `.targets`, and `.mask`).'''
        range_generator = RangeGenerator()
        batch_size = arguments.gen_batch_size
        assert data_count % batch_size == 0, 'data count has to be divisible by the batch size'
        batch_count = data_count // batch_size
        bucketer = Bucketer()
        bucket_count = bucketer.get_bucket_count()
        target_size = bucket_count * constants.players_count
        targets = arguments.Tensor(data_count, target_size)
        input_size = bucket_count * constants.players_count + 1
        inputs = arguments.Tensor(data_count, input_size)
        mask = arguments.Tensor(data_count, bucket_count).zero_()
        bucket_conversion = BucketConversion()
        for batch in tqdm(range(batch_count)):
            board = card_generator.generate_cards(game_settings.board_card_count)
            range_generator.set_board(board)
            bucket_conversion.set_board(board)
            
            # generating ranges
            ranges = arguments.Tensor(constants.players_count, batch_size, game_settings.card_count)
            for player in range(constants.players_count):
                range_generator.generate_range(ranges[player])
            
            # generating pot sizes between ante and stack - 0.1
            min_pot = arguments.ante
            max_pot = arguments.stack - 0.1
            pot_range = max_pot - min_pot
            
            random_pot_sizes = torch.rand(arguments.gen_batch_size, 1).mul(pot_range).add(min_pot)
            
            # pot features are pot sizes normalized between (ante/stack,1)
            pot_size_features = random_pot_sizes.clone().mul(1/arguments.stack)
            
            # translating ranges to features 
            pot_feature_index =  -1
            inputs[batch * batch_size : (batch + 1) * batch_size, pot_feature_index].copy_(pot_size_features.squeeze(1))
            for player in range(constants.players_count):
                bucket_conversion.card_range_to_bucket_range(ranges[player], inputs[batch * batch_size : (batch + 1) * batch_size, player * bucket_count : (player + 1) * bucket_count])
            
            # computaton of values using re-solving
            values = arguments.Tensor(constants.players_count, batch_size, game_settings.card_count)
            for i in range(batch_size): 
                resolving = Resolving()
                current_node = TreeNode()

                current_node.board = board
                current_node.street = 2
                current_node.current_player = constants.players.P1
                pot_size = pot_size_features[i][0] * arguments.stack
                current_node.bets = arguments.Tensor([pot_size, pot_size])
                p1_range = ranges[0][i]
                p2_range = ranges[1][i]
                resolving.resolve_first_node(current_node, p1_range, p2_range)
                root_values = resolving.get_root_cfv_both_players()
                root_values.mul_(1/pot_size)
                values[:, i, :].copy_(root_values)
            
            # translating values to nn targets
            for player in range(constants.players_count):
                bucket_conversion.card_range_to_bucket_range(values[player], targets[batch * batch_size : (batch + 1) * batch_size, player * bucket_count : (player + 1) * bucket_count])
            # computing a mask of possible buckets
            bucket_mask = bucket_conversion.get_possible_bucket_mask()
            mask[batch * batch_size : (batch + 1) * batch_size, :].copy_(bucket_mask.expand(batch_size, bucket_count))
        
        if os.path.exists(file_name + '.inputs'):
            saved_inputs = torch.load(file_name + '.inputs')
            saved_targets = torch.load(file_name + '.targets')
            saved_mask = torch.load(file_name + '.mask')
            inputs = torch.cat((saved_inputs, inputs), 0)
            targets = torch.cat((saved_targets, targets), 0)
            mask = torch.cat((saved_mask, mask), 0)
        torch.save(inputs, file_name + '.inputs')
        torch.save(targets, file_name + '.targets')
        torch.save(mask, file_name + '.mask')