Пример #1
0
    def forward(self, outputs, targets, mask):
        batch_size = outputs.size(1)
        feature_size = outputs.size(2)

        # --1.0 zero out the outputs/target so that the error does not depend on these
        outputs.cmul(mask)
        targets.cmul(mask)

        loss = self.criterion.forward(outputs, targets)

        # --2.0 if the batch size has changed, create new storage for the sum, otherwise reuse
        if not self.mask_sum or (self.mask_sum.size(1) != batch_size):
            self.mask_placeholder = arguments.Tensor(mask.size()).fill(0)
            self.mask_sum = arguments.Tensor(batch_size).fill(0)
            self.mask_multiplier = self.mask_sum.clone().fill(0).view(-1, 1)

        # --3.0 compute mask sum for each batch
        self.mask_placeholder.copy(mask)
        torch.sum(self.mask_sum, self.mask_placeholder, 2)

        # --3.1 mask multiplier - note that mask is 1 for impossible features
        self.mask_multiplier.fill(feature_size)
        self.mask_multiplier.csub(self.mask_sum)
        self.mask_multiplier.div(feature_size)

        # --4.0 multiply to get a new losss
        # --loss is not really computed batch-wise correctly,
        # --but that does not really matter now since gradients are correct
        loss_multiplier = (batch_size * feature_size) / (batch_size * feature_size - self.mask_sum.sum())
        new_loss = loss_multiplier * loss
        return new_loss
Пример #2
0
    def _set_call_matrix(self, board):
        street = card_tools.board_to_street(board)
        self.equity_matrix = arguments.Tensor(game_settings.card_count,
                                              game_settings.card_count).zero()

        if street == 1:
            ##--iterate through all possible next round streets
            next_round_boards = card_tools.get_second_round_boards()
            boards_count = next_round_boards.size(1)
            next_round_equity_matrix = arguments.Tensor(
                game_settings.card_count, game_settings.card_count)
            for board in range(boards_count):
                self.get_last_round_call_matrix(next_round_boards[board],
                                                next_round_equity_matrix)
                self.equity_matrix.add(next_round_equity_matrix)
            ##--averaging the values in the call matrix
            weight_constant = game_settings.board_card_count == 1 and 1 / (
                game_settings.card_count - 2) or 2 / (
                    (game_settings.card_count - 2) *
                    (game_settings.card_count - 3))
            self.equity_matrix.mul(weight_constant)
        elif street == 2:
            ##--for last round we just return the matrix
            self.get_last_round_call_matrix(board, self.equity_matrix)
        else:
            ##--impossible street
            assert (False, 'impossible street')
Пример #3
0
    def _sample_bet(self, node, state):
        ##--1.0 get the possible bets in the node
        possible_bets = self.resolving.get_possible_actions()
        actions_count = possible_bets.size(1)

        # --2.0 get the strategy for the current hand since the strategy is computed for all hands
        hand_strategy = arguments.Tensor(actions_count)

        for i in range(1, actions_count):
            action_bet = possible_bets[i]
            action_strategy = self.resolving.get_action_strategy(action_bet)
            hand_strategy[i] = action_strategy[self.hand_id]
        # end

        # assert(math.abs(1 - hand_strategy:sum()) < 0.001)

        print("strategy:")
        print(hand_strategy)

        # --3.0 sample the action by doing cumsum and uniform sample
        hand_strategy_cumsum = torch.cumsum(hand_strategy)
        r = torch.uniform()

        sampled_bet = possible_bets[hand_strategy_cumsum.gt(r)][1]
        print("playing action that has prob: " % (hand_strategy[hand_strategy_cumsum.gt(r)][1]))

        # --4.0 update the invariants based on our action
        self.current_opponent_cfvs_bound = self.resolving.get_action_cfv(sampled_bet)

        strategy = self.resolving.get_action_strategy(sampled_bet)
        self.current_player_range.cmul(strategy)
        self.current_player_range = card_tools.normalize_range(node.board, self.current_player_range)
        return sampled_bet
Пример #4
0
    def _init_bucketing(self):
        self.bucketer = Bucketer()
        self.bucket_count = self.bucketer.get_bucket_count()
        boards = card_tools.get_second_round_boards()
        self.board_count = boards.size(1)
        self._range_matrix = arguments.Tensor(game_settings.card_count, self.board_count * self.bucket_count).zero()
        self._range_matrix_board_view = self._range_matrix.view(game_settings.card_count, self.board_count,
                                                                self.bucket_count)

        for idx in range(1, self.board_count):
            board = boards[idx]

            buckets = self.bucketer.compute_buckets(board)
            class_ids = torch.range(1, self.bucket_count)

            if arguments.gpu:
                buckets = buckets.cuda()
                class_ids = class_ids.cuda()
            else:
                class_ids = class_ids.float()

            class_ids = class_ids.view(1, self.bucket_count).expand(game_settings.card_count, self.bucket_count)
            card_buckets = buckets.view(game_settings.card_count, 1).expand(game_settings.card_count, self.bucket_count)

            '''
            --finding all strength classes
            --matrix for transformation from card ranges to strength class ranges
            '''
            self._range_matrix_board_view[{{}, idx, {}}][torch.eq(class_ids, card_buckets)] = 1

        # --matrix for transformation from class values to card values
        self._reverse_value_matrix = self._range_matrix.t().clone()
        # --we need to div the matrix by the sum of possible boards (from point of view of each hand)
        weight_constant = 1 / (self.board_count - 2) #-- count
        self._reverse_value_matrix.mul(weight_constant)
Пример #5
0
 def _set_fold_matrix(self, board):
     self.fold_matrix = arguments.Tensor(game_settings.card_count,
                                         game_settings.card_count)
     self.fold_matrix.fill(1)
     ##--setting cards that block each other to zero - exactly elements on diagonal in leduc variants
     self.fold_matrix.csub(
         torch.eye(game_settings.card_count).typeAs(self.fold_matrix))
     self._handle_blocking_cards(self.fold_matrix, board)
Пример #6
0
def batch_eval(board, impossible_hand_value):
    hand_values = arguments.Tensor(game_settings.card_count).full(-1)
    ##not sure what board.dim() is
    if board.size() == 0:
        for hand in range(game_settings.card_count):
            hand_values[hand] = math.floor(
                (hand - 1) / game_settings.suit_count) + 1
    else:
        board_size = board.size()
        assert board_size == 1 or board_size == 2, 'Incorrect board size for Leduc'
        whole_hand = arguments.Tensor(board_size + 1)
        whole_hand = torch.FloatTensor
        whole_hand[
            -2] = board  ### it seems that whole_hand[{{1, -2}}]:copy(board) means that we copy board value into the second to the last of whole hand
        for card in range(1, game_settings.card_count):
            whole_hand[-1] = card
            hand_values[card] = evaluate(whole_hand, impossible_hand_value)
    return hand_values
Пример #7
0
    def update_average_strategy(self, node, current_strategy, iter):
        if iter > arguments.cfr_skip_iters:
            node.strategy = node.strategy or arguments.Tensor(
                actions_count, game_settings.card_count).fill(0)
            node.iter_weight_sum = node.iter_weight_sum or arguments.Tensor(
                game_settings.card_count).fill(0)
            iter_weight_contribution = node.ranges_absolute[
                node.current_player].clone()
            iter_weight_contribution[torch.le(iter_weight_contribution,
                                              0)] = self.regret_epsilon
            node.iter_weight_sum.add(iter_weight_contribution)
            iter_weight = torch.cdiv(iter_weight_contribution,
                                     node.iter_weight_sum)

            expanded_weight = iter_weight.view(
                1, game_settings.card_count).expandAs(node.strategy)
            old_strategy_scale = expanded_weight * (
                -1) + 1  ##--same as 1 - expanded weight
            node.strategy.cmul(old_strategy_scale)
            strategy_addition = current_strategy.cmul(expanded_weight)
            node.strategy.add(strategy_addition)
Пример #8
0
    def __init(self, board, player_range, opponent_cfvs):
        assert (board)
        self.input_opponent_range = player_range.clone()
        self.input_opponent_value = opponent_cfvs.clone()
        self.curent_opponent_values = arguments.Tensor(
            game_settings.card_count)
        self.regret_epsilon = 1.0 / 100000000

        ##--2 stands for 2 actions: play/terminate
        #self.opponent_reconstruction_regret = arguments.params['Tensor'](2, game_settings.card_count)
        self.opponent_reconstruction_regret = np.zeros(
            np.shape([2, game_settings.card_count]))
        self.play_current_strategy = np.zeros(game_settings.card_count)
        self.terminate_current_strategy = np.zeros(game_settings.card_count)
        self.terminate_regrets = np.zeros(game_settings.card_count)
        self.total_values = np.zeros(game_settings.card_count)
        self.play_regrets = np.zeros(game_settings.card_count)
        self.range_mask = np.zeros(game_settings.card_count)
Пример #9
0
 def __init__(self):
     self.bucketer = Bucketer()
     self.bucket_count = self.bucketer.get_bucket_count()
     self.equity_matrix = arguments.Tensor(self.bucket_count,
                                           self.bucket_count).zero()
     ##--filling equity matrix
     boards = card_tools.get_second_round_boards()
     self.board_count = boards.size(1)
     self.terminal_equity = TerminalEquity()
     for i in range(1, self.board_count):
         board = boards[i]
         self.terminal_equity.set_board(board)
         call_matrix = self.terminal_equity.get_call_matrix()
         buckets = self.bucketer.compute_buckets(board)
         for c1 in range(1, game_settings.card_count):
             for c2 in range(1, game_settings.card_count):
                 b1 = buckets[c1]
                 b2 = buckets[c2]
                 if (b1 > 0 and b2 > 0):
                     matrix_entry = call_matrix[c1][c2]
                     self.equity_matrix[b1][b2] = matrix_entry
Пример #10
0
    def set_board(self, board):
        self.bucketer = Bucketer()
        self.bucket_count = self.bucketer.get_bucket_count()
        self._range_matrix = arguments.Tensor(game_settings.card_count, self.bucket_count).zero()

        buckets = self.bucketer.compute_buckets(board)
        class_ids = torch.range(1, self.bucket_count)

        if arguments.gpu:
            buckets = buckets.cuda()
            class_ids = class_ids.cuda()
        else:
            class_ids = class_ids.float()

        class_ids = class_ids.view(1, self.bucket_count).expand(game_settings.card_count, self.bucket_count)
        card_buckets = buckets.view(game_settings.card_count, 1).expand(game_settings.card_count, self.bucket_count)

        # --finding all strength classes
        # --matrix for transformation from card ranges to strength class ranges
        self._range_matrix[torch.eq(class_ids, card_buckets)] = 1

        # --matrix for transformation form class values to card values
        self._reverse_value_matrix = self._range_matrix.t().clone()
Пример #11
0
    def set_datastructures_from_tree_dfs(self,node, layer, action_id, parent_id, gp_id):

        ##--fill the potsize
        assert (node.pot)
        self.lookahead.pot_size[layer][{action_id, parent_id, gp_id, {}, {}}] = node.pot

        node.lookahead_coordinates = arguments.Tensor({action_id, parent_id, gp_id})

        ###--transition call cannot be allin call
        if node.current_player == constants.players.chance:
            assert (parent_id <= self.lookahead.nonallinbets_count[layer - 2])

        if layer < self.lookahead.depth + 1:
            gp_nonallinbets_count = self.lookahead.nonallinbets_count[layer - 2]
            prev_layer_terminal_actions_count = self.lookahead.terminal_actions_count[layer - 1]
            gp_terminal_actions_count = self.lookahead.terminal_actions_count[layer - 2]
            prev_layer_bets_count = 0

            prev_layer_bets_count = self.lookahead.bets_count[layer - 1]

            ##--compute next coordinates for parent and grandparent
            next_parent_id = action_id - prev_layer_terminal_actions_count
            next_gp_id = (gp_id - 1) * gp_nonallinbets_count + (parent_id)

            if (not node.terminal) and (node.current_player != constants.players.chance):

                ##--parent is not an allin raise
                assert (parent_id <= self.lookahead.nonallinbets_count[layer - 2])

                ##--do we need to mask some actions for that node? (that is, does the node have fewer children than the max number of children for any node on this layer)
                node_with_empty_actions = ( len(node.children) < self.lookahead.actions_count[layer])

                if node_with_empty_actions:
                    ###--we need to mask nonexisting padded bets
                    assert (layer > 1)

                terminal_actions_count = self.lookahead.terminal_actions_count[layer]
                assert (terminal_actions_count == 2)

                existing_bets_count =  len(node.children - terminal_actions_count)

                ###--allin situations
                if existing_bets_count == 0:
                    assert (action_id == self.lookahead.actions_count[layer - 1])

                for child_id in range(1, terminal_actions_count):
                    child_node = node.children[child_id]
                ###--go deeper
                self.set_datastructures_from_tree_dfs(child_node, layer + 1, child_id, next_parent_id, next_gp_id)

                ##--we need to make sure that even though there are fewer actions, the last action/allin is has the same last index as if we had full number of actions
                ##--we manually set the action_id as the last action (allin)
                for b in range(1, existing_bets_count):
                    self.set_datastructures_from_tree_dfs(node.children[len(node.children-b+1)], layer+1, self.lookahead.actions_count[layer]-b+1, next_parent_id, next_gp_id)

                    ##--mask out empty actions
                    self.lookahead.empty_action_mask[layer + 1][{
                                                              {terminal_actions_count + 1,
                                                               -(existing_bets_count + 1)}, next_parent_id,
                                                              next_gp_id, {}}] = 0

            else:
            ###--node has full action count, easy to handle
                for child_id in range(1,node.children):
                    ##--go deeper
                    child_node = node.children[child_id]
                    self.set_datastructures_from_tree_dfs(child_node, layer + 1, child_id, next_parent_id,
                                                          next_gp_id)
Пример #12
0
    def construct_data_structures(self):
        self._compute_structure()
        ##--lookahead main data structures
        ##--all the structures are per-layer tensors, that is, each layer holds the data in n-dimensional tensors
        self.lookahead.pot_size = {}
        self.lookahead.ranges_data = {}
        self.lookahead.average_strategies_data = {}
        self.lookahead.current_strategy_data = {}
        self.lookahead.cfvs_data = {}
        self.lookahead.average_cfvs_data = {}
        self.lookahead.regrets_data = {}
        self.lookahead.current_regrets_data = {}
        self.lookahead.positive_regrets_data = {}
        self.lookahead.placeholder_data = {}
        self.lookahead.regrets_sum = {}
        self.lookahead.empty_action_mask = {}
        ##--used to mask empty actions
        ##--used to hold and swap inner (nonterminal) nodes when doing some transpose operations
        self.lookahead.inner_nodes = {}
        self.lookahead.inner_nodes_p1 = {}
        self.lookahead.swap_data = {}

        ##--create the data structure for the first two layers

        ##--data structures [actions x parent_action x grandparent_id x batch x players x range]
        self.lookahead.ranges_data[1] = arguments.Tensor(1, 1, 1, constants.players_count,
                                                         game_settings.card_count).fill(
            1.0 / game_settings.card_count)
        self.lookahead.ranges_data[2] = arguments.Tensor(self.lookahead.actions_count[1], 1, 1,
                                                         constants.players_count, game_settings.card_count).fill(
            1.0 / game_settings.card_count)
        self.lookahead.pot_size[1] = self.lookahead.ranges_data[1].clone().fill(0)
        self.lookahead.pot_size[2] = self.lookahead.ranges_data[2].clone().fill(0)
        self.lookahead.cfvs_data[1] = self.lookahead.ranges_data[1].clone().fill(0)
        self.lookahead.cfvs_data[2] = self.lookahead.ranges_data[2].clone().fill(0)
        self.lookahead.average_cfvs_data[1] = self.lookahead.ranges_data[1].clone().fill(0)
        self.lookahead.average_cfvs_data[2] = self.lookahead.ranges_data[2].clone().fill(0)
        self.lookahead.placeholder_data[1] = self.lookahead.ranges_data[1].clone().fill(0)
        self.lookahead.placeholder_data[2] = self.lookahead.ranges_data[2].clone().fill(0)

        ##--data structures for one player [actions x parent_action x grandparent_id x 1 x range]
        self.lookahead.average_strategies_data[1] = None
        self.lookahead.average_strategies_data[2] = arguments.Tensor(self.lookahead.actions_count[1], 1, 1,
                                                                     game_settings.card_count).fill(0)
        self.lookahead.current_strategy_data[1] = None
        self.lookahead.current_strategy_data[2] = self.lookahead.average_strategies_data[2].clone().fill(0)
        self.lookahead.regrets_data[1] = None
        self.lookahead.regrets_data[2] = self.lookahead.average_strategies_data[2].clone().fill(0)
        self.lookahead.current_regrets_data[1] = None
        self.lookahead.current_regrets_data[2] = self.lookahead.average_strategies_data[2].clone().fill(0)
        self.lookahead.positive_regrets_data[1] = None
        self.lookahead.positive_regrets_data[2] = self.lookahead.average_strategies_data[2].clone().fill(0)
        self.lookahead.empty_action_mask[1] = None
        self.lookahead.empty_action_mask[2] = self.lookahead.average_strategies_data[2].clone().fill(1)

        ##--data structures for summing over the actions [1 x parent_action x grandparent_id x range]
        self.lookahead.regrets_sum[1] = arguments.Tensor(1, 1, 1, game_settings.card_count).fill(0)
        self.lookahead.regrets_sum[2] = arguments.Tensor(1, self.lookahead.bets_count[1], 1,
                                                         game_settings.card_count).fill(0)

        ##--data structures for inner nodes (not terminal nor allin) [bets_count x parent_nonallinbetscount x gp_id x batch x players x range]
        self.lookahead.inner_nodes[1] = arguments.Tensor(1, 1, 1, constants.players_count,
                                                         game_settings.card_count).fill(0)
        self.lookahead.swap_data[1] = self.lookahead.inner_nodes[1].transpose(2, 3).clone()
        self.lookahead.inner_nodes_p1[1] = arguments.Tensor(1, 1, 1, 1, game_settings.card_count).fill(0)

        if self.lookahead.depth > 2:
            self.lookahead.inner_nodes[2] = arguments.Tensor(self.lookahead.bets_count[1], 1, 1,
                                                             constants.players_count,
                                                             game_settings.card_count).fill(0)
            self.lookahead.swap_data[2] = self.lookahead.inner_nodes[2].transpose(2, 3).clone()
            self.lookahead.inner_nodes_p1[2] = arguments.Tensor(self.lookahead.bets_count[1], 1, 1, 1,
                                                                game_settings.card_count).fill(0)

        ##--create the data structures for the rest of the layers
        for d in range(3, self.lookahead.depth):
            ##--data structures [actions x parent_action x grandparent_id x batch x players x range]
            self.lookahead.ranges_data[d] = arguments.Tensor(self.lookahead.actions_count[d - 1],
                                                             self.lookahead.bets_count[d - 2],
                                                             self.lookahead.nonterminal_nonallin_nodes_count[d - 2],
                                                             constants.players_count,
                                                             game_settings.card_count).fill(0)
            self.lookahead.cfvs_data[d] = self.lookahead.ranges_data[d].clone()
            self.lookahead.placeholder_data[d] = self.lookahead.ranges_data[d].clone()
            self.lookahead.pot_size[d] = self.lookahead.ranges_data[d].clone().fill(arguments.stack)

            ##--data structures [actions x parent_action x grandparent_id x batch x 1 x range]
            self.lookahead.average_strategies_data[d] = arguments.Tensor(self.lookahead.actions_count[d - 1],
                                                                         self.lookahead.bets_count[d - 2],
                                                                         self.lookahead.nonterminal_nonallin_nodes_count[
                                                                             d - 2], game_settings.card_count).fill(0)
            self.lookahead.current_strategy_data[d] = self.lookahead.average_strategies_data[d].clone()
            self.lookahead.regrets_data[d] = self.lookahead.average_strategies_data[d].clone().fill(
                self.lookahead.regret_epsilon)
            self.lookahead.current_regrets_data[d] = self.lookahead.average_strategies_data[d].clone().fill(0)
            self.lookahead.empty_action_mask[d] = self.lookahead.average_strategies_data[d].clone().fill(1)
            self.lookahead.positive_regrets_data[d] = self.lookahead.regrets_data[d].clone()

            ##--data structures [1 x parent_action x grandparent_id x batch x players x range]
            self.lookahead.regrets_sum[d] = arguments.Tensor(1, self.lookahead.bets_count[d - 2],
                                                             self.lookahead.nonterminal_nonallin_nodes_count[d - 2],
                                                             constants.players_count,
                                                             game_settings.card_count).fill(0)

        ##--data structures for the layers except the last one
        if d < self.lookahead.depth:
            self.lookahead.inner_nodes[d] = arguments.Tensor(self.lookahead.bets_count[d - 1],
                                                             self.lookahead.nonallinbets_count[d - 2],
                                                             self.lookahead.nonterminal_nonallin_nodes_count[d - 2],
                                                             constants.players_count,
                                                             game_settings.card_count).fill(0)
            self.lookahead.inner_nodes_p1[d] = arguments.Tensor(self.lookahead.bets_count[d - 1],
                                                                self.lookahead.nonallinbets_count[d - 2],
                                                                self.lookahead.nonterminal_nonallin_nodes_count[
                                                                    d - 2], 1, game_settings.card_count).fill(0)

            self.lookahead.swap_data[d] = self.lookahead.inner_nodes[d].transpose(2, 3).clone()
Пример #13
0
params['root_node'] = {}
params['root_node']['board'] = card_to_string.string_to_board('')
params['root_node']['street'] = 1
params['root_node']['current_player'] = constants['players']['P1']
params['root_node']['bets'] = np.zeros((1, 1)).fill(100)

tree = builder.build_tree(params)

filling = TreeStrategyFilling()

cardTool = CardTool()

range1 = cardTool.get_uniform_range(params['root_node']['board'])
range2 = cardTool.get_uniform_range(params['root_node']['board'])

filling.fill_strategies(tree, 1, range1, range2)
filling.fill_strategies(tree, 2, range1, range2)

starting_ranges = arguments.Tensor(constants.players_count,
                                   game_settings.card_count)
starting_ranges[1].copy(range1)
starting_ranges[2].copy(range2)

tree_values = TreeValues()
tree_values.compute_values(tree, starting_ranges)

print('Exploitability: %f [chips]' % (tree.exploitability))

#local visualiser = TreeVisualiser()
#visualiser:graphviz(tree)
Пример #14
0
    def cfrs_iter_dfs(self, node, iter):
        assert (node.current_player == constants.players.P1
                or node.current_player == constants.players.P2
                or node.current_player == constants.players.chance)
        opponent_index = 3 - node.current_player
        ##--dimensions in tensor
        action_dimension = 1
        card_dimension = 2
        ##--compute values using terminal_equity in terminal nodes
        if (node.terminal):
            terminal_equity = self._get_terminal_equity(node)
            values = node.ranges_absolute.clone().fill(0)

            if (node.type == constants.node_types.terminal_fold):
                terminal_equity.tree_node_fold_value(node.ranges_absolute,
                                                     values, opponent_index)
            else:
                terminal_equity.tree_node_call_value(node.ranges_absolute,
                                                     values)

            ##--multiply by the pot
            values = values * node.pot
            node.cf_values = values.viewAs(node.ranges_absolute)
        else:

            actions_count = len(node.children)
            current_strategy = None

            if node.current_player == constants.players.chance:
                current_strategy = node.strategy
            else:
                ##--we have to compute current strategy at the beginning of each iteraton

                ##--initialize regrets in the first iteration
                node.regrets = node.regrets or arguments.Tensor(
                    actions_count, game_settings.card_count
                ).fill(self.regret_epsilon)  ##--[[actions_count x card_count]]
                node.possitive_regrets = node.possitive_regrets or arguments.Tensor(
                    actions_count, game_settings.card_count).fill(
                        self.regret_epsilon)

                ##--compute positive regrets so that we can compute the current strategy fromm them
                node.possitive_regrets.copy(node.regrets)
                node.possitive_regrets[torch.le(
                    node.possitive_regrets,
                    self.regret_epsilon)] = self.regret_epsilon

                ##--compute the current strategy
                regrets_sum = node.possitive_regrets.sum(action_dimension)
                current_strategy = node.possitive_regrets.clone()
                current_strategy.cdiv(regrets_sum.expandAs(current_strategy))

            ##--current cfv [[actions, players, ranges]]
            cf_values_allactions = arguments.Tensor(
                actions_count, constants.players_count,
                game_settings.card_count).fill(0)

            children_ranges_absolute = {}

            if node.current_player == constants.players.chance:
                ranges_mul_matrix = node.ranges_absolute[1].repeatTensor(
                    actions_count, 1)
                children_ranges_absolute[1] = torch.cmul(
                    current_strategy, ranges_mul_matrix)

                ranges_mul_matrix = node.ranges_absolute[2].repeatTensor(
                    actions_count, 1)
                children_ranges_absolute[2] = torch.cmul(
                    current_strategy, ranges_mul_matrix)
            else:
                ranges_mul_matrix = node.ranges_absolute[
                    node.current_player].repeatTensor(actions_count, 1)
                children_ranges_absolute[node.current_player] = torch.cmul(
                    current_strategy, ranges_mul_matrix)

                children_ranges_absolute[
                    opponent_index] = node.ranges_absolute[
                        opponent_index].repeatTensor(actions_count, 1).clone()

            for i in range(len(node.children)):
                child_node = node.children[i]
                ##--set new absolute ranges (after the action) for the child
                child_node.ranges_absolute = node.ranges_absolute.clone()

                child_node.ranges_absolute[1].copy(
                    children_ranges_absolute[1][{i}])
                child_node.ranges_absolute[2].copy(
                    children_ranges_absolute[2][{i}])
                self.cfrs_iter_dfs(child_node, iter, game_settings.card_count)
                cf_values_allactions[i] = child_node.cf_values

            node.cf_values = arguments.Tensor(constants.players_count,
                                              game_settings.card_count).fill(0)

            if node.current_player != constants.players.chance:
                strategy_mul_matrix = current_strategy.viewAs(
                    arguments.Tensor(actions_count, game_settings.card_count))

                node.cf_values[node.current_player] = torch.cmul(
                    strategy_mul_matrix,
                    cf_values_allactions[{{}, node.current_player, {}}]).sum(1)
                node.cf_values[opponent_index] = (cf_values_allactions[{
                    {}, opponent_index, {}
                }]).sum(1)
            else:
                node.cf_values[1] = (cf_values_allactions[{{}, 1, {}}]).sum(1)
                node.cf_values[2] = (cf_values_allactions[{{}, 2, {}}]).sum(1)

            if node.current_player != constants.players.chance:
                ##--computing regrets
                current_regrets = cf_values_allactions[{
                    {}, {node.current_player}, {}
                }].reshape(actions_count, game_settings.card_count).clone()
                current_regrets.csub(node.cf_values[node.current_player].view(
                    1, game_settings.card_count).expandAs(current_regrets))

                self.update_regrets(node, current_regrets)

                ##--accumulating average strategy
                self.update_average_strategy(node, current_strategy, iter)
Пример #15
0
    def get_value(self, ranges, values):
        assert (ranges and values)
        assert (ranges.size(1) == self.batch_size)
        self.iter = self.iter + 1
        if self.iter == 1:
           # --initializing
            #data
            #structures
            self.next_round_inputs = arguments.Tensor(self.batch_size, self.board_count,
                                                      (self.bucket_count * constants.players_count + 1)).zero()
            self.next_round_values = arguments.Tensor(self.batch_size, self.board_count, constants.players_count,
                                                      self.bucket_count).zero()
            self.transposed_next_round_values = arguments.Tensor(self.batch_size, constants.players_count,
                                                                 self.board_count, self.bucket_count)
            self.next_round_extended_range = arguments.Tensor(self.batch_size, constants.players_count,
                                                              self.board_count * self.bucket_count).zero()
            self.next_round_serialized_range = self.next_round_extended_range.view(-1, self.bucket_count)
            self.range_normalization = arguments.Tensor()
            self.value_normalization = arguments.Tensor(self.batch_size, constants.players_count, self.board_count)
            ##--handling pot feature for the nn
            nn_bet_input = self.pot_sizes.clone().mul(1 / arguments.stack)
            nn_bet_input = nn_bet_input.view(-1, 1).expand(self.batch_size, self.board_count)
            self.next_round_inputs[{{}, {}, {-1}}].copy(nn_bet_input)

        # --we need to find if we need remember something in this iteration
        use_memory = self.iter > arguments.cfr_skip_iters
        if use_memory and self.iter == arguments.cfr_skip_iters + 1:
            ##--first iter that we need to remember something - we need to init data structures
            self.range_normalization_memory = arguments.Tensor(
                self.batch_size * self.board_count * constants.players_count, 1).zero()
            self.counterfactual_value_memory = arguments.Tensor(self.batch_size, constants.players_count,
                                                                self.board_count, self.bucket_count).zero()

        ##--computing bucket range in next street for both players at once
        self._card_range_to_bucket_range(ranges.view(self.batch_size * constants.players_count, -1),
                                         self.next_round_extended_range.view(self.batch_size * constants.players_count,
                                                                             -1))
        self.range_normalization.sum(self.next_round_serialized_range, 2)
        rn_view = self.range_normalization.view(self.batch_size, constants.players_count, self.board_count)
        for player in range(1, constants.players_count):
            self.value_normalization[{{}, player, {}}].copy(rn_view[{{}, 3 - player, {}}])
        if use_memory:
            self.range_normalization_memory.add(self.value_normalization)

        # --eliminating division by zero
        self.range_normalization[torch.eq(self.range_normalization, 0)] = 1
        self.next_round_serialized_range.cdiv(self.range_normalization.expandAs(self.next_round_serialized_range))
        serialized_range_by_player = self.next_round_serialized_range.view(self.batch_size, constants.players_count,
                                                                           self.board_count, self.bucket_count)
        for player in range(1, constants.players_count):
            player_range_index = {(player - 1) * self.bucket_count + 1, player * self.bucket_count}
            self.next_round_inputs[{{}, {}, player_range_index}].copy(self.next_round_extended_range[{{}, player, {}}])

        ##--usning nn to compute values
        serialized_inputs_view = self.next_round_inputs.view(self.batch_size * self.board_count, -1)
        serialized_values_view = self.next_round_values.view(self.batch_size * self.board_count, -1)

        ##--computing value in the next round
        self.nn.get_value(serialized_inputs_view, serialized_values_view)

        ##--normalizing values back according to the orginal range sum
        normalization_view = self.value_normalization.view(self.batch_size, constants.players_count, self.board_count,
                                                           1).transpose(2, 3)
        self.next_round_values.cmul(normalization_view.expandAs(self.next_round_values))

        self.transposed_next_round_values.copy(self.next_round_values.transpose(3, 2))
        ##--remembering the values for the next round
        if use_memory:
            self.counterfactual_value_memory.add(self.transposed_next_round_values)

        # --translating bucket values back to the card values
        self._bucket_value_to_card_value(
            self.transposed_next_round_values.view(self.batch_size * constants.players_count, -1),
            values.view(self.batch_size * constants.players_count, -1))
Пример #16
0
 def get_possible_bucket_mask(self):
     mask = arguments.Tensor(1, self.bucket_count)
     card_indicator = arguments.Tensor(1, game_settings.card_count).fill(1)
     mask.mm(card_indicator, self._range_matrix)
     return mask
Пример #17
0
import mock_nn_terminal
from TerminalEquity import terminal_equity

import torch
import value_nn

import arguments
import game_settings
import card_to_string
import card_tools

next_round_value = NextRoundValue()
#--print(next_round_value._range_matrix)
#--[[ test of card to bucket range translation
range = torch.range(1, 6).float().view(1, -1)
next_round_range = arguments.Tensor(
    1, next_round_value.bucket_count * next_round_value.board_count)
next_round_value._card_range_to_bucket_range(range, next_round_range)
print(next_round_range)
#]]

#--test of get_value functionality
mock_nn = MockNnTerminal()
#--local mock_nn = ValueNn()
next_round_value = NextRoundValue(mock_nn)

#--local bets = torch.range(1,1):float():mul(100)
bets = torch.Tensor(1).fill(1200)

next_round_value.start_computation(bets)

ranges = arguments.Tensor(1, 2, game_settings.card_count).fill(1 / 4)