def forward(self, outputs, targets, mask): batch_size = outputs.size(1) feature_size = outputs.size(2) # --1.0 zero out the outputs/target so that the error does not depend on these outputs.cmul(mask) targets.cmul(mask) loss = self.criterion.forward(outputs, targets) # --2.0 if the batch size has changed, create new storage for the sum, otherwise reuse if not self.mask_sum or (self.mask_sum.size(1) != batch_size): self.mask_placeholder = arguments.Tensor(mask.size()).fill(0) self.mask_sum = arguments.Tensor(batch_size).fill(0) self.mask_multiplier = self.mask_sum.clone().fill(0).view(-1, 1) # --3.0 compute mask sum for each batch self.mask_placeholder.copy(mask) torch.sum(self.mask_sum, self.mask_placeholder, 2) # --3.1 mask multiplier - note that mask is 1 for impossible features self.mask_multiplier.fill(feature_size) self.mask_multiplier.csub(self.mask_sum) self.mask_multiplier.div(feature_size) # --4.0 multiply to get a new losss # --loss is not really computed batch-wise correctly, # --but that does not really matter now since gradients are correct loss_multiplier = (batch_size * feature_size) / (batch_size * feature_size - self.mask_sum.sum()) new_loss = loss_multiplier * loss return new_loss
def _set_call_matrix(self, board): street = card_tools.board_to_street(board) self.equity_matrix = arguments.Tensor(game_settings.card_count, game_settings.card_count).zero() if street == 1: ##--iterate through all possible next round streets next_round_boards = card_tools.get_second_round_boards() boards_count = next_round_boards.size(1) next_round_equity_matrix = arguments.Tensor( game_settings.card_count, game_settings.card_count) for board in range(boards_count): self.get_last_round_call_matrix(next_round_boards[board], next_round_equity_matrix) self.equity_matrix.add(next_round_equity_matrix) ##--averaging the values in the call matrix weight_constant = game_settings.board_card_count == 1 and 1 / ( game_settings.card_count - 2) or 2 / ( (game_settings.card_count - 2) * (game_settings.card_count - 3)) self.equity_matrix.mul(weight_constant) elif street == 2: ##--for last round we just return the matrix self.get_last_round_call_matrix(board, self.equity_matrix) else: ##--impossible street assert (False, 'impossible street')
def _sample_bet(self, node, state): ##--1.0 get the possible bets in the node possible_bets = self.resolving.get_possible_actions() actions_count = possible_bets.size(1) # --2.0 get the strategy for the current hand since the strategy is computed for all hands hand_strategy = arguments.Tensor(actions_count) for i in range(1, actions_count): action_bet = possible_bets[i] action_strategy = self.resolving.get_action_strategy(action_bet) hand_strategy[i] = action_strategy[self.hand_id] # end # assert(math.abs(1 - hand_strategy:sum()) < 0.001) print("strategy:") print(hand_strategy) # --3.0 sample the action by doing cumsum and uniform sample hand_strategy_cumsum = torch.cumsum(hand_strategy) r = torch.uniform() sampled_bet = possible_bets[hand_strategy_cumsum.gt(r)][1] print("playing action that has prob: " % (hand_strategy[hand_strategy_cumsum.gt(r)][1])) # --4.0 update the invariants based on our action self.current_opponent_cfvs_bound = self.resolving.get_action_cfv(sampled_bet) strategy = self.resolving.get_action_strategy(sampled_bet) self.current_player_range.cmul(strategy) self.current_player_range = card_tools.normalize_range(node.board, self.current_player_range) return sampled_bet
def _init_bucketing(self): self.bucketer = Bucketer() self.bucket_count = self.bucketer.get_bucket_count() boards = card_tools.get_second_round_boards() self.board_count = boards.size(1) self._range_matrix = arguments.Tensor(game_settings.card_count, self.board_count * self.bucket_count).zero() self._range_matrix_board_view = self._range_matrix.view(game_settings.card_count, self.board_count, self.bucket_count) for idx in range(1, self.board_count): board = boards[idx] buckets = self.bucketer.compute_buckets(board) class_ids = torch.range(1, self.bucket_count) if arguments.gpu: buckets = buckets.cuda() class_ids = class_ids.cuda() else: class_ids = class_ids.float() class_ids = class_ids.view(1, self.bucket_count).expand(game_settings.card_count, self.bucket_count) card_buckets = buckets.view(game_settings.card_count, 1).expand(game_settings.card_count, self.bucket_count) ''' --finding all strength classes --matrix for transformation from card ranges to strength class ranges ''' self._range_matrix_board_view[{{}, idx, {}}][torch.eq(class_ids, card_buckets)] = 1 # --matrix for transformation from class values to card values self._reverse_value_matrix = self._range_matrix.t().clone() # --we need to div the matrix by the sum of possible boards (from point of view of each hand) weight_constant = 1 / (self.board_count - 2) #-- count self._reverse_value_matrix.mul(weight_constant)
def _set_fold_matrix(self, board): self.fold_matrix = arguments.Tensor(game_settings.card_count, game_settings.card_count) self.fold_matrix.fill(1) ##--setting cards that block each other to zero - exactly elements on diagonal in leduc variants self.fold_matrix.csub( torch.eye(game_settings.card_count).typeAs(self.fold_matrix)) self._handle_blocking_cards(self.fold_matrix, board)
def batch_eval(board, impossible_hand_value): hand_values = arguments.Tensor(game_settings.card_count).full(-1) ##not sure what board.dim() is if board.size() == 0: for hand in range(game_settings.card_count): hand_values[hand] = math.floor( (hand - 1) / game_settings.suit_count) + 1 else: board_size = board.size() assert board_size == 1 or board_size == 2, 'Incorrect board size for Leduc' whole_hand = arguments.Tensor(board_size + 1) whole_hand = torch.FloatTensor whole_hand[ -2] = board ### it seems that whole_hand[{{1, -2}}]:copy(board) means that we copy board value into the second to the last of whole hand for card in range(1, game_settings.card_count): whole_hand[-1] = card hand_values[card] = evaluate(whole_hand, impossible_hand_value) return hand_values
def update_average_strategy(self, node, current_strategy, iter): if iter > arguments.cfr_skip_iters: node.strategy = node.strategy or arguments.Tensor( actions_count, game_settings.card_count).fill(0) node.iter_weight_sum = node.iter_weight_sum or arguments.Tensor( game_settings.card_count).fill(0) iter_weight_contribution = node.ranges_absolute[ node.current_player].clone() iter_weight_contribution[torch.le(iter_weight_contribution, 0)] = self.regret_epsilon node.iter_weight_sum.add(iter_weight_contribution) iter_weight = torch.cdiv(iter_weight_contribution, node.iter_weight_sum) expanded_weight = iter_weight.view( 1, game_settings.card_count).expandAs(node.strategy) old_strategy_scale = expanded_weight * ( -1) + 1 ##--same as 1 - expanded weight node.strategy.cmul(old_strategy_scale) strategy_addition = current_strategy.cmul(expanded_weight) node.strategy.add(strategy_addition)
def __init(self, board, player_range, opponent_cfvs): assert (board) self.input_opponent_range = player_range.clone() self.input_opponent_value = opponent_cfvs.clone() self.curent_opponent_values = arguments.Tensor( game_settings.card_count) self.regret_epsilon = 1.0 / 100000000 ##--2 stands for 2 actions: play/terminate #self.opponent_reconstruction_regret = arguments.params['Tensor'](2, game_settings.card_count) self.opponent_reconstruction_regret = np.zeros( np.shape([2, game_settings.card_count])) self.play_current_strategy = np.zeros(game_settings.card_count) self.terminate_current_strategy = np.zeros(game_settings.card_count) self.terminate_regrets = np.zeros(game_settings.card_count) self.total_values = np.zeros(game_settings.card_count) self.play_regrets = np.zeros(game_settings.card_count) self.range_mask = np.zeros(game_settings.card_count)
def __init__(self): self.bucketer = Bucketer() self.bucket_count = self.bucketer.get_bucket_count() self.equity_matrix = arguments.Tensor(self.bucket_count, self.bucket_count).zero() ##--filling equity matrix boards = card_tools.get_second_round_boards() self.board_count = boards.size(1) self.terminal_equity = TerminalEquity() for i in range(1, self.board_count): board = boards[i] self.terminal_equity.set_board(board) call_matrix = self.terminal_equity.get_call_matrix() buckets = self.bucketer.compute_buckets(board) for c1 in range(1, game_settings.card_count): for c2 in range(1, game_settings.card_count): b1 = buckets[c1] b2 = buckets[c2] if (b1 > 0 and b2 > 0): matrix_entry = call_matrix[c1][c2] self.equity_matrix[b1][b2] = matrix_entry
def set_board(self, board): self.bucketer = Bucketer() self.bucket_count = self.bucketer.get_bucket_count() self._range_matrix = arguments.Tensor(game_settings.card_count, self.bucket_count).zero() buckets = self.bucketer.compute_buckets(board) class_ids = torch.range(1, self.bucket_count) if arguments.gpu: buckets = buckets.cuda() class_ids = class_ids.cuda() else: class_ids = class_ids.float() class_ids = class_ids.view(1, self.bucket_count).expand(game_settings.card_count, self.bucket_count) card_buckets = buckets.view(game_settings.card_count, 1).expand(game_settings.card_count, self.bucket_count) # --finding all strength classes # --matrix for transformation from card ranges to strength class ranges self._range_matrix[torch.eq(class_ids, card_buckets)] = 1 # --matrix for transformation form class values to card values self._reverse_value_matrix = self._range_matrix.t().clone()
def set_datastructures_from_tree_dfs(self,node, layer, action_id, parent_id, gp_id): ##--fill the potsize assert (node.pot) self.lookahead.pot_size[layer][{action_id, parent_id, gp_id, {}, {}}] = node.pot node.lookahead_coordinates = arguments.Tensor({action_id, parent_id, gp_id}) ###--transition call cannot be allin call if node.current_player == constants.players.chance: assert (parent_id <= self.lookahead.nonallinbets_count[layer - 2]) if layer < self.lookahead.depth + 1: gp_nonallinbets_count = self.lookahead.nonallinbets_count[layer - 2] prev_layer_terminal_actions_count = self.lookahead.terminal_actions_count[layer - 1] gp_terminal_actions_count = self.lookahead.terminal_actions_count[layer - 2] prev_layer_bets_count = 0 prev_layer_bets_count = self.lookahead.bets_count[layer - 1] ##--compute next coordinates for parent and grandparent next_parent_id = action_id - prev_layer_terminal_actions_count next_gp_id = (gp_id - 1) * gp_nonallinbets_count + (parent_id) if (not node.terminal) and (node.current_player != constants.players.chance): ##--parent is not an allin raise assert (parent_id <= self.lookahead.nonallinbets_count[layer - 2]) ##--do we need to mask some actions for that node? (that is, does the node have fewer children than the max number of children for any node on this layer) node_with_empty_actions = ( len(node.children) < self.lookahead.actions_count[layer]) if node_with_empty_actions: ###--we need to mask nonexisting padded bets assert (layer > 1) terminal_actions_count = self.lookahead.terminal_actions_count[layer] assert (terminal_actions_count == 2) existing_bets_count = len(node.children - terminal_actions_count) ###--allin situations if existing_bets_count == 0: assert (action_id == self.lookahead.actions_count[layer - 1]) for child_id in range(1, terminal_actions_count): child_node = node.children[child_id] ###--go deeper self.set_datastructures_from_tree_dfs(child_node, layer + 1, child_id, next_parent_id, next_gp_id) ##--we need to make sure that even though there are fewer actions, the last action/allin is has the same last index as if we had full number of actions ##--we manually set the action_id as the last action (allin) for b in range(1, existing_bets_count): self.set_datastructures_from_tree_dfs(node.children[len(node.children-b+1)], layer+1, self.lookahead.actions_count[layer]-b+1, next_parent_id, next_gp_id) ##--mask out empty actions self.lookahead.empty_action_mask[layer + 1][{ {terminal_actions_count + 1, -(existing_bets_count + 1)}, next_parent_id, next_gp_id, {}}] = 0 else: ###--node has full action count, easy to handle for child_id in range(1,node.children): ##--go deeper child_node = node.children[child_id] self.set_datastructures_from_tree_dfs(child_node, layer + 1, child_id, next_parent_id, next_gp_id)
def construct_data_structures(self): self._compute_structure() ##--lookahead main data structures ##--all the structures are per-layer tensors, that is, each layer holds the data in n-dimensional tensors self.lookahead.pot_size = {} self.lookahead.ranges_data = {} self.lookahead.average_strategies_data = {} self.lookahead.current_strategy_data = {} self.lookahead.cfvs_data = {} self.lookahead.average_cfvs_data = {} self.lookahead.regrets_data = {} self.lookahead.current_regrets_data = {} self.lookahead.positive_regrets_data = {} self.lookahead.placeholder_data = {} self.lookahead.regrets_sum = {} self.lookahead.empty_action_mask = {} ##--used to mask empty actions ##--used to hold and swap inner (nonterminal) nodes when doing some transpose operations self.lookahead.inner_nodes = {} self.lookahead.inner_nodes_p1 = {} self.lookahead.swap_data = {} ##--create the data structure for the first two layers ##--data structures [actions x parent_action x grandparent_id x batch x players x range] self.lookahead.ranges_data[1] = arguments.Tensor(1, 1, 1, constants.players_count, game_settings.card_count).fill( 1.0 / game_settings.card_count) self.lookahead.ranges_data[2] = arguments.Tensor(self.lookahead.actions_count[1], 1, 1, constants.players_count, game_settings.card_count).fill( 1.0 / game_settings.card_count) self.lookahead.pot_size[1] = self.lookahead.ranges_data[1].clone().fill(0) self.lookahead.pot_size[2] = self.lookahead.ranges_data[2].clone().fill(0) self.lookahead.cfvs_data[1] = self.lookahead.ranges_data[1].clone().fill(0) self.lookahead.cfvs_data[2] = self.lookahead.ranges_data[2].clone().fill(0) self.lookahead.average_cfvs_data[1] = self.lookahead.ranges_data[1].clone().fill(0) self.lookahead.average_cfvs_data[2] = self.lookahead.ranges_data[2].clone().fill(0) self.lookahead.placeholder_data[1] = self.lookahead.ranges_data[1].clone().fill(0) self.lookahead.placeholder_data[2] = self.lookahead.ranges_data[2].clone().fill(0) ##--data structures for one player [actions x parent_action x grandparent_id x 1 x range] self.lookahead.average_strategies_data[1] = None self.lookahead.average_strategies_data[2] = arguments.Tensor(self.lookahead.actions_count[1], 1, 1, game_settings.card_count).fill(0) self.lookahead.current_strategy_data[1] = None self.lookahead.current_strategy_data[2] = self.lookahead.average_strategies_data[2].clone().fill(0) self.lookahead.regrets_data[1] = None self.lookahead.regrets_data[2] = self.lookahead.average_strategies_data[2].clone().fill(0) self.lookahead.current_regrets_data[1] = None self.lookahead.current_regrets_data[2] = self.lookahead.average_strategies_data[2].clone().fill(0) self.lookahead.positive_regrets_data[1] = None self.lookahead.positive_regrets_data[2] = self.lookahead.average_strategies_data[2].clone().fill(0) self.lookahead.empty_action_mask[1] = None self.lookahead.empty_action_mask[2] = self.lookahead.average_strategies_data[2].clone().fill(1) ##--data structures for summing over the actions [1 x parent_action x grandparent_id x range] self.lookahead.regrets_sum[1] = arguments.Tensor(1, 1, 1, game_settings.card_count).fill(0) self.lookahead.regrets_sum[2] = arguments.Tensor(1, self.lookahead.bets_count[1], 1, game_settings.card_count).fill(0) ##--data structures for inner nodes (not terminal nor allin) [bets_count x parent_nonallinbetscount x gp_id x batch x players x range] self.lookahead.inner_nodes[1] = arguments.Tensor(1, 1, 1, constants.players_count, game_settings.card_count).fill(0) self.lookahead.swap_data[1] = self.lookahead.inner_nodes[1].transpose(2, 3).clone() self.lookahead.inner_nodes_p1[1] = arguments.Tensor(1, 1, 1, 1, game_settings.card_count).fill(0) if self.lookahead.depth > 2: self.lookahead.inner_nodes[2] = arguments.Tensor(self.lookahead.bets_count[1], 1, 1, constants.players_count, game_settings.card_count).fill(0) self.lookahead.swap_data[2] = self.lookahead.inner_nodes[2].transpose(2, 3).clone() self.lookahead.inner_nodes_p1[2] = arguments.Tensor(self.lookahead.bets_count[1], 1, 1, 1, game_settings.card_count).fill(0) ##--create the data structures for the rest of the layers for d in range(3, self.lookahead.depth): ##--data structures [actions x parent_action x grandparent_id x batch x players x range] self.lookahead.ranges_data[d] = arguments.Tensor(self.lookahead.actions_count[d - 1], self.lookahead.bets_count[d - 2], self.lookahead.nonterminal_nonallin_nodes_count[d - 2], constants.players_count, game_settings.card_count).fill(0) self.lookahead.cfvs_data[d] = self.lookahead.ranges_data[d].clone() self.lookahead.placeholder_data[d] = self.lookahead.ranges_data[d].clone() self.lookahead.pot_size[d] = self.lookahead.ranges_data[d].clone().fill(arguments.stack) ##--data structures [actions x parent_action x grandparent_id x batch x 1 x range] self.lookahead.average_strategies_data[d] = arguments.Tensor(self.lookahead.actions_count[d - 1], self.lookahead.bets_count[d - 2], self.lookahead.nonterminal_nonallin_nodes_count[ d - 2], game_settings.card_count).fill(0) self.lookahead.current_strategy_data[d] = self.lookahead.average_strategies_data[d].clone() self.lookahead.regrets_data[d] = self.lookahead.average_strategies_data[d].clone().fill( self.lookahead.regret_epsilon) self.lookahead.current_regrets_data[d] = self.lookahead.average_strategies_data[d].clone().fill(0) self.lookahead.empty_action_mask[d] = self.lookahead.average_strategies_data[d].clone().fill(1) self.lookahead.positive_regrets_data[d] = self.lookahead.regrets_data[d].clone() ##--data structures [1 x parent_action x grandparent_id x batch x players x range] self.lookahead.regrets_sum[d] = arguments.Tensor(1, self.lookahead.bets_count[d - 2], self.lookahead.nonterminal_nonallin_nodes_count[d - 2], constants.players_count, game_settings.card_count).fill(0) ##--data structures for the layers except the last one if d < self.lookahead.depth: self.lookahead.inner_nodes[d] = arguments.Tensor(self.lookahead.bets_count[d - 1], self.lookahead.nonallinbets_count[d - 2], self.lookahead.nonterminal_nonallin_nodes_count[d - 2], constants.players_count, game_settings.card_count).fill(0) self.lookahead.inner_nodes_p1[d] = arguments.Tensor(self.lookahead.bets_count[d - 1], self.lookahead.nonallinbets_count[d - 2], self.lookahead.nonterminal_nonallin_nodes_count[ d - 2], 1, game_settings.card_count).fill(0) self.lookahead.swap_data[d] = self.lookahead.inner_nodes[d].transpose(2, 3).clone()
params['root_node'] = {} params['root_node']['board'] = card_to_string.string_to_board('') params['root_node']['street'] = 1 params['root_node']['current_player'] = constants['players']['P1'] params['root_node']['bets'] = np.zeros((1, 1)).fill(100) tree = builder.build_tree(params) filling = TreeStrategyFilling() cardTool = CardTool() range1 = cardTool.get_uniform_range(params['root_node']['board']) range2 = cardTool.get_uniform_range(params['root_node']['board']) filling.fill_strategies(tree, 1, range1, range2) filling.fill_strategies(tree, 2, range1, range2) starting_ranges = arguments.Tensor(constants.players_count, game_settings.card_count) starting_ranges[1].copy(range1) starting_ranges[2].copy(range2) tree_values = TreeValues() tree_values.compute_values(tree, starting_ranges) print('Exploitability: %f [chips]' % (tree.exploitability)) #local visualiser = TreeVisualiser() #visualiser:graphviz(tree)
def cfrs_iter_dfs(self, node, iter): assert (node.current_player == constants.players.P1 or node.current_player == constants.players.P2 or node.current_player == constants.players.chance) opponent_index = 3 - node.current_player ##--dimensions in tensor action_dimension = 1 card_dimension = 2 ##--compute values using terminal_equity in terminal nodes if (node.terminal): terminal_equity = self._get_terminal_equity(node) values = node.ranges_absolute.clone().fill(0) if (node.type == constants.node_types.terminal_fold): terminal_equity.tree_node_fold_value(node.ranges_absolute, values, opponent_index) else: terminal_equity.tree_node_call_value(node.ranges_absolute, values) ##--multiply by the pot values = values * node.pot node.cf_values = values.viewAs(node.ranges_absolute) else: actions_count = len(node.children) current_strategy = None if node.current_player == constants.players.chance: current_strategy = node.strategy else: ##--we have to compute current strategy at the beginning of each iteraton ##--initialize regrets in the first iteration node.regrets = node.regrets or arguments.Tensor( actions_count, game_settings.card_count ).fill(self.regret_epsilon) ##--[[actions_count x card_count]] node.possitive_regrets = node.possitive_regrets or arguments.Tensor( actions_count, game_settings.card_count).fill( self.regret_epsilon) ##--compute positive regrets so that we can compute the current strategy fromm them node.possitive_regrets.copy(node.regrets) node.possitive_regrets[torch.le( node.possitive_regrets, self.regret_epsilon)] = self.regret_epsilon ##--compute the current strategy regrets_sum = node.possitive_regrets.sum(action_dimension) current_strategy = node.possitive_regrets.clone() current_strategy.cdiv(regrets_sum.expandAs(current_strategy)) ##--current cfv [[actions, players, ranges]] cf_values_allactions = arguments.Tensor( actions_count, constants.players_count, game_settings.card_count).fill(0) children_ranges_absolute = {} if node.current_player == constants.players.chance: ranges_mul_matrix = node.ranges_absolute[1].repeatTensor( actions_count, 1) children_ranges_absolute[1] = torch.cmul( current_strategy, ranges_mul_matrix) ranges_mul_matrix = node.ranges_absolute[2].repeatTensor( actions_count, 1) children_ranges_absolute[2] = torch.cmul( current_strategy, ranges_mul_matrix) else: ranges_mul_matrix = node.ranges_absolute[ node.current_player].repeatTensor(actions_count, 1) children_ranges_absolute[node.current_player] = torch.cmul( current_strategy, ranges_mul_matrix) children_ranges_absolute[ opponent_index] = node.ranges_absolute[ opponent_index].repeatTensor(actions_count, 1).clone() for i in range(len(node.children)): child_node = node.children[i] ##--set new absolute ranges (after the action) for the child child_node.ranges_absolute = node.ranges_absolute.clone() child_node.ranges_absolute[1].copy( children_ranges_absolute[1][{i}]) child_node.ranges_absolute[2].copy( children_ranges_absolute[2][{i}]) self.cfrs_iter_dfs(child_node, iter, game_settings.card_count) cf_values_allactions[i] = child_node.cf_values node.cf_values = arguments.Tensor(constants.players_count, game_settings.card_count).fill(0) if node.current_player != constants.players.chance: strategy_mul_matrix = current_strategy.viewAs( arguments.Tensor(actions_count, game_settings.card_count)) node.cf_values[node.current_player] = torch.cmul( strategy_mul_matrix, cf_values_allactions[{{}, node.current_player, {}}]).sum(1) node.cf_values[opponent_index] = (cf_values_allactions[{ {}, opponent_index, {} }]).sum(1) else: node.cf_values[1] = (cf_values_allactions[{{}, 1, {}}]).sum(1) node.cf_values[2] = (cf_values_allactions[{{}, 2, {}}]).sum(1) if node.current_player != constants.players.chance: ##--computing regrets current_regrets = cf_values_allactions[{ {}, {node.current_player}, {} }].reshape(actions_count, game_settings.card_count).clone() current_regrets.csub(node.cf_values[node.current_player].view( 1, game_settings.card_count).expandAs(current_regrets)) self.update_regrets(node, current_regrets) ##--accumulating average strategy self.update_average_strategy(node, current_strategy, iter)
def get_value(self, ranges, values): assert (ranges and values) assert (ranges.size(1) == self.batch_size) self.iter = self.iter + 1 if self.iter == 1: # --initializing #data #structures self.next_round_inputs = arguments.Tensor(self.batch_size, self.board_count, (self.bucket_count * constants.players_count + 1)).zero() self.next_round_values = arguments.Tensor(self.batch_size, self.board_count, constants.players_count, self.bucket_count).zero() self.transposed_next_round_values = arguments.Tensor(self.batch_size, constants.players_count, self.board_count, self.bucket_count) self.next_round_extended_range = arguments.Tensor(self.batch_size, constants.players_count, self.board_count * self.bucket_count).zero() self.next_round_serialized_range = self.next_round_extended_range.view(-1, self.bucket_count) self.range_normalization = arguments.Tensor() self.value_normalization = arguments.Tensor(self.batch_size, constants.players_count, self.board_count) ##--handling pot feature for the nn nn_bet_input = self.pot_sizes.clone().mul(1 / arguments.stack) nn_bet_input = nn_bet_input.view(-1, 1).expand(self.batch_size, self.board_count) self.next_round_inputs[{{}, {}, {-1}}].copy(nn_bet_input) # --we need to find if we need remember something in this iteration use_memory = self.iter > arguments.cfr_skip_iters if use_memory and self.iter == arguments.cfr_skip_iters + 1: ##--first iter that we need to remember something - we need to init data structures self.range_normalization_memory = arguments.Tensor( self.batch_size * self.board_count * constants.players_count, 1).zero() self.counterfactual_value_memory = arguments.Tensor(self.batch_size, constants.players_count, self.board_count, self.bucket_count).zero() ##--computing bucket range in next street for both players at once self._card_range_to_bucket_range(ranges.view(self.batch_size * constants.players_count, -1), self.next_round_extended_range.view(self.batch_size * constants.players_count, -1)) self.range_normalization.sum(self.next_round_serialized_range, 2) rn_view = self.range_normalization.view(self.batch_size, constants.players_count, self.board_count) for player in range(1, constants.players_count): self.value_normalization[{{}, player, {}}].copy(rn_view[{{}, 3 - player, {}}]) if use_memory: self.range_normalization_memory.add(self.value_normalization) # --eliminating division by zero self.range_normalization[torch.eq(self.range_normalization, 0)] = 1 self.next_round_serialized_range.cdiv(self.range_normalization.expandAs(self.next_round_serialized_range)) serialized_range_by_player = self.next_round_serialized_range.view(self.batch_size, constants.players_count, self.board_count, self.bucket_count) for player in range(1, constants.players_count): player_range_index = {(player - 1) * self.bucket_count + 1, player * self.bucket_count} self.next_round_inputs[{{}, {}, player_range_index}].copy(self.next_round_extended_range[{{}, player, {}}]) ##--usning nn to compute values serialized_inputs_view = self.next_round_inputs.view(self.batch_size * self.board_count, -1) serialized_values_view = self.next_round_values.view(self.batch_size * self.board_count, -1) ##--computing value in the next round self.nn.get_value(serialized_inputs_view, serialized_values_view) ##--normalizing values back according to the orginal range sum normalization_view = self.value_normalization.view(self.batch_size, constants.players_count, self.board_count, 1).transpose(2, 3) self.next_round_values.cmul(normalization_view.expandAs(self.next_round_values)) self.transposed_next_round_values.copy(self.next_round_values.transpose(3, 2)) ##--remembering the values for the next round if use_memory: self.counterfactual_value_memory.add(self.transposed_next_round_values) # --translating bucket values back to the card values self._bucket_value_to_card_value( self.transposed_next_round_values.view(self.batch_size * constants.players_count, -1), values.view(self.batch_size * constants.players_count, -1))
def get_possible_bucket_mask(self): mask = arguments.Tensor(1, self.bucket_count) card_indicator = arguments.Tensor(1, game_settings.card_count).fill(1) mask.mm(card_indicator, self._range_matrix) return mask
import mock_nn_terminal from TerminalEquity import terminal_equity import torch import value_nn import arguments import game_settings import card_to_string import card_tools next_round_value = NextRoundValue() #--print(next_round_value._range_matrix) #--[[ test of card to bucket range translation range = torch.range(1, 6).float().view(1, -1) next_round_range = arguments.Tensor( 1, next_round_value.bucket_count * next_round_value.board_count) next_round_value._card_range_to_bucket_range(range, next_round_range) print(next_round_range) #]] #--test of get_value functionality mock_nn = MockNnTerminal() #--local mock_nn = ValueNn() next_round_value = NextRoundValue(mock_nn) #--local bets = torch.range(1,1):float():mul(100) bets = torch.Tensor(1).fill(1200) next_round_value.start_computation(bets) ranges = arguments.Tensor(1, 2, game_settings.card_count).fill(1 / 4)