def set_board(self, board): # matrix [1326*1326] # [1.0] set call matrix (only works for last round) assert board.size(0) == 5 call_matrix = arguments.tensor(arguments.hole_count, arguments.hole_count) # self.board_mask = Mask.get_board_mask(board) # hand evaluation, get strength vector _strength = (c_int * 1326)() _board = (c_int * 5)() for i in range(board.size(0)): _board[i] = int(board[i]) dll.eval5Board(_board, 5, _strength) strength_list = [x for x in _strength] strength = arguments.tensor(strength_list) self.board_mask = strength.clone().fill_(1) self.board_mask[strength < 0] = 0 # bm = Mask.get_board_mask(board) # print((bm == self.board_mask).sum()) assert int((self.board_mask > 0).sum()) == 1081 # construct row view and column view, construct win/lose/tie matrix strength_view1 = strength.view(arguments.hole_count, 1).expand_as(call_matrix) strength_view2 = strength.view( 1, arguments.hole_count).expand_as(call_matrix) call_matrix[torch.lt(strength_view1, strength_view2)] = 1 call_matrix[torch.gt(strength_view1, strength_view2)] = -1 call_matrix[torch.eq(strength_view1, strength_view2)] = 0.5 # mask out hole cards which conflict each other call_matrix[self.hole_mask < 1] = 0 # mask out hole card which conflict boards call_matrix[strength_view1 == -1] = 0 call_matrix[strength_view2 == -1] = 0 # [2.0] set fold matrix fold_matrix = arguments.tensor(arguments.hole_count, arguments.hole_count) # make sure player hole don't conflict with opponent hole fold_matrix.copy_(self.hole_mask) # make sure hole don't conflict with board fold_matrix[strength_view1 == -1] = 0 fold_matrix[strength_view2 == -1] = 0 self.call_matrix = call_matrix self.fold_matrix = fold_matrix
def _fill_ranges_dfs(self, node, ranges_absolute): node.ranges_absolute = ranges_absolute.clone() player_index = node.current_player opponent_index = 1 - player_index if node.terminal: return assert (node.strategy is not None) actions_count = len(node.children) strategy_to_check = node.strategy hand_mask = Mask.get_board_mask(node.board) if node.current_player != constants.chance_player: check_sum = strategy_to_check.sum(0) assert (strategy_to_check.lt(0).sum() == 0) assert (check_sum.lt(0.999).sum() == 0) assert (check_sum.gt(1.001).sum() == 0) assert (check_sum.ne(check_sum).sum() == 0) assert (node.ranges_absolute.lt(0).sum() == 0) assert (node.ranges_absolute.gt(1).sum() == 0) impossible_hand_mask = hand_mask.clone().fill_(1) - hand_mask impossible_range_sum = node.ranges_absolute.clone()\ .mul_(impossible_hand_mask.view(1, arguments.hole_count).expand_as(node.ranges_absolute)) assert (impossible_range_sum.sum() == 0) children_ranges_absolute = arguments.tensor(actions_count, 2, arguments.hole_count) if node.current_player == constants.chance_player: children_ranges_absolute[:, 0, :].copy_( node.ranges_absolute[0].repeat(actions_count, 1)) children_ranges_absolute[:, 1, :].copy_( node.ranges_absolute[1].repeat(actions_count, 1)) children_ranges_absolute[:, 0, :].mul_(node.strategy) children_ranges_absolute[:, 1, :].mul_(node.strategy) else: children_ranges_absolute[:, opponent_index, :] = \ node.ranges_absolute[opponent_index].clone().repeat(actions_count,1) ranges_mul_matrix = node.ranges_absolute[player_index].repeat( actions_count, 1) children_ranges_absolute[:, player_index, :] = node.strategy * ranges_mul_matrix # fill ranges for children for i in range(actions_count): child_node = node.children[i] child_range = children_ranges_absolute[i] self._fill_ranges_dfs(child_node, child_range)
def _fill_uniformly(cls, node): cp = node.current_player assert (cp == constants.p1_player or cp == constants.p2_player) if node.terminal: return children_number = len(node.children) node.strategy = arguments.tensor(children_number, 1326).fill_(1.0 / children_number)
def get_board_mask(cls, board): out = arguments.tensor(arguments.hole_count).fill_(1) s = set([int(x) for x in board]) for s_card in range(arguments.card_count - 1): for b_card in range(s_card + 1, arguments.card_count): if s_card in s or b_card in s: index = b_card * (b_card - 1) // 2 + s_card out[index] = 0 return out
def get_possible_bets(self, node): current_player = node.current_player player_bet = node.bets[current_player] opponent_bet = node.bets[1-current_player] assert (player_bet <= opponent_bet) max_raise_size = arguments.stack - opponent_bet min_raise_size = opponent_bet - node.bets[current_player] min_raise_size = max(min_raise_size, arguments.BB) min_raise_size = min(max_raise_size, min_raise_size) # [1]. raise is not valid (oppo bets 20000, player can only call) if min_raise_size == 0: return torch.FloatTensor() # [2]. can only all-in elif min_raise_size == max_raise_size: out = torch.FloatTensor(1,2).fill_(opponent_bet) out[0, current_player] = opponent_bet + min_raise_size return out else: # [3]. iterate through all bets, check if they are valid max_possible_bets_count = len(self.pot_fractions) + 1 out = arguments.tensor(max_possible_bets_count, 2).fill_(opponent_bet) pot = 2 * opponent_bet used_bets_count = 0 for i in range(len(self.pot_fractions)): raise_size = pot * self.pot_fractions[i] if min_raise_size <= raise_size < max_raise_size: out[used_bets_count, current_player] = opponent_bet + raise_size used_bets_count += 1 # end if # end for assert (used_bets_count < max_possible_bets_count) out[used_bets_count, current_player] = opponent_bet + max_raise_size used_bets_count += 1 return out[0:used_bets_count]
def _build_tree_dfs(self, current_node): current_node.pot = current_node.bets.min() children = self._get_children(current_node) current_node.children = children child_number = len(children) depth = 0 current_node.actions = arguments.tensor(child_number) for i in range(child_number): children[i].parent = current_node self._build_tree_dfs(children[i]) depth = children[i].depth if children[i].depth > depth else depth if i == 0: current_node.actions[i] = constants.fold_action elif i == 1: current_node.actions[i] = constants.ccall_action else: current_node.actions[i] = children[i].bets.max() current_node.depth = depth + 1
from game.enums import Round from game.betsizing import BetSizing from setting import arguments, constants from tree.tree_builder import TreeBuilder from cfr.tree_cfr import TreeCFR from equity.mask import Mask from cfr.tree_values import TreeValues params = {} params['street'] = Round.RIVER params['bets'] = arguments.tensor(2).fill_(3000) params['current_player'] = constants.p1_player params['board'] = arguments.tensor([0, 5, 16, 29, 34]) # params['board'] = arguments.tensor([47, 48, 49, 50, 51]) params['bet_sizing'] = BetSizing(pot_fractions=[1]) params['limit_to_street'] = True tb = TreeBuilder() root = tb.build_tree(params) ranges = arguments.tensor(2, arguments.hole_count).fill_(1) tree_cfr = TreeCFR() valid_hole_mask = Mask.get_board_mask(root.board).view(1, arguments.hole_count) expand_mask = valid_hole_mask.expand_as(ranges) ranges.mul_(expand_mask) ranges_sum = ranges.sum(1).view(2, 1) ranges.div_(ranges_sum.expand(2, arguments.hole_count)) tree_cfr.run_cfr(root, ranges, 10000)
def _compute_values_dfs(self, node): player_index = node.current_player opponent_index = 1 - player_index if node.terminal: assert (node.node_type == constants.terminal_fold_node or node.node_type == constants.terminal_call_node) # construct equity matrix key = ' '.join([str(int(x)) for x in node.board]) terminal_equity = self._cached_terminal_equity.get(key, None) if terminal_equity is None: terminal_equity = TerminalEquity() terminal_equity.set_board(node.board) self._cached_terminal_equity[key] = terminal_equity # compute terminal node values values = node.ranges_absolute.clone().fill_(0) if node.node_type == constants.terminal_call_node: values[0] = torch.matmul(node.ranges_absolute[1], terminal_equity.call_matrix) values[1] = torch.matmul(node.ranges_absolute[0], terminal_equity.call_matrix) else: # terminal fold node values[0] = torch.matmul(node.ranges_absolute[1], terminal_equity.fold_matrix) values[1] = torch.matmul(node.ranges_absolute[0], terminal_equity.fold_matrix) values[opponent_index, :].mul_(-1) values.mul_(node.pot) node.cf_values = values.view_as(node.ranges_absolute) node.cf_br_values = values.view_as(node.ranges_absolute) else: actions_count = len(node.children) hole_count = node.ranges_absolute.size(1) cf_values_allactions = arguments.tensor(actions_count, 2, hole_count).fill_(0) cf_br_values_allactions = arguments.tensor(actions_count, 2, hole_count).fill_(0) for i in range(actions_count): child_node = node.children[i] self._compute_values_dfs(child_node) cf_values_allactions[i] = child_node.cf_values cf_br_values_allactions[i] = child_node.cf_br_values # end for # compute values of this node according to its children values node.cf_values = arguments.tensor(2, hole_count).fill_(0) node.cf_br_values = arguments.tensor(2, hole_count).fill_(0) # strategy [actions * range] strategy_mul_matrix = node.strategy.view(actions_count, hole_count) # compute cfvs if player_index == constants.chance_player: node.cf_values = cf_values_allactions.sum(0) node.cf_br_values = cf_br_values_allactions.sum(0) else: node.cf_values[player_index] = ( strategy_mul_matrix * cf_values_allactions[:, player_index, :]).sum(0) node.cf_values[ opponent_index] = cf_br_values_allactions[:, opponent_index, :].sum( 0) # compute br strategy node.cf_br_values[ opponent_index] = cf_br_values_allactions[:, opponent_index, :].sum( 0) node.cf_br_values[ player_index] = cf_br_values_allactions[:, player_index, :].max( 0)[0] # end if terminal else # cf values weighted by reach probability node.cfv_infset = arguments.tensor(2) node.cfv_infset[0] = (node.cf_values[0] * node.ranges_absolute[0]).sum() node.cfv_infset[1] = (node.cf_values[1] * node.ranges_absolute[1]).sum() # cfv-br values weighted by reach probability node.cfv_br_infset = arguments.tensor(2) node.cfv_br_infset[0] = (node.cfv_br_infset[0] * node.ranges_absolute[0]).sum() node.cfv_br_infset[1] = (node.cfv_br_infset[1] * node.ranges_absolute[1]).sum() node.epsilon = node.cfv_br_infset - node.cfv_infset node.exploitability = node.epsilon.mean()