def traverse_ESMCCFR(self, state, player): if state.is_terminal(): return state.get_utility(player) #default to chance player other_player = 3 - player player_turn = state.get_players_turn() possible_bets = self.available_bets.get_bets_as_numbers( state._my_contrib(player_turn), state._other_contrib(player_turn), self.abstracted) # Determine the strategy at this infoset infoset = state.get_infoset(player_turn) if infoset in self.infoset_strategy_map.keys(): strategy = self.infoset_strategy_map[infoset] else: strategy = Strategy(len(possible_bets)) self.infoset_strategy_map[infoset] = strategy player_strategy = strategy.calculate_strategy() if player_turn == player: # initialize expected value # value of a node h is the value player i expects to achieve if all players play according to given strategy, having reached h value = 0 value_bet = [0] * len(player_strategy) for bet_index, bet in enumerate(possible_bets): # need to define adding an bet to a bets, make bet class memento = state.update(bet) # Traverse each bet (per iteration of loop) (each bet changes the bets) va = self.traverse_ESMCCFR(state, player) state.reverse_update(memento) value_bet[bet_index] = va # Update the expected value value += player_strategy[bet_index] * va for bet_index in range(len(possible_bets)): # Update the cumulative regret of each bet strategy.regret_sum[bet_index] += value_bet[bet_index] - value return value elif player_turn == other_player: # Sample one bet and increment bet counter bet_index = self.get_random_bet(player_strategy) bet = possible_bets[bet_index] strategy.count[bet_index] += 1 memento = state.update(bet) val = self.traverse_ESMCCFR(state, player) state.reverse_update(memento) return val else: raise Exception('How did we get here? There are no other players')