def test_cpp_python_best_response_oracle(self, game_name, num_players): # Tests that these best responses interface well with Best Response Oracle game = pyspiel.load_game( game_name, {"players": pyspiel.GameParameter(num_players)}) all_states, _ = best_response.compute_states_and_info_states_if_none( game, all_states=None, state_to_information_state=None) current_best = [[policy.TabularPolicy(game).__copy__()] for _ in range(num_players)] probabilities_of_playing_policies = [[1.] for _ in range(num_players)] # Construct the python oracle py_oracle = best_response_oracle.BestResponseOracle( best_response_backend="py") # Construct the cpp oracle. Note that in this regime, BestResponseOracle # uses base_policy to construct and cache TabularBestResponse internally. cpp_oracle = best_response_oracle.BestResponseOracle( game=game, best_response_backend="cpp") # Prepare the computation of the best responses with each backend # pylint:disable=g-complex-comprehension training_params = [[{ "total_policies": current_best, "current_player": i, "probabilities_of_playing_policies": probabilities_of_playing_policies }] for i in range(num_players)] # pylint:enable=g-complex-comprehension py_best_rep = py_oracle(game, training_params) cpp_best_rep = cpp_oracle(game, training_params) # Compare the policies for state in all_states.values(): i_player = state.current_player() py_dict = py_best_rep[i_player][0].action_probabilities(state) cpp_dict = cpp_best_rep[i_player][0].action_probabilities(state) for action in py_dict.keys(): self.assertEqual(py_dict.get(action, 0.0), cpp_dict.get(action, 0.0)) for action in cpp_dict.keys(): self.assertEqual(py_dict.get(action, 0.0), cpp_dict.get(action, 0.0))
def init_br_responder(env): """Initializes the tabular best-response based responder and agents.""" random_policy = policy.TabularPolicy(env.game) oracle = best_response_oracle.BestResponseOracle(game=env.game, policy=random_policy) agents = [random_policy.__copy__() for _ in range(FLAGS.n_players)] return oracle, agents