def test_exploitability_is_zero_on_nash(self, alpha): # A similar test exists in: # open_spiel/python/algorithms/exploitability_test.py game = pyspiel.load_game("kuhn_poker") policy = data.kuhn_nash_equilibrium(alpha=alpha) expl = exploitability.exploitability(game, policy) self.assertAlmostEqual(0, expl)
def test_2p_nash_conv(self): # Note: The first action test_policy is "AlwaysFold". kuhn_poker = pyspiel.load_game("kuhn_poker") leduc_poker = pyspiel.load_game("leduc_poker") test_parameters = [ (kuhn_poker, policy.UniformRandomPolicy(kuhn_poker), 0.9166666666666666), (kuhn_poker, policy.FirstActionPolicy(kuhn_poker), 2.), (kuhn_poker, data.kuhn_nash_equilibrium(alpha=0.2), 0.), (leduc_poker, policy.FirstActionPolicy(leduc_poker), 2.), (leduc_poker, policy.UniformRandomPolicy(leduc_poker), 4.747222222222222), ] for game, test_test_policy, expected_value in test_parameters: self.assertAlmostEqual( exploitability.nash_conv(game, test_test_policy), expected_value)
def test_kuhn_poker_optimal(self): game = pyspiel.load_game("kuhn_poker") test_policy = data.kuhn_nash_equilibrium(alpha=0.2) self.assertAlmostEqual(exploitability.nash_conv(game, test_policy), 0)
class ExploitabilityTest(parameterized.TestCase): def test_exploitability_on_kuhn_poker_uniform_random(self): # NashConv of uniform random test_policy from (found on Google books): # https://link.springer.com/chapter/10.1007/978-3-319-75931-9_5 game = pyspiel.load_game("kuhn_poker") test_policy = policy.UniformRandomPolicy(game) expected_nash_conv = 11 / 12 self.assertAlmostEqual( exploitability.exploitability(game, test_policy), expected_nash_conv / 2) def test_kuhn_poker_uniform_random_best_response_pid0(self): game = pyspiel.load_game("kuhn_poker") test_policy = policy.UniformRandomPolicy(game) results = exploitability.best_response(game, test_policy, player_id=0) self.assertEqual( results["best_response_action"], { "0": 1, # Bet in case opponent folds when winning "1": 1, # Bet in case opponent folds when winning "2": 0, # Both equally good (we return the lowest action) # Some of these will never happen under the best-response policy, # but we have computed best-response actions anyway. "0pb": 0, # Fold - we're losing "1pb": 1, # Call - we're 50-50 "2pb": 1, # Call - we've won }) self.assertGreater(results["nash_conv"], 0.1) def test_kuhn_poker_uniform_random_best_response_pid1(self): game = pyspiel.load_game("kuhn_poker") test_policy = policy.UniformRandomPolicy(game) results = exploitability.best_response(game, test_policy, player_id=1) self.assertEqual( results["best_response_action"], { # Bet is always best "0p": 1, "1p": 1, "2p": 1, # Call unless we know we're beaten "0b": 0, "1b": 1, "2b": 1, }) self.assertGreater(results["nash_conv"], 0.1) def test_kuhn_poker_uniform_random(self): # NashConv of uniform random test_policy from (found on Google books): # https://link.springer.com/chapter/10.1007/978-3-319-75931-9_5 game = pyspiel.load_game("kuhn_poker") test_policy = policy.UniformRandomPolicy(game) self.assertAlmostEqual(exploitability.nash_conv(game, test_policy), 11 / 12) def test_kuhn_poker_always_fold(self): game = pyspiel.load_game("kuhn_poker") test_policy = policy.FirstActionPolicy(game) self.assertAlmostEqual(exploitability.nash_conv(game, test_policy), 2) def test_kuhn_poker_optimal(self): game = pyspiel.load_game("kuhn_poker") test_policy = data.kuhn_nash_equilibrium(alpha=0.2) self.assertAlmostEqual(exploitability.nash_conv(game, test_policy), 0) def test_leduc_poker_uniform_random(self): # NashConv taken from independent implementations game = pyspiel.load_game("leduc_poker") test_policy = policy.UniformRandomPolicy(game) self.assertAlmostEqual(exploitability.nash_conv(game, test_policy), 4.747222222222222) def test_leduc_poker_always_fold(self): game = pyspiel.load_game("leduc_poker") test_policy = policy.FirstActionPolicy(game) self.assertAlmostEqual(exploitability.nash_conv(game, test_policy), 2) # Values for uniform policies taken from # https://link.springer.com/chapter/10.1007/978-3-319-75931-9_5 # (including multiplayer games below). However, the value for Leduc against # the uniform test_policy is wrong in the paper. This has been independently # verified in a number of independent code bases. The 4.7472 value is correct. # Value for AlwaysFold is trivial: if you # always fold, you win 0 chips, but if you switch to AlwaysBet, you win 1 # chip everytime if playing against a player who always folds. @parameterized.parameters( ("kuhn_poker", policy.UniformRandomPolicy, 0.9166666666666666), ("kuhn_poker", policy.FirstActionPolicy, 2.), ("kuhn_poker", lambda _: data.kuhn_nash_equilibrium(alpha=0.2), 0.), ("leduc_poker", policy.FirstActionPolicy, 2.), ("leduc_poker", policy.UniformRandomPolicy, 4.7472222222222), ) def test_2p_nash_conv(self, game_name, policy_func, expected): game = pyspiel.load_game(game_name) self.assertAlmostEqual( exploitability.nash_conv(game, policy_func(game)), expected) @parameterized.parameters(3, 4) def test_kuhn_poker_uniform_random_nash_conv(self, num_players): game = pyspiel.load_game("kuhn_poker", {"players": num_players}) test_policy = policy.UniformRandomPolicy(game) self.assertGreater(exploitability.nash_conv(game, test_policy), 2) @parameterized.parameters(("kuhn_poker", 2), ("kuhn_poker", 3), ("kuhn_poker", 4)) def test_python_same_as_cpp_for_multiplayer_uniform_random_nash_conv( self, game_name, num_players): game = pyspiel.load_game(game_name, {"players": num_players}) # TabularPolicy defaults to being a uniform random policy. test_policy = policy.TabularPolicy(game) python_nash_conv = exploitability.nash_conv(game, test_policy) cpp_nash_conv = pyspiel.nash_conv( game, policy_utils.policy_to_dict(test_policy, game)) self.assertAlmostEqual(python_nash_conv, cpp_nash_conv) def test_cpp_python_cfr_kuhn(self): game = pyspiel.load_game("kuhn_poker") solver = pyspiel.CFRSolver(game) for _ in range(100): solver.evaluate_and_update_policy() pyspiel_average_policy = solver.tabular_average_policy() cpp_nash_conv = pyspiel.nash_conv(game, pyspiel_average_policy) python_policy = policy.pyspiel_policy_to_python_policy( game, pyspiel_average_policy) python_nash_conv = exploitability.nash_conv(game, python_policy) self.assertAlmostEqual(python_nash_conv, cpp_nash_conv)