def test_kuhn_poker_always_pass_p0(self):
     game = pyspiel.load_game("kuhn_poker")
     calc = action_value_vs_best_response.Calculator(game)
     (expl, avvbr, cfrp,
      player_reach_probs) = calc(0, policy.FirstActionPolicy(game),
                                 ["0", "1", "2", "0pb", "1pb", "2pb"])
     self.assertAlmostEqual(expl, 1.)
     np.testing.assert_allclose(
         avvbr,
         [
             # Opening bet. If we pass, we always lose (pass-pass with op's K,
             # otherwise pass-bet-pass).
             # If we bet, we always win (because op's best response is to pass,
             # because this is an unreachable state and we break ties in favour
             # of the lowest action).
             [-1, 1],
             [-1, 1],
             [-1, 1],
             # We pass, opp bets into us. This can be either J or Q (K will pass
             # because of the tie-break rules).
             # So we are guaranteed to be winning with Q or K.
             [-1, -2],  # 0pb
             [-1, 2],  # 1pb
             [-1, 2],  # 2pb
         ])
     np.testing.assert_allclose(cfrp,
                                [1 / 3, 1 / 3, 1 / 3, 1 / 6, 1 / 6, 1 / 3])
     np.testing.assert_allclose([1., 1., 1., 1., 1., 1.],
                                player_reach_probs)
 def __init__(self, game):
   """Initializes a loss calculation for the given game."""
   if game.num_players() != _NUM_PLAYERS:
     raise ValueError("Game {} does not have {} players.".format(
         game, _NUM_PLAYERS))
   self.tabular_policy = policy.TabularPolicy(game)
   self.q_value_calculator = action_value_vs_best_response.Calculator(game)
Exemplo n.º 3
0
 def test_kuhn_poker_uniform(self):
     game = pyspiel.load_game("kuhn_poker")
     calc = action_value_vs_best_response.Calculator(game)
     expl, avvbr, cfrp = calc(0, policy.UniformRandomPolicy(game),
                              ["0", "1", "2", "0pb", "1pb", "2pb"])
     self.assertAlmostEqual(expl, 15 / 36)
     np.testing.assert_allclose(
         avvbr,
         [
             [-1.5, -2.0],  # 0 (better to pass)
             [-0.5, -0.5],  # 1 (same)
             [0.5, 1.5],  # 2 (better to bet)
             [-1.0, -2.0],  # 0pb - losing
             [-1.0, 0.0],  # 1pb - best response is bet always
             [-1.0, 2.0],  # 2pb - winning
         ])
     np.testing.assert_allclose(cfrp,
                                [1 / 3, 1 / 3, 1 / 3, 1 / 3, 1 / 3, 1 / 3])