Exemplo n.º 1
0
    def test_policy_zero_is_uniform(self, linear_averaging,
                                    regret_matching_plus, alternating_updates):
        # We use Leduc and not Kuhn, because Leduc has illegal actions and Kuhn does
        # not.
        game = pyspiel.load_game("leduc_poker")
        cfr_solver = cfr._CFRSolver(game,
                                    regret_matching_plus=regret_matching_plus,
                                    linear_averaging=linear_averaging,
                                    alternating_updates=alternating_updates)

        np.testing.assert_array_equal(
            _LEDUC_UNIFORM_POLICY.action_probability_array,
            cfr_solver.current_policy().action_probability_array)
        np.testing.assert_array_equal(
            _LEDUC_UNIFORM_POLICY.action_probability_array,
            cfr_solver.average_policy().action_probability_array)
Exemplo n.º 2
0
    def test_cfr_kuhn_poker_runs_with_multiple_players(self, linear_averaging,
                                                       regret_matching_plus,
                                                       alternating_updates):
        num_players = 3

        game = pyspiel.load_game("kuhn_poker", {"players": num_players})
        cfr_solver = cfr._CFRSolver(game,
                                    regret_matching_plus=regret_matching_plus,
                                    linear_averaging=linear_averaging,
                                    alternating_updates=alternating_updates)
        for _ in range(10):
            cfr_solver.evaluate_and_update_policy()
        average_policy = cfr_solver.average_policy()
        average_policy_values = expected_game_score.policy_value(
            game.new_initial_state(), [average_policy] * num_players)
        del average_policy_values
Exemplo n.º 3
0
    def test_simultaneous_two_step_avg_1b_seq_in_kuhn_poker(
            self, regret_matching_plus, initialize_cumulative_values):
        num_players = 2
        game = pyspiel.load_game(
            "kuhn_poker", {"players": pyspiel.GameParameter(num_players)})
        cfr_solver = cfr._CFRSolver(
            game,
            initialize_cumulative_values=initialize_cumulative_values,
            regret_matching_plus=regret_matching_plus,
            linear_averaging=False,
            alternating_updates=False)

        def check_avg_policy_is_uniform_random():
            policy = cfr_solver.average_policy()
            for player_info_states in policy.states_per_player:
                for info_state in player_info_states:
                    state_policy = policy.policy_for_key(info_state)
                    np.testing.assert_allclose(state_policy,
                                               [1.0 / len(state_policy)] *
                                               len(state_policy))

        check_avg_policy_is_uniform_random()

        cfr_solver.evaluate_and_update_policy()
        check_avg_policy_is_uniform_random()

        cfr_solver.evaluate_and_update_policy()

        # The acting player in 1b is player 1 and they have not acted before, so
        # the probability this player plays to this information state is 1, and
        # the sequence probability of any action is just the probability of that
        # action given the information state. On the first iteration, this
        # probability is 0.5 for both actions. On the second iteration, the
        # current policy is [0, 1], so the average cumulants should be
        # [0.5, 1.5]. Normalizing this gives the average policy.
        normalization = 0.5 + 0.5 + 1
        np.testing.assert_allclose(
            cfr_solver.average_policy().policy_for_key("1b"),
            [0.5 / normalization, (0.5 + 1) / normalization])