示例#1
0
def CFR_BR_Solving(game, iterations, save_every=0, save_prefix='base'):
    def save_cfr_br():
        policy = cfr_solver.average_policy()
        policy = dict(zip(policy.state_lookup, policy.action_probability_array))
        policy_handler.save_to_tabular_policy(game, policy, "policies/CFRBR/{}/{}".format(save_prefix, it))

    cfr_solver = cfr_br.CFRBRSolver(game)
    for it in range(iterations + 1):
        if save_every != 0 and it % save_every == 0:  # order is important
            save_cfr_br()
        cfr_solver.evaluate_and_update_policy()
    save_cfr_br()
示例#2
0
  def test_policy_and_average_policy(self):
    game = pyspiel.load_game("kuhn_poker")
    cfrbr_solver = cfr_br.CFRBRSolver(game)
    for _ in range(300):
      cfrbr_solver.evaluate_and_update_policy()
    average_policy = cfrbr_solver.average_policy()
    average_policy_values = expected_game_score.policy_value(
        game.new_initial_state(), [average_policy] * 2)
    # 1/18 is the Nash value. See https://en.wikipedia.org/wiki/Kuhn_poker
    np.testing.assert_allclose(
        average_policy_values, [-1 / 18, 1 / 18], atol=1e-3)

    cfrbr_solver.current_policy()
示例#3
0
  def test_policy_zero_is_uniform(self, linear_averaging, regret_matching_plus):
    game = pyspiel.load_game("leduc_poker")
    cfr_solver = cfr_br.CFRBRSolver(
        game,
        regret_matching_plus=regret_matching_plus,
        linear_averaging=linear_averaging)

    np.testing.assert_array_equal(
        _LEDUC_UNIFORM_POLICY.action_probability_array,
        cfr_solver.current_policy().action_probability_array)
    np.testing.assert_array_equal(
        _LEDUC_UNIFORM_POLICY.action_probability_array,
        cfr_solver.average_policy().action_probability_array)