def CFR_BR_Solving(game, iterations, save_every=0, save_prefix='base'): def save_cfr_br(): policy = cfr_solver.average_policy() policy = dict(zip(policy.state_lookup, policy.action_probability_array)) policy_handler.save_to_tabular_policy(game, policy, "policies/CFRBR/{}/{}".format(save_prefix, it)) cfr_solver = cfr_br.CFRBRSolver(game) for it in range(iterations + 1): if save_every != 0 and it % save_every == 0: # order is important save_cfr_br() cfr_solver.evaluate_and_update_policy() save_cfr_br()
def test_policy_and_average_policy(self): game = pyspiel.load_game("kuhn_poker") cfrbr_solver = cfr_br.CFRBRSolver(game) for _ in range(300): cfrbr_solver.evaluate_and_update_policy() average_policy = cfrbr_solver.average_policy() average_policy_values = expected_game_score.policy_value( game.new_initial_state(), [average_policy] * 2) # 1/18 is the Nash value. See https://en.wikipedia.org/wiki/Kuhn_poker np.testing.assert_allclose( average_policy_values, [-1 / 18, 1 / 18], atol=1e-3) cfrbr_solver.current_policy()
def test_policy_zero_is_uniform(self, linear_averaging, regret_matching_plus): game = pyspiel.load_game("leduc_poker") cfr_solver = cfr_br.CFRBRSolver( game, regret_matching_plus=regret_matching_plus, linear_averaging=linear_averaging) np.testing.assert_array_equal( _LEDUC_UNIFORM_POLICY.action_probability_array, cfr_solver.current_policy().action_probability_array) np.testing.assert_array_equal( _LEDUC_UNIFORM_POLICY.action_probability_array, cfr_solver.average_policy().action_probability_array)