def evaluate_players(p1: Player, p2: Player, games_per_battle=100, num_battles=100): board = Board() p1_wins = [] p2_wins = [] draws = [] game_number = [] game_counter = 0 TFSessionManager.set_session(tf.Session()) TFSessionManager.get_session().run(tf.global_variables_initializer()) for i in range(num_battles): p1win, p2win, draw = battle(p1, p2, games_per_battle, False) p1_wins.append(p1win) p2_wins.append(p2win) draws.append(draw) game_counter = game_counter + 1 game_number.append(game_counter) TFSessionManager.set_session(None) return game_number, p1_wins, p2_wins, draws
from tic_tac_toe.RndMinMaxAgent import RndMinMaxAgent from tic_tac_toe.DirectPolicyAgent import DirectPolicyAgent min_reward = -3 max_reward = 3 num_reward_steps = 1 + max_reward - min_reward rewards = np.zeros((num_reward_steps, num_reward_steps)) for loss_reward in range(min_reward, max_reward): for draw_reward in range(loss_reward + 1, max_reward + 1): tf.reset_default_graph() TFSessionManager.set_session(tf.Session()) sess = TFSessionManager.get_session() nnplayer = DirectPolicyAgent("PolicyLearner1", loss_value=loss_reward, draw_value=draw_reward) rm_player = RndMinMaxAgent() sess.run(tf.global_variables_initializer()) game_number, p1_wins, p2_wins, draws = evaluate_players( nnplayer, rm_player, num_battles=1000, silent=True) # , num_battles = 20) print("With loss reward {} and draw reward {} we get draws: {}".format(