reward = 0 reward_list = [] for eval_episode in range(evaluate_num): print('\rEPISODE {} - Eval {} over {} - Number of game played {} - {}'.format(episode, eval_episode, evaluate_num, total_game_played, time_difference_good_format( seconds, time.time())), end='') _, payoffs = eval_env.run(is_training=False) total_game_played += 1 reward_list.append(payoffs[0]) reward += payoffs[0] logger.log('\n########## Evaluation - Episode {} ##########'.format(episode)) logger.log('Timestep: {} Average reward is {}'.format(env.timestep, float(reward) / evaluate_num)) # Add point to logger logger.add_point(x=env.timestep, y=float(reward) / evaluate_num) # Make plot if episode % save_plot_every == 0 and episode > 0: logger.make_plot(save_path=figure_path + str(episode) + '.png') logger.make_plot_hist(save_path_1=figure_path + str(episode) + '_hist.png', save_path_2=figure_path + str(episode) + '_freq.png', reward_list=reward_list) # Make the final plot logger.make_plot(save_path=figure_path + 'final_' + str(episode) + '.png') logger.make_plot_hist(save_path_1=figure_path + str(episode) + '_hist.png', save_path_2=figure_path + str(episode) + '_freq.png', reward_list=reward_list)
.format(episode)) logger_random.log( 'Timestep: {} Average reward against random is {}'.format( env.timestep, float(reward_random) / evaluate_num)) # Add point to logger logger_random.add_point(x=env.timestep, y=float(reward_random) / evaluate_num) # Make plot logger_random.make_plot(save_path=figure_path_random + str(episode) + '.png') logger_random.make_plot_hist( save_path_1=figure_path_random + str(episode) + '_hist.png', save_path_2=figure_path_random + str(episode) + '_freq.png', reward_list=reward_random_list, taking_list=taking_list) # Eval against last agent reward_opponent = 0 reward_opponent_list = [] taking_list = [] eval_env.set_agents([agent] + [opponent_agent] * (env.player_num - 1)) for eval_episode in range(evaluate_num): print( '\rEPISODE {} - Eval Opponent {} over {} - Number of game played {} - {}' .format(episode, eval_episode, evaluate_num, total_game_played, time_difference_good_format(seconds, time.time())),