Пример #1
0
            reward = 0
            reward_list = []
            for eval_episode in range(evaluate_num):
                print('\rEPISODE {} - Eval {} over {} - Number of game played {} - {}'.format(episode, eval_episode,
                                                                                              evaluate_num,
                                                                                              total_game_played,
                                                                                              time_difference_good_format(
                                                                                                  seconds,
                                                                                                  time.time())),
                      end='')
                _, payoffs = eval_env.run(is_training=False)
                total_game_played += 1
                reward_list.append(payoffs[0])
                reward += payoffs[0]

            logger.log('\n########## Evaluation - Episode {} ##########'.format(episode))
            logger.log('Timestep: {} Average reward is {}'.format(env.timestep, float(reward) / evaluate_num))

            # Add point to logger
            logger.add_point(x=env.timestep, y=float(reward) / evaluate_num)

        # Make plot
        if episode % save_plot_every == 0 and episode > 0:
            logger.make_plot(save_path=figure_path + str(episode) + '.png')
            logger.make_plot_hist(save_path_1=figure_path + str(episode) + '_hist.png',
                                  save_path_2=figure_path + str(episode) + '_freq.png', reward_list=reward_list)

    # Make the final plot
    logger.make_plot(save_path=figure_path + 'final_' + str(episode) + '.png')
    logger.make_plot_hist(save_path_1=figure_path + str(episode) + '_hist.png',
                          save_path_2=figure_path + str(episode) + '_freq.png', reward_list=reward_list)
Пример #2
0
                .format(episode))
            logger_random.log(
                'Timestep: {} Average reward against random is {}'.format(
                    env.timestep,
                    float(reward_random) / evaluate_num))

            # Add point to logger
            logger_random.add_point(x=env.timestep,
                                    y=float(reward_random) / evaluate_num)

            # Make plot
            logger_random.make_plot(save_path=figure_path_random +
                                    str(episode) + '.png')
            logger_random.make_plot_hist(
                save_path_1=figure_path_random + str(episode) + '_hist.png',
                save_path_2=figure_path_random + str(episode) + '_freq.png',
                reward_list=reward_random_list,
                taking_list=taking_list)

            # Eval against last agent
            reward_opponent = 0
            reward_opponent_list = []
            taking_list = []
            eval_env.set_agents([agent] + [opponent_agent] *
                                (env.player_num - 1))
            for eval_episode in range(evaluate_num):
                print(
                    '\rEPISODE {} - Eval Opponent {} over {} - Number of game played {} - {}'
                    .format(episode, eval_episode, evaluate_num,
                            total_game_played,
                            time_difference_good_format(seconds, time.time())),