Exemplo n.º 1
0
    def test_make_plot(self):
        logger = Logger(xlabel="x", ylabel="y", legend="test")
        for x in range(10):
            logger.add_point(x=x, y=x * x)
        self.assertEqual(9 * 9, logger.ys[9])
        save_path = './newtest/test.png'
        save_dir = os.path.dirname(save_path)
        if os.path.exists(save_dir):
            shutil.rmtree(save_dir)

        logger.make_plot(save_path=save_path)
        shutil.rmtree(save_dir)
Exemplo n.º 2
0
def train():
    env = rlcard.make('mahjong', {'allow_step_back': True})
    # env = rlcard.make('mahjong')

    # Set the iterations numbers and how frequently we evaluate/save plot
    evaluate_every = 100
    save_plot_every = 1000
    evaluate_num = 10000
    episode_num = 100000

    # The paths for saving the logs and learning curves
    root_path = './experiments/mahjong_cfr_result/'
    log_path = root_path + 'log.txt'
    csv_path = root_path + 'performance.csv'
    figure_path = root_path + 'figures/'

    # Set a global seed
    set_global_seed(0)

    # Initilize CFR Agent
    agent = MCCFRAgent(env)
    # Init a Logger to plot the learning curve
    logger = Logger(root_path)

    for episode in range(episode_num + 1):
        agent.train()
        print('\rIteration {}'.format(episode), end='')
        if episode % 5000 == 0:
            agent.save(episode)
        # # Evaluate the performance. Play with NFSP agents.
        # if episode % evaluate_every == 0:
        #     reward = 0
        #     for eval_episode in range(evaluate_num):
        #         _, payoffs = eval_env.run(is_training=False)
        #
        #         reward += payoffs[0]
        #
        #     logger.log('\n########## Evaluation ##########')
        #     logger.log('Iteration: {} Average reward is {}'.format(episode, float(reward)/evaluate_num))
        #
        #     # Add point to logger
        #     logger.add_point(x=env.timestep, y=float(reward)/evaluate_num)
        #
        # # Make plot
        # if episode % save_plot_every == 0 and episode > 0:
        #     logger.make_plot(save_path=figure_path+str(episode)+'.png')

    # Make the final plot
    logger.make_plot(save_path=figure_path + 'final_' + str(episode) + '.png')
Exemplo n.º 3
0
            reward = 0
            reward_list = []
            for eval_episode in range(evaluate_num):
                print('\rEPISODE {} - Eval {} over {} - Number of game played {} - {}'.format(episode, eval_episode,
                                                                                              evaluate_num,
                                                                                              total_game_played,
                                                                                              time_difference_good_format(
                                                                                                  seconds,
                                                                                                  time.time())),
                      end='')
                _, payoffs = eval_env.run(is_training=False)
                total_game_played += 1
                reward_list.append(payoffs[0])
                reward += payoffs[0]

            logger.log('\n########## Evaluation - Episode {} ##########'.format(episode))
            logger.log('Timestep: {} Average reward is {}'.format(env.timestep, float(reward) / evaluate_num))

            # Add point to logger
            logger.add_point(x=env.timestep, y=float(reward) / evaluate_num)

        # Make plot
        if episode % save_plot_every == 0 and episode > 0:
            logger.make_plot(save_path=figure_path + str(episode) + '.png')
            logger.make_plot_hist(save_path_1=figure_path + str(episode) + '_hist.png',
                                  save_path_2=figure_path + str(episode) + '_freq.png', reward_list=reward_list)

    # Make the final plot
    logger.make_plot(save_path=figure_path + 'final_' + str(episode) + '.png')
    logger.make_plot_hist(save_path_1=figure_path + str(episode) + '_hist.png',
                          save_path_2=figure_path + str(episode) + '_freq.png', reward_list=reward_list)
Exemplo n.º 4
0
                taking_list.append(eval_env.game.players[0].taking)

            logger_random.log(
                '\n########## Evaluation Against Random - Episode {} ##########'
                .format(episode))
            logger_random.log(
                'Timestep: {} Average reward against random is {}'.format(
                    env.timestep,
                    float(reward_random) / evaluate_num))

            # Add point to logger
            logger_random.add_point(x=env.timestep,
                                    y=float(reward_random) / evaluate_num)

            # Make plot
            logger_random.make_plot(save_path=figure_path_random +
                                    str(episode) + '.png')
            logger_random.make_plot_hist(
                save_path_1=figure_path_random + str(episode) + '_hist.png',
                save_path_2=figure_path_random + str(episode) + '_freq.png',
                reward_list=reward_random_list,
                taking_list=taking_list)

            # Eval against last agent
            reward_opponent = 0
            reward_opponent_list = []
            taking_list = []
            eval_env.set_agents([agent] + [opponent_agent] *
                                (env.player_num - 1))
            for eval_episode in range(evaluate_num):
                print(
                    '\rEPISODE {} - Eval Opponent {} over {} - Number of game played {} - {}'
Exemplo n.º 5
0
        next_state, reward, done = env.step(action)
        ts = (state, action, reward, next_state, done)
        agent.feed(ts)

        train_count = timestep - (memory_init_size + norm_step)
        if train_count > 0:
            loss = agent.train()
            print('\rINFO - Step {}, loss: {}'.format(timestep, loss), end='')

        if timestep % evaluate_every == 0:
            rewards = []
            state = eval_env.reset()
            for _ in range(evaluate_num):
                action = agent.eval_step(state)
                _, reward, done = env.step(action)
                if done:
                    rewards.append(reward)
            logger.log('\n########## Evaluation ##########')
            logger.log('Timestep: {} Average reward is {}'.format(
                timestep, np.mean(rewards)))

            # Add point to logger
            logger.add_point(x=env.timestep, y=float(reward) / evaluate_num)

        # Make plot
        if timestep % save_plot_every == 0:
            logger.make_plot(save_path=figure_path + str(timestep) + '.png')

    # Make the final plot
    logger.make_plot(save_path=figure_path + 'final_' + str(timestep) + '.png')
Exemplo n.º 6
0
 def test_make_plot(self):
     logger = Logger(xlabel="x", ylabel="y", legend="test")
     for x in range(10):
         logger.add_point(x=x, y=x * x)
     self.assertEqual(9 * 9, logger.ys[9])
     logger.make_plot(save_path='./newtest/test.png')
Exemplo n.º 7
0
            bet_logger.log(
                'Timestep: {} Average bet reward is {}. Average change reward is {}'
                .format(env.timestep,
                        float(bet_reward) / evaluate_num,
                        float(change_reward) / evaluate_num))

            # send_slack('Episode: {} Average bet reward is {}. Average change reward is {}'.format(episode, float(bet_reward)/evaluate_num, float(change_reward)/evaluate_num))

            # Add point to logger
            bet_logger.add_point(x=env.timestep,
                                 y=float(bet_reward) / evaluate_num)
            change_logger.add_point(x=env.timestep,
                                    y=float(change_reward) / evaluate_num)

        # Make plot
        if episode % save_plot_every == 0 and episode > 0:
            bet_logger.make_plot(save_path=figure_path + 'bet/' +
                                 str(episode) + '.png')
            change_logger.make_plot(save_path=figure_path + 'change/' +
                                    str(episode) + '.png')

        if episode % checkpoint_every == 0 and episode > 0:
            bet_path, change_path = agent.save(checkpoint_path, episode)
            print('Saved to {}, {}'.format(bet_path, change_path))

    # Make the final plot
    bet_logger.make_plot(save_path=figure_path + 'bet/' + str(episode) +
                         '.png')
    change_logger.make_plot(save_path=figure_path + 'change/' + str(episode) +
                            '.png')
Exemplo n.º 8
0
        trajectories, _ = env.run(is_training=True)

        # Feed transitions into agent memory, and train
        for ts in trajectories[0]:
            agent.feed(ts)
            step_counter += 1

            # Train the agent
            if step_counter > memory_init_size + norm_step:
                agent.train()

        # Evaluate the performance
        if episode % evaluate_every == 0:
            reward = 0
            for eval_episode in range(evaluate_num):
                _, payoffs = env.run(is_training=False)
                reward += payoffs[0]

            logger.log('\n########## Evaluation ##########')
            logger.log('Episode: {} Average reward is {}'.format(episode, float(reward)/evaluate_num))

            # Add point to logger
            logger.add_point(x=episode, y=float(reward)/evaluate_num)

        # Make plot
        if episode % save_plot_every == 0 and episode > 0:
            logger.make_plot(save_path='./experiments/blackjack_dqn_result/'+str(episode)+'.png')

    # Make the final plot
    logger.make_plot(save_path='./experiments/blackjack_dqn_result/'+'final_'+str(episode)+'.png')