Exemplo n.º 1
0
 def test_log(self):
     log_dir = "./newtest/test_log.txt"
     if os.path.exists(log_dir):
         shutil.rmtree(log_dir)
     logger = Logger(log_dir)
     logger.log("test text")
     logger.log_performance(1, 1)
     logger.log_performance(2, 2)
     logger.log_performance(3, 3)
     logger.close_files()
     logger.plot('aaa')
Exemplo n.º 2
0
# logger.log(f'\nTrain Agents:{get_agent_str(env_agent_list)}')
# logger.log(f'\nEval Agents:{get_agent_str(eval_agent_list)}')
for episode in range(episode_num):

    # Generate data from the environment
    trajectories, _ = env.run(is_training=True)

    # Feed transitions into agent memory, and train the agent
    for ts in trajectories[0]:
        agent.feed(ts)
    # Evaluate the performance. Play with random agents.
    if episode % evaluate_every == 0:
        logger.log_performance(env.timestep,
                               tournament(eval_env, evaluate_num)[0],
                               episode=episode)

# Save model
save_dir = 'models/mocsar_dqn_ra_pytorch'
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
state_dict = agent.get_state_dict()
logger.log('\n########## Pytorch Save model ##########')
logger.log('\n' + str(state_dict.keys()))
torch.save(state_dict, os.path.join(save_dir, 'model.pth'))

# Close files in the logger
logger.close_files()

# Plot the learning curve
logger.plot('DQN RA PyTorch')
Exemplo n.º 3
0
        env.game.num_players, env.game.num_cards, episode_num))

    # logger.log(f'\nTrain Agents:{get_agent_str(env_agent_list)}')
    # logger.log(f'\nEval Agents:{get_agent_str(eval_agent_list)}')
    for episode in range(episode_num):

        # Generate data from the environment
        trajectories, _ = env.run(is_training=True)

        # Feed transitions into agent memory, and train the agent
        for ts in trajectories[0]:
            agent.feed(ts)
        # Evaluate the performance. Play with random agents.
        if episode % evaluate_every == 0:
            logger.log_performance(env.timestep,
                                   tournament(eval_env, evaluate_num)[0],
                                   episode=episode)

    # Close files in the logger
    logger.close_files()

    # Plot the learning curve
    logger.plot('DQN RA')

    # Save model
    save_dir = 'models/mocsar_dqn_ra'
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    saver = tf.compat.v1.train.Saver()
    saver.save(sess, os.path.join(save_dir, 'model'))
Exemplo n.º 4
0
        # First sample a policy for the episode
        for agent in agents:
            agent.sample_episode_policy()

        # Generate data from the environment
        trajectories, _ = env.run(is_training=True)

        # Feed transitions into agent memory, and train the agent
        for i in range(env.player_num):
            for ts in trajectories[i]:
                agents[i].feed(ts)

        # Evaluate the performance. Play with random agents.
        if episode % evaluate_every == 0:
            logger.log_performance(env.timestep,
                                   tournament(eval_env, evaluate_num)[0])

    # Close files in the logger
    logger.close_files()

    # Plot the learning curve
    logger.plot('NFSP')

    # Save model
    save_dir = 'models/limit_holdem_nfsp'
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    saver = tf.train.Saver()
    saver.save(sess, os.path.join(save_dir, 'model'))
Exemplo n.º 5
0
    # Init a Logger to plot the learning curve
    logger = Logger(log_dir)

    for episode in range(episode_num):

        # Generate data from the environment
        trajectories, _ = env.run(is_training=True)

        # Feed transitions into agent memory, and train the agent
        for ts in trajectories[0]:
            agent.feed(ts)

        # Evaluate the performance. Play with random agents.
        if episode % evaluate_every == 0:
            logger.log_performance(env.timestep,
                                   tournament(eval_env, evaluate_num)[0])

    # Close files in the logger
    logger.close_files()

    # Plot the learning curve
    logger.plot('DQN')

    # Save model
    save_dir = 'models/gin_rummy_dqn'
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    saver = tf.train.Saver()
    saver.save(sess, os.path.join(save_dir, 'model'))
Exemplo n.º 6
0
log_dir = './experiments/leduc_holdem_br_result/'

# Set a global seed
set_global_seed(0)

# Initilize CFR Agent
opponent = CFRAgent(env)
#opponent = RandomAgent(action_num=env.action_num)
#opponent.load()  # If we have saved model, we first load the model

#agent = RandomAgent(action_num=env.action_num)
agent = BRAgent(eval_env, opponent)
#agent = CFRAgent(env)

eval_env.set_agents([agent, opponent])
# Init a Logger to plot the learning curve
logger = Logger(log_dir)

for episode in range(episode_num):
    opponent.train()
    #agent.train()
    print('\rIteration {}'.format(episode), end='')
    # Evaluate the performance. Play with NFSP agents.
    if episode % evaluate_every == 0:
        logger.log_performance(env.timestep,
                               tournament(eval_env, evaluate_num)[0])

# Close files in the logger
logger.close_files()
logger.plot('BR')
Exemplo n.º 7
0
# The paths for saving the logs and learning curves
log_dir = './experiments/leduc_holdem_cfr_result/'

# Set a global seed
set_global_seed(0)

# Initilize CFR Agent
agent = CFRAgent(env)
agent.load()  # If we have saved model, we first load the model

# Evaluate CFR against pre-trained NFSP
eval_env.set_agents([agent, models.load('leduc-holdem-nfsp').agents[0]])

# Init a Logger to plot the learning curve
logger = Logger(log_dir)

for episode in range(episode_num):
    agent.train()
    print('\rIteration {}'.format(episode), end='')
    # Evaluate the performance. Play with NFSP agents.
    if episode % evaluate_every == 0:
        agent.save()  # Save model
        logger.log_performance(env.timestep,
                               tournament(eval_env, evaluate_num)[0])

# Close files in the logger
logger.close_files()

# Plot the learning curve
logger.plot('CFR')
        agent.sample_episode_policy()

    # Generate data from the environment
    trajectories, _ = env.run(is_training=True)

    # Feed transitions into agent memory, and train the agent
    for i in range(env.player_num):
        for ts in trajectories[i]:
            agents[i].feed(ts)

    # Evaluate the performance. Play with random agents.
    if episode % evaluate_every == 0:
        logger.log_performance(env.timestep,
                               tournament(eval_env, evaluate_num)[0],
                               episode=episode)

# Close files in the logger
logger.close_files()

# Plot the learning curve
logger.plot('NFSP Torch')

# Save model
save_dir = 'models/mocsar_nfsp_pytorch'
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
state_dict = {}
for agent in agents:
    state_dict.update(agent.get_state_dict())
torch.save(state_dict, os.path.join(save_dir, 'model.pth'))
Exemplo n.º 9
0
    # First sample a policy for the episode
    agent.sample_episode_policy()

    # Generate data from the environment
    trajectories, _ = env.run(is_training=True)

    # Feed transitions into agent memory, and train the agent
    for ts in trajectories[0]:
        agent.feed(ts)

    # Evaluate the performance. Play with random agents.
    if episode % evaluate_every == 0:
        logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0], episode=episode)


# Save model
save_dir = 'models/mocsar_nfsp_pytorch_ra'
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
state_dict = agent.get_state_dict()
logger.log('\n########## Pytorch Save model ##########')
logger.log('\n' + str(state_dict.keys()))
torch.save(state_dict, os.path.join(save_dir, 'model.pth'))

# Close files in the logger
logger.close_files()

# Plot the learning curve
logger.plot('NFSP Torch Rule Agent')