Пример #1
0
 def test_tournament(self):
     env = rlcard3.make('leduc-holdem')
     env.set_agents(
         [RandomAgent(env.action_num),
          RandomAgent(env.action_num)])
     payoffs = tournament(env, 1000)
     self.assertEqual(len(payoffs), 2)
Пример #2
0
    env.game.num_players, env.game.num_cards, episode_num))

# logger.log(f'\nTrain Agents:{get_agent_str(env_agent_list)}')
# logger.log(f'\nEval Agents:{get_agent_str(eval_agent_list)}')
for episode in range(episode_num):

    # Generate data from the environment
    trajectories, _ = env.run(is_training=True)

    # Feed transitions into agent memory, and train the agent
    for ts in trajectories[0]:
        agent.feed(ts)
    # Evaluate the performance. Play with random agents.
    if episode % evaluate_every == 0:
        logger.log_performance(env.timestep,
                               tournament(eval_env, evaluate_num)[0],
                               episode=episode)

# Save model
save_dir = 'models/mocsar_dqn_ra_pytorch'
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
state_dict = agent.get_state_dict()
logger.log('\n########## Pytorch Save model ##########')
logger.log('\n' + str(state_dict.keys()))
torch.save(state_dict, os.path.join(save_dir, 'model.pth'))

# Close files in the logger
logger.close_files()

# Plot the learning curve
# Load pretrained model
graph = tf.Graph()
sess = tf.Session(graph=graph)

with graph.as_default():
    nfsp_agents = []
    for i in range(env.player_num):
        agent = NFSPAgent(sess,
                          scope='nfsp' + str(i),
                          action_num=env.action_num,
                          state_shape=env.state_shape,
                          hidden_layers_sizes=[128,128],
                          q_mlp_layers=[128,128])
        nfsp_agents.append(agent)

# We have a pretrained model here. Change the path for your model.
check_point_path = os.path.join(rlcard3.__path__[0], 'models/pretrained/leduc_holdem_nfsp')

with sess.as_default():
    with graph.as_default():
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(check_point_path))

# Evaluate the performance. Play with random agents.
evaluate_num = 10000
random_agent = RandomAgent(env.action_num)
env.set_agents([nfsp_agents[0], random_agent])
reward = tournament(env, evaluate_num)[0]
print('Average reward against random agent: ', reward)

print(f"mocsar_pl_dqn_pytorch_load_model_cfg, Agents:{agents}")

# # Here we directly load NFSP models from /models module
# rl_agents = models.load(agent_str,
#                         num_players=env.game.get_player_num(),
#                         action_num=env.action_num,
#                         state_shape=env.state_shape).agents

# Evaluate the performance. Play with random agents.

env.game.set_game_params(num_players=4, num_cards=nr_cards)
env.model.create_agents(agents)
env.set_agents(env.model.rule_agents)

if NR_GAMES % 2 == 0:
    reward = tournament(env, NR_GAMES)[0]
    print(
        f'Average reward for {agent_str} against random agent: {reward}, cards: {nr_cards} '
    )

else:
    stat.reset_game_nr(agents=env.model.rule_agents)
    print(f"Game for cards:{nr_cards}, agents:{stat.agentstr} ")
    payoff_total = 0
    for i in range(NR_GAMES):
        state, payoffs, done = env.run_multi_agent(stat=stat)
        payoff_total += payoffs[0]
        print(f"-----------\nGame Finished.{i}.game, payoff: {payoffs[0]}")
    print(
        f'Average reward for {agent_str} against random agent: {payoff_total / NR_GAMES}, cards: {nr_cards} '
    )
Пример #5
0
for episode in range(conf.get_int('episode_num')):

    # First sample a policy for the episode
    agent.sample_episode_policy()

    # Generate data from the environment
    trajectories, _ = env.run(is_training=True)

    # Feed transitions into agent memory, and train the agent
    for ts in trajectories[0]:
        agent.feed(ts)

    # Evaluate the performance. Play with random agents.
    if episode % evaluate_every == 0:
        logger.log_performance(env.timestep, tournament(eval_env, evaluate_num)[0], episode=episode)


# Save model
save_dir = 'models/mocsar_nfsp_pytorch_ra'
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
state_dict = agent.get_state_dict()
logger.log('\n########## Pytorch Save model ##########')
logger.log('\n' + str(state_dict.keys()))
torch.save(state_dict, os.path.join(save_dir, 'model.pth'))

# Close files in the logger
logger.close_files()

# Plot the learning curve