Exemplo n.º 1
0
 def __init__(self):
     self.name = 'PreDQNAgent'
     self.id = "d"
     # Set up the DQN agent and load the pre-trained model
     self.graph = tf.Graph()
     self.sess = tf.Session(graph=self.graph)
     self.use_raw = False
     # Config
     conf = Config('environ.properties')
     # Set the the number of steps for collecting normalization statistics
     # and intial memory size
     memory_init_size = conf.get_int('memory_init_size')
     norm_step = conf.get_int('norm_step')
     env = rlcard3.make('mocsar_dqn')
     with self.graph.as_default():
         self.agent = DQNAgent(self.sess,
                               scope='dqn',
                               action_num=env.action_num,
                               state_shape=env.state_shape,
                               replay_memory_size=20000,
                               replay_memory_init_size=memory_init_size,
                               norm_step=norm_step,
                               mlp_layers=[512, 512])
         self.normalize(env, 1000)
         self.sess.run(tf.compat.v1.global_variables_initializer())
     check_point_path = os.path.join(ROOT_PATH, 'mocsar_dqn')
     with self.sess.as_default():
         with self.graph.as_default():
             saver = tf.train.Saver(tf.model_variables())
             saver.restore(self.sess,
                           tf.train.latest_checkpoint(check_point_path))
Exemplo n.º 2
0
def init_vars(conf: Config) -> Tuple:
    """
    Ge the properties from the configuration
    :param conf: Mocsaár config, based on environ.propertirs
    :return: evaluate_num, evaluate_every, memory_init_size, train_every, episode_num
    """
    # Set the iterations numbers and how frequently we evaluate/save plot
    evaluate_num = conf.get_int('evaluate_num')
    evaluate_every = conf.get_int('evaluate_every')
    # Set the the number of steps for collecting normalization statistics
    # and intial memory size
    memory_init_size = conf.get_int('memory_init_size')
    train_every = conf.get_int('train_every')
    episode_num = conf.get_int('episode_num')
    return evaluate_num, evaluate_every, memory_init_size, train_every, episode_num
Exemplo n.º 3
0
def init_environment(conf: Config, env_id: str, config: Dict = {}) -> Tuple:
    """
    Initialize Mocsár envronments, and return them
    :param conf: Mocsaár config, based on environ.propertirs
    :param envoronment_id: Mocsár environment id, like 'mocsar'
    :return: (env, eval_env)
    """
    # Make environment
    env = rlcard3.make(env_id=env_id, config=config)
    eval_env = rlcard3.make(env_id=env_id, config=config)

    # Set Nr of players and cards
    env.game.set_game_params(num_players=conf.get_int('nr_players'),
                             num_cards=conf.get_int('nr_cards'))
    eval_env.game.set_game_params(num_players=conf.get_int('nr_players'),
                                  num_cards=conf.get_int('nr_cards'))

    return env, eval_env
Exemplo n.º 4
0
"""
    Compare different set of bots
    Repeat random games for defined players and sums the points received.
    File name: examples/mocsar_pl_cfg_config.py
    Author: József Varga
    Date created: 4/01/2020
"""

import rlcard3
from rlcard3.games.mocsar.agentdb import str_to_agent_list
from rlcard3.games.mocsar.stat import MocsarStat
from rlcard3.utils.config_read import Config

conf = Config('environ.properties')
NR_GAMES = conf.get_int(section='cfg.compare', key='nr_games')

# Make environment and enable human mode
env = rlcard3.make('mocsar-cfg', config={'multi_agent_mode': True})

# Create statistics
stat = MocsarStat(game=env.game,
                  agents=env.model.rule_agents,
                  nr_of_games=NR_GAMES,
                  batch_name=conf.get_str(section='cfg.compare',
                                          key='batch_name'),
                  log_dir=conf.get_str(section='cfg.compare',
                                       key='stat_dir_path'))

# Register agents
agents_list = str_to_agent_list(agent_str_list=conf.get_str(section='cfg.compare', key="agent_list"))
print(f"mocsar_pl_cfg_config, Agents:{agents_list}")
Exemplo n.º 5
0
def test_conf():
    conf = Config('environ.properties')
    memory_init_size = conf.get_int('memory_init_size')
    assert 1000 == memory_init_size
                      q_mlp_layers=[512, 512],
                      device=torch.device('cuda'))
    agents.append(agent)

random_agent = RandomAgent(action_num=eval_env.action_num)

env.set_agents(agents)
eval_env.set_agents([agents[0], random_agent, random_agent, random_agent])

# Init a Logger to plot the learning curve
logger = Logger(log_dir)

# Log Game info
logger.log('\n########## Game information ##########')
logger.log('\nNumPlayers: {}, NumCards: {}, Episodes: {}'.format(
    env.game.num_players, env.game.num_cards, conf.get_int('episode_num')))

for episode in range(conf.get_int('episode_num')):

    # First sample a policy for the episode
    for agent in agents:
        agent.sample_episode_policy()

    # Generate data from the environment
    trajectories, _ = env.run(is_training=True)

    # Feed transitions into agent memory, and train the agent
    for i in range(env.player_num):
        for ts in trajectories[i]:
            agents[i].feed(ts)
"""
    Compare different agents against random agents
    File name: examples/mocsar_pl_dqn_pytorch_load_model_cfg.py
    Author: József Varga
    Date created: 4/14/2020
"""
import rlcard3
from rlcard3.games.mocsar.stat import MocsarStat
from rlcard3.utils.config_read import Config
from rlcard3.utils.utils import tournament

conf = Config('environ.properties')
NR_GAMES = conf.get_int(section='cfg.compare', key='nr_games')

# Make environment and enable human mode
env = rlcard3.make('mocsar-cfg', config={'multi_agent_mode': True})

# Create statistics
stat = MocsarStat(game=env.game,
                  agents=env.model.rule_agents,
                  nr_of_games=NR_GAMES,
                  batch_name=conf.get_str(section='cfg.compare',
                                          key='batch_name'),
                  log_dir=conf.get_str(section='cfg.compare',
                                       key='stat_dir_path'))

# Register agents
agent_str = conf.get_str(section='cfg.compare', key="agent_str")
nr_cards = conf.get_int(section='global', key='nr_cards')

agents = {agent_str: 1, "mocsar_random": 3}
Exemplo n.º 8
0
''' Another example of loading a pre-trained NFSP model on Leduc Hold'em
    Here, we directly load the model from model zoo
'''
import rlcard3
from rlcard3.agents.random_agent import RandomAgent
from rlcard3.utils.utils import set_global_seed, tournament
from rlcard3 import models
from rlcard3.utils.config_read import Config
# Make environment
env = rlcard3.make('mocsar')

# Get parameters
conf = Config('environ.properties')
evaluate_num = conf.get_int(section='cfg.compare', key='nr_games')
agent_str = conf.get_str(section='cfg.compare', key="agent_str")
nr_cards = conf.get_int(section='global', key='nr_cards')

# Set a global seed
#set_global_seed(0)

# Here we directly load NFSP models from /models module
dqn_agents = models.load(agent_str,
                         num_players=env.game.get_player_num(),
                         action_num=env.action_num,
                         state_shape=env.state_shape).agents

# Evaluate the performance. Play with random agents.

random_agent = RandomAgent(env.action_num)
env.game.set_game_params(num_players=4, num_cards=nr_cards)
env.set_agents([dqn_agents[0], random_agent, random_agent, random_agent])
Exemplo n.º 9
0
env.set_agents(env_agent_list)

# Evaluation agent
eval_env.model.create_agents({"mocsar_random": 4})
eval_agent_list = [eval_env.model.rule_agents[i] for i in range(1, 4)]
eval_agent_list.insert(0, agent)
eval_env.set_agents(eval_agent_list)

# Init a Logger to plot the learning curve
logger = Logger(log_dir)

# Log Game info
logger.log('\n########## Game information, NFSP, RuleAgents, Pytorch ##########')
logger.log('\nNumPlayers: {}, NumCards: {}, Episodes: {}'.format(env.game.num_players,
                                                                 env.game.num_cards,
                                                                 conf.get_int('episode_num')))

for episode in range(conf.get_int('episode_num')):

    # First sample a policy for the episode
    agent.sample_episode_policy()

    # Generate data from the environment
    trajectories, _ = env.run(is_training=True)

    # Feed transitions into agent memory, and train the agent
    for ts in trajectories[0]:
        agent.feed(ts)

    # Evaluate the performance. Play with random agents.
    if episode % evaluate_every == 0: