def __init__(self): self.name = 'PreDQNAgent' self.id = "d" # Set up the DQN agent and load the pre-trained model self.graph = tf.Graph() self.sess = tf.Session(graph=self.graph) self.use_raw = False # Config conf = Config('environ.properties') # Set the the number of steps for collecting normalization statistics # and intial memory size memory_init_size = conf.get_int('memory_init_size') norm_step = conf.get_int('norm_step') env = rlcard3.make('mocsar_dqn') with self.graph.as_default(): self.agent = DQNAgent(self.sess, scope='dqn', action_num=env.action_num, state_shape=env.state_shape, replay_memory_size=20000, replay_memory_init_size=memory_init_size, norm_step=norm_step, mlp_layers=[512, 512]) self.normalize(env, 1000) self.sess.run(tf.compat.v1.global_variables_initializer()) check_point_path = os.path.join(ROOT_PATH, 'mocsar_dqn') with self.sess.as_default(): with self.graph.as_default(): saver = tf.train.Saver(tf.model_variables()) saver.restore(self.sess, tf.train.latest_checkpoint(check_point_path))
def init_vars(conf: Config) -> Tuple: """ Ge the properties from the configuration :param conf: Mocsaár config, based on environ.propertirs :return: evaluate_num, evaluate_every, memory_init_size, train_every, episode_num """ # Set the iterations numbers and how frequently we evaluate/save plot evaluate_num = conf.get_int('evaluate_num') evaluate_every = conf.get_int('evaluate_every') # Set the the number of steps for collecting normalization statistics # and intial memory size memory_init_size = conf.get_int('memory_init_size') train_every = conf.get_int('train_every') episode_num = conf.get_int('episode_num') return evaluate_num, evaluate_every, memory_init_size, train_every, episode_num
def init_environment(conf: Config, env_id: str, config: Dict = {}) -> Tuple: """ Initialize Mocsár envronments, and return them :param conf: Mocsaár config, based on environ.propertirs :param envoronment_id: Mocsár environment id, like 'mocsar' :return: (env, eval_env) """ # Make environment env = rlcard3.make(env_id=env_id, config=config) eval_env = rlcard3.make(env_id=env_id, config=config) # Set Nr of players and cards env.game.set_game_params(num_players=conf.get_int('nr_players'), num_cards=conf.get_int('nr_cards')) eval_env.game.set_game_params(num_players=conf.get_int('nr_players'), num_cards=conf.get_int('nr_cards')) return env, eval_env
""" Compare different set of bots Repeat random games for defined players and sums the points received. File name: examples/mocsar_pl_cfg_config.py Author: József Varga Date created: 4/01/2020 """ import rlcard3 from rlcard3.games.mocsar.agentdb import str_to_agent_list from rlcard3.games.mocsar.stat import MocsarStat from rlcard3.utils.config_read import Config conf = Config('environ.properties') NR_GAMES = conf.get_int(section='cfg.compare', key='nr_games') # Make environment and enable human mode env = rlcard3.make('mocsar-cfg', config={'multi_agent_mode': True}) # Create statistics stat = MocsarStat(game=env.game, agents=env.model.rule_agents, nr_of_games=NR_GAMES, batch_name=conf.get_str(section='cfg.compare', key='batch_name'), log_dir=conf.get_str(section='cfg.compare', key='stat_dir_path')) # Register agents agents_list = str_to_agent_list(agent_str_list=conf.get_str(section='cfg.compare', key="agent_list")) print(f"mocsar_pl_cfg_config, Agents:{agents_list}")
def test_conf(): conf = Config('environ.properties') memory_init_size = conf.get_int('memory_init_size') assert 1000 == memory_init_size
q_mlp_layers=[512, 512], device=torch.device('cuda')) agents.append(agent) random_agent = RandomAgent(action_num=eval_env.action_num) env.set_agents(agents) eval_env.set_agents([agents[0], random_agent, random_agent, random_agent]) # Init a Logger to plot the learning curve logger = Logger(log_dir) # Log Game info logger.log('\n########## Game information ##########') logger.log('\nNumPlayers: {}, NumCards: {}, Episodes: {}'.format( env.game.num_players, env.game.num_cards, conf.get_int('episode_num'))) for episode in range(conf.get_int('episode_num')): # First sample a policy for the episode for agent in agents: agent.sample_episode_policy() # Generate data from the environment trajectories, _ = env.run(is_training=True) # Feed transitions into agent memory, and train the agent for i in range(env.player_num): for ts in trajectories[i]: agents[i].feed(ts)
""" Compare different agents against random agents File name: examples/mocsar_pl_dqn_pytorch_load_model_cfg.py Author: József Varga Date created: 4/14/2020 """ import rlcard3 from rlcard3.games.mocsar.stat import MocsarStat from rlcard3.utils.config_read import Config from rlcard3.utils.utils import tournament conf = Config('environ.properties') NR_GAMES = conf.get_int(section='cfg.compare', key='nr_games') # Make environment and enable human mode env = rlcard3.make('mocsar-cfg', config={'multi_agent_mode': True}) # Create statistics stat = MocsarStat(game=env.game, agents=env.model.rule_agents, nr_of_games=NR_GAMES, batch_name=conf.get_str(section='cfg.compare', key='batch_name'), log_dir=conf.get_str(section='cfg.compare', key='stat_dir_path')) # Register agents agent_str = conf.get_str(section='cfg.compare', key="agent_str") nr_cards = conf.get_int(section='global', key='nr_cards') agents = {agent_str: 1, "mocsar_random": 3}
''' Another example of loading a pre-trained NFSP model on Leduc Hold'em Here, we directly load the model from model zoo ''' import rlcard3 from rlcard3.agents.random_agent import RandomAgent from rlcard3.utils.utils import set_global_seed, tournament from rlcard3 import models from rlcard3.utils.config_read import Config # Make environment env = rlcard3.make('mocsar') # Get parameters conf = Config('environ.properties') evaluate_num = conf.get_int(section='cfg.compare', key='nr_games') agent_str = conf.get_str(section='cfg.compare', key="agent_str") nr_cards = conf.get_int(section='global', key='nr_cards') # Set a global seed #set_global_seed(0) # Here we directly load NFSP models from /models module dqn_agents = models.load(agent_str, num_players=env.game.get_player_num(), action_num=env.action_num, state_shape=env.state_shape).agents # Evaluate the performance. Play with random agents. random_agent = RandomAgent(env.action_num) env.game.set_game_params(num_players=4, num_cards=nr_cards) env.set_agents([dqn_agents[0], random_agent, random_agent, random_agent])
env.set_agents(env_agent_list) # Evaluation agent eval_env.model.create_agents({"mocsar_random": 4}) eval_agent_list = [eval_env.model.rule_agents[i] for i in range(1, 4)] eval_agent_list.insert(0, agent) eval_env.set_agents(eval_agent_list) # Init a Logger to plot the learning curve logger = Logger(log_dir) # Log Game info logger.log('\n########## Game information, NFSP, RuleAgents, Pytorch ##########') logger.log('\nNumPlayers: {}, NumCards: {}, Episodes: {}'.format(env.game.num_players, env.game.num_cards, conf.get_int('episode_num'))) for episode in range(conf.get_int('episode_num')): # First sample a policy for the episode agent.sample_episode_policy() # Generate data from the environment trajectories, _ = env.run(is_training=True) # Feed transitions into agent memory, and train the agent for ts in trajectories[0]: agent.feed(ts) # Evaluate the performance. Play with random agents. if episode % evaluate_every == 0: