def __init__(self): self.name = 'PreDQNAgent' self.id = "d" # Set up the DQN agent and load the pre-trained model self.graph = tf.Graph() self.sess = tf.Session(graph=self.graph) self.use_raw = False # Config conf = Config('environ.properties') # Set the the number of steps for collecting normalization statistics # and intial memory size memory_init_size = conf.get_int('memory_init_size') norm_step = conf.get_int('norm_step') env = rlcard3.make('mocsar_dqn') with self.graph.as_default(): self.agent = DQNAgent(self.sess, scope='dqn', action_num=env.action_num, state_shape=env.state_shape, replay_memory_size=20000, replay_memory_init_size=memory_init_size, norm_step=norm_step, mlp_layers=[512, 512]) self.normalize(env, 1000) self.sess.run(tf.compat.v1.global_variables_initializer()) check_point_path = os.path.join(ROOT_PATH, 'mocsar_dqn') with self.sess.as_default(): with self.graph.as_default(): saver = tf.train.Saver(tf.model_variables()) saver.restore(self.sess, tf.train.latest_checkpoint(check_point_path))
def init_vars(conf: Config) -> Tuple: """ Ge the properties from the configuration :param conf: Mocsaár config, based on environ.propertirs :return: evaluate_num, evaluate_every, memory_init_size, train_every, episode_num """ # Set the iterations numbers and how frequently we evaluate/save plot evaluate_num = conf.get_int('evaluate_num') evaluate_every = conf.get_int('evaluate_every') # Set the the number of steps for collecting normalization statistics # and intial memory size memory_init_size = conf.get_int('memory_init_size') train_every = conf.get_int('train_every') episode_num = conf.get_int('episode_num') return evaluate_num, evaluate_every, memory_init_size, train_every, episode_num
def init_environment(conf: Config, env_id: str, config: Dict = {}) -> Tuple: """ Initialize Mocsár envronments, and return them :param conf: Mocsaár config, based on environ.propertirs :param envoronment_id: Mocsár environment id, like 'mocsar' :return: (env, eval_env) """ # Make environment env = rlcard3.make(env_id=env_id, config=config) eval_env = rlcard3.make(env_id=env_id, config=config) # Set Nr of players and cards env.game.set_game_params(num_players=conf.get_int('nr_players'), num_cards=conf.get_int('nr_cards')) eval_env.game.set_game_params(num_players=conf.get_int('nr_players'), num_cards=conf.get_int('nr_cards')) return env, eval_env
Author: József Varga Date created: 4/06/2020 Compare various agents """ import os from typing import List import io from urllib.request import urlopen import matplotlib.pyplot as plt import pandas as pd import seaborn as sns from rlcard3.games.mocsar.agentdb import str_to_agent_dict, get_by_id from rlcard3.utils.config_read import Config conf = Config('environ.properties') # PATH Const LOG_SAVE_PRFX = conf.get_str(section='cfg.compare', key="stat_dir_path") PNG_SAVE_PRFX = conf.get_str(section='cfg.visual', key="png_dir_path") log_dirname = conf.get_str(section='cfg.visual', key="dir_name") log_filename = conf.get_str(section='cfg.visual', key="file_name") def read_data_local() -> pd.DataFrame: csv_file_name = os.path.join(LOG_SAVE_PRFX, log_dirname, log_filename) dfr = pd.read_csv(csv_file_name, sep=";", usecols=["cardnr", "agentid", "agentstr", "payoff"]) return dfr def read_data_github(csv_url:str) -> pd.DataFrame: r1 = urlopen(csv_url)
""" An example of learning a DQN Agent on Mocsár """ import torch import os from rlcard3.agents.dqn_agent_pytorch import DQNAgent from rlcard3.agents.random_agent import RandomAgent from rlcard3.utils.utils import set_global_seed, tournament from rlcard3.utils.logger import Logger from rlcard3.utils.config_read import Config from rlcard3.games.mocsar.util_examples import init_environment, init_vars # Config conf = Config('environ.properties') # Environemtn env, eval_env = init_environment(conf=conf, env_id='mocsar-cfg', config={'multi_agent_mode': True}) # parameter variables evaluate_num, evaluate_every, memory_init_size, train_every, episode_num = init_vars( conf=conf) # The paths for saving the logs and learning curves log_dir = './experiments/mocsar_dqn_ra_pytorch_result/' # Set a global seed set_global_seed(0) agent = DQNAgent(scope='dqn', action_num=env.action_num, replay_memory_init_size=memory_init_size,
""" Compare different set of bots Repeat random games for defined players and sums the points received. File name: examples/mocsar_pl_cfg_config.py Author: József Varga Date created: 4/01/2020 """ import rlcard3 from rlcard3.games.mocsar.agentdb import str_to_agent_list from rlcard3.games.mocsar.stat import MocsarStat from rlcard3.utils.config_read import Config conf = Config('environ.properties') NR_GAMES = conf.get_int(section='cfg.compare', key='nr_games') # Make environment and enable human mode env = rlcard3.make('mocsar-cfg', config={'multi_agent_mode': True}) # Create statistics stat = MocsarStat(game=env.game, agents=env.model.rule_agents, nr_of_games=NR_GAMES, batch_name=conf.get_str(section='cfg.compare', key='batch_name'), log_dir=conf.get_str(section='cfg.compare', key='stat_dir_path')) # Register agents agents_list = str_to_agent_list(agent_str_list=conf.get_str(section='cfg.compare', key="agent_list")) print(f"mocsar_pl_cfg_config, Agents:{agents_list}")
def test_conf(): conf = Config('environ.properties') memory_init_size = conf.get_int('memory_init_size') assert 1000 == memory_init_size
""" An example of learning a NFSP Agent on Mocsár """ import torch import os from rlcard3.games.mocsar.util_examples import init_environment, init_vars from rlcard3.agents.nfsp_agent_pytorch import NFSPAgent from rlcard3.agents.random_agent import RandomAgent from rlcard3.utils.config_read import Config from rlcard3.utils.utils import set_global_seed, tournament from rlcard3.utils.logger import Logger # Config conf = Config('environ.properties') # Environemtn env, eval_env = init_environment(conf=conf, env_id='mocsar') # parameter variables evaluate_num, evaluate_every, memory_init_size, train_every, episode_num = init_vars( conf=conf) # The paths for saving the logs and learning curves log_dir = './experiments/mocsar_nfsp_pytorch_result/' # Set a global seed set_global_seed(0) # Set agents agents = [] for i in range(env.player_num): agent = NFSPAgent(scope='nfsp' + str(i),
""" Compare different agents against random agents File name: examples/mocsar_pl_dqn_pytorch_load_model_cfg.py Author: József Varga Date created: 4/14/2020 """ import rlcard3 from rlcard3.games.mocsar.stat import MocsarStat from rlcard3.utils.config_read import Config from rlcard3.utils.utils import tournament conf = Config('environ.properties') NR_GAMES = conf.get_int(section='cfg.compare', key='nr_games') # Make environment and enable human mode env = rlcard3.make('mocsar-cfg', config={'multi_agent_mode': True}) # Create statistics stat = MocsarStat(game=env.game, agents=env.model.rule_agents, nr_of_games=NR_GAMES, batch_name=conf.get_str(section='cfg.compare', key='batch_name'), log_dir=conf.get_str(section='cfg.compare', key='stat_dir_path')) # Register agents agent_str = conf.get_str(section='cfg.compare', key="agent_str") nr_cards = conf.get_int(section='global', key='nr_cards') agents = {agent_str: 1, "mocsar_random": 3}
''' Another example of loading a pre-trained NFSP model on Leduc Hold'em Here, we directly load the model from model zoo ''' import rlcard3 from rlcard3.agents.random_agent import RandomAgent from rlcard3.utils.utils import set_global_seed, tournament from rlcard3 import models from rlcard3.utils.config_read import Config # Make environment env = rlcard3.make('mocsar') # Get parameters conf = Config('environ.properties') evaluate_num = conf.get_int(section='cfg.compare', key='nr_games') agent_str = conf.get_str(section='cfg.compare', key="agent_str") nr_cards = conf.get_int(section='global', key='nr_cards') # Set a global seed #set_global_seed(0) # Here we directly load NFSP models from /models module dqn_agents = models.load(agent_str, num_players=env.game.get_player_num(), action_num=env.action_num, state_shape=env.state_shape).agents # Evaluate the performance. Play with random agents. random_agent = RandomAgent(env.action_num) env.game.set_game_params(num_players=4, num_cards=nr_cards) env.set_agents([dqn_agents[0], random_agent, random_agent, random_agent])
File name: examples/mocsar_ln_nfsp_pytorch_ra.py Author: József Varga Date created: 4/07/2020 """ import torch import os from rlcard3.games.mocsar.util_examples import init_environment, init_vars from rlcard3.agents.nfsp_agent_pytorch import NFSPAgent from rlcard3.agents.random_agent import RandomAgent from rlcard3.utils.config_read import Config from rlcard3.utils.utils import set_global_seed, tournament from rlcard3.utils.logger import Logger # Config conf = Config('environ.properties') # Environemtn env, eval_env = init_environment(conf=conf, env_id='mocsar-cfg', config= {'multi_agent_mode': True}) # parameter variables evaluate_num, evaluate_every, memory_init_size, train_every, episode_num = init_vars(conf=conf) # The paths for saving the logs and learning curves log_dir = './experiments/mocsar_nfsp_pytorch_ra_result/' # Set a global seed set_global_seed(0) # Set agents agent = NFSPAgent(scope='nfsp', action_num=env.action_num, state_shape=env.state_shape,