def test_step_back(self): env = rlcard3.make('blackjack', config={'allow_step_back':True}) _, player_id = env.init_game() env.step(1) _, back_player_id = env.step_back() self.assertEqual(player_id, back_player_id) self.assertEqual(env.step_back(), False) env = rlcard3.make('blackjack') with self.assertRaises(Exception): env.step_back()
def test_step_back(self): env = rlcard3.make('mahjong', config={'allow_step_back': True}) state, player_id = env.init_game() action = np.random.choice(state['legal_actions']) env.step(action) env.step_back() self.assertEqual(env.game.round.current_player, player_id) env = rlcard3.make('mahjong', config={'allow_step_back': False}) state, player_id = env.init_game() action = np.random.choice(state['legal_actions']) env.step(action) # env.step_back() self.assertRaises(Exception, env.step_back)
def __init__(self): ''' Load pretrained model ''' import tensorflow as tf from rlcard3.agents.nfsp_agent import NFSPAgent self.graph = tf.Graph() self.sess = tf.Session(graph=self.graph) env = rlcard3.make('leduc-holdem') with self.graph.as_default(): self.nfsp_agents = [] for i in range(env.player_num): agent = NFSPAgent(self.sess, scope='nfsp' + str(i), action_num=env.action_num, state_shape=env.state_shape, hidden_layers_sizes=[128, 128], q_mlp_layers=[128, 128]) self.nfsp_agents.append(agent) check_point_path = os.path.join(ROOT_PATH, 'leduc_holdem_nfsp') with self.sess.as_default(): with self.graph.as_default(): saver = tf.train.Saver() saver.restore(self.sess, tf.train.latest_checkpoint(check_point_path))
def test_step(self): env = rlcard3.make('doudizhu') _, player_id = env.init_game() player = env.game.players[player_id] _, next_player_id = env.step(env.action_num - 1) self.assertEqual(next_player_id, get_downstream_player_id(player, env.game.players))
def test_run(self): env = rlcard3.make('blackjack') env.set_agents([RandomAgent(env.action_num)]) trajectories, _ = env.run(is_training=False) self.assertEqual(len(trajectories), 1) trajectories, _ = env.run(is_training=True, seed=1) self.assertEqual(len(trajectories), 1)
def test_make(self): register(env_id='test_make', entry_point='rlcard3.envs.blackjack:BlackjackEnv') env = rlcard3.make('test_make') _, player = env.init_game() self.assertEqual(player, 0) with self.assertRaises(ValueError): make('test_random_make')
def __init__(self): ''' Load pretrained model ''' env = rlcard3.make('uno') rule_agent = UNORuleAgentV1() self.rule_agents = [rule_agent for _ in range(env.player_num)]
def test_tournament(self): env = rlcard3.make('leduc-holdem') env.set_agents( [RandomAgent(env.action_num), RandomAgent(env.action_num)]) payoffs = tournament(env, 1000) self.assertEqual(len(payoffs), 2)
def __init__(self): self.name = 'PreDQNAgent' self.id = "d" # Set up the DQN agent and load the pre-trained model self.graph = tf.Graph() self.sess = tf.Session(graph=self.graph) self.use_raw = False # Config conf = Config('environ.properties') # Set the the number of steps for collecting normalization statistics # and intial memory size memory_init_size = conf.get_int('memory_init_size') norm_step = conf.get_int('norm_step') env = rlcard3.make('mocsar_dqn') with self.graph.as_default(): self.agent = DQNAgent(self.sess, scope='dqn', action_num=env.action_num, state_shape=env.state_shape, replay_memory_size=20000, replay_memory_init_size=memory_init_size, norm_step=norm_step, mlp_layers=[512, 512]) self.normalize(env, 1000) self.sess.run(tf.compat.v1.global_variables_initializer()) check_point_path = os.path.join(ROOT_PATH, 'mocsar_dqn') with self.sess.as_default(): with self.graph.as_default(): saver = tf.train.Saver(tf.model_variables()) saver.restore(self.sess, tf.train.latest_checkpoint(check_point_path))
def test_single_agent_mode(self): env = rlcard3.make('leduc-holdem') with self.assertRaises(ValueError): env.reset() env = rlcard3.make('leduc-holdem', config={'single_agent_mode': True}) with self.assertRaises(ValueError): env.set_agents([]) with self.assertRaises(ValueError): env.run() state = env.reset() self.assertIsInstance(state, dict) for _ in range(100): state, _, _ = env.step(np.random.choice(state['legal_actions']))
def __init__(self): ''' Load pretrained model ''' env = rlcard3.make('limit-holdem') rule_agent = LimitholdemRuleAgentV1() self.rule_agents = [rule_agent for _ in range(env.player_num)]
def test_step(self): env = rlcard3.make('no-limit-holdem') state, player_id = env.init_game() self.assertEqual(player_id, env.get_player_id()) action = state['legal_actions'][0] _, player_id = env.step(action) self.assertEqual(player_id, env.get_player_id())
def test_decode_action(self): env = rlcard3.make('uno') env.init_game() legal_actions = env._get_legal_actions() for legal_action in legal_actions: decoded = env._decode_action(legal_action) self.assertLessEqual(decoded, ACTION_LIST[legal_action])
def __init__(self): ''' Load pretrained model ''' env = rlcard3.make('leduc-holdem') self.agent = CFRAgent(env, model_path=os.path.join(ROOT_PATH, 'leduc_holdem_cfr')) self.agent.load()
def test_get_legal_actions(self): env = rlcard3.make('mahjong') env.set_agents( [RandomAgent(env.action_num) for _ in range(env.player_num)]) env.init_game() legal_actions = env._get_legal_actions() for legal_action in legal_actions: self.assertLessEqual(legal_action, env.action_num - 1)
def init_environment(conf: Config, env_id: str, config: Dict = {}) -> Tuple: """ Initialize Mocsár envronments, and return them :param conf: Mocsaár config, based on environ.propertirs :param envoronment_id: Mocsár environment id, like 'mocsar' :return: (env, eval_env) """ # Make environment env = rlcard3.make(env_id=env_id, config=config) eval_env = rlcard3.make(env_id=env_id, config=config) # Set Nr of players and cards env.game.set_game_params(num_players=conf.get_int('nr_players'), num_cards=conf.get_int('nr_cards')) eval_env.game.set_game_params(num_players=conf.get_int('nr_players'), num_cards=conf.get_int('nr_cards')) return env, eval_env
def test_step(): env = rlcard3.make('mocsar') print(f"Env:{env}") state, _ = env.init_game() action = np.random.choice(state['legal_actions']) state, player_id = env.step(action) assert player_id == env.game.players.get_playerid( env.game.round.current_player_index) assert state['obs'].size == np.array(env.state_shape).prod()
def test_init_game_and_extract_state(): """ Egyáltalán létrejön-e a környezet, stb """ env = rlcard3.make('mocsar') print(f"Env:{env} test_init_game_and_extract_state") state, player_id = env.init_game() assert 0 <= player_id <= env.game.get_player_num() assert state['obs'].size == np.array(env.state_shape).prod()
def test_get_payoffs(self): env = rlcard3.make('blackjack') for _ in range(100): env.init_game() while not env.is_over(): action = np.random.choice([0, 1]) env.step(action) payoffs = env.get_payoffs() for payoff in payoffs: self.assertIn(payoff, [-1, 1, 0])
def test_run(self): env = rlcard3.make('no-limit-holdem') agents = [RandomAgent(env.action_num) for _ in range(env.player_num)] env.set_agents(agents) trajectories, payoffs = env.run(is_training=False) self.assertEqual(len(trajectories), 2) total = 0 for payoff in payoffs: total += payoff self.assertEqual(total, 0)
def test_get_legal_actions(): env = rlcard3.make('mocsar') print(f"Env:{env} test_get_legal_actions") env.set_agents([ RandomAgent(action_num=env.action_num), RandomAgent(action_num=env.action_num) ]) env.init_game() legal_actions = env._get_legal_actions() for legal_action in legal_actions: assert legal_action <= env.game.get_action_num()
def test_decode_action(self): env = rlcard3.make('doudizhu') env.init_game() env.game.state['actions'] = ['33366', '33355'] env.game.judger.playable_cards[0] = [ '5', '6', '55', '555', '33366', '33355' ] decoded = env._decode_action(54) self.assertEqual(decoded, '33366') env.game.state['actions'] = ['444', '44466', '44455'] decoded = env._decode_action(29) self.assertEqual(decoded, '444')
def test_decode_action(self): env = rlcard3.make('simple-doudizhu') env.init_game() env.game.state['actions'] = ['888TT', '88899'] env.game.judger.playable_cards[0] = [ '9', 'T', '99', '999', '888TT', '88899' ] decoded = env._decode_action(28) self.assertEqual(decoded, '888TT') env.game.state['actions'] = ['888', '88899', '888TT'] decoded = env._decode_action(14) self.assertEqual(decoded, '888')
def test_step_back_enabled(): random.seed = 42 np.random.seed(42) env = rlcard3.make('mocsar', config={'allow_step_back': True}) print(f"Env:{env} test_step_back_enabled") state_before, player_id_before = env.init_game() print(player_id_before, state_before) env.step(state_before['legal_actions'][0]) state, player_id = env.step_back() print(player_id, state) assert player_id == player_id_before assert np.array_equal(state['obs'], state_before['obs'])
def test_decode_action(self): env = rlcard3.make('no-limit-holdem') state, _ = env.init_game() for action in state['legal_actions']: decoded = env._decode_action(action) self.assertIn(decoded, env.actions) decoded = env._decode_action(3) self.assertEqual(decoded, 'fold') env.step(0) decoded = env._decode_action(0) self.assertEqual(decoded, 'check')
def test_train(self): env = rlcard3.make('leduc-holdem', config={'allow_step_back': True}) agent = CFRAgent(env) for _ in range(100): agent.train() state = { 'obs': np.array([1., 1., 0., 0., 0., 0.]), 'legal_actions': [0, 2] } action, _ = agent.eval_step(state) self.assertIn(action, [0, 2])
def test_step_back_disabled(): random.seed = 42 np.random.seed(42) env = rlcard3.make('mocsar') print(f"Env:{env} test_step_back_disabled") state, player_id = env.init_game() legal_actions = state['legal_actions'] # print(f"LegalActions{legal_actions}") action = legal_actions[0] env.step(action) with pytest.raises(Exception) as excinfo: _ = env.step_back() exception_msg = excinfo.value.args[0] assert exception_msg == "Step back is off. To use step_back, please set allow_step_back=True in rlcard3.make"
def test_run(self): env = rlcard3.make('mahjong') env.set_agents( [RandomAgent(env.action_num) for _ in range(env.player_num)]) trajectories, payoffs = env.run(is_training=False) self.assertEqual(len(trajectories), 4) total = 0 for payoff in payoffs: total += payoff self.assertEqual(total, 0) trajectories, payoffs = env.run(is_training=True, seed=1) total = 0 for payoff in payoffs: total += payoff self.assertEqual(total, 0)
def test_run(self): env = rlcard3.make('doudizhu') env.set_agents( [RandomAgent(env.action_num) for _ in range(env.player_num)]) trajectories, payoffs = env.run(is_training=False) self.assertEqual(len(trajectories), 3) win = [] for player_id, payoff in enumerate(payoffs): if payoff == 1: win.append(player_id) if len(win) == 1: self.assertEqual(env.game.players[win[0]].role, 'landlord') if len(win) == 2: self.assertEqual(env.game.players[win[0]].role, 'peasant') self.assertEqual(env.game.players[win[1]].role, 'peasant')
def test_save_and_load(self): env = rlcard3.make('leduc-holdem', config={'allow_step_back': True}) agent = CFRAgent(env) for _ in range(100): agent.train() agent.save() new_agent = CFRAgent(env) new_agent.load() self.assertEqual(len(agent.policy), len(new_agent.policy)) self.assertEqual(len(agent.average_policy), len(new_agent.average_policy)) self.assertEqual(len(agent.regrets), len(new_agent.regrets)) self.assertEqual(agent.iteration, new_agent.iteration)