Exemplo n.º 1
0
    def test_step_back(self):
        env = rlcard3.make('blackjack', config={'allow_step_back':True})
        _, player_id = env.init_game()
        env.step(1)
        _, back_player_id = env.step_back()
        self.assertEqual(player_id, back_player_id)
        self.assertEqual(env.step_back(), False)

        env = rlcard3.make('blackjack')
        with self.assertRaises(Exception):
            env.step_back()
Exemplo n.º 2
0
    def test_step_back(self):
        env = rlcard3.make('mahjong', config={'allow_step_back': True})
        state, player_id = env.init_game()
        action = np.random.choice(state['legal_actions'])
        env.step(action)
        env.step_back()
        self.assertEqual(env.game.round.current_player, player_id)

        env = rlcard3.make('mahjong', config={'allow_step_back': False})
        state, player_id = env.init_game()
        action = np.random.choice(state['legal_actions'])
        env.step(action)
        # env.step_back()
        self.assertRaises(Exception, env.step_back)
Exemplo n.º 3
0
    def __init__(self):
        ''' Load pretrained model
        '''
        import tensorflow as tf
        from rlcard3.agents.nfsp_agent import NFSPAgent
        self.graph = tf.Graph()
        self.sess = tf.Session(graph=self.graph)

        env = rlcard3.make('leduc-holdem')
        with self.graph.as_default():
            self.nfsp_agents = []
            for i in range(env.player_num):
                agent = NFSPAgent(self.sess,
                                  scope='nfsp' + str(i),
                                  action_num=env.action_num,
                                  state_shape=env.state_shape,
                                  hidden_layers_sizes=[128, 128],
                                  q_mlp_layers=[128, 128])
                self.nfsp_agents.append(agent)

        check_point_path = os.path.join(ROOT_PATH, 'leduc_holdem_nfsp')
        with self.sess.as_default():
            with self.graph.as_default():
                saver = tf.train.Saver()
                saver.restore(self.sess,
                              tf.train.latest_checkpoint(check_point_path))
Exemplo n.º 4
0
 def test_step(self):
     env = rlcard3.make('doudizhu')
     _, player_id = env.init_game()
     player = env.game.players[player_id]
     _, next_player_id = env.step(env.action_num - 1)
     self.assertEqual(next_player_id,
                      get_downstream_player_id(player, env.game.players))
Exemplo n.º 5
0
 def test_run(self):
     env = rlcard3.make('blackjack')
     env.set_agents([RandomAgent(env.action_num)])
     trajectories, _ = env.run(is_training=False)
     self.assertEqual(len(trajectories), 1)
     trajectories, _ = env.run(is_training=True, seed=1)
     self.assertEqual(len(trajectories), 1)
Exemplo n.º 6
0
 def test_make(self):
     register(env_id='test_make', entry_point='rlcard3.envs.blackjack:BlackjackEnv')
     env = rlcard3.make('test_make')
     _, player = env.init_game()
     self.assertEqual(player, 0)
     with self.assertRaises(ValueError):
         make('test_random_make')
Exemplo n.º 7
0
    def __init__(self):
        ''' Load pretrained model
        '''
        env = rlcard3.make('uno')

        rule_agent = UNORuleAgentV1()
        self.rule_agents = [rule_agent for _ in range(env.player_num)]
Exemplo n.º 8
0
 def test_tournament(self):
     env = rlcard3.make('leduc-holdem')
     env.set_agents(
         [RandomAgent(env.action_num),
          RandomAgent(env.action_num)])
     payoffs = tournament(env, 1000)
     self.assertEqual(len(payoffs), 2)
Exemplo n.º 9
0
 def __init__(self):
     self.name = 'PreDQNAgent'
     self.id = "d"
     # Set up the DQN agent and load the pre-trained model
     self.graph = tf.Graph()
     self.sess = tf.Session(graph=self.graph)
     self.use_raw = False
     # Config
     conf = Config('environ.properties')
     # Set the the number of steps for collecting normalization statistics
     # and intial memory size
     memory_init_size = conf.get_int('memory_init_size')
     norm_step = conf.get_int('norm_step')
     env = rlcard3.make('mocsar_dqn')
     with self.graph.as_default():
         self.agent = DQNAgent(self.sess,
                               scope='dqn',
                               action_num=env.action_num,
                               state_shape=env.state_shape,
                               replay_memory_size=20000,
                               replay_memory_init_size=memory_init_size,
                               norm_step=norm_step,
                               mlp_layers=[512, 512])
         self.normalize(env, 1000)
         self.sess.run(tf.compat.v1.global_variables_initializer())
     check_point_path = os.path.join(ROOT_PATH, 'mocsar_dqn')
     with self.sess.as_default():
         with self.graph.as_default():
             saver = tf.train.Saver(tf.model_variables())
             saver.restore(self.sess,
                           tf.train.latest_checkpoint(check_point_path))
Exemplo n.º 10
0
    def test_single_agent_mode(self):
        env = rlcard3.make('leduc-holdem')
        with self.assertRaises(ValueError):
            env.reset()

        env = rlcard3.make('leduc-holdem', config={'single_agent_mode': True})
        with self.assertRaises(ValueError):
            env.set_agents([])

        with self.assertRaises(ValueError):
            env.run()

        state = env.reset()
        self.assertIsInstance(state, dict)
        for _ in range(100):
            state, _, _ = env.step(np.random.choice(state['legal_actions']))
    def __init__(self):
        ''' Load pretrained model
        '''
        env = rlcard3.make('limit-holdem')

        rule_agent = LimitholdemRuleAgentV1()
        self.rule_agents = [rule_agent for _ in range(env.player_num)]
Exemplo n.º 12
0
 def test_step(self):
     env = rlcard3.make('no-limit-holdem')
     state, player_id = env.init_game()
     self.assertEqual(player_id, env.get_player_id())
     action = state['legal_actions'][0]
     _, player_id = env.step(action)
     self.assertEqual(player_id, env.get_player_id())
Exemplo n.º 13
0
 def test_decode_action(self):
     env = rlcard3.make('uno')
     env.init_game()
     legal_actions = env._get_legal_actions()
     for legal_action in legal_actions:
         decoded = env._decode_action(legal_action)
         self.assertLessEqual(decoded, ACTION_LIST[legal_action])
Exemplo n.º 14
0
 def __init__(self):
     ''' Load pretrained model
     '''
     env = rlcard3.make('leduc-holdem')
     self.agent = CFRAgent(env,
                           model_path=os.path.join(ROOT_PATH,
                                                   'leduc_holdem_cfr'))
     self.agent.load()
Exemplo n.º 15
0
 def test_get_legal_actions(self):
     env = rlcard3.make('mahjong')
     env.set_agents(
         [RandomAgent(env.action_num) for _ in range(env.player_num)])
     env.init_game()
     legal_actions = env._get_legal_actions()
     for legal_action in legal_actions:
         self.assertLessEqual(legal_action, env.action_num - 1)
Exemplo n.º 16
0
def init_environment(conf: Config, env_id: str, config: Dict = {}) -> Tuple:
    """
    Initialize Mocsár envronments, and return them
    :param conf: Mocsaár config, based on environ.propertirs
    :param envoronment_id: Mocsár environment id, like 'mocsar'
    :return: (env, eval_env)
    """
    # Make environment
    env = rlcard3.make(env_id=env_id, config=config)
    eval_env = rlcard3.make(env_id=env_id, config=config)

    # Set Nr of players and cards
    env.game.set_game_params(num_players=conf.get_int('nr_players'),
                             num_cards=conf.get_int('nr_cards'))
    eval_env.game.set_game_params(num_players=conf.get_int('nr_players'),
                                  num_cards=conf.get_int('nr_cards'))

    return env, eval_env
Exemplo n.º 17
0
def test_step():
    env = rlcard3.make('mocsar')
    print(f"Env:{env}")
    state, _ = env.init_game()
    action = np.random.choice(state['legal_actions'])
    state, player_id = env.step(action)
    assert player_id == env.game.players.get_playerid(
        env.game.round.current_player_index)
    assert state['obs'].size == np.array(env.state_shape).prod()
Exemplo n.º 18
0
def test_init_game_and_extract_state():
    """
    Egyáltalán létrejön-e a környezet, stb
    """
    env = rlcard3.make('mocsar')

    print(f"Env:{env} test_init_game_and_extract_state")
    state, player_id = env.init_game()
    assert 0 <= player_id <= env.game.get_player_num()
    assert state['obs'].size == np.array(env.state_shape).prod()
Exemplo n.º 19
0
 def test_get_payoffs(self):
     env = rlcard3.make('blackjack')
     for _ in range(100):
         env.init_game()
         while not env.is_over():
             action = np.random.choice([0, 1])
             env.step(action)
         payoffs = env.get_payoffs()
         for payoff in payoffs:
             self.assertIn(payoff, [-1, 1, 0])
Exemplo n.º 20
0
 def test_run(self):
     env = rlcard3.make('no-limit-holdem')
     agents = [RandomAgent(env.action_num) for _ in range(env.player_num)]
     env.set_agents(agents)
     trajectories, payoffs = env.run(is_training=False)
     self.assertEqual(len(trajectories), 2)
     total = 0
     for payoff in payoffs:
         total += payoff
     self.assertEqual(total, 0)
Exemplo n.º 21
0
def test_get_legal_actions():
    env = rlcard3.make('mocsar')
    print(f"Env:{env} test_get_legal_actions")
    env.set_agents([
        RandomAgent(action_num=env.action_num),
        RandomAgent(action_num=env.action_num)
    ])
    env.init_game()
    legal_actions = env._get_legal_actions()
    for legal_action in legal_actions:
        assert legal_action <= env.game.get_action_num()
Exemplo n.º 22
0
 def test_decode_action(self):
     env = rlcard3.make('doudizhu')
     env.init_game()
     env.game.state['actions'] = ['33366', '33355']
     env.game.judger.playable_cards[0] = [
         '5', '6', '55', '555', '33366', '33355'
     ]
     decoded = env._decode_action(54)
     self.assertEqual(decoded, '33366')
     env.game.state['actions'] = ['444', '44466', '44455']
     decoded = env._decode_action(29)
     self.assertEqual(decoded, '444')
 def test_decode_action(self):
     env = rlcard3.make('simple-doudizhu')
     env.init_game()
     env.game.state['actions'] = ['888TT', '88899']
     env.game.judger.playable_cards[0] = [
         '9', 'T', '99', '999', '888TT', '88899'
     ]
     decoded = env._decode_action(28)
     self.assertEqual(decoded, '888TT')
     env.game.state['actions'] = ['888', '88899', '888TT']
     decoded = env._decode_action(14)
     self.assertEqual(decoded, '888')
Exemplo n.º 24
0
def test_step_back_enabled():
    random.seed = 42
    np.random.seed(42)
    env = rlcard3.make('mocsar', config={'allow_step_back': True})
    print(f"Env:{env} test_step_back_enabled")
    state_before, player_id_before = env.init_game()

    print(player_id_before, state_before)
    env.step(state_before['legal_actions'][0])
    state, player_id = env.step_back()
    print(player_id, state)
    assert player_id == player_id_before
    assert np.array_equal(state['obs'], state_before['obs'])
Exemplo n.º 25
0
    def test_decode_action(self):
        env = rlcard3.make('no-limit-holdem')
        state, _ = env.init_game()
        for action in state['legal_actions']:
            decoded = env._decode_action(action)
            self.assertIn(decoded, env.actions)

        decoded = env._decode_action(3)
        self.assertEqual(decoded, 'fold')

        env.step(0)
        decoded = env._decode_action(0)
        self.assertEqual(decoded, 'check')
Exemplo n.º 26
0
    def test_train(self):

        env = rlcard3.make('leduc-holdem', config={'allow_step_back': True})
        agent = CFRAgent(env)

        for _ in range(100):
            agent.train()

        state = {
            'obs': np.array([1., 1., 0., 0., 0., 0.]),
            'legal_actions': [0, 2]
        }
        action, _ = agent.eval_step(state)

        self.assertIn(action, [0, 2])
Exemplo n.º 27
0
def test_step_back_disabled():
    random.seed = 42
    np.random.seed(42)
    env = rlcard3.make('mocsar')
    print(f"Env:{env} test_step_back_disabled")
    state, player_id = env.init_game()
    legal_actions = state['legal_actions']
    # print(f"LegalActions{legal_actions}")
    action = legal_actions[0]
    env.step(action)

    with pytest.raises(Exception) as excinfo:
        _ = env.step_back()
    exception_msg = excinfo.value.args[0]
    assert exception_msg == "Step back is off. To use step_back, please set allow_step_back=True in rlcard3.make"
Exemplo n.º 28
0
 def test_run(self):
     env = rlcard3.make('mahjong')
     env.set_agents(
         [RandomAgent(env.action_num) for _ in range(env.player_num)])
     trajectories, payoffs = env.run(is_training=False)
     self.assertEqual(len(trajectories), 4)
     total = 0
     for payoff in payoffs:
         total += payoff
     self.assertEqual(total, 0)
     trajectories, payoffs = env.run(is_training=True, seed=1)
     total = 0
     for payoff in payoffs:
         total += payoff
     self.assertEqual(total, 0)
Exemplo n.º 29
0
 def test_run(self):
     env = rlcard3.make('doudizhu')
     env.set_agents(
         [RandomAgent(env.action_num) for _ in range(env.player_num)])
     trajectories, payoffs = env.run(is_training=False)
     self.assertEqual(len(trajectories), 3)
     win = []
     for player_id, payoff in enumerate(payoffs):
         if payoff == 1:
             win.append(player_id)
     if len(win) == 1:
         self.assertEqual(env.game.players[win[0]].role, 'landlord')
     if len(win) == 2:
         self.assertEqual(env.game.players[win[0]].role, 'peasant')
         self.assertEqual(env.game.players[win[1]].role, 'peasant')
Exemplo n.º 30
0
    def test_save_and_load(self):
        env = rlcard3.make('leduc-holdem', config={'allow_step_back': True})
        agent = CFRAgent(env)

        for _ in range(100):
            agent.train()

        agent.save()

        new_agent = CFRAgent(env)
        new_agent.load()
        self.assertEqual(len(agent.policy), len(new_agent.policy))
        self.assertEqual(len(agent.average_policy),
                         len(new_agent.average_policy))
        self.assertEqual(len(agent.regrets), len(new_agent.regrets))
        self.assertEqual(agent.iteration, new_agent.iteration)