Exemplo n.º 1
0
 def testCase1(self):
     env = SevenKingEnv()
     players = [
         AlwaysFoldPlayer(),
         AlwaysFoldPlayer(),
         AlwaysNotFoldPlayer()
     ]
     env.compete(env, players)
Exemplo n.º 2
0
    def testScores1(self):
        """

        """
        env = SevenKingEnv()
        infos, public_state, person_states, private_state = env.init()

        self.assertTrue("" not in infos[public_state.turn].person_state.available_actions)
        self.assertFalse(env.is_action_valid(SevenKingAction.lookup(""),public_state, person_states[public_state.turn]))
Exemplo n.º 3
0
    def testRandom(self):
        """

        """
        env = SevenKingEnv()
        env.num_players = 2
        players = [roomai.common.RandomPlayer() for i in range(2)]

        for i in range(100):
            SevenKingEnv.compete(env, players)
Exemplo n.º 4
0
    def testScores(self):
        """

        """
        env = SevenKingEnv()
        env.num_players = 3

        print "aaa"
        players = [AlwaysFoldPlayer(), AlwaysFoldPlayer(), AlwaysNotFoldPlayer()]
        scores  = env.compete(env, players)
        print scores

        self.assertEqual(scores[0],-1)
        self.assertEqual(scores[1],-1)
        self.assertEqual(scores[2],2)
Exemplo n.º 5
0
def Train(params=dict()):
    # initialization
    env = SevenKingEnv()
    player = SevenKingPlayer()

    num_players = 0

    if "num_players" in params:
        num_players = params["num_players"]
    else:
        num_players = 2

    if "num_iter" in params:
        num_iter = params["num_iter"]
    else:
        num_iter = 10000

    probs = [1.0 for i in range(num_players)]

    for i in range(num_iter):
        for p in range(num_players):
            CRMTrain(env, p, player, probs)
            # OutcomeSamplingCRM(env, p, player, probs, 1)

    return player
Exemplo n.º 6
0
    def testEnv(self):
        """

        """
        env = SevenKingEnv()

        infos, public_state, person_states, private_state = env.init({"num_players":2})
        assert(len(infos) == 2)
        turn = public_state.turn
        self.show_hand_card(person_states[turn].hand_cards)
        print (turn)
        print ("available_actions=",person_states[turn].available_actions.keys())
        print ("available_actions_v=",person_states[turn].available_actions.values())


        action = SevenKingAction("%s" % (person_states[turn].hand_cards[0].key))
        infos, public_state, person_states, private_state = env.forward(action)
Exemplo n.º 7
0
    def testScores(self):
        """

        """
        env = SevenKingEnv()

        print("aaa")
        players = [
            AlwaysFoldPlayer(),
            AlwaysFoldPlayer(),
            AlwaysNotFoldPlayer(),
            roomai.common.RandomPlayerChance()
        ]
        scores = env.compete(env, players)
        print(scores)

        self.assertEqual(scores[0], -1)
        self.assertEqual(scores[1], -1)
        self.assertEqual(scores[2], 2)
Exemplo n.º 8
0
    def testScores1(self):
        """

        """
        env = SevenKingEnv()
        infos, public_state, person_states, private_state = env.init()

        self.assertTrue(
            "" not in infos[public_state.turn].person_state.available_actions)
        self.assertFalse(
            env.is_action_valid(SevenKingAction.lookup(""), public_state,
                                person_states[public_state.turn]))


if __name__ == "__main__":
    env = SevenKingEnv()
    players = [
        AlwaysMaxPlayer(),
        AlwaysNotFoldPlayer(),
        AlwaysMinPlayer(),
        roomai.common.RandomPlayer()
    ]
    import time
    start = time.time()
    for i in range(10):
        scores = env.compete(env, players)
        print(scores)
    end = time.time()
    print(end - start)
Exemplo n.º 9
0
#
# output = RNN(xs, weights, biases)
# output_reshape = tf.reshape(output, [-1, TIME_STEPS, OUTPUT_SIZE])
# cost = tf.losses.mean_squared_error(labels=ys, predictions=output_reshape)
# train = tf.train.AdamOptimizer(LR).minimize(cost)
# check = tf.add_check_numerics_ops()

if __name__ == '__main__':
    # model = LSTMRNN(TIME_STEPS, INPUT_SIZE, OUTPUT_SIZE, CELL_SIZE, BATCH_SIZE)

    # sess = tf.Session()
    # sess.run(tf.global_variables_initializer())

    num_players = 2
    env = SevenKingEnv({
        'param_num_normal_players': num_players,
        'param_backward_enable': True
    })
    player = SevenKingPlayer()
    player.rnn_model.model()
    player.rnn_model.sess.run(tf.global_variables_initializer())
    for i in range(2000):

        seq, res = Train(player, env, num_players)

        player.rnn_model.train_func(seq, res)

        # k = 0
        # while (k + BATCH_SIZE) < len(res):
        #
        #     batch_x = seq[k:k + BATCH_SIZE]
        #     batch_y = res[k:k + BATCH_SIZE]
Exemplo n.º 10
0
 def testCase1(self):
     import roomai.sevenking
     env = SevenKingEnv()
     all_cards = roomai.sevenking.AllSevenKingPokerCards.values()
     infos, public_state, person_states, private_state = env.init(
         {"allcards": all_cards})