def testCase1(self): env = SevenKingEnv() players = [ AlwaysFoldPlayer(), AlwaysFoldPlayer(), AlwaysNotFoldPlayer() ] env.compete(env, players)
def testScores1(self): """ """ env = SevenKingEnv() infos, public_state, person_states, private_state = env.init() self.assertTrue("" not in infos[public_state.turn].person_state.available_actions) self.assertFalse(env.is_action_valid(SevenKingAction.lookup(""),public_state, person_states[public_state.turn]))
def testRandom(self): """ """ env = SevenKingEnv() env.num_players = 2 players = [roomai.common.RandomPlayer() for i in range(2)] for i in range(100): SevenKingEnv.compete(env, players)
def testScores(self): """ """ env = SevenKingEnv() env.num_players = 3 print "aaa" players = [AlwaysFoldPlayer(), AlwaysFoldPlayer(), AlwaysNotFoldPlayer()] scores = env.compete(env, players) print scores self.assertEqual(scores[0],-1) self.assertEqual(scores[1],-1) self.assertEqual(scores[2],2)
def Train(params=dict()): # initialization env = SevenKingEnv() player = SevenKingPlayer() num_players = 0 if "num_players" in params: num_players = params["num_players"] else: num_players = 2 if "num_iter" in params: num_iter = params["num_iter"] else: num_iter = 10000 probs = [1.0 for i in range(num_players)] for i in range(num_iter): for p in range(num_players): CRMTrain(env, p, player, probs) # OutcomeSamplingCRM(env, p, player, probs, 1) return player
def testEnv(self): """ """ env = SevenKingEnv() infos, public_state, person_states, private_state = env.init({"num_players":2}) assert(len(infos) == 2) turn = public_state.turn self.show_hand_card(person_states[turn].hand_cards) print (turn) print ("available_actions=",person_states[turn].available_actions.keys()) print ("available_actions_v=",person_states[turn].available_actions.values()) action = SevenKingAction("%s" % (person_states[turn].hand_cards[0].key)) infos, public_state, person_states, private_state = env.forward(action)
def testScores(self): """ """ env = SevenKingEnv() print("aaa") players = [ AlwaysFoldPlayer(), AlwaysFoldPlayer(), AlwaysNotFoldPlayer(), roomai.common.RandomPlayerChance() ] scores = env.compete(env, players) print(scores) self.assertEqual(scores[0], -1) self.assertEqual(scores[1], -1) self.assertEqual(scores[2], 2)
def testScores1(self): """ """ env = SevenKingEnv() infos, public_state, person_states, private_state = env.init() self.assertTrue( "" not in infos[public_state.turn].person_state.available_actions) self.assertFalse( env.is_action_valid(SevenKingAction.lookup(""), public_state, person_states[public_state.turn])) if __name__ == "__main__": env = SevenKingEnv() players = [ AlwaysMaxPlayer(), AlwaysNotFoldPlayer(), AlwaysMinPlayer(), roomai.common.RandomPlayer() ] import time start = time.time() for i in range(10): scores = env.compete(env, players) print(scores) end = time.time() print(end - start)
# # output = RNN(xs, weights, biases) # output_reshape = tf.reshape(output, [-1, TIME_STEPS, OUTPUT_SIZE]) # cost = tf.losses.mean_squared_error(labels=ys, predictions=output_reshape) # train = tf.train.AdamOptimizer(LR).minimize(cost) # check = tf.add_check_numerics_ops() if __name__ == '__main__': # model = LSTMRNN(TIME_STEPS, INPUT_SIZE, OUTPUT_SIZE, CELL_SIZE, BATCH_SIZE) # sess = tf.Session() # sess.run(tf.global_variables_initializer()) num_players = 2 env = SevenKingEnv({ 'param_num_normal_players': num_players, 'param_backward_enable': True }) player = SevenKingPlayer() player.rnn_model.model() player.rnn_model.sess.run(tf.global_variables_initializer()) for i in range(2000): seq, res = Train(player, env, num_players) player.rnn_model.train_func(seq, res) # k = 0 # while (k + BATCH_SIZE) < len(res): # # batch_x = seq[k:k + BATCH_SIZE] # batch_y = res[k:k + BATCH_SIZE]
def testCase1(self): import roomai.sevenking env = SevenKingEnv() all_cards = roomai.sevenking.AllSevenKingPokerCards.values() infos, public_state, person_states, private_state = env.init( {"allcards": all_cards})