def test_player_game(self): players = [ LearningPlayer(name='random', estimation_mode=LearningPlayer.ACTUAL_Q) ] * 3 game = LandlordGame(players=players) hands = { TurnPosition.FIRST: [Card.ACE] * 4 + [Card.KING] * 4 + [Card.QUEEN] * 4 + [Card.JACK] * 4 + [Card.THREE], TurnPosition.SECOND: [Card.TEN] * 4 + [Card.NINE] * 4 + [Card.EIGHT] * 4 + [Card.SEVEN] * 4 + [Card.THREE], TurnPosition.THIRD: [Card.FIVE] * 4 + [Card.FOUR] * 4 + [Card.SIX] * 4 + [Card.TWO] * 4 + [Card.THREE] * 2 + [Card.LITTLE_JOKER] + [Card.BIG_JOKER] } game._betting_complete = True game.force_setup(TurnPosition.THIRD, hands, 3) game.main_game() players[0].compute_future_q(game) self.assertTrue(np.sum(np.abs(game.get_scores())) > 0) # game is over self.assertTrue(np.abs(players[0]._record_future_q[-1]) > 0.5) features = players[0]._derive_features(game) self.assertTrue( np.sum(features[:, players[0].get_feature_index('I_AM_LANDLORD')]) != 0) # it is possible this guy never plays, eventually self.assertTrue( np.sum(features[:, players[0]. get_feature_index('I_AM_BEFORE_LANDLORD')]) != 0)
def test_endgame_scenario(self): players = [LearningPlayer('v1')] * 3 game = LandlordGame(players=players) hands = { TurnPosition.FIRST: [Card.ACE] * 1, TurnPosition.SECOND: [Card.TEN] * 1, TurnPosition.THIRD: [Card.JACK, Card.QUEEN] } game._betting_complete = True game.force_setup(TurnPosition.THIRD, hands, 2) game.main_game() self.assertTrue(TurnPosition.FIRST in game.get_winners()) self.assertTrue(len(game.get_move_logs()) == 2)
def test_llord_winning(self): players = [LearningPlayer(name='random')] * 3 game = LandlordGame(players=players) hands = { TurnPosition.FIRST: [Card.ACE] * 4 + [Card.KING] * 4 + [Card.QUEEN] * 4 + [Card.JACK] * 4 + [Card.THREE], TurnPosition.SECOND: [Card.TEN] * 4 + [Card.NINE] * 4 + [Card.EIGHT] * 4 + [Card.SEVEN] * 4 + [Card.THREE], TurnPosition.THIRD: [Card.FIVE] * 4 } game._betting_complete = True game.force_setup(TurnPosition.THIRD, hands, 3) game.main_game() self.assertTrue(TurnPosition.THIRD in game.get_winners()) self.assertTrue(len(game.get_move_logs()) == 1)
def test_bet_3(self): players = [LearningPlayer('v1')] * 3 game = LandlordGame(players=players) game.play_move(BetMove(0)) game.play_move(BetMove(3)) self.assertFalse(game.is_round_over()) self.assertTrue(game.is_betting_complete()) self.assertTrue(game.get_bet_amount() == 3)
def test_extended_game(self): game = LandlordGame(players=[RandomPlayer(name='random')] * 3) game.play_round() game2 = copy(game) self.assertTrue(game.get_move_logs() == game2.get_move_logs()) self.assertTrue( game.get_hand(TurnPosition.SECOND) == game2.get_hand( TurnPosition.SECOND)) self.assertTrue(game.get_last_played() == game2.get_last_played())
def test_hand_vector_v2(self): players = [ LearningPlayer_v2(name='random', estimation_mode=LearningPlayer.ACTUAL_Q) for _ in range(3) ] game = LandlordGame(players=players) hands = { TurnPosition.FIRST: [Card.ACE] * 4, TurnPosition.SECOND: [Card.TEN] * 3 + [Card.THREE], TurnPosition.THIRD: [Card.FIVE] * 3 + [Card.THREE] + [Card.FOUR] } game._betting_complete = True game.force_setup(TurnPosition.SECOND, hands, 3) best_move = SpecificMove(RankedMoveType(MoveType.TRIPLE_SINGLE_KICKER, Card.TEN), cards=Counter({ Card.TEN: 3, Card.THREE: 1 })) move_vector = players[1].compute_move_vector( TurnPosition.SECOND, game.get_landlord_position(), best_move) remaining_hand_vector = players[1].compute_remaining_hand_vector( game, move_vector, TurnPosition.SECOND)[:-3] self.assertEqual(np.sum(remaining_hand_vector), 0)
def test_landlord_bombing(self): players = [LearningPlayer('v1')] * 3 game = LandlordGame(players=players) hands = { TurnPosition.FIRST: [Card.ACE] * 4 + [Card.THREE], TurnPosition.SECOND: [Card.TEN] * 4 + [Card.THREE], TurnPosition.THIRD: [Card.FIVE] * 4 + [Card.THREE] } game._betting_complete = True game.force_setup(TurnPosition.THIRD, hands, 2) game.play_move( SpecificMove(RankedMoveType(MoveType.BOMB, Card.FIVE), Counter({Card.FIVE: 4}))) self.assertTrue(game._bet_amount == 4)
def test_player_move(self): players = [LearningPlayer(name='random')] * 3 game = LandlordGame(players=players) hands = { TurnPosition.FIRST: [Card.ACE] * 4 + [Card.KING] * 4 + [Card.QUEEN] * 4 + [Card.JACK] * 4 + [Card.THREE], TurnPosition.SECOND: [Card.TEN] * 4 + [Card.NINE] * 4 + [Card.EIGHT] * 4 + [Card.SEVEN] * 4 + [Card.THREE], TurnPosition.THIRD: [Card.FIVE] * 4 + [Card.FOUR] * 4 + [Card.SIX] * 4 + [Card.TWO] * 4 + [Card.THREE] * 2 + [Card.LITTLE_JOKER] + [Card.BIG_JOKER] } game._betting_complete = True game.force_setup(TurnPosition.THIRD, hands, 3) game2 = copy(game) game.play_move( SpecificMove(RankedMoveType(MoveType.BOMB, Card.FIVE), Counter({Card.FIVE: 4}))) self.assertNotEqual(game2.get_hand(TurnPosition.THIRD), game.get_hand(TurnPosition.THIRD))
def test_full_game(self): players = [LearningPlayer(name='random') for _ in range(3)] game = LandlordGame(players=players) game.play_round() while np.sum(np.abs(game.get_scores())) == 0: players = [LearningPlayer(name='random') for _ in range(3)] game = LandlordGame(players=players) game.play_round() # game is over for i in range(3): # print(players[i].record_future_q[-1]) # self.assertTrue(np.abs(players[i].record_future_q[-1]) > 0.5) features = players[i]._derive_features(game) self.assertTrue( np.sum(features[:, players[i].get_feature_index('I_AM_LANDLORD' )]) != 0) # it is possible this guy never plays, eventually self.assertTrue( np.sum(features[:, players[i]. get_feature_index('I_AM_BEFORE_LANDLORD')]) != 0)
def test_landlord_game_ending(self): players = [LearningPlayer('v1')] * 3 game = LandlordGame(players=players) hands = { TurnPosition.FIRST: [Card.ACE] * 4, TurnPosition.SECOND: [Card.TEN] * 4, TurnPosition.THIRD: [Card.FIVE] * 4 } game._betting_complete = True game.force_setup(TurnPosition.THIRD, hands, 2) self.assertTrue( game.move_ends_game( SpecificMove(RankedMoveType(MoveType.BOMB, Card.FIVE), Counter({Card.FIVE: 4})))) self.assertFalse( game.move_ends_game( SpecificMove(RankedMoveType(MoveType.BOMB, Card.TEN), Counter({Card.TEN: 4}))))
def play_game(self): while True: players = self.pick_players() game = LandlordGame(players=players) # play a meaningful game game.play_round() if game.has_winners(): players_to_record = game._winners if random.random() < self.record_everyone_pct: players_to_record = list(TurnPosition) for pos in players_to_record: player = game.get_ai(pos) self.record_player(game, player) self.track_stats(game) break # clear out in case a full game wasn't played for player in players: player._reset_records()
def test_record_actual_q(self): def load_best_sim_net(net): return LearningPlayer(name=net, net_dir='../models/' + net, estimation_mode=LearningPlayer.ACTUAL_Q, epsilon=0, discount_factor=1) players = [load_best_sim_net('4_2_sim4_model10') for i in range(3)] player_0_scores = [] game = LandlordGame(players=players) while not game.is_round_over(): curr_player = game.get_current_player() best_move, best_move_q = curr_player.decide_best_move(game) if curr_player == players[0]: player_0_scores.append(best_move_q) curr_player.make_move(game) game.play_move(best_move) for player in players: player.compute_future_q(game) record_state = players[0]._record_state_q future_q = players[0].get_estimated_qs() # assert in bounds based on update function for i, val in enumerate(record_state): if i != len(record_state) - 1: if record_state[i + 1] < record_state[i]: self.assertTrue( record_state[i + 1] < future_q[i] < record_state[i]) elif record_state[i + 1] > record_state[i]: self.assertTrue( record_state[i + 1] > future_q[i] > record_state[i]) self.assertEqual(len(players[0].get_record_hand_vectors()), len(players[0].get_estimated_qs()))
def test_nobet_game(self): game = LandlordGame(players=[NoBetPlayer(name='random')] * 3) game.play_round() self.assertTrue(np.sum(np.abs(game._scores)) == 0)
def test_self_feed(self): players = [self.load_v2_net("4_8_actualq1_model20") for _ in range(3)] #players = [self.load_v2_net("4_2_sim4_model15") for _ in range(3)] game = LandlordGame(players=players) best_move_qs = [] all_history_features = [] history_vectors = [] all_hand_vectors = [] all_move_vectors = [] while not game.is_round_over(): best_move, best_move_q = game.get_current_player( ).decide_best_move(game, game.get_current_position()) game.get_current_player().record_move(game, best_move, best_move_q, game.get_current_position()) if game.get_current_player() == players[0]: history_features = players[0]._derive_features(game) all_history_features.append(history_features) # all the moves we make from here will not affect the history, so assess it and copy history_vectors.append(players[0].history_net.predict( np.array([history_features]), batch_size=1)[0]) # create features for each of the possible moves from this position all_move_vectors.append(players[0].compute_move_vector( game.get_current_position(), game.get_landlord_position(), best_move)) all_hand_vectors.append( players[0].compute_remaining_hand_vector( game, all_move_vectors[-1], game.get_current_position())) predicted_q = players[0].position_net.predict([ np.array([history_vectors[-1]]), np.array([all_move_vectors[-1]]), np.array([all_hand_vectors[-1]]) ])[0][0] self.assertAlmostEqual(predicted_q, best_move_q, places=4) best_move_qs.append(best_move_q) game.play_move(best_move) players[0].compute_future_q(game) history_matrices = players[0].get_record_history_matrices() for i, j in zip(all_history_features, history_matrices): self.assertTrue(np.allclose(i, j)) move_vectors = players[0].get_record_move_vectors() for i, j in zip(all_move_vectors, move_vectors): self.assertTrue(np.allclose(i, j)) hand_vectors = players[0].get_record_hand_vectors() for i, j in zip(all_hand_vectors, hand_vectors): self.assertTrue(np.allclose(i, j)) qs = players[0].get_estimated_qs() pred_qs = [] # recreate for i, records in enumerate( zip(history_matrices, move_vectors, hand_vectors, qs)): history_matrix, move_vector, hand_vector, q = records history_vector = players[0].history_net.predict( np.array([history_matrix]))[0] self.assertTrue(np.allclose(history_vector, history_vectors[i])) pred_qs.append(players[0].position_net.predict([[history_vector], [move_vector], [hand_vector] ])[0][0]) # works only if learning rate is 0 self.assertTrue(np.allclose(qs, pred_qs))
def test_features_v2(self): players = [ LearningPlayer_v2(name='random', epsilon=0, estimation_mode=LearningPlayer.ACTUAL_Q, learning_rate=1) for _ in range(3) ] game = LandlordGame(players=players) while not game.is_round_over(): curr_player = game.get_current_player() curr_features = curr_player._derive_features(game) best_move, best_move_q = curr_player.decide_best_move(game) curr_move_vector = game.get_current_player().compute_move_vector( game.get_current_position(), game.get_landlord_position(), best_move) curr_hand_vector = game.get_current_player( ).compute_remaining_hand_vector(game, curr_move_vector, game.get_current_position()) curr_player.record_move(game, best_move, best_move_q, game.get_current_position()) game.play_move(best_move) self.assertTrue( np.allclose(curr_features, curr_player.record_history_matrices[-1])) self.assertTrue( np.allclose(curr_move_vector, curr_player.record_move_vectors[-1])) self.assertTrue( np.allclose(curr_hand_vector, curr_player.record_hand_vectors[-1])) players[0].compute_future_q(game) if game.has_winners(): print(np.max(np.abs(players[0].get_estimated_qs()))) self.assertTrue(np.max(np.abs(players[0].get_estimated_qs())) == 1) self.assertTrue( players[0].record_history_matrices[0][0].dtype == np.int8)
def track_stats(self, game: LandlordGame): assert game.is_round_over() winners = tuple( [player.get_name() for player in game.get_winner_ais()]) losers = tuple([player.get_name() for player in game.get_loser_ais()]) self.results.append((winners, losers))
def play_against_two(players, show_q=True): game = LandlordGame(players) while not game.is_round_over(): current_player = game.get_current_player() current_position = game.get_current_position() best_move, best_move_q = current_player.decide_best_move(game) if show_q: best_move_q_str = '(' + str(best_move_q) + ')' else: best_move_q_str = '' print(current_player.get_name(), "(" + game.get_position_role_name(current_position) + ", " \ + str(len(game.get_hand(current_position))) + "):", best_move, best_move_q_str) game.play_move(best_move) if type(game.get_last_played()) == KittyReveal: print(game.get_last_played()) if game.has_winners(): for winner in game.get_winners(): print('WINNERS:', game.get_ai_players()[winner].get_name())
def test_features(self): players = [ LearningPlayer(name='random', estimation_mode=LearningPlayer.ACTUAL_Q) for _ in range(3) ] game = LandlordGame(players=players) while not game.is_round_over(): curr_player = game.get_current_player() curr_features = curr_player._derive_features(game) curr_hand_vector = game.get_current_player().get_hand_vector( game, game.get_current_position()) move = game.get_current_player().make_move( game, game.get_current_position()) curr_move_vector = game.get_current_player().compute_move_vector( game.get_current_position(), game.get_landlord_position(), move) game.play_move(move) self.assertTrue( np.allclose(curr_features, curr_player.record_history_matrices[-1])) self.assertTrue( np.allclose(curr_move_vector, curr_player.record_move_vectors[-1])) self.assertTrue( np.allclose(curr_hand_vector, curr_player.record_hand_vectors[-1]))
def test_best_montecarlo(self): players = [LearningPlayer(name='random')] * 3 game = LandlordGame(players=players) game.play_round(debug=False)
def test_peasant_winning(self): players = [LearningPlayer(name='random')] * 3 game = LandlordGame(players=players) hands = { TurnPosition.FIRST: [Card.ACE] * 4, TurnPosition.SECOND: [Card.TEN] + [Card.THREE], TurnPosition.THIRD: [Card.FIVE] * 3 + [Card.THREE] + [Card.FOUR] } game._betting_complete = True game.force_setup(TurnPosition.THIRD, hands, 3) hand_vector = players[0].get_hand_vector(game, TurnPosition.FIRST) self.assertTrue(hand_vector[11] == 4) self.assertTrue(hand_vector[-2] == 2) self.assertTrue(hand_vector[-3] == 5) self.assertTrue(hand_vector[-1] == 4) # self.assertTrue(np.sum(hand_vector) == 4) game.main_game() self.assertTrue(TurnPosition.THIRD not in game.get_winners()) self.assertTrue(TurnPosition.SECOND in game.get_winners()) self.assertTrue(TurnPosition.FIRST in game.get_winners()) self.assertTrue(len(game.get_move_logs()) == 2)
def test_bet_1(self): players = [LearningPlayer('v1')] * 3 game = LandlordGame(players=players) game.play_move(BetMove(0)) game.play_move(BetMove(0)) self.assertFalse(game.is_round_over()) self.assertFalse(game.is_betting_complete()) self.assertFalse(game.move_ends_game(BetMove(1))) self.assertTrue(game.move_ends_game(BetMove(0))) game.play_move(BetMove(0)) self.assertTrue(game.is_round_over()) self.assertTrue(game.is_betting_complete())
def test_many_games(self): for i in range(10): game = LandlordGame(players=[RandomPlayer(name='random')] * 3) game.play_round()
def test_sweep(self): players = [LearningPlayer('v1')] * 3 game = LandlordGame(players=players) hands = { TurnPosition.FIRST: [Card.ACE] * LandlordGame.DEAL_SIZE, TurnPosition.SECOND: [Card.TEN] * LandlordGame.DEAL_SIZE, TurnPosition.THIRD: [Card.FIVE] * 4 } game._betting_complete = True game.force_setup(TurnPosition.THIRD, hands, 2) game.play_move( SpecificMove(RankedMoveType(MoveType.BOMB, Card.FIVE), Counter({Card.FIVE: 4}))) self.assertTrue(game.peasants_have_no_plays()) self.assertTrue(game.get_scores()[TurnPosition.THIRD] == 2 * 2 * 2 * LandlordGame.SWEEP_MULTIPLIER) self.assertEqual(game.get_r(), 24) self.assertEqual(game.get_winbased_r(), 1)
def test_bet_2(self): players = [LearningPlayer('v1')] * 3 game = LandlordGame(players=players) game.play_move(BetMove(0)) self.assertEqual(len(game.get_legal_moves()), 4) one_better = game.get_current_position() game.play_move(BetMove(1)) self.assertFalse(game.is_round_over()) self.assertFalse(game.is_betting_complete()) game.play_move(BetMove(0)) self.assertFalse(game.is_round_over()) self.assertTrue(game.is_betting_complete()) self.assertTrue(game.get_bet_amount() == 1) self.assertEqual(game.get_current_position(), one_better)
def test_setup(self): players = [LearningPlayer('v1', None)] * 3 game = LandlordGame(players=players) hands = { TurnPosition.FIRST: [Card.ACE] * 4 + [Card.KING] * 4 + [Card.QUEEN] * 4 + [Card.JACK] * 4 + [Card.THREE], TurnPosition.SECOND: [Card.TEN] * 4 + [Card.NINE] * 4 + [Card.EIGHT] * 4 + [Card.SEVEN] * 4 + [Card.THREE], TurnPosition.THIRD: [Card.FIVE] * 4 + [Card.FOUR] * 4 + [Card.SIX] * 4 + [Card.TWO] * 4 + [Card.THREE] * 2 + [Card.LITTLE_JOKER] + [Card.BIG_JOKER] } game._betting_complete = True game.force_setup(TurnPosition.THIRD, hands, 2) self.assertTrue(game.get_current_position() == TurnPosition.THIRD) game.play_move(None) self.assertTrue( game.get_current_position() == TurnPosition.THIRD.next()) self.assertTrue(len(game.get_move_logs()) == 1) self.assertTrue(game.get_move_logs()[0][1] is None) game.play_move( SpecificMove(RankedMoveType(MoveType.BOMB, Card.KING), cards=Counter({Card.KING: 4}))) self.assertTrue(game.get_current_position() == TurnPosition.SECOND) feature_matrix = players[1]._derive_features(game) self.assertTrue(feature_matrix[0][-6] == 1) self.assertTrue(feature_matrix[0][-2] == 1) self.assertTrue(feature_matrix[1][10] == 4) self.assertTrue(np.sum(feature_matrix) == 7)
def test_betting(self): players = [LearningPlayer('v1')] * 3 game = LandlordGame(players=players) game.force_current_position(TurnPosition.SECOND) game.force_kitty([Card.LITTLE_JOKER, Card.BIG_JOKER, Card.THREE]) game._make_bet_move(BetMove(2)) game._make_bet_move(None) self.assertEqual(game.get_last_played(), BetMove(2)) game._make_bet_move(BetMove(3)) hands = { TurnPosition.FIRST: [Card.ACE] * 4 + [Card.KING] * 4 + [Card.QUEEN] * 4 + [Card.JACK] * 4 + [Card.THREE], TurnPosition.SECOND: [Card.TEN] * 4 + [Card.NINE] * 4 + [Card.EIGHT] * 4 + [Card.SEVEN] * 4 + [Card.THREE], TurnPosition.THIRD: [Card.FIVE] * 4 + [Card.FOUR] * 4 + [Card.SIX] * 4 + [Card.TWO] * 4 + [Card.THREE] * 1 } game.force_setup(TurnPosition.FIRST, hands, 2) game.play_move( SpecificMove(RankedMoveType(MoveType.BOMB, Card.ACE), cards=Counter({Card.ACE: 4}))) feature_matrix = players[1]._derive_features(game) self.assertTrue(feature_matrix[0][-3] == 2) self.assertTrue(np.sum(players[1]._derive_features(game)) == 16)
learning_rate=0.3) if __name__ == "__main__": #players = [load_net('4_11_actualq4_model20'), # load_net('4_11_actualq4_model20'), # load_net('4_11_actualq4_model20')] players = [ load_v2_net('4_13_stream2_model3_170', '../stream_models/'), load_v2_net('4_13_stream2_model2_194', '../stream_models/'), load_v2_net('4_13_stream2_model1_141', '../stream_models/') ] while True: game = LandlordGame(players=players) game.play_round(debug=True) if game.has_winners(): break def printout_floats(array): print(', '.join(["%.3f" % val for val in array])) print('\n') for i in range(3): players[i].compute_future_q(game) print(players[i].get_name()) printout_floats(players[i].get_estimated_qs()) printout_floats(players[i]._record_state_q) print('\n')
def human_game(player_names, perspective): perspective_hand = None players = [] for player_name in player_names: if player_name == perspective: perspective_hand = manual_hand() player_is_perspective = (player_name == perspective) players.append( HumanPlayer(name=player_name, reference_player=reference_player, known_hand=player_is_perspective, ai_before=player_is_perspective)) game = LandlordGame(players, kitty_callback=manual_kitty) first_player = get_first_player(game) game.force_current_position(first_player) game.force_hand(perspective_position(game, perspective), perspective_hand) while not game.is_round_over(): current_player = game.get_current_player() current_position = game.get_current_position() best_move, best_move_q = current_player.decide_best_move(game) print(current_player.get_name(), "(" + game.get_position_role_name(current_position) + ", " \ + str(len(game.get_hand(current_position))) + "):", best_move, '(' + str(best_move_q) + ')') # play with known hand if it matches perspective game.play_move(best_move, hand_known=current_player.get_name() == perspective) if type(game.get_last_played()) == KittyReveal: print(game.get_last_played()) if game.has_winners(): for winner in game.get_winners(): print('WINNERS:', game.get_ai_players()[winner].get_name())