def test_features(self): players = [ LearningPlayer(name='random', estimation_mode=LearningPlayer.ACTUAL_Q) for _ in range(3) ] game = LandlordGame(players=players) while not game.is_round_over(): curr_player = game.get_current_player() curr_features = curr_player._derive_features(game) curr_hand_vector = game.get_current_player().get_hand_vector( game, game.get_current_position()) move = game.get_current_player().make_move( game, game.get_current_position()) curr_move_vector = game.get_current_player().compute_move_vector( game.get_current_position(), game.get_landlord_position(), move) game.play_move(move) self.assertTrue( np.allclose(curr_features, curr_player.record_history_matrices[-1])) self.assertTrue( np.allclose(curr_move_vector, curr_player.record_move_vectors[-1])) self.assertTrue( np.allclose(curr_hand_vector, curr_player.record_hand_vectors[-1]))
def test_features_v2(self): players = [ LearningPlayer_v2(name='random', epsilon=0, estimation_mode=LearningPlayer.ACTUAL_Q, learning_rate=1) for _ in range(3) ] game = LandlordGame(players=players) while not game.is_round_over(): curr_player = game.get_current_player() curr_features = curr_player._derive_features(game) best_move, best_move_q = curr_player.decide_best_move(game) curr_move_vector = game.get_current_player().compute_move_vector( game.get_current_position(), game.get_landlord_position(), best_move) curr_hand_vector = game.get_current_player( ).compute_remaining_hand_vector(game, curr_move_vector, game.get_current_position()) curr_player.record_move(game, best_move, best_move_q, game.get_current_position()) game.play_move(best_move) self.assertTrue( np.allclose(curr_features, curr_player.record_history_matrices[-1])) self.assertTrue( np.allclose(curr_move_vector, curr_player.record_move_vectors[-1])) self.assertTrue( np.allclose(curr_hand_vector, curr_player.record_hand_vectors[-1])) players[0].compute_future_q(game) if game.has_winners(): print(np.max(np.abs(players[0].get_estimated_qs()))) self.assertTrue(np.max(np.abs(players[0].get_estimated_qs())) == 1) self.assertTrue( players[0].record_history_matrices[0][0].dtype == np.int8)
def test_record_actual_q(self): def load_best_sim_net(net): return LearningPlayer(name=net, net_dir='../models/' + net, estimation_mode=LearningPlayer.ACTUAL_Q, epsilon=0, discount_factor=1) players = [load_best_sim_net('4_2_sim4_model10') for i in range(3)] player_0_scores = [] game = LandlordGame(players=players) while not game.is_round_over(): curr_player = game.get_current_player() best_move, best_move_q = curr_player.decide_best_move(game) if curr_player == players[0]: player_0_scores.append(best_move_q) curr_player.make_move(game) game.play_move(best_move) for player in players: player.compute_future_q(game) record_state = players[0]._record_state_q future_q = players[0].get_estimated_qs() # assert in bounds based on update function for i, val in enumerate(record_state): if i != len(record_state) - 1: if record_state[i + 1] < record_state[i]: self.assertTrue( record_state[i + 1] < future_q[i] < record_state[i]) elif record_state[i + 1] > record_state[i]: self.assertTrue( record_state[i + 1] > future_q[i] > record_state[i]) self.assertEqual(len(players[0].get_record_hand_vectors()), len(players[0].get_estimated_qs()))
def human_game(player_names, perspective): perspective_hand = None players = [] for player_name in player_names: if player_name == perspective: perspective_hand = manual_hand() player_is_perspective = (player_name == perspective) players.append( HumanPlayer(name=player_name, reference_player=reference_player, known_hand=player_is_perspective, ai_before=player_is_perspective)) game = LandlordGame(players, kitty_callback=manual_kitty) first_player = get_first_player(game) game.force_current_position(first_player) game.force_hand(perspective_position(game, perspective), perspective_hand) while not game.is_round_over(): current_player = game.get_current_player() current_position = game.get_current_position() best_move, best_move_q = current_player.decide_best_move(game) print(current_player.get_name(), "(" + game.get_position_role_name(current_position) + ", " \ + str(len(game.get_hand(current_position))) + "):", best_move, '(' + str(best_move_q) + ')') # play with known hand if it matches perspective game.play_move(best_move, hand_known=current_player.get_name() == perspective) if type(game.get_last_played()) == KittyReveal: print(game.get_last_played()) if game.has_winners(): for winner in game.get_winners(): print('WINNERS:', game.get_ai_players()[winner].get_name())
def play_against_two(players, show_q=True): game = LandlordGame(players) while not game.is_round_over(): current_player = game.get_current_player() current_position = game.get_current_position() best_move, best_move_q = current_player.decide_best_move(game) if show_q: best_move_q_str = '(' + str(best_move_q) + ')' else: best_move_q_str = '' print(current_player.get_name(), "(" + game.get_position_role_name(current_position) + ", " \ + str(len(game.get_hand(current_position))) + "):", best_move, best_move_q_str) game.play_move(best_move) if type(game.get_last_played()) == KittyReveal: print(game.get_last_played()) if game.has_winners(): for winner in game.get_winners(): print('WINNERS:', game.get_ai_players()[winner].get_name())
def test_self_feed(self): players = [self.load_v2_net("4_8_actualq1_model20") for _ in range(3)] #players = [self.load_v2_net("4_2_sim4_model15") for _ in range(3)] game = LandlordGame(players=players) best_move_qs = [] all_history_features = [] history_vectors = [] all_hand_vectors = [] all_move_vectors = [] while not game.is_round_over(): best_move, best_move_q = game.get_current_player( ).decide_best_move(game, game.get_current_position()) game.get_current_player().record_move(game, best_move, best_move_q, game.get_current_position()) if game.get_current_player() == players[0]: history_features = players[0]._derive_features(game) all_history_features.append(history_features) # all the moves we make from here will not affect the history, so assess it and copy history_vectors.append(players[0].history_net.predict( np.array([history_features]), batch_size=1)[0]) # create features for each of the possible moves from this position all_move_vectors.append(players[0].compute_move_vector( game.get_current_position(), game.get_landlord_position(), best_move)) all_hand_vectors.append( players[0].compute_remaining_hand_vector( game, all_move_vectors[-1], game.get_current_position())) predicted_q = players[0].position_net.predict([ np.array([history_vectors[-1]]), np.array([all_move_vectors[-1]]), np.array([all_hand_vectors[-1]]) ])[0][0] self.assertAlmostEqual(predicted_q, best_move_q, places=4) best_move_qs.append(best_move_q) game.play_move(best_move) players[0].compute_future_q(game) history_matrices = players[0].get_record_history_matrices() for i, j in zip(all_history_features, history_matrices): self.assertTrue(np.allclose(i, j)) move_vectors = players[0].get_record_move_vectors() for i, j in zip(all_move_vectors, move_vectors): self.assertTrue(np.allclose(i, j)) hand_vectors = players[0].get_record_hand_vectors() for i, j in zip(all_hand_vectors, hand_vectors): self.assertTrue(np.allclose(i, j)) qs = players[0].get_estimated_qs() pred_qs = [] # recreate for i, records in enumerate( zip(history_matrices, move_vectors, hand_vectors, qs)): history_matrix, move_vector, hand_vector, q = records history_vector = players[0].history_net.predict( np.array([history_matrix]))[0] self.assertTrue(np.allclose(history_vector, history_vectors[i])) pred_qs.append(players[0].position_net.predict([[history_vector], [move_vector], [hand_vector] ])[0][0]) # works only if learning rate is 0 self.assertTrue(np.allclose(qs, pred_qs))