Пример #1
0
    def test_features(self):
        players = [
            LearningPlayer(name='random',
                           estimation_mode=LearningPlayer.ACTUAL_Q)
            for _ in range(3)
        ]
        game = LandlordGame(players=players)
        while not game.is_round_over():
            curr_player = game.get_current_player()
            curr_features = curr_player._derive_features(game)
            curr_hand_vector = game.get_current_player().get_hand_vector(
                game, game.get_current_position())
            move = game.get_current_player().make_move(
                game, game.get_current_position())
            curr_move_vector = game.get_current_player().compute_move_vector(
                game.get_current_position(), game.get_landlord_position(),
                move)

            game.play_move(move)

            self.assertTrue(
                np.allclose(curr_features,
                            curr_player.record_history_matrices[-1]))
            self.assertTrue(
                np.allclose(curr_move_vector,
                            curr_player.record_move_vectors[-1]))
            self.assertTrue(
                np.allclose(curr_hand_vector,
                            curr_player.record_hand_vectors[-1]))
Пример #2
0
 def test_setup(self):
     players = [LearningPlayer('v1', None)] * 3
     game = LandlordGame(players=players)
     hands = {
         TurnPosition.FIRST: [Card.ACE] * 4 + [Card.KING] * 4 +
         [Card.QUEEN] * 4 + [Card.JACK] * 4 + [Card.THREE],
         TurnPosition.SECOND: [Card.TEN] * 4 + [Card.NINE] * 4 +
         [Card.EIGHT] * 4 + [Card.SEVEN] * 4 + [Card.THREE],
         TurnPosition.THIRD: [Card.FIVE] * 4 + [Card.FOUR] * 4 +
         [Card.SIX] * 4 + [Card.TWO] * 4 + [Card.THREE] * 2 +
         [Card.LITTLE_JOKER] + [Card.BIG_JOKER]
     }
     game._betting_complete = True
     game.force_setup(TurnPosition.THIRD, hands, 2)
     self.assertTrue(game.get_current_position() == TurnPosition.THIRD)
     game.play_move(None)
     self.assertTrue(
         game.get_current_position() == TurnPosition.THIRD.next())
     self.assertTrue(len(game.get_move_logs()) == 1)
     self.assertTrue(game.get_move_logs()[0][1] is None)
     game.play_move(
         SpecificMove(RankedMoveType(MoveType.BOMB, Card.KING),
                      cards=Counter({Card.KING: 4})))
     self.assertTrue(game.get_current_position() == TurnPosition.SECOND)
     feature_matrix = players[1]._derive_features(game)
     self.assertTrue(feature_matrix[0][-6] == 1)
     self.assertTrue(feature_matrix[0][-2] == 1)
     self.assertTrue(feature_matrix[1][10] == 4)
     self.assertTrue(np.sum(feature_matrix) == 7)
Пример #3
0
 def test_bet_2(self):
     players = [LearningPlayer('v1')] * 3
     game = LandlordGame(players=players)
     game.play_move(BetMove(0))
     self.assertEqual(len(game.get_legal_moves()), 4)
     one_better = game.get_current_position()
     game.play_move(BetMove(1))
     self.assertFalse(game.is_round_over())
     self.assertFalse(game.is_betting_complete())
     game.play_move(BetMove(0))
     self.assertFalse(game.is_round_over())
     self.assertTrue(game.is_betting_complete())
     self.assertTrue(game.get_bet_amount() == 1)
     self.assertEqual(game.get_current_position(), one_better)
Пример #4
0
    def test_features_v2(self):
        players = [
            LearningPlayer_v2(name='random',
                              epsilon=0,
                              estimation_mode=LearningPlayer.ACTUAL_Q,
                              learning_rate=1) for _ in range(3)
        ]
        game = LandlordGame(players=players)
        while not game.is_round_over():
            curr_player = game.get_current_player()
            curr_features = curr_player._derive_features(game)

            best_move, best_move_q = curr_player.decide_best_move(game)
            curr_move_vector = game.get_current_player().compute_move_vector(
                game.get_current_position(), game.get_landlord_position(),
                best_move)
            curr_hand_vector = game.get_current_player(
            ).compute_remaining_hand_vector(game, curr_move_vector,
                                            game.get_current_position())

            curr_player.record_move(game, best_move, best_move_q,
                                    game.get_current_position())
            game.play_move(best_move)

            self.assertTrue(
                np.allclose(curr_features,
                            curr_player.record_history_matrices[-1]))
            self.assertTrue(
                np.allclose(curr_move_vector,
                            curr_player.record_move_vectors[-1]))
            self.assertTrue(
                np.allclose(curr_hand_vector,
                            curr_player.record_hand_vectors[-1]))

        players[0].compute_future_q(game)

        if game.has_winners():
            print(np.max(np.abs(players[0].get_estimated_qs())))
            self.assertTrue(np.max(np.abs(players[0].get_estimated_qs())) == 1)

        self.assertTrue(
            players[0].record_history_matrices[0][0].dtype == np.int8)
Пример #5
0
def human_game(player_names, perspective):
    perspective_hand = None
    players = []
    for player_name in player_names:
        if player_name == perspective:
            perspective_hand = manual_hand()
        player_is_perspective = (player_name == perspective)
        players.append(
            HumanPlayer(name=player_name,
                        reference_player=reference_player,
                        known_hand=player_is_perspective,
                        ai_before=player_is_perspective))

    game = LandlordGame(players, kitty_callback=manual_kitty)

    first_player = get_first_player(game)

    game.force_current_position(first_player)
    game.force_hand(perspective_position(game, perspective), perspective_hand)

    while not game.is_round_over():
        current_player = game.get_current_player()
        current_position = game.get_current_position()

        best_move, best_move_q = current_player.decide_best_move(game)

        print(current_player.get_name(), "(" + game.get_position_role_name(current_position) + ", " \
              + str(len(game.get_hand(current_position))) + "):", best_move, '(' + str(best_move_q) + ')')

        # play with known hand if it matches perspective
        game.play_move(best_move,
                       hand_known=current_player.get_name() == perspective)

        if type(game.get_last_played()) == KittyReveal:
            print(game.get_last_played())

    if game.has_winners():
        for winner in game.get_winners():
            print('WINNERS:', game.get_ai_players()[winner].get_name())
Пример #6
0
def play_against_two(players, show_q=True):
    game = LandlordGame(players)
    while not game.is_round_over():
        current_player = game.get_current_player()
        current_position = game.get_current_position()

        best_move, best_move_q = current_player.decide_best_move(game)

        if show_q:
            best_move_q_str = '(' + str(best_move_q) + ')'
        else:
            best_move_q_str = ''

        print(current_player.get_name(), "(" + game.get_position_role_name(current_position) + ", " \
                  + str(len(game.get_hand(current_position))) + "):", best_move, best_move_q_str)
        game.play_move(best_move)

        if type(game.get_last_played()) == KittyReveal:
            print(game.get_last_played())

    if game.has_winners():
        for winner in game.get_winners():
            print('WINNERS:', game.get_ai_players()[winner].get_name())
Пример #7
0
    def test_self_feed(self):
        players = [self.load_v2_net("4_8_actualq1_model20") for _ in range(3)]
        #players = [self.load_v2_net("4_2_sim4_model15") for _ in range(3)]
        game = LandlordGame(players=players)
        best_move_qs = []
        all_history_features = []
        history_vectors = []
        all_hand_vectors = []
        all_move_vectors = []
        while not game.is_round_over():

            best_move, best_move_q = game.get_current_player(
            ).decide_best_move(game, game.get_current_position())

            game.get_current_player().record_move(game, best_move, best_move_q,
                                                  game.get_current_position())

            if game.get_current_player() == players[0]:
                history_features = players[0]._derive_features(game)
                all_history_features.append(history_features)
                # all the moves we make from here will not affect the history, so assess it and copy

                history_vectors.append(players[0].history_net.predict(
                    np.array([history_features]), batch_size=1)[0])

                # create features for each of the possible moves from this position
                all_move_vectors.append(players[0].compute_move_vector(
                    game.get_current_position(), game.get_landlord_position(),
                    best_move))

                all_hand_vectors.append(
                    players[0].compute_remaining_hand_vector(
                        game, all_move_vectors[-1],
                        game.get_current_position()))

                predicted_q = players[0].position_net.predict([
                    np.array([history_vectors[-1]]),
                    np.array([all_move_vectors[-1]]),
                    np.array([all_hand_vectors[-1]])
                ])[0][0]

                self.assertAlmostEqual(predicted_q, best_move_q, places=4)

                best_move_qs.append(best_move_q)

            game.play_move(best_move)

        players[0].compute_future_q(game)

        history_matrices = players[0].get_record_history_matrices()

        for i, j in zip(all_history_features, history_matrices):
            self.assertTrue(np.allclose(i, j))

        move_vectors = players[0].get_record_move_vectors()

        for i, j in zip(all_move_vectors, move_vectors):
            self.assertTrue(np.allclose(i, j))

        hand_vectors = players[0].get_record_hand_vectors()

        for i, j in zip(all_hand_vectors, hand_vectors):
            self.assertTrue(np.allclose(i, j))

        qs = players[0].get_estimated_qs()
        pred_qs = []
        # recreate
        for i, records in enumerate(
                zip(history_matrices, move_vectors, hand_vectors, qs)):
            history_matrix, move_vector, hand_vector, q = records

            history_vector = players[0].history_net.predict(
                np.array([history_matrix]))[0]
            self.assertTrue(np.allclose(history_vector, history_vectors[i]))

            pred_qs.append(players[0].position_net.predict([[history_vector],
                                                            [move_vector],
                                                            [hand_vector]
                                                            ])[0][0])
            # works only if learning rate is 0
        self.assertTrue(np.allclose(qs, pred_qs))