示例#1
0
    def test_hand_vector_v2(self):
        players = [
            LearningPlayer_v2(name='random',
                              estimation_mode=LearningPlayer.ACTUAL_Q)
            for _ in range(3)
        ]
        game = LandlordGame(players=players)
        hands = {
            TurnPosition.FIRST: [Card.ACE] * 4,
            TurnPosition.SECOND: [Card.TEN] * 3 + [Card.THREE],
            TurnPosition.THIRD: [Card.FIVE] * 3 + [Card.THREE] + [Card.FOUR]
        }
        game._betting_complete = True
        game.force_setup(TurnPosition.SECOND, hands, 3)
        best_move = SpecificMove(RankedMoveType(MoveType.TRIPLE_SINGLE_KICKER,
                                                Card.TEN),
                                 cards=Counter({
                                     Card.TEN: 3,
                                     Card.THREE: 1
                                 }))
        move_vector = players[1].compute_move_vector(
            TurnPosition.SECOND, game.get_landlord_position(), best_move)
        remaining_hand_vector = players[1].compute_remaining_hand_vector(
            game, move_vector, TurnPosition.SECOND)[:-3]

        self.assertEqual(np.sum(remaining_hand_vector), 0)
示例#2
0
    def test_features(self):
        players = [
            LearningPlayer(name='random',
                           estimation_mode=LearningPlayer.ACTUAL_Q)
            for _ in range(3)
        ]
        game = LandlordGame(players=players)
        while not game.is_round_over():
            curr_player = game.get_current_player()
            curr_features = curr_player._derive_features(game)
            curr_hand_vector = game.get_current_player().get_hand_vector(
                game, game.get_current_position())
            move = game.get_current_player().make_move(
                game, game.get_current_position())
            curr_move_vector = game.get_current_player().compute_move_vector(
                game.get_current_position(), game.get_landlord_position(),
                move)

            game.play_move(move)

            self.assertTrue(
                np.allclose(curr_features,
                            curr_player.record_history_matrices[-1]))
            self.assertTrue(
                np.allclose(curr_move_vector,
                            curr_player.record_move_vectors[-1]))
            self.assertTrue(
                np.allclose(curr_hand_vector,
                            curr_player.record_hand_vectors[-1]))
示例#3
0
    def test_features_v2(self):
        players = [
            LearningPlayer_v2(name='random',
                              epsilon=0,
                              estimation_mode=LearningPlayer.ACTUAL_Q,
                              learning_rate=1) for _ in range(3)
        ]
        game = LandlordGame(players=players)
        while not game.is_round_over():
            curr_player = game.get_current_player()
            curr_features = curr_player._derive_features(game)

            best_move, best_move_q = curr_player.decide_best_move(game)
            curr_move_vector = game.get_current_player().compute_move_vector(
                game.get_current_position(), game.get_landlord_position(),
                best_move)
            curr_hand_vector = game.get_current_player(
            ).compute_remaining_hand_vector(game, curr_move_vector,
                                            game.get_current_position())

            curr_player.record_move(game, best_move, best_move_q,
                                    game.get_current_position())
            game.play_move(best_move)

            self.assertTrue(
                np.allclose(curr_features,
                            curr_player.record_history_matrices[-1]))
            self.assertTrue(
                np.allclose(curr_move_vector,
                            curr_player.record_move_vectors[-1]))
            self.assertTrue(
                np.allclose(curr_hand_vector,
                            curr_player.record_hand_vectors[-1]))

        players[0].compute_future_q(game)

        if game.has_winners():
            print(np.max(np.abs(players[0].get_estimated_qs())))
            self.assertTrue(np.max(np.abs(players[0].get_estimated_qs())) == 1)

        self.assertTrue(
            players[0].record_history_matrices[0][0].dtype == np.int8)
示例#4
0
    def test_self_feed(self):
        players = [self.load_v2_net("4_8_actualq1_model20") for _ in range(3)]
        #players = [self.load_v2_net("4_2_sim4_model15") for _ in range(3)]
        game = LandlordGame(players=players)
        best_move_qs = []
        all_history_features = []
        history_vectors = []
        all_hand_vectors = []
        all_move_vectors = []
        while not game.is_round_over():

            best_move, best_move_q = game.get_current_player(
            ).decide_best_move(game, game.get_current_position())

            game.get_current_player().record_move(game, best_move, best_move_q,
                                                  game.get_current_position())

            if game.get_current_player() == players[0]:
                history_features = players[0]._derive_features(game)
                all_history_features.append(history_features)
                # all the moves we make from here will not affect the history, so assess it and copy

                history_vectors.append(players[0].history_net.predict(
                    np.array([history_features]), batch_size=1)[0])

                # create features for each of the possible moves from this position
                all_move_vectors.append(players[0].compute_move_vector(
                    game.get_current_position(), game.get_landlord_position(),
                    best_move))

                all_hand_vectors.append(
                    players[0].compute_remaining_hand_vector(
                        game, all_move_vectors[-1],
                        game.get_current_position()))

                predicted_q = players[0].position_net.predict([
                    np.array([history_vectors[-1]]),
                    np.array([all_move_vectors[-1]]),
                    np.array([all_hand_vectors[-1]])
                ])[0][0]

                self.assertAlmostEqual(predicted_q, best_move_q, places=4)

                best_move_qs.append(best_move_q)

            game.play_move(best_move)

        players[0].compute_future_q(game)

        history_matrices = players[0].get_record_history_matrices()

        for i, j in zip(all_history_features, history_matrices):
            self.assertTrue(np.allclose(i, j))

        move_vectors = players[0].get_record_move_vectors()

        for i, j in zip(all_move_vectors, move_vectors):
            self.assertTrue(np.allclose(i, j))

        hand_vectors = players[0].get_record_hand_vectors()

        for i, j in zip(all_hand_vectors, hand_vectors):
            self.assertTrue(np.allclose(i, j))

        qs = players[0].get_estimated_qs()
        pred_qs = []
        # recreate
        for i, records in enumerate(
                zip(history_matrices, move_vectors, hand_vectors, qs)):
            history_matrix, move_vector, hand_vector, q = records

            history_vector = players[0].history_net.predict(
                np.array([history_matrix]))[0]
            self.assertTrue(np.allclose(history_vector, history_vectors[i]))

            pred_qs.append(players[0].position_net.predict([[history_vector],
                                                            [move_vector],
                                                            [hand_vector]
                                                            ])[0][0])
            # works only if learning rate is 0
        self.assertTrue(np.allclose(qs, pred_qs))