Пример #1
0
    def test_bid(self):
        figgie = Figgie()
        for suit in SUITS:
            figgie.cards = np.full((4, 4), 2, dtype=int)

            player = figgie.active_player
            market = figgie.markets[suit.value]

            can, _ = market.can_bid(player, 7)
            self.assertTrue(can)

            figgie.preform(('bid', suit, 7))
            self.assertEqual(
                7, market.buying_price,
                'Buying price not set properly with bid operation')
            self.assertEqual(1, market.operations)
            figgie.reset()
Пример #2
0
    def test_ask(self):
        figgie = Figgie()
        for suit in SUITS:
            figgie.cards = np.full((4, 4), 2, dtype=int)

            player = figgie.active_player
            hand = figgie.cards[player]
            market = figgie.markets[suit.value]

            hand[suit.value] = 0
            can, _ = market.can_ask(player, 7)
            self.assertFalse(can)

            hand[suit.value] = 2
            can, _ = market.can_ask(player, 7)
            self.assertTrue(can)

            figgie.preform(('ask', suit, 7))
            self.assertEqual(
                7, market.selling_price,
                'Selling price not set properly with ask operation')
            self.assertEqual(1, market.operations)
            figgie.reset()
Пример #3
0
    def test_buy(self):
        figgie = Figgie()
        for suit in SUITS:
            figgie.cards = np.full((4, 4), 2, dtype=int)

            player = figgie.active_player
            market = figgie.markets[suit.value]

            can, _ = market.can_ask(0, 7)
            self.assertTrue(can)
            asking_player = player
            figgie.preform(('ask', suit, 7))
            self.assertEqual(
                7, market.selling_price,
                'Selling price not set properly with ask operation')
            self.assertEqual(1, market.operations)

            if figgie.active_player == asking_player:
                figgie.preform(('pass', ))

            player = figgie.active_player

            figgie.preform(('buy', suit))
            for s in SUITS:
                self.assertEqual(figgie.markets[s.value].buying_price, None,
                                 'Market not reset after buy')
                self.assertEqual(figgie.markets[s.value].selling_price, None,
                                 'Market not reset after buy')
            self.assertEqual(STARTING_CHIPS - 7, figgie.chips[player],
                             'Chips not properly subtracted')
            self.assertEqual(STARTING_CHIPS + 7, figgie.chips[asking_player],
                             'Chips not properly added')
            self.assertEqual(3, figgie.cards[player][suit.value],
                             'card not properly added')
            self.assertEqual(1, figgie.cards[asking_player][suit.value],
                             'card not properly subtracted')
            self.assertEqual(1, market.transactions)
            self.assertEqual(7, market.last_price)
            self.assertEqual(2, market.operations)

            figgie.reset()
Пример #4
0
    def __train(self, figgie: Figgie, pi: float, pi_prime: float,
                training_player: int) -> tuple:
        player = figgie.active_player
        if figgie.is_terminal():
            utility = figgie.get_utility()
            return utility[player] / pi_prime, 1.0

        player = figgie.active_player
        model_values = self.util_model.get_card_values(figgie, player)
        best_transaction = ModularAgent.get_best_transaction(
            figgie, model_values)
        if best_transaction is not None:
            figgie.preform(best_transaction)
            return self.__train(figgie, pi, pi_prime, training_player)

        best_operation, best_suit = ModularAgent.get_best_operation(
            figgie, model_values)
        if best_operation is None:
            figgie.preform(('pass', ))
            return self.__train(figgie, pi, pi_prime, training_player)
        else:
            info_set = self.info_set_generator.generate_info_set(
                figgie, model_values[best_suit.value], best_operation,
                best_suit)
            actions = self.info_set_generator.generate_actions(
                figgie, model_values[best_suit.value], best_operation,
                best_suit)
            assert len(actions) != 0, 'Length of actions == 0'
            if info_set in self.game_tree:
                node = self.game_tree[info_set]
            else:
                node = GameNode(len(actions))
                self.game_tree[info_set] = node

        strategy = node.get_strategy()

        epsilon = 0.6
        if player == training_player:
            probability = epsilon / len(actions) + ((1.0 - epsilon) * strategy)
        else:
            probability = np.copy(strategy)

        action_index = random.choices(range(len(actions)),
                                      weights=probability)[0]
        price = actions[action_index]
        if best_operation == 'bid' and price > figgie.chips[figgie.active_player]\
                or best_operation == 'at' and price[0] > figgie.chips[figgie.active_player]:
            figgie.preform(('pass', ))
            return self.__train(figgie, pi, pi_prime, training_player)
        action = self.create_action(best_operation, best_suit, price)
        figgie.preform(action)
        result = self.__train(
            figgie, pi * strategy[action_index], pi_prime *
            probability[action_index],
            training_player) if player == training_player else self.__train(
                figgie, pi, pi_prime, training_player)
        util = -result[0]
        p_tail = result[1]

        if player == training_player:
            w = util * p_tail
            regret = w * (1 - strategy)
            regret[action_index] = -w * strategy[action_index]
            node.sum_regret += regret
        else:
            node.sum_strategy += strategy / pi_prime

        node.observations += 1

        if player == training_player:
            return util, p_tail * strategy[action_index]
        else:
            return util, p_tail