def test_bid(self): figgie = Figgie() for suit in SUITS: figgie.cards = np.full((4, 4), 2, dtype=int) player = figgie.active_player market = figgie.markets[suit.value] can, _ = market.can_bid(player, 7) self.assertTrue(can) figgie.preform(('bid', suit, 7)) self.assertEqual( 7, market.buying_price, 'Buying price not set properly with bid operation') self.assertEqual(1, market.operations) figgie.reset()
def test_ask(self): figgie = Figgie() for suit in SUITS: figgie.cards = np.full((4, 4), 2, dtype=int) player = figgie.active_player hand = figgie.cards[player] market = figgie.markets[suit.value] hand[suit.value] = 0 can, _ = market.can_ask(player, 7) self.assertFalse(can) hand[suit.value] = 2 can, _ = market.can_ask(player, 7) self.assertTrue(can) figgie.preform(('ask', suit, 7)) self.assertEqual( 7, market.selling_price, 'Selling price not set properly with ask operation') self.assertEqual(1, market.operations) figgie.reset()
def test_buy(self): figgie = Figgie() for suit in SUITS: figgie.cards = np.full((4, 4), 2, dtype=int) player = figgie.active_player market = figgie.markets[suit.value] can, _ = market.can_ask(0, 7) self.assertTrue(can) asking_player = player figgie.preform(('ask', suit, 7)) self.assertEqual( 7, market.selling_price, 'Selling price not set properly with ask operation') self.assertEqual(1, market.operations) if figgie.active_player == asking_player: figgie.preform(('pass', )) player = figgie.active_player figgie.preform(('buy', suit)) for s in SUITS: self.assertEqual(figgie.markets[s.value].buying_price, None, 'Market not reset after buy') self.assertEqual(figgie.markets[s.value].selling_price, None, 'Market not reset after buy') self.assertEqual(STARTING_CHIPS - 7, figgie.chips[player], 'Chips not properly subtracted') self.assertEqual(STARTING_CHIPS + 7, figgie.chips[asking_player], 'Chips not properly added') self.assertEqual(3, figgie.cards[player][suit.value], 'card not properly added') self.assertEqual(1, figgie.cards[asking_player][suit.value], 'card not properly subtracted') self.assertEqual(1, market.transactions) self.assertEqual(7, market.last_price) self.assertEqual(2, market.operations) figgie.reset()
def __train(self, figgie: Figgie, pi: float, pi_prime: float, training_player: int) -> tuple: player = figgie.active_player if figgie.is_terminal(): utility = figgie.get_utility() return utility[player] / pi_prime, 1.0 player = figgie.active_player model_values = self.util_model.get_card_values(figgie, player) best_transaction = ModularAgent.get_best_transaction( figgie, model_values) if best_transaction is not None: figgie.preform(best_transaction) return self.__train(figgie, pi, pi_prime, training_player) best_operation, best_suit = ModularAgent.get_best_operation( figgie, model_values) if best_operation is None: figgie.preform(('pass', )) return self.__train(figgie, pi, pi_prime, training_player) else: info_set = self.info_set_generator.generate_info_set( figgie, model_values[best_suit.value], best_operation, best_suit) actions = self.info_set_generator.generate_actions( figgie, model_values[best_suit.value], best_operation, best_suit) assert len(actions) != 0, 'Length of actions == 0' if info_set in self.game_tree: node = self.game_tree[info_set] else: node = GameNode(len(actions)) self.game_tree[info_set] = node strategy = node.get_strategy() epsilon = 0.6 if player == training_player: probability = epsilon / len(actions) + ((1.0 - epsilon) * strategy) else: probability = np.copy(strategy) action_index = random.choices(range(len(actions)), weights=probability)[0] price = actions[action_index] if best_operation == 'bid' and price > figgie.chips[figgie.active_player]\ or best_operation == 'at' and price[0] > figgie.chips[figgie.active_player]: figgie.preform(('pass', )) return self.__train(figgie, pi, pi_prime, training_player) action = self.create_action(best_operation, best_suit, price) figgie.preform(action) result = self.__train( figgie, pi * strategy[action_index], pi_prime * probability[action_index], training_player) if player == training_player else self.__train( figgie, pi, pi_prime, training_player) util = -result[0] p_tail = result[1] if player == training_player: w = util * p_tail regret = w * (1 - strategy) regret[action_index] = -w * strategy[action_index] node.sum_regret += regret else: node.sum_strategy += strategy / pi_prime node.observations += 1 if player == training_player: return util, p_tail * strategy[action_index] else: return util, p_tail