Пример #1
0
def test_agent():

    actions = [-1, 0, 1, 2, 3, 4]

    agent = Agent(player_index=0, actions=actions)

    game_state = State(n_players=3)

    private_state = agent.get_private_state(game_state)

    assert len(private_state) == agent.len_private_state
    assert private_state[0] == GameStage.PRE_FLOP
    assert private_state[1] == game_state.hole_cards[
        agent.player_index][0].rank
    assert private_state[2] == game_state.hole_cards[
        agent.player_index][0].suit
    assert private_state[3] == game_state.hole_cards[
        agent.player_index][1].rank
    assert private_state[4] == game_state.hole_cards[
        agent.player_index][1].suit

    model_input = agent.get_model_input(private_state, actions)
    assert model_input.shape == (len(actions), agent.n_inputs)

    assert np.allclose(model_input[:, -1], actions)
    assert np.allclose(model_input[:, 0], private_state[0])

    model_input_single_action = agent.get_model_input(private_state,
                                                      actions=[-1])
    assert model_input_single_action.shape == (1, agent.n_inputs)
Пример #2
0
def test_minimum_legal_bet():

    initial_dealer = 0

    state = State(n_players=3)

    # Note: the first person to act is the player after the big blind,
    #  and their minimum legal bet is therefore the big blind amount
    assert state.minimum_legal_bet() == state.big_blind

    state.update(-1)

    min_legal_bet = state.minimum_legal_bet()
    assert min_legal_bet == (state.big_blind - state.small_blind)
    state.update(min_legal_bet)

    # Note: the big blind is allowed to bet zero dollars (to "knock")
    assert state.minimum_legal_bet() == 0
Пример #3
0
    def describe_learned_q_function(self, n_iter=20):

        for _ in range(n_iter):

            game_state = State(n_players=3)

            private_cards = game_state.hole_cards[self.player_index]
            private_state = self.get_private_state(game_state)

            model_input = self.get_model_input(private_state, self.actions)

            q = self.model.predict(model_input)[:, 0]

            print(
                f"Value at stage {game_state.game_stage.name} with private cards {private_cards}: {q}"
            )
Пример #4
0
def get_bot(name, hole, possible_actions, state):
    """Return the bot based on its name"""
    parsed_state = State(name, hole, possible_actions, state)
    bot = SmartBot(parsed_state)
    if 'simple' == name:
        bot = SimpleBot(parsed_state)
    elif 'random' == name:
        bot = RandomBot(parsed_state)
    elif 'threshold' == name:
        bot = ThresholdBot(parsed_state)
    elif 'agressive-loose' == name:
        bot = AgressiveLooseBot(parsed_state)
    elif 'agressive-tight' == name:
        bot = AgressiveTightBot(parsed_state)
    elif 'passive-loose' == name:
        bot = PassiveLooseBot(parsed_state)
    elif 'passive-tight' == name:
        bot = PassiveTightBot(parsed_state)
    elif 'smart' == name:
        bot = SmartBot(parsed_state)
    elif 'randomized-smart' == name:
        bot = RandomizedSmartBot(parsed_state)
    LOG.info("\n\nname of the bot: %s", bot.name)
    return bot
Пример #5
0
def run_one_episode(episode, players, initial_wealth=100):

    # Note: the probability of random (exploratory) actions decreases over time
    proba_random_action = 0.02 + 0.98 * np.exp(-episode / 500)

    state = State(n_players=len(players),
                  initial_wealth=initial_wealth,
                  verbose=False)

    learning_player = state.current_player

    private_state = players[learning_player].get_private_state(state)
    action = players[learning_player].get_action(state, proba_random_action)

    cumulative_reward = 0

    # Note: at the beginning of every episode, we push updates to the q function
    #  from the learning player to all other players
    for player_index in range(state.n_players):
        if player_index != learning_player:
            players[player_index].model.set_weights(
                players[learning_player].model.get_weights())

    while not state.terminal:

        wealth_before_action = state.wealth[learning_player]

        state.update(action)

        # Note: we need to make the other players act so that we can get back to the learning player
        #  Even though the other (non-learning) players are acting, it
        #  is possible that the learning player is forced
        #  to act (bet the small blind or big blind) in this block
        while state.current_player != learning_player:

            action = players[state.current_player].get_action(
                state, proba_random_action)
            state.update(action)

            # TODO Is this needed?
            if state.terminal:
                break

        # Note: we calculate the player's reward _after_ the other players act
        # TODO This can create odd rewards when the other players fold and
        #  the learning player is either small or big blind in the next round
        wealth_after_action = state.wealth[learning_player]
        reward = wealth_after_action - wealth_before_action
        cumulative_reward += reward
        # print(f" *** Reward for player {learning_player} is ${reward} ***")

        # TODO Put this in pytest
        assert state.wealth[
            learning_player] == initial_wealth + cumulative_reward

        next_private_state = players[learning_player].get_private_state(state)

        next_action = players[learning_player].get_action(
            state, proba_random_action)

        continuation_value = players[learning_player].predicted_q(
            next_private_state, next_action)

        if state.terminal:
            print(
                f"Reached a terminal state: player wealths are {state.wealth}")
            updated_guess_for_q = reward

        else:
            updated_guess_for_q = reward + continuation_value

        players[learning_player].update_q(private_state, action,
                                          updated_guess_for_q)

        action = next_action
        private_state = next_private_state
Пример #6
0
def test_state_with_low_wealth():

    initial_wealth = 5
    big_blind = 2
    small_blind = 1

    # Note: cards are dealt (popped) off of the _end_ of the list
    deck = [
        Card(Rank.KING, Suit.HEARTS),
        Card(Rank.KING, Suit.DIAMONDS),
        Card(Rank.KING, Suit.CLUBS),
        Card(Rank.THREE, Suit.HEARTS),
        Card(Rank.NINE, Suit.CLUBS),
        Card(Rank.TWO, Suit.SPADES),
        Card(Rank.SEVEN, Suit.SPADES),
        Card(Rank.TWO, Suit.CLUBS),
        Card(Rank.SEVEN, Suit.CLUBS),
        Card(Rank.TWO, Suit.DIAMONDS),
        Card(Rank.SEVEN, Suit.DIAMONDS),
        Card(Rank.ACE, Suit.HEARTS),
        Card(Rank.ACE, Suit.DIAMONDS),
    ]

    state = State(
        n_players=4,
        initial_wealth=initial_wealth,
        big_blind=big_blind,
        small_blind=small_blind,
        deck=deck,
    )

    state.update(big_blind)
    state.update(big_blind)

    # Note: the small blind completes
    state.update(big_blind - small_blind)
    assert state.game_stage == GameStage.FLOP

    for _ in range(state.n_players):
        state.update(small_blind)

    assert state.game_stage == GameStage.TURN

    for _ in range(state.n_players):
        state.update(small_blind)

    assert state.game_stage == GameStage.RIVER

    for _ in range(state.n_players):
        # Note: the players all knock (bet zero)
        state.update(0)

    # Note: player index 0 had a pair of aces as their hole cards, and they win the round. The next
    #  player to be big blind has only $1 left (and the big blind is $2), so they are forced to go all in
    #  as part of the blind
    assert state.wealth[0] == initial_wealth + (state.n_players - 1) * (
        big_blind + 2 * small_blind
    )
    assert (
        state.wealth[2]
        == state.wealth[3]
        == initial_wealth - (big_blind + 2 * small_blind)
        == 1
    )

    assert state.game_stage == GameStage.PRE_FLOP

    state.update(small_blind)
    state.update(small_blind)

    assert state.game_stage == GameStage.FLOP

    assert state.dealer == 1
    assert state.current_player == 2

    # Note: the low wealth players are all in, so bets > 0 are not allowed
    assert state.maximum_legal_bet() == 0

    for _ in range(state.n_players):
        state.update(0)

    assert state.game_stage == GameStage.TURN

    state.update(0)

    for _ in range(state.n_players - 1):
        state.update(-1)

    # Note: player 2 wins because everyone else has folded
    assert state.wealth[2] == 1 + small_blind * (state.n_players - 1)
    assert state.wealth[3] == 0
Пример #7
0
def test_state():

    initial_wealth = 200
    initial_dealer = 0
    state = State(
        n_players=3, initial_wealth=initial_wealth, initial_dealer=initial_dealer
    )

    n_cards_in_deck_after_initial_deal = 52 - state.n_players * 2
    assert len(state.shuffled_deck) == n_cards_in_deck_after_initial_deal

    assert state.game_stage == GameStage.PRE_FLOP
    assert len(state.public_cards) == 0

    assert all(player_has_folded is False for player_has_folded in state.has_folded)

    # Note: any action < 0 means the current player is folding
    action_fold = -1

    # Note: player index 1 is the small blind, player index 2 is the big blind,
    #  so player index 0 (the dealer) is the first to act pre flop
    assert state.current_player == state.dealer

    state.update(action_fold)
    assert state.has_folded[state.dealer]

    assert state.current_player == 1
    ten_dollars = 10
    complete_blind_plus_ten = (state.big_blind - state.small_blind) + ten_dollars

    state.update(complete_blind_plus_ten)
    assert state.bets_by_stage[GameStage.PRE_FLOP][1] == [
        state.small_blind,
        complete_blind_plus_ten,
    ]
    assert state.game_stage == GameStage.PRE_FLOP

    # Note: the big blind player calls. At this point,
    #  the dealer has folded, and the other two have both bet the big blind plus $10.
    #  We move to the next stage (the flop) and the dealer deals three public cards
    state.update(ten_dollars)
    assert state.bets_by_stage[GameStage.PRE_FLOP][2] == [state.big_blind, ten_dollars]

    assert state.game_stage == GameStage.FLOP
    assert len(state.public_cards) == 3
    assert len(state.shuffled_deck) == n_cards_in_deck_after_initial_deal - 3

    # Note: the player after the dealer is the first to act after the flop
    assert state.current_player == 1

    # Note: the second player bets $10, then the dealer bets $20,
    #  the first player has already folded and does nothing, and then the second player calls
    state.update(ten_dollars)
    state.update(2 * ten_dollars)
    state.update(ten_dollars)

    # Note: the dealer folded before making any bets
    assert state.bets_by_stage[GameStage.FLOP][state.dealer] == []
    assert state.bets_by_stage[GameStage.FLOP][1] == [ten_dollars, ten_dollars]
    assert state.bets_by_stage[GameStage.FLOP][2] == [2 * ten_dollars]

    assert state.game_stage == GameStage.TURN
    assert len(state.public_cards) == 4

    assert state.current_player == 1
    state.update(ten_dollars)

    assert state.current_player == 2
    state.update(action_fold)

    total_bets_by_player_2 = ten_dollars * 3 + state.big_blind
    assert state.wealth[2] == initial_wealth - total_bets_by_player_2
    assert state.wealth[1] == initial_wealth + total_bets_by_player_2
    assert state.wealth[0] == initial_wealth

    assert sum(state.wealth) == initial_wealth * state.n_players

    assert len(state.public_cards) == 0
    assert len(state.shuffled_deck) == n_cards_in_deck_after_initial_deal

    # Note: the dealer shifts to the next player
    assert state.dealer == 1

    assert all(player_has_folded is False for player_has_folded in state.has_folded)

    assert sum(state.wealth) == initial_wealth * state.n_players

    action_knock = 0
    action_small_bet = 10
    action_big_bet = 20

    assert state.game_stage == GameStage.PRE_FLOP

    # Note: all players bet (or complete) the big blind, and we move to the next stage
    state.update(state.big_blind)
    state.update(state.small_blind)
    state.update(action_knock)

    assert state.game_stage == GameStage.FLOP
    assert len(state.public_cards) == 3

    for _ in range(state.n_players):
        # Note: all players bet $10 after the flop
        #  and we move to the next stage
        state.update(action_small_bet)

    assert state.game_stage == GameStage.TURN
    assert len(state.public_cards) == 4

    for _ in range(state.n_players):
        # Note: all players bet $10 after the turn
        #  and we move to the next stage
        state.update(action_small_bet)

    assert state.game_stage == GameStage.RIVER
    assert len(state.public_cards) == 5

    # Note: even though the stage has not ended, all public cards have
    #  been dealt and we can calculate hand strengths
    hand_strengths, hand_descriptions = state.calculate_best_hand_strengths()
    winning_players = argmax(hand_strengths)

    # Note: there may be multiple winning players (a tie)
    #  We assert on the first winning player's wealth
    wealth_before_winning = state.wealth[winning_players[0]]

    # Note: no players have folded, so all hand strengths must be positive
    assert max(hand_strengths) > 0

    state.update(action_small_bet)
    state.update(action_big_bet)
    state.update(action_big_bet)

    # Note: this player initially made a small bet, and the next person raised them with a large bet
    #  The first person to act now completes their bet (calls the raise)
    state.update(action_big_bet - action_small_bet)

    # Note: the bets were small during the flop and turn, and big during the river
    amount_won = (
        (state.n_players - len(winning_players))
        * (2 * action_small_bet + action_big_bet + state.big_blind)
        / len(winning_players)
    )
    assert state.wealth[winning_players[0]] == wealth_before_winning + amount_won
Пример #8
0
def test_ties():

    initial_wealth = 200.0
    initial_dealer = 0

    # Note: there will be a full house (aces and kings) on the board
    #  Cards are dealt (popped) off of the _end_ of the list
    deck = [
        Card(Rank.ACE, Suit.HEARTS),
        Card(Rank.ACE, Suit.DIAMONDS),
        Card(Rank.KING, Suit.HEARTS),
        Card(Rank.KING, Suit.DIAMONDS),
        Card(Rank.KING, Suit.CLUBS),
        Card(Rank.TWO, Suit.HEARTS),
        Card(Rank.SEVEN, Suit.HEARTS),
        Card(Rank.TWO, Suit.SPADES),
        Card(Rank.SEVEN, Suit.SPADES),
        Card(Rank.TWO, Suit.CLUBS),
        Card(Rank.SEVEN, Suit.CLUBS),
    ]

    # Note: we pass in a deck so that the order in which cards are dealt is known and deterministic
    state = State(
        n_players=3,
        initial_wealth=initial_wealth,
        initial_dealer=initial_dealer,
        deck=deck,
    )

    action_bet = 1
    action_fold = -1
    action_knock = 0

    state.update(state.big_blind)
    state.update(state.small_blind)
    state.update(action_knock)
    assert state.game_stage == GameStage.FLOP

    for _ in range(state.n_players):
        state.update(action_bet)

    assert state.game_stage == GameStage.TURN

    for _ in range(state.n_players):
        state.update(action_bet)

    assert state.game_stage == GameStage.RIVER

    amount_bet_by_player_who_folds = (
        sum(state.bets_by_stage[0][state.current_player])
        + sum(state.bets_by_stage[1][state.current_player])
        + sum(state.bets_by_stage[2][state.current_player])
    )

    state.update(action_fold)
    assert state.has_folded[1]

    # Note: the other two players stay in the game. They tie and split the pot
    state.update(action_bet)
    state.update(action_bet)

    amount_won_by_each_winner = amount_bet_by_player_who_folds / (state.n_players - 1)
    assert state.wealth[0] == initial_wealth + amount_won_by_each_winner
    assert state.wealth[2] == initial_wealth + amount_won_by_each_winner
    assert sum(state.wealth) == initial_wealth * state.n_players