def test_short_deck_1():
    """Test the short deck poker game state works as expected."""
    from pluribus.games.short_deck.player import ShortDeckPokerPlayer
    from pluribus.games.short_deck.state import ShortDeckPokerState
    from pluribus.poker.pot import Pot

    n_players = 3
    pot = Pot()
    players = [
        ShortDeckPokerPlayer(player_i=player_i, pot=pot, initial_chips=10000)
        for player_i in range(n_players)
    ]
    state = ShortDeckPokerState(players=players, load_pickle_files=False)
    # Call for all players.
    for player_i in range(n_players):
        assert state.current_player.name == f"player_{player_i}"
        assert len(state.legal_actions) == 3
        assert state._betting_stage == "pre_flop"
        state = state.apply_action(action_str="call")
    # Fold for all but last player.
    for player_i in range(n_players - 1):
        assert state.current_player.name == f"player_{player_i}"
        assert len(state.legal_actions) == 3
        assert state._betting_stage == "flop"
        state = state.apply_action(action_str="fold")
    # Only one player left, so game state should be terminal.
    assert state.is_terminal, "state was not terminal"
    assert state._betting_stage == "terminal"
Exemplo n.º 2
0
def cfrp(state: ShortDeckPokerState, i: int, t: int):
    """
    pruning cfr algo, might need to adjust only pruning if not final betting round and if not terminal node

    :param state: the game state
    :param i: player
    :param t: iteration
    :return: expected value for node for player i
    """
    ph = state.player_i

    if state.is_terminal:
        return state.payout[i] * (1 if i == 1 else -1)
    # elif p_i not in hand:
    #   cfrp()
    # TODO: Does this need to be added or does the game logic account for this?
    # elif h is chance_node:  -- we don't care about chance nodes here, but we will for No Limit
    #   sample action from strategy for h
    #   cfrp()
    # TODO: Does the game logic appropriately account for chance samplings? In other words, make sure that
    #  chance actions (ie; dealing cards) are done the appropriate amount of times.
    elif ph == i:
        I = state.info_set
        # calculate strategy
        calculate_strategy(regret, sigma, I, state)
        # TODO: Does updating sigma here (as opposed to after regret) miss out on any updates?
        #  If so, is there any benefit to having it up here?
        vo = 0.0
        voa = {}
        explored = {}  # keeps tracked of items that can be skipped
        for a in state.legal_actions:
            if regret[I][a] > C:
                new_state: ShortDeckPokerState = state.apply_action(a)
                voa[a] = cfrp(new_state, i, t)
                explored[a] = True
                vo += sigma[t][I][a] * voa[a]
            else:
                explored[a] = False
        for a in state.legal_actions:
            if explored[a]:
                regret[I][a] += voa[a] - vo
                # do not need update the strategy based on regret, strategy does that with sigma
        return vo
    else:
        Iph = state.info_set
        calculate_strategy(regret, sigma, Iph, state)
        try:
            a = np.random.choice(list(sigma[t][Iph].keys()),
                                 1,
                                 p=list(sigma[t][Iph].values()))[0]
        except ValueError:
            p = 1 / len(state.legal_actions)
            probabilities = np.full(len(state.legal_actions), p)
            a = np.random.choice(state.legal_actions, p=probabilities)
            sigma[t][Iph] = {action: p for action in state.legal_actions}
        new_state: ShortDeckPokerState = state.apply_action(a)
        return cfrp(new_state, i, t)
Exemplo n.º 3
0
def cfr(state: ShortDeckPokerState, i: int, t: int) -> float:
    """
    regular cfr algo

    :param state: the game state
    :param i: player
    :param t: iteration
    :return: expected value for node for player i
    """
    ph = state.player_i

    if state.is_terminal:
        return state.payout[i] * (1 if i == 1 else -1)
    # elif p_i not in hand:
    #   cfr()
    # TODO: Does this need to be added or does the game logic account for this?
    # elif h is chance_node:
    #   sample action from strategy for h
    #   cfr()
    # TODO: Does the game logic appropriately account for chance samplings? In other words, make sure that
    #  chance actions (ie; dealing cards) are done the appropriate amount of times.
    elif ph == i:
        I = state.info_set
        # calculate strategy
        calculate_strategy(regret, sigma, I, state)
        # TODO: Does updating sigma here (as opposed to after regret) miss out on any updates?
        #  If so, is there any benefit to having it up here?
        vo = 0.0
        voa = {}
        for a in state.legal_actions:
            new_state: ShortDeckPokerState = state.apply_action(a)
            voa[a] = cfr(new_state, i, t)
            vo += sigma[t][I][a] * voa[a]
        for a in state.legal_actions:
            regret[I][a] += voa[a] - vo
            # do not need update the strategy based on regret, strategy does that with sigma
        return vo
    else:
        Iph = state.info_set
        calculate_strategy(regret, sigma, Iph, state)
        try:
            a = np.random.choice(list(sigma[t][Iph].keys()),
                                 1,
                                 p=list(sigma[t][Iph].values()))[0]
        except ValueError:
            p = 1 / len(state.legal_actions)
            probabilities = np.full(len(state.legal_actions), p)
            a = np.random.choice(state.legal_actions, p=probabilities)
            sigma[t][Iph] = {action: p for action in state.legal_actions}
        new_state: ShortDeckPokerState = state.apply_action(a)
        return cfr(new_state, i, t)
Exemplo n.º 4
0
def generate_all_action_sequences(
    state: ShortDeckPokerState,
    action_sequences: ActionSequences,
    n_players: int
):
    """
    DFS to return action combos
    """
    if state.is_terminal:
        lst = action_sequences.action_combo.copy()
        action_sequences.action_combos.append(lst)
        nodes_found = len(action_sequences.action_combos)
        if nodes_found % 1000 == 0:
            print(f"Found {nodes_found} of ceiling {133**4}")
            size_of_lst = sys.getsizeof(action_sequences.action_combos)
            print(f"Size of list: {size_of_lst}")
        action_sequences.action_combo.pop()
        return action_sequences.action_combo

    for a in state.legal_actions:
        action_sequences.action_combo.append(a)
        new_state: ShortDeckPokerState = state.apply_action(a)

        action_sequences.action_combo = generate_all_action_sequences(
            new_state, action_sequences, n_players
        )
    if action_sequences.action_combo:
        action_sequences.action_combo.pop()
    return action_sequences.action_combo
Exemplo n.º 5
0
def new_game(n_players: int) -> ShortDeckPokerState:
    """Create a new game of short deck poker."""
    pot = Pot()
    players = [
        ShortDeckPokerPlayer(player_i=player_i, initial_chips=10000, pot=pot)
        for player_i in range(n_players)
    ]
    state = ShortDeckPokerState(players=players)
    return state
Exemplo n.º 6
0
def update_strategy(state: ShortDeckPokerState, i: int):
    """

    :param state: the game state
    :param i: the player, i = 1 is always first to act and i = 2 is always second to act, but they take turns who
        updates the strategy (only one strategy)
    :return: nothing, updates action count in the strategy of actions chosen according to sigma, this simple choosing of
        actions is what allows the algorithm to build up preference for one action over another in a given spot
    """
    ph = state.player_i  # this is always the case no matter what i is

    if state.is_terminal or state.players[i].is_active is False:
        # or if betting round is > 0, strategy is only
        # updated in betting round 1 for Pluribus, but I am doing all rounds in this example
        return
    # elif h is chance_node:
    #   sample action from strategy for h
    #   update_strategy(rs, h + a, i)
    # TODO: Does the game logic appropriately account for chance samplings? In other words, make sure that
    #  chance actions (ie; dealing cards) are done the appropriate amount of times.
    elif ph == i:
        I = state.info_set
        # calculate regret
        calculate_strategy(regret, sigma, I, state)
        # choose an action based of sigma
        try:
            a = np.random.choice(list(sigma[t][I].keys()),
                                 1,
                                 p=list(sigma[t][I].values()))[0]
        except ValueError:
            p = 1 / len(state.legal_actions)
            probabilities = np.full(len(state.legal_actions), p)
            a = np.random.choice(state.legal_actions, p=probabilities)
            sigma[t][I] = {action: p for action in state.legal_actions}
        strategy[I][a] += 1
        # so strategy is counts based on sigma, this takes into account the reach probability
        # so there is no need to pass around that pi guy..
        new_state: ShortDeckPokerState = state.apply_action(a)
        update_strategy(new_state, i)
    else:
        for a in state.legal_actions:
            # not actually updating the strategy for p_i != i, only one i at a time
            new_state: ShortDeckPokerState = state.apply_action(a)
            update_strategy(new_state, i)
Exemplo n.º 7
0
def new_game(
    n_players: int,
    small_blind: int = 50,
    big_blind: int = 100,
    initial_chips: int = 10000,
) -> Tuple[ShortDeckPokerState, Pot]:
    """Create a new game."""
    pot = Pot()
    players = [
        ShortDeckPokerPlayer(player_i=player_i, pot=pot, initial_chips=initial_chips)
        for player_i in range(n_players)
    ]
    state = ShortDeckPokerState(
        players=players,
        load_pickle_files=False,
        small_blind=small_blind,
        big_blind=big_blind,
    )
    return state
Exemplo n.º 8
0
def generate_preflop_action_sequences(state: ShortDeckPokerState,
                                      action_sequences: ActionSequences,
                                      n_players: int):
    """
    DFS to return action combos
    """
    if state.is_terminal or state.betting_round > 0:
        lst = action_sequences.action_combo.copy()
        print(lst)
        action_sequences.action_combos[n_players].append(lst)
        action_sequences.action_combo.pop()
        return action_sequences.action_combo

    for a in state.legal_actions:
        action_sequences.action_combo.append(a)
        new_state: ShortDeckPokerState = state.apply_action(a)

        action_sequences.action_combo = generate_preflop_action_sequences(
            new_state, action_sequences, n_players)
    if action_sequences.action_combo:
        action_sequences.action_combo.pop()
    return action_sequences.action_combo
Exemplo n.º 9
0
from pluribus.poker.card import Card
from pluribus.poker.pot import Pot

utils.random.seed(42)
app = Flask(__name__, static_folder="./dist/static", template_folder="./dist")
cors = CORS(app, resources={r"/api/*": {"origins": "*"}})
colours = [
    "cyan", "lightcoral", "crimson", "#444", "forestgreen", "goldenrod", "gold"
]
pot = Pot()
n_players = 3
players = [
    ShortDeckPokerPlayer(player_i=player_i, initial_chips=10000, pot=pot)
    for player_i in range(n_players)
]
state = ShortDeckPokerState(players=players,
                            pickle_dir="../../research/blueprint_algo/")


def _to_player_dict(
    player_i: int,
    player: ShortDeckPokerPlayer,
    pot: Pot,
) -> Dict[str, Any]:
    """Create dictionary to describe player for frontend."""
    return {
        "name": player.name,
        "color": colours[player_i],
        "bank": player.n_chips,
        "onTable": pot[player],
        "hasCards": True,
    }