Пример #1
0
from agent.simpleAgent import simpleAgent
from agent.NFSPLimit import NFSPLimit
import gym
import holdem
from holdem.env import LimitTexasHoldemEnv
from holdem.utils import toLimitDiscreteAction, toLimitContinuesAction, correctLimitAction, get_card_dict, toCardState, toLimitBettingState, toLimitDiscreteAction, random_actions, uniform_random_actions, fold_actions, safe_actions
import numpy as np

num_player = 4
num_NFSP = 2
big = False
hid = 64
num_versions = 91
checkpoint_dir = '4players_64unit/4players_64hid_checkpoints'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
env = LimitTexasHoldemEnv(num_player, max_limit=1e9,
                          debug=False)  #initialize 3-player game
env.add_player(0, stack=20000)  # add a player to seat 0 with 2000 "chips"
env.add_player(1, stack=20000)  # add a player to seat 1 with 2000 "chips"
env.add_player(2, stack=20000)  # add a player to seat 2 with 2000 "chips"
env.add_player(3, stack=20000)  # add a player to seat 2 with 2000 "chips"
card_dictionary = get_card_dict()

for epoch in range(num_versions):
    rl_dir = checkpoint_dir + '/rl_checkpoints/checkpoint_' + str(epoch +
                                                                  1) + '000.pt'
    sl_dir = checkpoint_dir + '/sl_checkpoints/checkpoint_' + str(epoch +
                                                                  1) + '000.pt'
    # policy_net = DQN(num_player=num_player,big=False).to(device)
    policy_net = DQN_limit(num_player=num_player,
                           big=big,
                           num_action=3,
Пример #2
0
def evaluate(policy_net, rl_optimizer, sl_net, sl_optimizer, steps_done,
             iteration, type_of_eval):
    env = LimitTexasHoldemEnv(num_player, max_limit=1e9,
                              debug=False)  #initialize 3-player game
    env.add_player(0, stack=20000)  # add a player to seat 0 with 2000 "chips"
    env.add_player(1, stack=20000)  # add a player to seat 1 with 2000 "chips"
    env.add_player(2, stack=20000)  # add a player to seat 2 with 2000 "chips"
    env.add_player(3, stack=20000)  # add a player to seat 2 with 2000 "chips"

    results = []
    for expriment in range(1):

        game_board = {}
        sum_board = {}

        nfsp_players = {}
        for i in range(num_NFSP):
            nfsp_players[i] = NFSPLimit(card_dict=card_dictionary,
                                        device=device)
            game_board[i] = 20000
            sum_board[i] = 0
        random_players = {}
        for i in range(num_player - num_NFSP):
            random_players[i + num_NFSP] = simpleAgent()
            game_board[i + num_NFSP] = 20000
            sum_board[i + num_NFSP] = 0

        for i_episode in range(25000):
            betting_state = np.zeros((num_player, 4, 5, 3))
            # print('-------------Playing Game:{}------------'.format(i_episode))
            (player_states, (community_infos, community_cards)) = env.reset()
            (player_infos, player_hands) = zip(*player_states)
            for i in range(num_NFSP):
                nfsp_players[i].setInitState(
                    betting_state,
                    toCardState(community_cards, player_hands[i],
                                card_dictionary))

            current_round = 0
            terminal = False
            # if i_episode % 1000 == 0:
            #     print(i_episode)
            #     env.render()
            while not terminal:
                current_player = community_infos[-1]
                current_round = community_infos[5]
                current_raises = community_infos[6]
                current_pot = community_infos[3]
                if current_player in nfsp_players.keys():
                    card_embed = toCardState(community_cards,
                                             player_hands[current_player],
                                             card_dictionary)
                    nfsp_players[current_player].setState(
                        nfsp_players[current_player].toStateRepre(
                            betting_state, card_embed))
                    action, f_rl = nfsp_players[current_player].act(
                        sl_net=sl_net, policy_net=policy_net)
                    action_c = correctLimitAction(action.item(),
                                                  community_infos,
                                                  player_infos, num_player)
                    actions = toLimitContinuesAction(action_c, community_infos,
                                                     player_infos, num_player)
                else:
                    if type_of_eval == 'call':
                        actions = safe_actions(community_infos,
                                               player_infos,
                                               n_seats=num_player)
                    else:
                        actions = simple_heuristic(
                            community_infos,
                            player_infos,
                            n_seats=num_player,
                            community_cards=community_cards,
                            player_hands=player_hands)
                    action_c = toLimitDiscreteAction(current_player,
                                                     current_pot, player_infos,
                                                     actions)

                action = torch.tensor([[action_c]],
                                      dtype=torch.long).to(device)
                # take actions
                (player_states,
                 (community_infos,
                  community_cards)), rews, terminal, info = env.step(actions)
                (player_infos, player_hands) = zip(*player_states)
                # if i_episode % 1000 == 0:
                # env.render()
                if terminal:
                    # set None state
                    for i in range(num_NFSP):
                        nfsp_players[i].reset()
                    for i in range(num_player - num_NFSP):
                        random_players[i + num_NFSP].reset()
                else:
                    # not terminal
                    if current_player in nfsp_players.keys():
                        betting_state = toLimitBettingState(
                            betting_state, current_round, current_raises,
                            current_player, action)
                        nfsp_players[current_player].setState(
                            nfsp_players[current_player].toStateRepre(
                                betting_state, card_embed))
                    else:
                        betting_state = toLimitBettingState(
                            betting_state, current_round, current_raises,
                            current_player, action)

                    if current_round != community_infos[5]:
                        for i in range(num_NFSP):
                            nfsp_players[i].reset()
                        for i in range(num_player - num_NFSP):
                            random_players[i + num_NFSP].reset()

            # record
            for player_id in range(num_player):
                sum_board[player_id] += player_infos[player_id][
                    2] - game_board[player_id]
                game_board[player_id] = player_infos[player_id][2]

            # reset players to 20000 if anyone's stack is down to 100
            lost_players = [
                p for p in env._seats if not p.emptyplayer and p.stack <= 100
            ]
            if lost_players:
                for p in range(num_player):
                    env.remove_player(p)
                    env.add_player(p)
                    game_board[p] = 20000

            if (i_episode + 1) % 1000 == 0:
                with open(
                        'log_' + str(num_player) + 'players_' + str(num_hid) +
                        'hid_' + str(num_layer) + 'layer_' + str(use_res_net) +
                        'res' + str(iteration) + 'nfsp' + str(type_of_eval) +
                        str(expriment) + '.txt', 'a+') as f:
                    line = [
                        str(sum_board[p] / (i_episode + 1))
                        for p in range(num_player)
                    ]
                    line = ','.join([str(i_episode + 1)] + line)
                    f.write(line + '\n')
        results.append(sum_board[0] / (i_episode + 1))
    with open(
            str(num_player) + 'players_' + str(num_hid) + 'hid_' +
            str(num_layer) + 'layer_' + str(use_res_net) + 'res' + 'nfsp' +
            str(type_of_eval) + 'results.txt', 'a+') as f:
        f.write(','.join(
            [str(iteration),
             str(np.mean(results)),
             str(np.std(results))]) + '\n')
Пример #3
0
from agent.NFSPAgent import NFSPAgent
from agent.simpleAgent import simpleAgent
from agent.NFSPLimit import NFSPLimit
import gym
import holdem
from holdem.env import LimitTexasHoldemEnv
from holdem.utils import hand_to_str, toLimitDiscreteAction, toLimitContinuesAction, correctLimitAction, get_card_dict, toCardState, toLimitBettingState, toLimitDiscreteAction, random_actions, uniform_random_actions, fold_actions, safe_actions
import numpy as np

from treys import Card, Deck, Evaluator

num_player = 4
num_NFSP = 4
hid = 64
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
env = LimitTexasHoldemEnv(num_player, max_limit=1e9,
                          debug=False)  #initialize 3-player game
env.add_player(0, stack=20000)  # add a player to seat 0 with 2000 "chips"
env.add_player(1, stack=20000)  # add a player to seat 1 with 2000 "chips"
env.add_player(2, stack=20000)  # add a player to seat 2 with 2000 "chips"
env.add_player(3, stack=20000)  # add a player to seat 2 with 2000 "chips"
card_dictionary = get_card_dict()
checkpoint_dir = '4players_64unit/4players_64hid_checkpoints'
version = 73

policy_net = {}
sl_net = {}
rl_dir = {}
sl_dir = {}
for i in range(num_NFSP):
    rl_dir[i] = checkpoint_dir + '/rl_checkpoints/checkpoint_' + str(
        version + 1 + i) + '000.pt'
Пример #4
0
GAMMA = 1
POLICY_UPDATE = 128
TARGET_UPDATE = 128 * 300
SAVE_INTERVAL = 5000
big = False
use_res_net = False
num_hid = 64
num_layer = 1
num_NFSP = 1
# init objects
save_dir = str(num_player) + 'players_' + str(num_hid) + 'hid_' + str(
    num_layer) + 'layer_' + str(use_res_net) + 'res' + '_checkpoints'
log_dir = 'log'

# environment
env = LimitTexasHoldemEnv(num_player, max_limit=1e5, debug=False)  #2 players
# game board
game_board = {}
sum_board = {}
for p in range(num_player):
    env.add_player(p, stack=20000)
    game_board[p] = 20000
    sum_board[p] = 0

# cards to index
card_dictionary = get_card_dict()

# players
players = {}
for i in range(num_player):
    players[i] = NFSPLimit(card_dict=card_dictionary, device=device)