if i_episode % 1000 == 0: print(i_episode) env.render() while not terminal: current_player = community_infos[-1] current_round = community_infos[5] current_raises = community_infos[6] current_pot = community_infos[3] card_embed = toCardState(community_cards, player_hands[current_player], card_dictionary) nfsp_players[current_player].setState(nfsp_players[current_player].toStateRepre(betting_state, card_embed)) action, follow_rl = nfsp_players[current_player].act(sl_net = sl_net[current_player], policy_net = policy_net[current_player]) action_c = correctLimitAction(action.item(),community_infos,player_infos,num_player) actions = toLimitContinuesAction(action_c, community_infos, player_infos, num_player) action = torch.tensor([[action_c]],dtype=torch.long).to(device) # take actions (player_states, (community_infos, community_cards)), rews, terminal, info = env.step(actions) (player_infos, player_hands) = zip(*player_states) if i_episode % 1000 == 0: env.render() if terminal: # set None state for i in range(num_NFSP): nfsp_players[i].reset() else: # not terminal betting_state = toLimitBettingState(betting_state, current_round, current_raises, current_player, action) nfsp_players[current_player].setState(nfsp_players[current_player].toStateRepre(betting_state, card_embed)) if current_round != community_infos[5]: for i in range(num_NFSP): nfsp_players[i].reset()
actions = toLimitContinuesAction(action_c, community_infos, player_infos, num_player) else: actions = safe_actions(community_infos, player_infos, n_seats=num_player) action_c = toLimitDiscreteAction(current_player, current_pot, player_infos, actions) action = torch.tensor([[action_c]], dtype=torch.long).to(device) # take actions (player_states, (community_infos, community_cards)), rews, terminal, info = env.step(actions) (player_infos, player_hands) = zip(*player_states) # if i_episode % 1000 == 0: # env.render() if terminal: # set None state for i in range(num_NFSP): nfsp_players[i].reset() for i in range(num_player - num_NFSP): random_players[i + num_NFSP].reset() else: # not terminal if current_player in nfsp_players.keys(): betting_state = toLimitBettingState( betting_state, current_round, current_raises, current_player, action)
def evaluate(policy_net, rl_optimizer, sl_net, sl_optimizer, steps_done, iteration, type_of_eval): env = LimitTexasHoldemEnv(num_player, max_limit=1e9, debug=False) #initialize 3-player game env.add_player(0, stack=20000) # add a player to seat 0 with 2000 "chips" env.add_player(1, stack=20000) # add a player to seat 1 with 2000 "chips" env.add_player(2, stack=20000) # add a player to seat 2 with 2000 "chips" env.add_player(3, stack=20000) # add a player to seat 2 with 2000 "chips" results = [] for expriment in range(1): game_board = {} sum_board = {} nfsp_players = {} for i in range(num_NFSP): nfsp_players[i] = NFSPLimit(card_dict=card_dictionary, device=device) game_board[i] = 20000 sum_board[i] = 0 random_players = {} for i in range(num_player - num_NFSP): random_players[i + num_NFSP] = simpleAgent() game_board[i + num_NFSP] = 20000 sum_board[i + num_NFSP] = 0 for i_episode in range(25000): betting_state = np.zeros((num_player, 4, 5, 3)) # print('-------------Playing Game:{}------------'.format(i_episode)) (player_states, (community_infos, community_cards)) = env.reset() (player_infos, player_hands) = zip(*player_states) for i in range(num_NFSP): nfsp_players[i].setInitState( betting_state, toCardState(community_cards, player_hands[i], card_dictionary)) current_round = 0 terminal = False # if i_episode % 1000 == 0: # print(i_episode) # env.render() while not terminal: current_player = community_infos[-1] current_round = community_infos[5] current_raises = community_infos[6] current_pot = community_infos[3] if current_player in nfsp_players.keys(): card_embed = toCardState(community_cards, player_hands[current_player], card_dictionary) nfsp_players[current_player].setState( nfsp_players[current_player].toStateRepre( betting_state, card_embed)) action, f_rl = nfsp_players[current_player].act( sl_net=sl_net, policy_net=policy_net) action_c = correctLimitAction(action.item(), community_infos, player_infos, num_player) actions = toLimitContinuesAction(action_c, community_infos, player_infos, num_player) else: if type_of_eval == 'call': actions = safe_actions(community_infos, player_infos, n_seats=num_player) else: actions = simple_heuristic( community_infos, player_infos, n_seats=num_player, community_cards=community_cards, player_hands=player_hands) action_c = toLimitDiscreteAction(current_player, current_pot, player_infos, actions) action = torch.tensor([[action_c]], dtype=torch.long).to(device) # take actions (player_states, (community_infos, community_cards)), rews, terminal, info = env.step(actions) (player_infos, player_hands) = zip(*player_states) # if i_episode % 1000 == 0: # env.render() if terminal: # set None state for i in range(num_NFSP): nfsp_players[i].reset() for i in range(num_player - num_NFSP): random_players[i + num_NFSP].reset() else: # not terminal if current_player in nfsp_players.keys(): betting_state = toLimitBettingState( betting_state, current_round, current_raises, current_player, action) nfsp_players[current_player].setState( nfsp_players[current_player].toStateRepre( betting_state, card_embed)) else: betting_state = toLimitBettingState( betting_state, current_round, current_raises, current_player, action) if current_round != community_infos[5]: for i in range(num_NFSP): nfsp_players[i].reset() for i in range(num_player - num_NFSP): random_players[i + num_NFSP].reset() # record for player_id in range(num_player): sum_board[player_id] += player_infos[player_id][ 2] - game_board[player_id] game_board[player_id] = player_infos[player_id][2] # reset players to 20000 if anyone's stack is down to 100 lost_players = [ p for p in env._seats if not p.emptyplayer and p.stack <= 100 ] if lost_players: for p in range(num_player): env.remove_player(p) env.add_player(p) game_board[p] = 20000 if (i_episode + 1) % 1000 == 0: with open( 'log_' + str(num_player) + 'players_' + str(num_hid) + 'hid_' + str(num_layer) + 'layer_' + str(use_res_net) + 'res' + str(iteration) + 'nfsp' + str(type_of_eval) + str(expriment) + '.txt', 'a+') as f: line = [ str(sum_board[p] / (i_episode + 1)) for p in range(num_player) ] line = ','.join([str(i_episode + 1)] + line) f.write(line + '\n') results.append(sum_board[0] / (i_episode + 1)) with open( str(num_player) + 'players_' + str(num_hid) + 'hid_' + str(num_layer) + 'layer_' + str(use_res_net) + 'res' + 'nfsp' + str(type_of_eval) + 'results.txt', 'a+') as f: f.write(','.join( [str(iteration), str(np.mean(results)), str(np.std(results))]) + '\n')