def testStreetIncrement(self): params = copy.deepcopy(self.env_params) params['starting_street'] = pdt.Street.TURN params['pot'] = 1 env = Poker(params) state, obs, done, mask, betsize_mask = env.reset() assert env.board[-2] == 0 assert env.board[-1] == 0 state, obs, done, mask, betsize_mask = env.step(ACTION_BET) state, obs, done, mask, betsize_mask = env.step(ACTION_CALL) assert env.street == pdt.Street.RIVER assert env.board[-2] != 0 state, obs, done, mask, betsize_mask = env.step(ACTION_BET) state, obs, done, mask, betsize_mask = env.step(ACTION_CALL) assert done == True del env params['starting_street'] = pdt.Street.PREFLOP params['pot'] = 0 env = Poker(params) state, obs, done, mask, betsize_mask = env.reset() state, obs, done, mask, betsize_mask = env.step(ACTION_CALL) assert state[:, -1][:, env.state_mapping['hero_position']] == pdt.Position.BB assert state[:, -1][:, env.state_mapping['last_position']] == pdt.Position.SB assert env.pot == 2 state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE) assert env.players['BB'].stack == 2 assert env.players['SB'].stack == 4 assert env.pot == 4 assert state[:, -1][:, env.state_mapping['hero_position']] == pdt.Position.SB assert state[:, -1][:, env.state_mapping['last_position']] == pdt.Position.BB state, obs, done, mask, betsize_mask = env.step(ACTION_CALL) assert state[:, -1][:, env.state_mapping['hero_position']] == pdt.Position.BB assert state[:, -1][:, env. state_mapping['player2_position']] == pdt.Position.SB assert state[:, -1][:, env. state_mapping['last_position']] == pdt.Position.BTN assert state[:, -1][:, env.state_mapping[ 'last_aggressive_position']] == pdt.Position.BB assert env.street == pdt.Street.FLOP state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK) state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK) assert env.street == pdt.Street.TURN state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK) state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK) assert env.street == pdt.Street.RIVER state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK) state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK) assert done == True
def testEnvCategoryMapping(self): params = copy.deepcopy(self.env_params) params['stacksize'] = 50 params['n_players'] = 2 params['starting_street'] = pdt.Street.PREFLOP params['pot'] = 0 env = Poker(params) state, obs, done, mask, betsize_mask = env.reset() assert env.convert_to_category(pdt.NetworkActions.RAISE, 3)[0] == 4 assert env.convert_to_category(pdt.NetworkActions.RAISE, 2)[0] == 3 assert env.convert_to_category(pdt.NetworkActions.CALL, 0.5)[0] == 2 assert env.convert_to_category(pdt.NetworkActions.CHECK, 0)[0] == 0 state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE) assert env.convert_to_category(pdt.NetworkActions.RAISE, 9)[0] == 4 assert env.convert_to_category(pdt.NetworkActions.RAISE, 5)[0] == 3 assert env.convert_to_category(pdt.NetworkActions.CALL, 2)[0] == 2 assert env.convert_to_category(pdt.NetworkActions.CHECK, 0)[0] == 0 state, obs, done, mask, betsize_mask = env.step(ACTION_CALL) assert env.convert_to_category(pdt.NetworkActions.BET, 6)[0] == 4 assert env.convert_to_category(pdt.NetworkActions.BET, 3)[0] == 3 assert env.convert_to_category(pdt.NetworkActions.FOLD, 0)[0] == 1 state, obs, done, mask, betsize_mask = env.step(ACTION_BET) assert env.convert_to_category(pdt.NetworkActions.RAISE, 24)[0] == 4 assert env.convert_to_category(pdt.NetworkActions.RAISE, 12)[0] == 3 assert env.convert_to_category(pdt.NetworkActions.CALL, 6)[0] == 2 assert env.convert_to_category(pdt.NetworkActions.FOLD, 0)[0] == 1 state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE) assert env.convert_to_category(pdt.NetworkActions.RAISE, 47)[0] == 4 assert env.convert_to_category(pdt.NetworkActions.RAISE, 42)[0] == 3 assert env.convert_to_category(pdt.NetworkActions.CALL, 18)[0] == 2 assert env.convert_to_category(pdt.NetworkActions.FOLD, 0)[0] == 1 del env params['stacksize'] = 3 params['n_players'] = 2 params['starting_street'] = pdt.Street.PREFLOP params['pot'] = 0 env = Poker(params) state, obs, done, mask, betsize_mask = env.reset() assert env.convert_to_category(pdt.NetworkActions.RAISE, 3)[0] == 4 print('check', env.convert_to_category(pdt.NetworkActions.RAISE, 2)[0]) assert env.convert_to_category(pdt.NetworkActions.RAISE, 2)[0] == 3 assert env.convert_to_category(pdt.NetworkActions.CALL, 0)[0] == 2 assert env.convert_to_category(pdt.NetworkActions.FOLD, 0)[0] == 1
def preflopTests(self): """Facing sb call. Sb min raise.""" params = copy.deepcopy(self.env_params) params['stacksize'] = 5 params['n_players'] = 2 params['starting_street'] = pdt.Street.PREFLOP params['pot'] = 0 env = Poker(params) state, obs, done, mask, betsize_mask = env.reset() assert np.array_equal(mask, np.array([0, 1, 1, 0, 1])) state, obs, done, mask, betsize_mask = env.step(ACTION_CALL) assert state[:, -1][:, env.state_mapping['player2_stacksize']] == 4 assert state[:, -1][:, env.state_mapping['player1_stacksize']] == 4 assert np.array_equal(mask, np.array([1, 0, 0, 0, 1])) del env env = Poker(params) state, obs, done, mask, betsize_mask = env.reset() state, obs, done, mask, betsize_mask = env.step(ACTION_MIN_RAISE) assert state[:, -1][:, env.state_mapping['player2_stacksize']] == 3 assert state[:, -1][:, env.state_mapping['player1_stacksize']] == 4
def testCritic(self): params = copy.deepcopy(self.env_params) env = Poker(params) nA = env.action_space nB = env.betsize_space nS = env.state_space seed = 152 critic = OmahaObsQCritic(seed, nS, nA, nB, self.network_params) state, obs, done, mask, betsize_mask = env.reset() output = critic(obs) assert isinstance(output['value'], torch.Tensor)
def testMasks(self): params = copy.deepcopy(self.env_params) params['stacksize'] = 5 params['n_players'] = 2 params['starting_street'] = pdt.Street.PREFLOP params['pot'] = 0 env = Poker(params) state, obs, done, mask, betsize_mask = env.reset() assert state[:, -1][:, env.state_mapping['pot']] == 1.5 assert state[:, -1][:, env.state_mapping['player1_stacksize']] == 4.5 assert state[:, -1][:, env. state_mapping['player1_position']] == pdt.Position.SB assert state[:, -1][:, env.state_mapping['player2_stacksize']] == 4 assert state[:, -1][:, env. state_mapping['player2_position']] == pdt.Position.BB assert state[:, -1][:, env.state_mapping['street']] == pdt.Street.PREFLOP assert env.current_player == 'SB' assert np.array_equal(betsize_mask, np.array([1, 1])) assert np.array_equal(mask, np.array([0, 1, 1, 0, 1])) state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE) assert env.current_player == 'BB' assert state[:, -1][:, env.state_mapping['pot']] == 4 assert state[:, -1][:, env.state_mapping['player1_stacksize']] == 4 assert state[:, -1][:, env. state_mapping['player1_position']] == pdt.Position.BB assert state[:, -1][:, env.state_mapping['player2_stacksize']] == 2 assert state[:, -1][:, env. state_mapping['player2_position']] == pdt.Position.SB assert state[:, -1][:, env.state_mapping['street']] == pdt.Street.PREFLOP assert np.array_equal(mask, np.array([0, 1, 1, 0, 1])) assert np.array_equal(betsize_mask, np.array([1, 0])) state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE) assert state[:, -1][:, env.state_mapping['pot']] == 8 assert state[:, -1][:, env.state_mapping['player1_stacksize']] == 2 assert state[:, -1][:, env. state_mapping['player1_position']] == pdt.Position.SB assert state[:, -1][:, env.state_mapping['player2_stacksize']] == 0 assert state[:, -1][:, env. state_mapping['player2_position']] == pdt.Position.BB assert state[:, -1][:, env.state_mapping['street']] == pdt.Street.PREFLOP assert np.array_equal(mask, np.array([0, 1, 1, 0, 0])) assert np.array_equal(betsize_mask, np.array([0, 0]))
def testStreetInitialization(self): params = copy.deepcopy(self.env_params) params['stacksize'] = 50 params['n_players'] = 2 params['starting_street'] = pdt.Street.RIVER params['pot'] = 1 env = Poker(params) state, obs, done, mask, betsize_mask = env.reset() assert state[:, -1][:, env. state_mapping['player1_position']] == pdt.Position.BB assert state[:, -1][:, env.state_mapping['hero_position']] == pdt.Position.BB
def testActor(self): params = copy.deepcopy(self.env_params) env = Poker(params) nA = env.action_space nB = env.betsize_space nS = env.state_space seed = 152 actor = OmahaActor(seed, nS, nA, nB, self.network_params) state, obs, done, mask, betsize_mask = env.reset() output = actor(state, mask, betsize_mask) state, obs, done, mask, betsize_mask = env.step(ACTION_BET) output = actor(state, mask, betsize_mask) assert isinstance(output['action_probs'], torch.Tensor) assert isinstance(output['action_prob'], torch.Tensor)
def testReset(self): params = copy.deepcopy(self.env_params) params['starting_street'] = pdt.Street.RIVER env = Poker(params) state, obs, done, mask, betsize_mask = env.reset() assert state.ndim == 3 assert obs.ndim == 3 assert state.shape == (1, 1, STATE_SHAPE) assert obs.shape == (1, 1, OBS_SHAPE) assert state[0, 0, env.state_mapping['street']] == pdt.Street.RIVER assert state[0, -1, env.state_mapping['hero_position']] == pdt.Position.BB assert state[0, -1, env.state_mapping['hero_stacksize']] == self.env_params[ 'stacksize'] assert state[0, -1, env.state_mapping['player1_position']] == pdt.Position.BB assert state[ 0, -1, env. state_mapping['player1_stacksize']] == self.env_params['stacksize'] assert state[0, -1, env.state_mapping['player1_street_total']] == 0 assert state[0, -1, env.state_mapping['player2_position']] == pdt.Position.SB assert state[ 0, -1, env. state_mapping['player2_stacksize']] == self.env_params['stacksize'] assert state[0, -1, env.state_mapping['player2_street_total']] == 0 assert state[0, -1, env.state_mapping['last_action']] == pdt.Action.UNOPENED assert state[ 0, -1, env.state_mapping['last_aggressive_action']] == pdt.Action.UNOPENED assert state[0, -1, env.state_mapping['last_betsize']] == 0 assert state[0, -1, env.state_mapping['last_position']] == pdt.Position.BTN assert state[0, -1, env.state_mapping['amount_to_call']] == 0 assert state[0, -1, env.state_mapping['pot_odds']] == 0 assert env.players_remaining == 2 assert done == False assert np.array_equal(mask, np.array([1., 0., 0., 1., 0.])) assert np.array_equal(betsize_mask, np.array([1., 1.])) assert len(env.players.players['SB'].hand ) == self.env_params['cards_per_player'] assert len(env.players.players['BB'].hand ) == self.env_params['cards_per_player'] assert len(env.deck) == 52 - ( self.env_params['cards_per_player'] * self.env_params['n_players'] + pdt.Globals.INITIALIZE_BOARD_CARDS[params['starting_street']])
def testTies(self): params = copy.deepcopy(self.env_params) params['starting_street'] = pdt.Street.RIVER params['stacksize'] = 5 params['pot'] = 1 env = Poker(params) state, obs, done, mask, betsize_mask = env.reset() # Modify board and hands env.board = [14, 0, 13, 1, 12, 2, 2, 2, 2, 3] env.players['SB'].hand = [[11, 3], [10, 3], [3, 2], [3, 3]] env.players['BB'].hand = [[11, 2], [10, 2], [4, 0], [4, 3]] state, obs, done, mask, betsize_mask = env.step(ACTION_BET) state, obs, done, mask, betsize_mask = env.step(ACTION_CALL) assert done == True assert env.players['SB'].stack == 5.5 assert env.players['BB'].stack == 5.5
def testCheckBetFold(self): params = copy.deepcopy(self.env_params) params['starting_street'] = pdt.Street.RIVER params['stacksize'] = 5 params['pot'] = 1 env = Poker(params) state, obs, done, mask, betsize_mask = env.reset() state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK) state, obs, done, mask, betsize_mask = env.step(ACTION_BET) assert state.ndim == 3 assert obs.ndim == 3 assert state.shape == (1, 3, STATE_SHAPE) assert obs.shape == (1, 3, OBS_SHAPE) assert env.players['SB'].stack == 4 assert env.players['BB'].stack == 5 assert env.players['SB'].street_total == 1 assert env.players['BB'].street_total == 0 assert env.pot == 2 assert state[:, -1][:, env.state_mapping['street']] == pdt.Street.RIVER assert state[:, -1][:, env.state_mapping['hero_position']] == pdt.Position.BB assert state[:, -1][:, env.state_mapping['last_position']] == pdt.Position.SB assert state[:, -1][:, env.state_mapping['last_action']] == pdt.Action.BET assert state[:, -1][:, env.state_mapping['last_betsize']] == 1 assert state[:, -1][:, env.state_mapping['hero_stacksize']] == params[ 'stacksize'] assert state[:, -1][:, env.state_mapping['player2_stacksize']] == params[ 'stacksize'] - 1 assert state[:, -1][:, env.state_mapping['amount_to_call']] == 1 self.assertAlmostEqual(state[:, -1][:, env.state_mapping['pot_odds']][0], 0.333, places=2) assert done == False assert np.array_equal(mask, np.array([0., 1., 1., 0., 1.])) assert np.array_equal(betsize_mask, np.array([1., 1.])) state, obs, done, mask, betsize_mask = env.step(ACTION_FOLD) assert done == True assert env.players['SB'].stack == 6 assert env.players['BB'].stack == 5 assert env.players['BB'].status == Status.FOLDED
def testBlindInitialization(self): params = copy.deepcopy(self.env_params) params['starting_street'] = pdt.Street.PREFLOP params['pot'] = 0 env = Poker(params) state, obs, done, mask, betsize_mask = env.reset() assert env.players['SB'].stack == 4.5 assert env.players['BB'].stack == 4. assert env.players['SB'].street_total == 0.5 assert env.players['BB'].street_total == 1. assert state[0, -1, env.state_mapping['blind']] == pdt.Blind.POSTED assert state[:, -1][:, env.state_mapping['hero_position']] == pdt.Position.SB assert state[:, -1][:, env.state_mapping['last_position']] == pdt.Position.BB assert done == False
def betsizingTests(self): params = copy.deepcopy(self.env_params) params['stacksize'] = 5 params['n_players'] = 2 params['starting_street'] = pdt.Street.PREFLOP params['pot'] = 0 env = Poker(params) state, obs, done, mask, betsize_mask = env.reset() betsize = env.return_potlimit_betsize(action=4, betsize_category=0) assert betsize == 1.5 betsize = env.return_potlimit_betsize(action=4, betsize_category=1) assert betsize == 2.5 betsize = env.return_potlimit_betsize(action=2, betsize_category=0) assert betsize == 0.5 state, obs, done, mask, betsize_mask = env.step(ACTION_CALL) betsize = env.return_potlimit_betsize(action=4, betsize_category=0) assert betsize == 1 betsize = env.return_potlimit_betsize(action=4, betsize_category=1) assert betsize == 2
def additionalTests(self): params = copy.deepcopy(self.env_params) params['stacksize'] = 5 params['n_players'] = 2 params['starting_street'] = pdt.Street.PREFLOP params['pot'] = 0 env = Poker(params) state, obs, done, mask, betsize_mask = env.reset() state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE) assert state[:, -1][:, env.state_mapping['player2_stacksize']] == 2 state, obs, done, mask, betsize_mask = env.step(ACTION_CALL) assert state[:, -1][:, env.state_mapping['player1_stacksize']] == 2 assert state[:, -1][:, env.state_mapping['street']] == 1 state, obs, done, mask, betsize_mask = env.step(ACTION_BET) state, obs, done, mask, betsize_mask = env.step(ACTION_CALL) assert state[:, -1][:, env.state_mapping['player1_stacksize']] == 0 assert state[:, -1][:, env.state_mapping['player2_stacksize']] == 0 assert state[:, -1][:, env.state_mapping['street']] == 3 assert done == True
def testCheckCheck(self): params = copy.deepcopy(self.env_params) params['starting_street'] = pdt.Street.RIVER params['stacksize'] = 5 params['pot'] = 1 env = Poker(params) state, obs, done, mask, betsize_mask = env.reset() state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK) assert state.ndim == 3 assert obs.ndim == 3 assert state.shape == (1, 2, STATE_SHAPE) assert obs.shape == (1, 2, OBS_SHAPE) assert state[:, 1][:, env. state_mapping['street']] == params['starting_street'] assert state[:, 1][:, env.state_mapping['hero_position']] == pdt.Position.SB assert state[:, 1][:, env. state_mapping['player2_position']] == pdt.Position.BB assert state[:, 1][:, env.state_mapping['last_position']] == pdt.Position.BB assert state[:, 1][:, env.state_mapping['last_action']] == pdt.Action.CHECK assert state[:, 1][:, env. state_mapping['hero_stacksize']] == params['stacksize'] assert state[:, 1][:, env.state_mapping['player2_stacksize']] == params[ 'stacksize'] assert state[:, 1][:, env.state_mapping['amount_to_call']] == 0 assert state[:, 1][:, env.state_mapping['pot_odds']] == 0 assert done == False assert np.array_equal(mask, np.array([1., 0., 0., 1., 0.])) assert np.array_equal(betsize_mask, np.array([1., 1.])) state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK) assert done == True assert env.players['SB'].stack == 6 assert env.players['BB'].stack == 5
def testBetRestrictions(self): params = copy.deepcopy(self.env_params) params['starting_street'] = pdt.Street.RIVER params['stacksize'] = 5 params['pot'] = 1 env = Poker(params) state, obs, done, mask, betsize_mask = env.reset() state, obs, done, mask, betsize_mask = env.step(ACTION_BET) state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE) state, obs, done, mask, betsize_mask = env.step(ACTION_MIN_RAISE) assert env.players['SB'].stack == 1 assert env.players['SB'].status == Status.ACTIVE assert env.players['BB'].stack == 0 assert env.players['BB'].status == Status.ALLIN assert state[0, -1, env.state_mapping['blind']] == pdt.Blind.NO_BLIND assert np.array_equal(mask, np.array([0., 1., 1., 0., 0.])) assert np.array_equal(betsize_mask, np.array([0., 0.])) state, obs, done, mask, betsize_mask = env.step(ACTION_CALL) assert done == True assert env.players['SB'].stack == 11 assert env.players['BB'].stack == 0
def testAllin(self): params = copy.deepcopy(self.env_params) params['n_players'] = 3 params['starting_street'] = pdt.Street.PREFLOP params['pot'] = 0 env = Poker(params) state, obs, done, mask, betsize_mask = env.reset() state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE) assert env.players['BTN'].stack == 1.5 assert env.players['BTN'].street_total == 3.5 state, obs, done, mask, betsize_mask = env.step(ACTION_FOLD) assert env.players['SB'].status == Status.FOLDED state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE) assert env.players['BB'].stack == 0 assert env.players['BB'].street_total == 5 assert env.players['BB'].status == Status.ALLIN state, obs, done, mask, betsize_mask = env.step(ACTION_CALL) assert env.players['BB'].stack == 10.5 assert env.players['SB'].stack == 4.5 assert env.players['BTN'].stack == 0 assert env.players['BTN'].street_total == 0 assert env.street == pdt.Street.RIVER assert done == True
class API(object): def __init__(self): self.increment_position = {'SB': 'BB', 'BB': 'SB'} self.seed = 1458 self.connect() self.game_object = pdt.Globals.GameTypeDict[pdt.GameTypes.OMAHAHI] self.config = Config() self.env_params = { 'game': pdt.GameTypes.OMAHAHI, 'betsizes': self.game_object.rule_params['betsizes'], 'bet_type': self.game_object.rule_params['bettype'], 'n_players': 2, 'pot': 1, 'stacksize': 10, #self.game_object.state_params['stacksize'], 'cards_per_player': self.game_object.state_params['cards_per_player'], 'starting_street': pdt.Street.FLOP, #self.game_object.starting_street, 'global_mapping': self.config.global_mapping, 'state_mapping': self.config.state_mapping, 'obs_mapping': self.config.obs_mapping, 'shuffle': True } self.env = Poker(self.env_params) self.network_params = self.instantiate_network_params() self.actor = OmahaActor(self.seed, self.env.state_space, self.env.action_space, self.env.betsize_space, self.network_params) self.critic = OmahaObsQCritic(self.seed, self.env.state_space, self.env.action_space, self.env.betsize_space, self.network_params) self.load_model(self.actor, self.config.production_actor) self.load_model(self.critic, self.config.production_critic) self.player = {'name': None, 'position': 'BB'} self.reset_trajectories() def reset_trajectories(self): self.trajectories = defaultdict(lambda: []) self.trajectory = defaultdict( lambda: { 'states': [], 'obs': [], 'betsize_masks': [], 'action_masks': [], 'actions': [], 'action_category': [], 'action_probs': [], 'action_prob': [], 'betsize': [], 'rewards': [], 'value': [] }) def instantiate_network_params(self): device = 'cpu' network_params = copy.deepcopy(self.config.network_params) network_params['maxlen'] = 10 network_params['device'] = device return network_params def load_model(self, model, path): if os.path.isfile(path): model.load_state_dict(load(path, map_location=D('cpu'))) set_grad_enabled(False) else: raise ValueError('File does not exist') def connect(self): client = MongoClient('localhost', 27017, maxPoolSize=10000) self.db = client.baseline def update_player_name(self, name: str): """updates player name""" self.player['name'] = name def update_player_position(self, position): self.player['position'] = position def insert_model_outputs(self, model_outputs, action_mask): outputs_json = { 'action': model_outputs['action'], 'action_category': model_outputs['action_category'], 'betsize': model_outputs['betsize'], 'action_prob': model_outputs['action_prob'].detach().numpy().tolist(), 'action_probs': model_outputs['action_probs'].detach().numpy().tolist(), 'value': model_outputs['value'].detach().numpy().tolist(), 'action_mask': action_mask.tolist(), 'player': self.player['name'] } self.db['bot_data'].insert_one(outputs_json) def insert_into_db(self, training_data: dict): """ stores player data in the player_stats collection. takes trajectories and inserts them into db for data analysis and learning. """ stats_json = { 'game': self.env.game, 'player': self.player['name'], 'reward': training_data[self.player['position']][0]['rewards'][0], 'position': self.player['position'], } self.db['player_stats'].insert_one(stats_json) keys = training_data.keys() positions = [position for position in keys if position in ['SB', 'BB']] for position in positions: for i, poker_round in enumerate(training_data[position]): states = poker_round['states'] observations = poker_round['obs'] actions = poker_round['actions'] action_prob = poker_round['action_prob'] action_probs = poker_round['action_probs'] action_categories = poker_round['action_category'] betsize_masks = poker_round['betsize_masks'] action_masks = poker_round['action_masks'] rewards = poker_round['rewards'] betsizes = poker_round['betsize'] values = poker_round['value'] assert (isinstance(rewards, list)) assert (isinstance(actions, list)) assert (isinstance(action_prob, list)) assert (isinstance(action_probs, list)) assert (isinstance(states, list)) assert (isinstance(values, list)) for step, state in enumerate(states): state_json = { 'game': self.env.game, 'player': self.player['name'], 'poker_round': step, 'state': state.tolist(), 'action_probs': action_probs[step].tolist(), 'action_prob': action_prob[step].tolist(), 'action': actions[step], 'action_category': action_categories[step], 'betsize_mask': betsize_masks[step].tolist(), 'action_mask': action_masks[step].tolist(), 'betsize': betsizes[step], 'reward': rewards[step], 'value': values[step].tolist() } self.db['game_data'].insert_one(state_json) def return_model_outputs(self): query = {'player': self.player['name']} player_data = self.db['bot_data'].find(query).sort('_id', -1) action_probs = [] values = [] action_mask = [] for result in player_data: action_probs.append(np.array(result['action_probs'])) values.append(np.array(result['value'])) action_mask.append(np.array(result['action_mask'])) break if action_probs: action_probs = action_probs[0] values = values[0] action_mask = action_mask[0] if np.sum(action_probs) > 0: action_probs *= action_mask action_probs /= np.sum(action_probs) # scale values if np.max(np.abs(values)) > 0: values *= action_mask values /= self.env_params['stacksize'] + self.env_params['pot'] model_outputs = { 'action_probs': action_probs.tolist(), 'q_values': [values.tolist()] } else: model_outputs = { 'action_probs': [0] * self.env.action_space, 'q_values': [0] * self.env.action_space } print(model_outputs) print(action_mask) return model_outputs def return_player_stats(self): """Returns dict of current player stats against the bot.""" query = {'player': self.player['name']} # projection ={'reward':1,'hand_num':1,'_id':0} player_data = self.db['player_stats'].find(query) total_hands = self.db['player_stats'].count_documents(query) results = [] position_results = {'SB': 0, 'BB': 0} # total_hands = 0 for result in player_data: results.append(result['reward']) position_results[result['position']] += result['reward'] bb_per_hand = sum(results) / total_hands if total_hands > 0 else 0 sb_bb_per_hand = position_results[ 'SB'] / total_hands if total_hands > 0 else 0 bb_bb_per_hand = position_results[ 'BB'] / total_hands if total_hands > 0 else 0 player_stats = { 'results': sum(results), 'bb_per_hand': round(bb_per_hand, 2), 'total_hands': total_hands, 'SB': round(sb_bb_per_hand, 2), 'BB': round(bb_bb_per_hand, 2), } return player_stats def parse_env_outputs(self, state, action_mask, betsize_mask, done): """Wraps state and passes to frontend. Can be the dummy last state. In which case hero mappings are reversed.""" reward = state[:, -1][:, self.env.state_mapping[ 'hero_stacksize']] - self.env.starting_stack # cards go in a list hero = self.env.players[self.player['position']] villain = self.env.players[self.increment_position[ self.player['position']]] state_object = { 'history': state.tolist(), 'betsizes': self.env.betsizes.tolist(), 'mapping': self.env.state_mapping, 'current_player': pdt.Globals.POSITION_MAPPING[self.env.current_player], 'hero_stack': hero.stack, 'hero_position': pdt.Globals.POSITION_MAPPING[hero.position], 'hero_cards': flatten(hero.hand), 'hero_street_total': hero.street_total, 'pot': float(state[:, -1][:, self.env.state_mapping['pot']][0]), 'board_cards': state[:, -1][:, self.env.state_mapping['board']][0].tolist(), 'villain_stack': villain.stack, 'villain_position': pdt.Globals.POSITION_MAPPING[villain.position], 'villain_cards': flatten(villain.hand), 'villain_street_total': villain.street_total, 'last_action': int(state[:, -1][:, self.env.state_mapping['last_action']][0]), 'last_betsize': float(state[:, -1][:, self.env.state_mapping['last_betsize']][0]), 'last_position': int(state[:, -1][:, self.env.state_mapping['last_position']][0]), 'last_aggressive_action': int(state[:, -1] [:, self.env.state_mapping['last_aggressive_action']][0]), 'last_aggressive_betsize': float(state[:, -1] [:, self.env.state_mapping['last_aggressive_betsize']][0]), 'last_aggressive_position': int(state[:, -1] [:, self.env.state_mapping['last_aggressive_position']][0]), 'done': done, 'action_mask': action_mask.tolist(), 'betsize_mask': betsize_mask.tolist(), 'street': int(state[:, -1][:, self.env.state_mapping['street']][0]), 'blind': bool(state[:, -1][:, self.env.state_mapping['blind']][0]) } outcome_object = { 'player1_reward': hero.stack - self.env.starting_stack, 'player1_hand': flatten(hero.hand), 'player2_reward': villain.stack - self.env.starting_stack, 'player2_hand': flatten(villain.hand), 'player1_handrank': hero.handrank, 'player2_handrank': villain.handrank } json_obj = {'state': state_object, 'outcome': outcome_object} return json.dumps(json_obj) def store_state(self, state, obs, action_mask, betsize_mask): cur_player = self.env.current_player self.trajectory[cur_player]['states'].append(copy.copy(state)) self.trajectory[cur_player]['action_masks'].append( copy.copy(action_mask)) self.trajectory[cur_player]['betsize_masks'].append( copy.copy(betsize_mask)) def store_actions(self, actor_outputs): cur_player = self.env.current_player self.trajectory[cur_player]['actions'].append(actor_outputs['action']) self.trajectory[cur_player]['action_category'].append( actor_outputs['action_category']) self.trajectory[cur_player]['action_prob'].append( actor_outputs['action_prob']) self.trajectory[cur_player]['action_probs'].append( actor_outputs['action_probs']) self.trajectory[cur_player]['betsize'].append(actor_outputs['betsize']) self.trajectory[cur_player]['value'].append(actor_outputs['value']) def query_bot(self, state, obs, action_mask, betsize_mask, done): while self.env.current_player != self.player['position'] and not done: actor_outputs = self.actor(state, action_mask, betsize_mask) critic_outputs = self.critic(obs) actor_outputs['value'] = critic_outputs['value'] self.insert_model_outputs(actor_outputs, action_mask) self.store_actions(actor_outputs) state, obs, done, action_mask, betsize_mask = self.env.step( actor_outputs) if not done: self.store_state(state, obs, action_mask, betsize_mask) return state, obs, done, action_mask, betsize_mask def reset(self): assert self.player['name'] is not None assert isinstance(self.player['position'], str) self.reset_trajectories() self.update_player_position( self.increment_position[self.player['position']]) state, obs, done, action_mask, betsize_mask = self.env.reset() self.store_state(state, obs, action_mask, betsize_mask) if self.env.current_player != self.player['position'] and not done: state, obs, done, action_mask, betsize_mask = self.query_bot( state, obs, action_mask, betsize_mask, done) assert self.env.current_player == self.player['position'] return self.parse_env_outputs(state, action_mask, betsize_mask, done) def step(self, action: str, betsize: float): """Maps action + betsize -> to a flat action category""" assert self.player['name'] is not None assert isinstance(self.player['position'], str) if isinstance(betsize, str): betsize = float(betsize) action_type = pdt.Globals.SERVER_ACTION_DICT[action] flat_action_category, betsize_category = self.env.convert_to_category( action_type, betsize) assert isinstance(flat_action_category, int) player_outputs = { 'action': flat_action_category, 'action_category': action_type, 'betsize': betsize_category, 'action_prob': np.array([0]), 'action_probs': np.zeros(self.env.action_space + self.env.betsize_space - 2), 'value': np.zeros(self.env.action_space + self.env.betsize_space - 2) } self.store_actions(player_outputs) state, obs, done, action_mask, betsize_mask = self.env.step( player_outputs) if not done: self.store_state(state, obs, action_mask, betsize_mask) if self.env.current_player != self.player['position']: state, obs, done, action_mask, betsize_mask = self.query_bot( state, obs, action_mask, betsize_mask, done) if done: rewards = self.env.player_rewards() for position in self.trajectory.keys(): N = len(self.trajectory[position]['betsize_masks']) self.trajectory[position]['rewards'] = [rewards[position]] * N self.trajectories[position].append(self.trajectory[position]) self.insert_into_db(self.trajectories) return self.parse_env_outputs(state, action_mask, betsize_mask, done) @property def current_player(self): return self.player
# times = [] # for i,val in enumerate([1,2,5,10,25,50]): # print(f'Generating {val} samples') # tic = time.time() # training_params['generate_epochs'] = val # train_dual(rank,env,actor,critic,target_actor,target_critic,training_params,learning_params,network_params,validation_params) # toc = time.time() # print(f'{val} samples took {toc-tic} seconds') # times.append(toc-tic) # mongo = MongoDB() # mongo.clean_db() # mongo.close() # plt.scatter([1,2,5,10,25,50],[0.26,0.73,1.75,3,7.9,14.5]) # plt.savefig(f'generate_times.png',bbox_inches='tight') tic = time.time() with profiler.profile(record_shapes=True) as prof: if args.function == 'train': train_dual(rank,env,actor,critic,target_actor,target_critic,training_params,learning_params,network_params,validation_params) elif args.function == 'learn': dual_learning_update(actor,critic,target_actor,target_critic,learning_params) elif args.function == 'generate': generate_trajectories(env,actor,critic,training_params,rank=0) else: with torch.no_grad(): for i in range(100): state,obs,done,action_mask,betsize_mask = env.reset() while not done: actor_outputs = actor(state,action_mask,betsize_mask) state,obs,done,action_mask,betsize_mask = env.step(actor_outputs) print(f'Computation took {time.time() - tic} seconds') print(prof)
def testThreePlayers(self): params = copy.deepcopy(self.env_params) params['n_players'] = 3 params['starting_street'] = pdt.Street.PREFLOP params['pot'] = 0 env = Poker(params) state, obs, done, mask, betsize_mask = env.reset() assert state[:, -1][:, env. state_mapping['hero_position']] == pdt.Position.BTN assert state[:, -1][:, env. state_mapping['player1_position']] == pdt.Position.BTN assert state[:, -1][:, env. state_mapping['player2_position']] == pdt.Position.SB assert state[:, -1][:, env. state_mapping['player3_position']] == pdt.Position.BB assert env.street == pdt.Street.PREFLOP assert env.players.num_active_players == 3 state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE) assert env.players['SB'].stack == 4.5 assert env.players['BB'].stack == 4. assert env.players['BTN'].stack == 1.5 assert env.players['SB'].street_total == 0.5 assert env.players['BB'].street_total == 1. assert env.players['BTN'].street_total == 3.5 state, obs, done, mask, betsize_mask = env.step(ACTION_FOLD) assert env.players['SB'].status == Status.FOLDED assert env.players['BB'].status == Status.ACTIVE assert env.players['BTN'].status == Status.ACTIVE state, obs, done, mask, betsize_mask = env.step(ACTION_CALL) assert env.players['SB'].stack == 4.5 assert env.players['BB'].stack == 1.5 assert env.players['BTN'].stack == 1.5 assert env.players['SB'].street_total == 0. assert env.players['BB'].street_total == 0. assert env.players['BTN'].street_total == 0. assert state[:, -1][:, env.state_mapping['pot']] == 7.5 assert env.pot == 7.5 assert env.street == pdt.Street.FLOP assert env.players.num_active_players == 2 assert state[:, -1][:, env.state_mapping['hero_position']] == pdt.Position.BB state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK) state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK) assert env.street == pdt.Street.TURN state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK) state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK) assert env.street == pdt.Street.RIVER state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK) state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK) assert done == True assert env.players['SB'].stack == 4.5 assert env.players['BB'].stack == 9 assert env.players['BTN'].stack == 1.5 del env params['n_players'] = 3 params['starting_street'] = pdt.Street.PREFLOP params['pot'] = 0 env = Poker(params) state, obs, done, mask, betsize_mask = env.reset() assert state[:, -1][:, env. state_mapping['hero_position']] == pdt.Position.BTN state, obs, done, mask, betsize_mask = env.step(ACTION_CALL) assert state[:, -1][:, env.state_mapping['hero_position']] == pdt.Position.SB assert env.players['SB'].street_total == 0.5 state, obs, done, mask, betsize_mask = env.step(ACTION_CALL) assert env.players['SB'].street_total == 1 assert state[:, -1][:, env.state_mapping['hero_position']] == pdt.Position.BB state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK) assert env.street == pdt.Street.FLOP assert env.pot == 3 state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK) state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK) state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK) assert env.street == pdt.Street.TURN state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK) state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK) state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK) assert env.street == pdt.Street.RIVER state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK) state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK) state, obs, done, mask, betsize_mask = env.step(ACTION_CHECK) assert done == True assert env.players['SB'].stack == 7 assert env.players['BB'].stack == 4 assert env.players['BTN'].stack == 4. del env params['n_players'] = 3 params['starting_street'] = pdt.Street.PREFLOP params['pot'] = 0 env = Poker(params) state, obs, done, mask, betsize_mask = env.reset() state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE) state, obs, done, mask, betsize_mask = env.step(ACTION_FOLD) state, obs, done, mask, betsize_mask = env.step(ACTION_FOLD) assert done == True assert env.players['SB'].stack == 4.5 assert env.players['BB'].stack == 4 assert env.players['BTN'].stack == 6.5
def testBetLimits(self): params = copy.deepcopy(self.env_params) # Limit params['bet_type'] = pdt.LimitTypes.LIMIT params['n_players'] = 3 params['starting_street'] = pdt.Street.PREFLOP params['pot'] = 0 env = Poker(params) state, obs, done, mask, betsize_mask = env.reset() assert state[:, -1][:, env.state_mapping['pot']] == 1.5 assert state[:, -1][:, env. state_mapping['hero_position']] == pdt.Position.BTN state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE) assert state[:, -1][:, env.state_mapping['pot']] == 3.5 assert env.players['BTN'].stack == 3 assert env.players['BB'].stack == 4 assert env.players['SB'].stack == 4.5 assert env.players['SB'].street_total == 0.5 assert state[:, -1][:, env.state_mapping['hero_position']] == pdt.Position.SB assert state[:, -1][:, env.state_mapping['last_aggressive_betsize']] == 2 assert env.street == pdt.Street.PREFLOP state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE) assert env.players['BTN'].stack == 3 assert env.players['BB'].stack == 4 assert env.players['SB'].stack == 2 assert env.players['SB'].street_total == 3. assert state[:, -1][:, env.state_mapping['hero_position']] == pdt.Position.BB assert state[:, -1][:, env.state_mapping['last_aggressive_betsize']] == 2.5 assert state[:, -1][:, env.state_mapping['pot']] == 6 assert env.street == pdt.Street.PREFLOP state, obs, done, mask, betsize_mask = env.step(ACTION_CALL) assert state[:, -1][:, env.state_mapping['pot']] == 8 assert state[:, -1][:, env. state_mapping['hero_position']] == pdt.Position.BTN assert env.street == pdt.Street.PREFLOP state, obs, done, mask, betsize_mask = env.step(ACTION_CALL) assert state[:, -1][:, env.state_mapping['pot']] == 9 assert state[:, -1][:, env.state_mapping['hero_position']] == pdt.Position.SB assert env.street == pdt.Street.FLOP del env params['bet_type'] = pdt.LimitTypes.POT_LIMIT params['n_players'] = 3 params['starting_street'] = pdt.Street.PREFLOP params['pot'] = 0 params['stacksize'] = 100 env = Poker(params) state, obs, done, mask, betsize_mask = env.reset() assert state[:, -1][:, env. state_mapping['hero_position']] == pdt.Position.BTN state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE) assert state[:, -1][:, env.state_mapping['hero_position']] == pdt.Position.SB assert env.players['BTN'].stack == 96.5 assert env.players['BTN'].street_total == 3.5 state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE) assert env.players['SB'].stack == 88.5 assert env.players['SB'].street_total == 11.5 assert state[:, -1][:, env.state_mapping['last_aggressive_betsize']] == 11 assert state[:, -1][:, env.state_mapping['pot']] == 16 state, obs, done, mask, betsize_mask = env.step(ACTION_FOLD) state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE) assert env.players['BTN'].stack == 64.5 assert env.players['BTN'].street_total == 35.5 assert state[:, -1][:, env.state_mapping['last_aggressive_betsize']] == 32 assert state[:, -1][:, env.state_mapping['pot']] == 48 del env params['bet_type'] = pdt.LimitTypes.POT_LIMIT params['n_players'] = 3 params['starting_street'] = pdt.Street.PREFLOP params['pot'] = 0 params['stacksize'] = 100 env = Poker(params) state, obs, done, mask, betsize_mask = env.reset() state, obs, done, mask, betsize_mask = env.step(ACTION_CALL) state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE) assert env.players['SB'].stack == 96 assert env.players['SB'].street_total == 4 state, obs, done, mask, betsize_mask = env.step(ACTION_CALL) state, obs, done, mask, betsize_mask = env.step(ACTION_CALL) assert env.street == pdt.Street.FLOP assert state[:, -1][:, env.state_mapping['pot']] == 12 state, obs, done, mask, betsize_mask = env.step(ACTION_BET) assert state[:, -1][:, env.state_mapping['pot']] == 24 assert env.players['SB'].stack == 84 assert env.players['SB'].street_total == 12 state, obs, done, mask, betsize_mask = env.step(ACTION_RAISE) assert env.players['BB'].stack == 48 assert env.players['BB'].street_total == 48 assert state[:, -1][:, env.state_mapping['pot']] == 72