Пример #1
0
    def _get_environment(self):
        """Observe the environment"""
        if not self.done:
            self._get_legal_moves()

        self.observation = None
        self.reward = 0
        self.info = None

        self.community_data = CommunityData(len(self.players))
        self.community_data.community_pot = self.community_pot / (
            self.big_blind * 100)
        self.community_data.current_round_pot = self.current_round_pot / (
            self.big_blind * 100)
        self.community_data.small_blind = self.small_blind
        self.community_data.big_blind = self.big_blind
        self.community_data.stage[np.minimum(self.stage.value, 3)] = 1  # pylint: disable= invalid-sequence-index
        self.community_data.legal_moves = [
            action in self.legal_moves for action in Action
        ]
        # self.cummunity_data.active_players

        self.player_data = PlayerData()
        self.player_data.stack = [
            player.stack / (self.big_blind * 100) for player in self.players
        ]

        if not self.current_player:  # game over
            self.current_player = self.players[self.winner_ix]

        self.player_data.position = self.current_player.seat
        self.current_player.equity_alive = self.get_equity(
            set(self.current_player.cards), set(self.table_cards),
            sum(self.player_cycle.alive), 1000)
        self.player_data.equity_to_river_alive = self.current_player.equity_alive

        arr1 = np.array(list(flatten(self.player_data.__dict__.values())))
        arr2 = np.array(list(flatten(self.community_data.__dict__.values())))
        arr3 = np.array([
            list(flatten(sd.__dict__.values())) for sd in self.stage_data
        ]).flatten()
        # arr_legal_only = np.array(self.community_data.legal_moves).flatten()

        self.array_everything = np.concatenate([arr1, arr2, arr3]).flatten()

        self.observation = self.array_everything
        self._get_legal_moves()

        self.info = {
            'player_data': self.player_data.__dict__,
            'community_data': self.community_data.__dict__,
            'stage_data': [stage.__dict__ for stage in self.stage_data],
            'legal_moves': self.legal_moves
        }

        self.observation_space = self.array_everything.shape

        if self.render_switch:
            self.render()
Пример #2
0
    def _get_environment(self):
        """Observe the environment"""
        if not self.done:
            self._get_legal_moves()

        self.observation = None
        self.reward = None
        self.info = None

        self.community_data = CommunityData(len(self.players))
        self.community_data.community_pot = self.community_pot
        self.community_data.current_round_pot = self.current_round_pot
        self.community_data.small_blind = self.small_blind
        self.community_data.big_blind = self.big_blind
        self.community_data.stage[np.minimum(self.stage.value, 3)] = 1
        # self.cummunity_data.active_players

        self.player_data = PlayerData()
        self.player_data.stack = [player.stack for player in self.players]

        if not self.current_player:  # game over
            self.current_player = self.players[self.winner_ix]

        self.player_data.position = self.current_player.seat
        self.current_player.equity_alive = get_equity(
            self.current_player.cards, self.table_cards,
            sum(self.player_cycle.alive))
        self.player_data.equity_to_river_alive = self.current_player.equity_alive

        arr1 = np.array(list(flatten(self.player_data.__dict__.values())))
        arr2 = np.array(list(flatten(self.community_data.__dict__.values())))
        arr3 = np.array([
            list(flatten(sd.__dict__.values())) for sd in self.stage_data
        ]).flatten()

        self.array_everything = np.concatenate([arr1, arr2, arr3]).flatten()

        self.observation = {
            'array_everything': self.array_everything,
            'player_data': self.player_data,
            'community_data': self.community_data,
            'stage_data': self.stage_data
        }
        self._get_legal_moves()

        self.reward = self.current_player.stack + self.player_data.equity_to_river_alive * self.community_pot

        if self.render_switch:
            self.render()
Пример #3
0
    def _get_environment(self):
        """Observe the environment"""
        if not self.done:
            self._get_legal_moves()

        self.observation = None
        self.reward = 0
        self.info = None

        self.community_data = CommunityData(len(self.players))
        self.community_data.community_pot = self.community_pot / (
            self.big_blind * 100)
        self.community_data.current_round_pot = self.current_round_pot / (
            self.big_blind * 100)
        self.community_data.small_blind = self.small_blind
        self.community_data.big_blind = self.big_blind
        self.community_data.stage[np.minimum(self.stage.value, 3)] = 1
        self.community_data.game_stage = self.stage.value
        self.community_data.legal_moves = [
            action in self.legal_moves for action in Action
        ]
        self.community_data.dealer_position = self.player_cycle.dealer_idx
        self.community_data.active_players = self.player_cycle.alive
        self.community_data.min_call = self.min_call / (self.big_blind * 10000)

        self.player_data = PlayerData()
        self.player_data.stack = [
            player.stack / (self.big_blind * 100) for player in self.players
        ]

        if not self.current_player:  # game over
            self.current_player = self.players[self.winner_ix]

        self.player_data.position = self.current_player.seat
        self.current_player.equity_alive = get_equity(
            self.current_player.cards, self.table_cards,
            sum(self.player_cycle.alive))
        self.player_data.equity_to_river_alive = self.current_player.equity_alive
        self.player_data.stack_amount = self.current_player.stack
        self.player_data.first_decision = (
            1 if self.current_player.last_action_in_stage == '' else 0)
        self.player_data.hand_rank = self.get_rank(self.stage,
                                                   self.current_player.cards,
                                                   self.table_cards)

        arr1 = np.array(list(flatten(self.player_data.__dict__.values())))
        arr2 = np.array(list(flatten(self.community_data.__dict__.values())))
        arr3 = np.array([
            list(flatten(sd.__dict__.values())) for sd in self.stage_data
        ]).flatten()
        # arr_legal_only = np.array(self.community_data.legal_moves).flatten()

        self.array_everything = np.concatenate([arr1, arr2, arr3]).flatten()

        self.observation = self.array_everything

        self.info = {
            'player_data': self.player_data.__dict__,
            'community_data': self.community_data.__dict__,
            'stage_data': [stage.__dict__ for stage in self.stage_data]
        }

        self._get_legal_moves()

        self.observation_space = self.array_everything.shape

        if self.render_switch:
            self.render()