示例#1
0
    def update_model(self, experiences):
        logger = roomai.get_logger()
        reward_plus_gamma_q = []
        info_feats = []
        action_feats = []
        logger = roomai.get_logger()

        for experience in experiences:
            next_action_feats = [
                action_feat
                for action_feat in experience.next_available_action_feats
            ]
            next_info_feats = [
                experience.next_info_feat
                for i in range(len(experience.next_available_action_feats))
            ]
            q = self.sess.run(self.q,
                              feed_dict={
                                  self.info_feats: next_info_feats,
                                  self.action_feats: next_action_feats
                              })

            reward_plus_gamma_q.append(experience.reward +
                                       self.gamma * np.max(q))
            info_feats.append(experience.info_feat)
            action_feats.append(experience.action_feat)

        _, loss, q = self.sess.run(
            (self.train_op, self.loss, self.q),
            feed_dict={
                self.info_feats: info_feats,
                self.action_feats: action_feats,
                self.reward_plus_gamma_q: reward_plus_gamma_q
            })
示例#2
0
    def compete_silent(cls, env, players):
        '''
        Use the game environment to hold a compete_silent for the players

        :param env: The game environment
        :param players: The normal players (without the chance player)
        :return: scores for the players
        '''

        total_scores = [0 for i in range(len(players))]
        total_count = 10
        num_normal_players = len(players)

        players = players + [roomai.games.common.RandomPlayerChance()]

        for count in range(total_count):

            chips = [(1000 + int(random.random() * 200))
                     for i in range(len(players))]
            dealer_id = int(random.random() * len(players))
            big_blind_bet = 50

            infos, public, persons, private, action_history = env.init({
                "chips":
                chips,
                "param_num_normal_players":
                num_normal_players,
                "dealer_id":
                dealer_id,
                "big_blind_bet":
                big_blind_bet
            })
            for i in range(len(players)):
                players[i].receive_info(infos[i])
            while public[-1].is_terminal == False:
                turn = public[-1].turn
                action = players[turn].take_action()
                # print len(infos[turn].person_state.available_actions),action.key(),turn
                infos, public, persons, private, _ = env.forward(action)
                for i in range(len(players)):
                    players[i].receive_info(infos[i])

            for i in range(len(players)):
                players[i].receive_info(infos[i])
            for i in range(num_normal_players):
                total_scores[i] += public[-1].scores[i]

            if (count + 1) % 500 == 0:
                tmp_scores = [0 for i in range(len(total_scores))]
                for i in range(len(total_scores)):
                    tmp_scores[i] = total_scores[i] / (count + 1)
                roomai.get_logger().info(
                    "TexasHoldem completes %d competitions, scores=%s" %
                    (count + 1, ",".join([str(i) for i in tmp_scores])))

        for i in range(len(total_scores)):
            total_scores[i] /= 1.0 * total_count

        return total_scores
示例#3
0
    def compete(cls, env, players):
        """

        Args:
            env:
            players:

        Returns:

        """

        total_scores = [0 for i in xrange(len(players))]
        total_count = 1000

        for count in range(total_count):

            chips = [(1000 + int(random.random() * 200))
                     for i in range(len(players))]
            num_players = len(players)
            dealer_id = int(random.random() * len(players))
            big_blind_bet = 50

            infos, public, persons, private = env.init({
                "chips":
                chips,
                "num_players":
                num_players,
                "dealer_id":
                dealer_id,
                "big_blind_bet":
                big_blind_bet
            })
            for i in xrange(len(players)):
                players[i].receive_info(infos[i])
            while public.is_terminal == False:
                turn = public.turn
                action = players[turn].take_action()
                #print len(infos[turn].person_state.available_actions),action.key(),turn
                infos, public, persons, private = env.forward(action)
                for i in xrange(len(players)):
                    players[i].receive_info(infos[i])

            for i in xrange(len(players)):
                players[i].receive_info(infos[i])
                total_scores[i] += public.scores[i]

            if (count + 1) % 500 == 0:
                tmp_scores = [0 for i in xrange(len(total_scores))]
                for i in xrange(len(total_scores)):
                    tmp_scores[i] = total_scores[i] / (count + 1)
                roomai.get_logger().info(
                    "TexasHoldem completes %d competitions, scores=%s" %
                    (count + 1, ",".join([str(i) for i in tmp_scores])))

        for i in xrange(len(total_scores)):
            total_scores[i] /= 1.0 * total_count

        return total_scores
示例#4
0
    def init(self, params=dict()):
        '''
        Initialize the TexasHoldem game environment with the initialization params.\n
        The initialization is a dict with only an option: \n
        param_num_normal_players: how many players are in the game, the option must be in {2, 4, 5}, default 5. An example of the initialization param is {"param_num_normal_players":2} \n
        
        :param params: the initialization params
        :return: infos, public_state_history, person_states_history, private_state_history, playerid_action_history
        '''

        logger = roomai.get_logger()
        ############ public state and private state ##########
        public_state = BangStatePublic()
        private_state = BangStatePrivate()
        self.__public_state_history__.append(public_state)
        self.__private_state_history__.append(private_state)
        if "param_num_normal_players" in params:
            public_state.__param_num_normal_players__ = params[
                "param_num_normal_players"]
        else:
            public_state.__param_num_normal_players__ = 5

        if public_state.param_num_normal_players not in [2, 4, 5]:
            logger.fatal("The number of normal players must be in [2,4,5]")
            raise ValueError("The number of normal players must be in [2,4,5]")

        public_state.__public_player_infos__ = [
            PublicPlayerInfo()
            for i in range(public_state.__param_num_normal_players__)
        ]
        for i in range(public_state.__param_num_normal_players__):
            public_state.__public_player_infos__[i].__num_hand_cards__ = 0
            public_state.__public_player_infos__[i].__character_card__ = None
            public_state.__public_player_infos__[i].__equipment_cards__ = []
        public_state.__phase_info__ = PhaseInfo()
        public_state.__phase_info__.__playid__ = public_state.__param_num_normal_players__
        public_state.__phase_info__.__phase__ = PhaseInfo.ChancePlay
        public_state.__turn__ = public_state.__param_num_normal_players__

        ########### person states #########
        person_states = [
            BangStatePerson()
            for i in range(public_state.param_num_normal_players + 1)
        ]
        for i in range(public_state.param_num_normal_players):
            self.__person_states_history__[i].append(person_states[i])
            person_states[i][0].__id__ = i
            person_states[i][0].__hand_cards__ = []
            person_states[i][0].__role__ = None
            person_states[i][0].__available_actions__ = dict()

        person_states[public_state.__param_num_normal_players__][
            0].__available_actions__ = self.available_actions()

        return self.__gen_infos__(
        ), self.__public_state_history__, self.__person_states_history__, self.__private_state_history__, self.__playerid_action_history__
示例#5
0
def sampling(probs):
    logger = roomai.get_logger()
    r = random.random()
    sum1 = 0
    for i in range(len(probs)):
        sum1 += probs[i]
        if sum1 > r:
            return i

    logger.warn("Sampling probs(%s) with r = %f occurs sum(probs) >= r", ",".join([str(i) for i in probs]), r)
    return len(probs)-1
示例#6
0
    def lookup(cls, key):
        '''
        lookup a PokerCard with the specified key

        :param key: The specified key
        :return: The PokerCard with the specified key
        '''

        logger = roomai.get_logger()
        if key not in AllPokerCards:
            logger.fatal("key (%s) is not invalid poker card key"%(key))
            raise ValueError("key (%s) is not invalid poker card key"%(key))

        return AllPokerCards[key]
示例#7
0
    def gen_info_feat(self, info):
        logger = roomai.get_logger()
        hand_cards = info.person_state.hand_cards
        info_feat = np.zeros((self.num_point, self.num_suit, self.info_dim))

        current_id = info.person_state.id
        previous_id = (current_id + 3 - 1) % 3
        next_id = (current_id + 1) % 3

        if info.public_state.stage == 0:
            for card in hand_cards:
                info_feat[card.point_rank, card.suit_rank, 0] += 1

            for person_action in info.public_state.action_history:
                person_id = person_action[0]
                action = person_action[1]
                for card in action.cards:
                    if person_id == current_id:
                        info_feat[card.point_rank, card.suit_rank, 1] += 1
                    elif person_id == previous_id:
                        info_feat[card.point_rank, card.suit_rank, 2] += 1
                    elif person_id == next_id:
                        info_feat[card.point_rank, card.suit_rank, 3] += 1
                    elif person_id == 3:
                        logger.debug(
                            "SevenKingModel finds the chance player-action pair in public_state.action_history"
                        )

        else:
            for card in hand_cards:
                info_feat[card.point_rank, card.suit_rank, 4] += 1

            for person_action in info.public_state.action_history:
                person_id = person_action[0]
                action = person_action[1]
                for card in action.cards:
                    if person_id == current_id:
                        info_feat[card.point_rank, card.suit_rank, 5] += 1
                    elif person_id == previous_id:
                        info_feat[card.point_rank, card.suit_rank, 6] += 1
                    elif person_id == next_id:
                        info_feat[card.point_rank, card.suit_rank, 7] += 1
                    elif person_id == 3:
                        logger.debug(
                            "SevenKingModel finds the chance player-action pair in public_state.action_history"
                        )

        return info_feat
示例#8
0
    def init(self, params = dict()):
        '''
        Initialize the TexasHoldem game environment with the initialization params.\n
        The initialization is a dict with some options\n
        
        1. param_num_normal_players: how many players are in the game, the option must be in {2, 4, 5}, default 5\n
        2. param_start_turn: The param_start_turn is the id of a normal player, who is the first to take an action \n

        
        An example of the initialization param is {"param_num_normal_players":2}

        :param params: the initialization params
        :return: infos, public_state, person_states, private_state
        '''


        logger         = roomai.get_logger()
        public_state   = BangStatePublic()
        private_state  = BangStatePrivate()

        if "param_num_normal_players" in params:
            public_state.__param_num_normal_players__ = params["param_num_normal_players"]
        else:
            public_state.__param_num_normal_players__ = 5

        if public_state.param_num_normal_players not in [2,4,5]:
            logger.fatal("The number of normal players must be in [2,4,5]")
            raise ValueError("The number of normal players must be in [2,4,5]")

        public_state.__public_person_info__ = [PublicPersonInfo() for i in range(public_state.__param_num_normal_players__)]
        for i in range(public_state.__param_num_normal_players__):
            public_state.__public_person_info__[i].__num_hand_cards__ = 0
            public_state.__public_person_info__[i].__charactor_card__ = None
            public_state.__public_person_info__[i].__equipment_cards__ = []

        person_states = [BangStatePerson() for i in range(public_state.param_num_normal_players+1)]

        self.__public_state_history__.append(public_state)
        self.__private_state_history__.append(private_state)
        for i in range(public_state.param_num_normal_players):
            self.__person_states_history__[i].append(person_states[i])
            self.__person_states_history__[i][0].__id__         = i
            self.__person_states_history__[i][0].__hand_cards__ = []
            self.__person_states_history__[i][0].__role__       = ""
        self.__person_states_history__[public_state.__param_num_normal_players__][0].__available_actions__ = self.available_actions()
        

        self.__gen_infos__()
示例#9
0
    def lookup(cls, key):
        '''
        lookup a Card with the specified key

        :param key: The specified key
        :return: The Card with the specified key
        '''

        logger = roomai.get_logger()
        if key not in AllPlayingCardsDict:
            logger.fatal("key (%s) is not invalid poker normalcard key" %
                         (key))
            raise ValueError("key (%s) is not invalid poker normalcard key" %
                             (key))

        return AllPlayingCardsDict[key]
示例#10
0
 def lookup(self, key):
     logger = roomai.get_logger()
     if key is None or not isinstance(key, str):
         logger.fatal(
             "In the constructor BangActionChance.lookup(key), the key must be a str"
         )
         raise TypeError(
             "In the constructor BangActionChance.lookup(key), the key must be a str"
         )
     if key not in AllBangActionChancesDict:
         logger.fatal(
             "In the constructor BangActionChance.lookup(key), the key must be the key of CharacterCard, RoleCard or PlayingCard"
         )
         raise ValueError(
             "In the constructor BangActionChance.lookup(key), the key must be the key of CharacterCard, RoleCard or PlayingCard"
         )
     return AllBangActionChancesDict[key]
示例#11
0
    def __init__(self, key):
        logger = roomai.get_logger()
        self.__type__            = BangActionType.card
        self.__key__             = None

        self.__skill__           = None
        self.__seen_cards__      = set()
        self.__choosen_cards__   = set()

        self.__card__            = None
        self.__card_targets__    = []

        self.__other__           = None
        self.__other_targets__   = []

        keys = key.split("_")
        if keys[0] in AllPlayingCardsDict:
            self.__card__ = AllPlayingCardsDict[keys[0]]
        else:
            logger.info("%s is invalid action key, since the cardkey %s is invalid"%(key, keys[0]))
示例#12
0
    def __action_chance__(self, action):
        pu = self.__public_state_history__[-1]
        pes = [self.__person_states_history__[i][-1] for i in range(len(self.__person_states_history__))]
        pr = self.__private_state_history__[-1]


        card = action.card
        num  = len(pr.all_used_cards)
        if num < (len(pes)-1) * 2:
            idx = int(num / 2)
            pes[idx].__hand_cards__.append(card)

        elif num < (len(pes)-1) * 2 + 5:
            pr.__keep_cards__.append(card)

        else:
            logger = roomai.get_logger()
            logger.fatal("the chance action in the invalid condition")

        pr.__all_used_cards__.append(card)
示例#13
0
 def __init__(self, key):
     logger = roomai.get_logger()
     self.__is_public__ = False
     if key in AllPlayingCardsDict:
         self.__type__ = BangActionChanceType.playingcard
         self.__card__ = PlayingCard.lookup(key)
         self.__key__ = key
     elif key in AllCharacterCardsDict:
         self.__type__ = BangActionChanceType.charactercard
         self.__card__ = CharacterCard.lookup(key)
         self.__key__ = key
     elif key in AllRoleCardsDict:
         self.__type__ = BangActionChanceType.rolecard
         self.__card__ = RoleCard.lookup(key)
         self.__key__ = key
     else:
         logger.fatal(
             "In the constructor BangActionChance(card), the parameter card must be NormalCard, CharacterCard or RoleCard"
         )
         raise TypeError(
             "In the constructor BangActionChance(card), the parameter card must be NormalCard, CharacterCard or RoleCard"
         )
示例#14
0
 def __init__(self, card):
     logger = roomai.get_logger()
     self.__is_public__ = False
     if isinstance(card, PlayingCard):
         self.__type__ = BangActionChanceType.playingcard
         self.__card__ = card
         self.__key__ = card.key
     elif isinstance(card, CharacterCard):
         self.__type__ = BangActionChanceType.charactercard
         self.__card__ = card
         self.__key__ = card.key
     elif isinstance(card, RoleCard):
         self.__type__ = BangActionChanceType.rolecard
         self.__card__ = card
         self.__key__ = card.key
     else:
         logger.fatal(
             "In the constructor BangActionChance(card), the parameter card must be NormalCard, CharacterCard or RoleCard"
         )
         raise TypeError(
             "In the constructor BangActionChance(card), the parameter card must be NormalCard, CharacterCard or RoleCard"
         )
示例#15
0
    def __init__(self, role):
        logger = roomai.get_logger()
        if isinstance(role, str):
            logger.fatal(
                "In the constructor RoleCard(rolecard), the rolecard must be a str."
            )
            raise TypeError(
                "In the constructor RoleCard(rolecard), the rolecard must be a str."
            )
        if role not in [
                RoleCardNames.sheriff, RoleCardNames.deputy_sheriff,
                RoleCardNames.outlaw, RoleCardNames.renegade
        ]:
            logger.fatal(
                "In the constructor RoleCard(rolecard), the rolecard must be one of [%s,%s,%s,%s]"
                % (RoleCardNames.sheriff, RoleCardNames.deputy_sheriff,
                   RoleCardNames.outlaw, RoleCardNames.renegade))
            raise TypeError(
                "In the constructor RoleCard(rolecard), the rolecard must be one of [%s,%s,%s,%s]"
                % (RoleCardNames.sheriff, RoleCardNames.deputy_sheriff,
                   RoleCardNames.outlaw, RoleCardNames.renegade))

        self.__name1__ = role
示例#16
0
#!/bin/python
#coding=utf8

import roomai
import roomai.common
logger = roomai.get_logger()



######################################################################### Basic Concepts #####################################################
class AbstractPublicState(object):
    '''
    The abstract class of the public state. The information in the public state is public to every player
    '''
    def __init__(self):
        self.__turn__            = None
        self.__action_history__  = []

        self.__is_terminal__     = False
        self.__scores__          = None

    def __get_turn__(self): return self.__turn__
    turn = property(__get_turn__, doc = "The players[turn] is expected to take an action.")

    def __get_action_history__(self):   return tuple(self.__action_history__)
    action_history = property(__get_action_history__, doc = "The action_history so far. For example, action_history = [(0, roomai.kuhn.KuhnAction.lookup(\"check\"),(1,roomai.kuhn.KuhnAction.lookup(\"bet\")]")

    ''' 
    def __get_previous_id__(self):  return self.__previous_id__
    previous_id = property(__get_previous_id__,doc = "The players[previous_id] took an action in the previous epoch. In the first epoch, previous_id is None")
示例#17
0
 def lookup(cls, key):
     logger = roomai.get_logger()
     if key not in AllRoleCardsDict:
         logger.fatal("%s is not valid rolecard key" % (key))
         raise TypeError("%s is not valid rolecard key" % (key))
     return AllRoleCardsDict[key]
示例#18
0
    def init(self, params=dict()):
        '''
        Initialize the TexasHoldem game environment with the initialization params.\n
        The initialization is a dict with some options\n
        1) allcards: the order of all poker cards appearing\n
        2) record_history: whether to record all history states. if you need call the backward function, please set it to True. default False\n
        3) num_players: how many players are in the game, default 3\n
        4) dealer_id: the player id of the dealer, default random\n
        5) chips: the initialization chips, default [1000,1000,...]\n
        6) big_blind_bet: the number of chips for the big blind bet, default 10\n
        An example of the initialization param is {"num_players":2,"record_history":True}

        :param params: the initialization params
        :return: infos, public_state, person_states, private_state
        '''

        self.logger = roomai.get_logger()

        if "num_players" in params:
            self.__params__["num_players"] = params["num_players"]
        else:
            self.__params__["num_players"] = 3

        if "dealer_id" in params:
            self.__params__["dealer_id"] = params["dealer_id"]
        else:
            self.__params__["dealer_id"] = int(random.random() *
                                               self.__params__["num_players"])

        if "chips" in params:
            self.__params__["chips"] = params["chips"]
        else:
            self.__params__["chips"] = [
                1000 for i in range(self.__params__["num_players"])
            ]

        if "big_blind_bet" in params:
            self.__params__["big_blind_bet"] = params["big_blind_bet"]
        else:
            self.__params__["big_blind_bet"] = 10

        if "allcards" in params:
            self.__params__["allcards"] = [
                c.__deepcopy__() for c in params["allcards"]
            ]
        else:
            self.__params__["allcards"] = list(
                roomai.common.AllPokerCards_Without_King.values())
            random.shuffle(self.__params__["allcards"])

        if "record_history" in params:
            self.__params__["record_history"] = params["record_history"]
        else:
            self.__params__["record_history"] = False

        self.__check_initialization_configuration__(self)

        ## public info
        small = (self.__params__["dealer_id"] +
                 1) % self.__params__["num_players"]
        big = (self.__params__["dealer_id"] +
               2) % self.__params__["num_players"]

        self.public_state = TexasHoldemPublicState()
        pu = self.public_state
        pu.__num_players__ = self.__params__["num_players"]
        pu.__dealer_id__ = self.__params__["dealer_id"]
        pu.__big_blind_bet__ = self.__params__["big_blind_bet"]
        pu.__raise_account__ = self.__params__["big_blind_bet"]

        pu.__is_fold__ = [False for i in range(self.__params__["num_players"])]
        pu.__num_fold__ = 0
        pu.__is_allin__ = [
            False for i in range(self.__params__["num_players"])
        ]
        pu.__num_allin__ = 0
        pu.__is_needed_to_action__ = [
            True for i in range(self.__params__["num_players"])
        ]
        pu.__num_needed_to_action__ = pu.num_players

        pu.__bets__ = [0 for i in range(self.__params__["num_players"])]
        pu.__chips__ = self.__params__["chips"]
        pu.__stage__ = StageSpace.firstStage
        pu.__turn__ = (big + 1) % pu.num_players
        pu.__public_cards__ = []

        pu.__previous_id__ = None
        pu.__previous_action__ = None

        if pu.chips[big] > self.__params__["big_blind_bet"]:
            pu.__chips__[big] -= self.__params__["big_blind_bet"]
            pu.__bets__[big] += self.__params__["big_blind_bet"]
        else:
            pu.__bets__[big] = pu.chips[big]
            pu.__chips__[big] = 0
            pu.__is_allin__[big] = True
            pu.__num_allin__ += 1
        pu.__max_bet_sofar__ = pu.bets[big]
        pu.__raise_account__ = self.__params__["big_blind_bet"]

        if pu.chips[small] > self.__params__["big_blind_bet"] / 2:
            pu.__chips__[small] -= self.__params__["big_blind_bet"] / 2
            pu.__bets__[small] += self.__params__["big_blind_bet"] / 2
        else:
            pu.__bets__[small] = pu.chips[small]
            pu.__chips__[small] = 0
            pu.__is_allin__[small] = True
            pu.__num_allin__ += 1

        pu.__is_terminal__ = False
        pu.__scores__ = [0 for i in range(self.__params__["num_players"])]

        # private info
        self.private_state = TexasHoldemPrivateState()
        pr = self.private_state
        pr.__keep_cards__ = self.__params__[
            "allcards"][self.__params__["num_players"] *
                        2:self.__params__["num_players"] * 2 + 5]

        ## person info
        self.person_states = [
            TexasHoldemPersonState()
            for i in range(self.__params__["num_players"])
        ]
        pes = self.person_states
        for i in range(self.__params__["num_players"]):
            pes[i].__id__ = i
            pes[i].__hand_cards__ = self.__params__["allcards"][i * 2:(i + 1) *
                                                                2]
        pes[pu.turn].__available_actions__ = self.available_actions(
            pu, pes[pu.turn])

        self.__gen_history__()
        infos = self.__gen_infos__()

        if self.logger.level <= logging.DEBUG:
            self.logger.debug("TexasHoldemEnv.init: num_players = %d, dealer_id = %d, chip = %d, big_blind_bet = %d"%(\
                pu.num_players,\
                pu.dealer_id,\
                pu.chips[0],\
                pu.big_blind_bet
            ))

        return infos, pu, pes, pr
示例#19
0
    def available_actions(self):
        '''
        Generate all valid actions given the public state and the person state

        :return: all valid actions
        '''
        logger = roomai.get_logger()

        ######################################   chance action  #################################
        ## charactercard
        if self.__public_state_history__[-1].__public_person_infos__[-1].__character_card__ is None:
            available_actions = dict()
            tmp_set = set()
            for i in range(len(self.__public_state_history__[-1].__public_person_infos__)):
                if self.__public_state_history__[-1].__public_person_infos__[i].__character_card__ is not None:
                    tmp_set.add(self.__public_state_history__[-1].__public_person_infos__[i].__character_card__.key)

            for key in AllCharacterCardsDict:
                if key not in tmp_set:
                    available_actions[key] = BangActionChance.lookup(key)
            return available_actions

        ## rolecard
        for i in range(self.__public_state_history__[-1].param_num_normal_players):
            if self.__person_states_history__[i].person_states[-1].__role_card__ is None:
                available_actions = dict()
                tmp_set = set()
                num_sheriff = 0
                num_deputy_sheriff = 0
                num_renegade = 0
                num_outlaw = 0

                for j in range(len(self.__public_state_history__[-1].param_num_normal_players)):
                    if self.__person_states_history__[j].person_states[-1].__role_card__ is not None:
                        tmp_set.add(self.__person_states_history__[j].person_states[-1].__role_card__.key)

                if self.__public_state_history__[-1].__param_num_normal_players__ == 2:
                    return available_actions

                elif self.__public_state_history__[-1].__param_num_normal_players__ == 4:
                    num_sheriff = 1
                    num_renegade = 1
                    num_outlaw = 2

                elif self.__public_state_history__[-1].__param_num_normal_players__ == 5:
                    num_sheriff = 1
                    num_deputy_sheriff = 1
                    num_renegade = 1
                    num_outlaw = 2

                else:
                    logger.fatal("param_num_normal_players not in [2,4,5]")
                    raise ValueError("param_num_normal_players not in [2,4,5]")

                for key in tmp_set:
                    if key == CardRole.RoleCardNames.sheriff:
                        num_sheriff = num_sheriff - 1
                    if key == CardRole.RoleCardNames.deputy_sheriff:
                        num_deputy_sheriff = num_deputy_sheriff - 1
                    if key == CardRole.RoleCardNames.renegade:
                        num_renegade = num_renegade - 1
                    if key == CardRole.RoleCardNames.outlaw:
                        num_outlaw = num_outlaw - 1
                if num_sheriff > 0:
                    available_actions[CardRole.RoleCardNames.sheriff] = BangActionChance.lookup(CardRole.RoleCardNames.sheriff)
                if num_deputy_sheriff > 0:
                    available_actions[CardRole.RoleCardNames.deputy_sheriff] = BangActionChance.lookup(CardRole.RoleCardNames.deputy_sheriff)
                if num_renegade > 0:
                    available_actions[CardRole.RoleCardNames.renegade] = BangActionChance.lookup(CardRole.RoleCardNames.renegade)
                if num_outlaw > 0:
                    available_actions[CardRole.RoleCardNames.outlaw] = BangActionChance.lookup(CardRole.RoleCardNames.outlaw)
                return available_actions

        ## deal cards
        available_actions = dict()
        for card in self.__private_state_history__.deck:
            available_actions[card.key] = card



        ####################################### action ####################################
        turn = self.__public_state_history__[-1].turn
        tmp_set = dict()
        if len(self.__public_state_history__[-1].response_infos_stack) > 0:
            person_states = self.__person_states_history__[-1]
            private_state = self.__private_state_history__[-1]
            subject = self.__public_state_history__[-1].response_infos_stack[-1].subject
            object  = self.__public_state_history__[-1].response_infos_stack[-1].object
            reason  = self.__public_state_history__[-1].response_infos_stack[-1].reason
            if  reason == ResponseInfo.UseIndian:
                for card in person_states[subject].hand_cards:
                    if  card.name == PlayingCardNames.Bang:
                        tmp_set[card.name] = BangAction.lookup(card.name)
                tmp_set[OtherActionNames.giveup] = BangAction.lookup(OtherActionNames.giveup)
                return tmp_set

            elif reason == ResponseInfo.UseCatling:
                for card in person_states[subject].hand_cards:
                    if  card.name == PlayingCardNames.Miss:
                        tmp_set[card.name] = BangAction.lookup(card.name)
                tmp_set[OtherActionNames.giveup] = BangAction.lookup(OtherActionNames.giveup)
                return tmp_set

            elif reason == ResponseInfo.ToDead:
                for card in person_states[subject].hand_cards:
                    if card.name == PlayingCardNames.Beer:
                        tmp_set[card.name+"_%d"%(object)] = BangAction.lookup(card.name+"_%d"%(object))
                tmp_set[OtherActionNames.giveup] = BangAction.lookup(OtherActionNames.giveup)
                return tmp_set

            elif reason == ResponseInfo.Shuffle:
                for card in private_state.shuffle_deck:
                    tmp_set[card.key] = BangActionChance.lookup(card.key)
                return tmp_set
示例#20
0
    def init(self, params=dict()):
        '''
        Initialize the TexasHoldem game environment with the initialization params.\n
        The initialization is a dict with some options\n
    
        1. param_num_normal_players: how many players are in the game, default 3\n
        2. param_dealer_id: the player id of the dealer, default random\n
        3. param_init_chips: the initialization chips, default [1000,1000,...]\n
        4. param_big_blind_bet: the number of chips for the big blind bet, default 10\n
        An example of the initialization param is {"param_num_normal_players":2,"backward_enable":True}
        
        :param params: the initialization params
        :return: infos, public_state, person_states, private_state
        '''

        logger = roomai.get_logger()
        public_state = TexasHoldemStatePublic()
        self.__public_state_history__.append(public_state)

        if "param_num_normal_players" in params:
            public_state.__param_num_normal_players__ = params[
                "param_num_normal_players"]
        else:
            public_state.__param_num_normal_players__ = 3

        if "param_start_turn" in params:
            public_state.__param_start_turn__ = params["param_start_turn"]
        else:
            public_state.__param_start_turn__ = int(
                random.random() * public_state.param_num_normal_players)

        if "param_dealer_id" in params:
            public_state.__param_dealer_id__ = params["param_dealer_id"]
        else:
            public_state.__param_dealer_id__ = int(
                random.random() * public_state.param_num_normal_players)

        if "param_init_chips" in params:
            public_state.__param_init_chips__ = params["param_init_chips"]
        else:
            public_state.__param_init_chips__ = [
                1000 for i in range(public_state.param_num_normal_players)
            ]

        if "param_big_blind_bet" in params:
            public_state.__param_big_blind_bet__ = params[
                "param_big_blind_bet"]
        else:
            public_state.__param_big_blind_bet__ = 10

        ## check initialization config
        if len(public_state.param_init_chips
               ) != public_state.param_num_normal_players:
            raise ValueError(
                "len(env.param_initialization_chips) %d != param_num_normal_players %d"
                % (len(public_state.param_init_chips),
                   public_state.num_normal_players))
        if public_state.param_num_normal_players > 6:
            raise ValueError(
                "The maximum of the number of players is 6. Now, the number of players = %d"
                % (public_state.param_num_normal_players))

        ## public info
        small = (public_state.param_dealer_id +
                 1) % public_state.param_num_normal_players
        big = (public_state.param_dealer_id +
               2) % public_state.param_num_normal_players

        pu = public_state

        pu.__is_fold__ = [
            False for i in range(public_state.param_num_normal_players)
        ]
        pu.__num_fold__ = 0
        pu.__is_allin__ = [
            False for i in range(public_state.param_num_normal_players)
        ]
        pu.__num_allin__ = 0
        pu.__is_needed_to_action__ = [
            True for i in range(public_state.param_num_normal_players)
        ]
        pu.__num_needed_to_action__ = pu.param_num_normal_players

        pu.__bets__ = [0 for i in range(public_state.param_num_normal_players)]
        pu.__chips__ = list(public_state.param_init_chips)
        pu.__stage__ = Stage.firstStage
        pu.__turn__ = pu.param_num_normal_players
        pu.__public_cards__ = []

        pu.__previous_id__ = None
        pu.__previous_action__ = None

        if pu.chips[big] > public_state.param_big_blind_bet:
            pu.__chips__[big] -= public_state.param_big_blind_bet
            pu.__bets__[big] += public_state.param_big_blind_bet
        else:
            pu.__bets__[big] = pu.chips[big]
            pu.__chips__[big] = 0
            pu.__is_allin__[big] = True
            pu.__num_allin__ += 1
        pu.__max_bet_sofar__ = pu.bets[big]
        pu.__raise_account__ = public_state.param_big_blind_bet

        if pu.chips[small] > public_state.param_big_blind_bet / 2:
            pu.__chips__[small] -= public_state.param_big_blind_bet / 2
            pu.__bets__[small] += public_state.param_big_blind_bet / 2
        else:
            pu.__bets__[small] = pu.chips[small]
            pu.__chips__[small] = 0
            pu.__is_allin__[small] = True
            pu.__num_allin__ += 1

        pu.__is_terminal__ = False
        pu.__scores__ = [
            0 for i in range(public_state.param_num_normal_players)
        ]

        # private info
        pr = TexasHoldemStatePrivate()
        self.__private_state_history__.append(pr)
        pr.__keep_cards__ = []
        ##pr.__keep_cards__      =allcards[public_state.param_num_normal_players*2:public_state.param_num_normal_players*2+5]

        ## person info
        self.__person_states_history__ = [
            [] for i in range(pu.param_num_normal_players + 1)
        ]
        for i in range(pu.param_num_normal_players + 1):
            self.__person_states_history__[i].append(TexasHoldemStatePerson())
            self.__person_states_history__[i][0].__id__ = i
            self.__person_states_history__[i][0].__hand_cards__ = []

        self.__person_states_history__[
            pu.turn][0].__available_actions__ = self.available_actions()

        infos = self.__gen_infos__()

        if logger.level <= logging.DEBUG:
            logger.debug("TexasHoldemEnv.init: param_num_normal_players = %d, param_dealer_id = %d, param_initialization_chip = %d, param_big_blind_bet = %d"%(\
                pu.param_num_normal_players,\
                pu.param_dealer_id,\
                pu.param_init_chips[0],\
                pu.param_big_blind_bet
            ))

        return infos, self.__public_state_history__, self.__person_states_history__, self.__private_state_history__, self.__playerid_action_history__
示例#21
0
    def forward(self, action):
        '''
        The TexasHoldem game environments steps with the action taken by the current player
        
        :param action: The action taken by the current player
        :return: infos, public_state, person_states, private_state
        '''

        logger = roomai.get_logger()
        pu = self.__public_state_history__[-1].__deepcopy__()
        pes = [
            self.__person_states_history__[i][-1].__deepcopy__()
            for i in range(len(self.__person_states_history__))
        ]
        pr = self.__private_state_history__[-1].__deepcopy__()

        self.__public_state_history__.append(pu)
        for i in range(len(pes)):
            self.__person_states_history__[i].append(pes[i])
        self.__private_state_history__.append(pr)

        if action.key not in pes[pu.turn].available_actions:
            logger.critical("action=%s is invalid" % (action.key))
            raise ValueError("action=%s is invalid" % (action.key))
        pes[pu.turn].__available_actions__ = dict()
        self.__playerid_action_history__.append(
            roomai.games.common.ActionRecord(pu.turn, action))

        if isinstance(action, TexasHoldemActionChance) == True:
            self.__action_chance__(action)

            if len(pr.all_used_cards) == (len(pes) - 1) * 2 + 5:
                pu.__turn__ = (pu.param_dealer_id + 2 +
                               1) % pu.param_num_normal_players

            pes[pu.turn].__available_actions__ = self.available_actions()

            infos = self.__gen_infos__()
            return infos, self.__public_state_history__, self.__person_states_history__, self.__private_state_history__, self.__playerid_action_history__

        if action.option == TexasHoldemAction.Fold:
            self.__action_fold__(action)
        elif action.option == TexasHoldemAction.Check:
            self.__action_check__(action)
        elif action.option == TexasHoldemAction.Call:
            self.__action_call__(action)
        elif action.option == TexasHoldemAction.Raise:
            self.__action_raise__(action)
        elif action.option == TexasHoldemAction.AllIn:
            self.__action_allin__(action)
        else:
            raise Exception(
                "action.option(%s) not in [Fold, Check, Call, Raise, AllIn]" %
                (action.option))
        pu.__previous_id__ = pu.turn
        pu.__previous_action__ = action
        pu.__is_terminal__ = False
        pu.__scores__ = [
            0 for i in range(
                self.__public_state_history__[-1].param_num_normal_players)
        ]

        # computing_score
        if TexasHoldemEnv.__is_compute_scores__(
                self.__public_state_history__[-1]):
            ## need showdown
            pu.__public_cards__ = pr.keep_cards[0:5]
            pu.__is_terminal__ = True
            pu.__scores__ = self.__compute_scores__()

        # enter into the next stage
        elif TexasHoldemEnv.__is_nextround__(
                self.__public_state_history__[-1]):
            add_cards = []
            if pu.stage == Stage.firstStage: add_cards = pr.keep_cards[0:3]
            if pu.stage == Stage.secondStage: add_cards = [pr.keep_cards[3]]
            if pu.stage == Stage.thirdStage: add_cards = [pr.keep_cards[4]]

            pu.__public_cards__.extend(add_cards)
            pu.__stage__ = pu.stage + 1

            pu.__num_needed_to_action__ = 0
            pu.__is_needed_to_action__ = [
                False for i in range(pu.param_num_normal_players)
            ]
            for i in range(pu.param_num_normal_players):
                if pu.__is_fold__[i] != True and pu.__is_allin__[i] != True:
                    pu.__is_needed_to_action__[i] = True
                    pu.__num_needed_to_action__ += 1

            pu.__turn__ = pu.param_dealer_id
            pu.__turn__ = self.__next_player__(pu)
            pes[self.__public_state_history__[-1].
                turn].__available_actions__ = self.available_actions()

        ##normal
        else:
            pu.__turn__ = self.__next_player__(pu)
            self.__person_states_history__[self.__public_state_history__[
                -1].turn][-1].__available_actions__ = self.available_actions()

        logger = roomai.get_logger()

        if logger.level <= logging.DEBUG:
            logger.debug("TexasHoldemEnv.forward: num_fold+num_allin = %d+%d = %d, action = %s, stage = %d"%(\
                self.__public_state_history__[-1].num_fold,\
                self.__public_state_history__[-1].num_allin,\
                self.__public_state_history__[-1].num_fold + self.__public_state_history__[-1].num_allin,\
                action.key,\
                self.__public_state_history__[-1].stage\
            ))

        infos = self.__gen_infos__()
        return infos, self.__public_state_history__, self.__person_states_history__, self.__private_state_history__, self.__playerid_action_history__
示例#22
0
    def forward(self, action):

        """
        接受一个动作,先检查是否是ActionChance,处理,如果是正常玩家的,pass
        如果是ActionChance,进行相应的动作处理

        The Bang game environment steps with the action taken by the current player

        :param action
        :returns:infos, public_state_history, person_states_history, private_state_history, playerid_action_history
        """

        logger = roomai.get_logger()
        private_state = copy.deepcopy(self.__private_state_history__[-1])
        public_state = copy.deepcopy(self.__public_state_history__[-1])
        person_states = [copy.deepcopy(self.__person_states_history__[i][-1]) for i in range(public_state.param_num_normal_players)]
        person_states[public_state.turn].__available_actions__ = dict()

        self.__public_state_history__.append(public_state)
        self.__private_state_history__.append(private_state)
        for i in range(len(person_states)):
            self.__person_states_history__[i].append(person_states[i])

        if isinstance(action, BangActionChance) == True:
            if action.type == BangActionChance.BangActionChanceType.charactercard:  # chance player deals character cards
                person_states[public_state.turn].__available_actions__ = self.available_actions()
                for i in range(len(public_state.__public_person_infos__)):
                    if public_state.__public_person_infos__[i].__character_card is None:  # sample a character card to that player
                        public_state.__public_person_infos__[i].__character_card = \
                            person_states[public_state.turn].__available_actions__[choice(person_states[public_state.turn].__available_actions__.keys)]
                        public_state.__turn__ = (public_state.turn - 1) % public_state.param_num_normal_players
                        return self.__gen_infos__(), self.__public_state_history__, self.__person_states_history__, self.__private_state_history__
                # if all players have been assigned a character, return
                public_state.__turn__ = (public_state.turn - 1) % public_state.param_num_normal_players
                return self.__gen_infos__(), self.__public_state_history__, self.__person_states_history__, self.__private_state_history__

            if action.type == BangActionChance.BangActionChanceType.rolecard: # chance player deals role cards
                person_states[public_state.turn].__available_actions__ = self.available_actions()
                for i in range(public_state.param_num_normal_players):
                    if person_states[i].__role__ is None:  # sample a role card to that player
                        person_states[i].__role__ = person_states[public_state.turn].__available_actions__[choice(person_states[public_state.turn].__available_actions__.keys)]
                        public_state.__turn__ = (public_state.turn - 1) % public_state.param_num_normal_players
                        if person_states[i].__role__ == CardRole.RoleCard(CardRole.RoleCardNames.sheriff):
                            public_state.__sheriff_id__ = i
                        return self.__gen_infos__(), self.__public_state_history__, self.__person_states_history__, self.__private_state_history__
                public_state.__turn__ = (public_state.turn - 1) % public_state.param_num_normal_players
                return self.__gen_infos__(), self.__public_state_history__, self.__person_states_history__, self.__private_state_history__

            if action.type == BangActionChance.BangActionChanceType.playingcard:  # chance player deals/shuffles cards
                person_states[public_state.turn].__available_actions__ = self.available_actions()

                private_state.__deal_cards__.append(action)
                private_state.__deck__.pop(action.key)
                if len(private_state.__deck__) == 0:
                    # there is no card, then the chance player needs to shuffle discard cards
                    private_state.__deck__ = public_state.__discard_pile__[:]
                    public_state.__discard_pile__ = []
                public_state.__turn__ = (public_state.turn - 1) % public_state.param_num_normal_players
                return self.__gen_infos__(), self.__public_state_history__, self.__person_states_history__, self.__private_state_history__


        else:

            if len(self.__public_state_history__[-1].response_infos_stack) > 0:
                response_action = self.__public_state_history__[-1].response_infos_stack[-1].action
                if isinstance(response_action,BangAction) == True \
                        and response_action.type == BangActionType.card \
                        and response_action.card.name == PlayingCardNames.Indian:
                    if action.type == BangActionType.other and action.other == OtherActionNames.giveup:
                        person_states[public_state.__turn__].__hp__ -= 1

                    elif action.type == BangActionType.card and action.card.name == PlayingCardNames.Bang:
                        person_states.__hand_cards__.remove(action.card)
                        new_turn = (public_state.turn + 1) % (public_state.param_num_normal_players)

                        public_state.__turn__ = (public_state.turn + 1) % public_state.param_num_normal_players

                    else:
                        logger.fatal("BangEnv generates %s action for responding Indian"%(action.key))
                        raise Exception("BangEnv generates %s action for responding Indian"%(action.key))



                elif isinstance(response_action,BangAction) == True \
                        and response_action.type == BangActionType.card \
                        and response_action.card.name == PlayingCardNames.Catling:

                    if action.type == BangActionType.other and action.other == OtherActionNames.giveup:
                        person_states[public_state.__turn__].__hp__ -= 1

                    elif action.type == BangActionType.card and action.card.name == PlayingCardNames.Miss:
                        person_states.__hand_cards__.remove(action.card)
                        new_turn = (public_state.turn + 1) % (public_state.param_num_normal_players)

                        public_state.__turn__ = (public_state.turn + 1) % public_state.param_num_normal_players


                    else:
                        logger.fatal("BangEnv generates %s action for responding Indian" % (action.key))
                        raise Exception("BangEnv generates %s action for responding Indian" % (action.key))
示例#23
0
 def lookup(cls, key):
     logger = roomai.get_logger()
     if key not in AllCharacterCardsDict:
         logger.fatal("key (%s) is not invalid charactor key" % (key))
         raise ValueError("key (%s) is not invalid charactor key" % (key))
     return AllCharacterCardsDict[key]
示例#24
0
    def init(self, params=dict()):
        """

        Args:
            params:

        Returns:

        """
        self.logger = roomai.get_logger()

        if "num_players" in params:
            self.num_players = params["num_players"]
        else:
            self.num_players = 3

        if "dealer_id" in params:
            self.dealer_id = params["dealer_id"]
        else:
            self.dealer_id = int(random.random() * self.num_players)

        if "chips" in params:
            self.chips = params["chips"]
        else:
            self.chips = [1000 for i in range(self.num_players)]

        if "big_blind_bet" in params:
            self.big_blind_bet = params["big_blind_bet"]
        else:
            self.big_blind_bet = 10

        if "allcards" in params:
            self.allcards = [c.__deepcopy__() for c in params["allcards"]]
        else:
            self.allcards = []
            for i in xrange(13):
                for j in xrange(4):
                    self.allcards.append(roomai.common.PokerCard(i, j))
            random.shuffle(self.allcards)

        if "record_history" in params:
            self.record_history = params["record_history"]
        else:
            self.record_history = False

        self.check_initialization_configuration(self)

        hand_cards = []
        for i in xrange(self.num_players):
            hand_cards.append(self.allcards[i * 2:(i + 1) * 2])
        keep_cards = self.allcards[self.num_players * 2:self.num_players * 2 +
                                   5]

        ## public info
        small = (self.dealer_id + 1) % self.num_players
        big = (self.dealer_id + 2) % self.num_players

        self.public_state = TexasHoldemPublicState()
        self.public_state.num_players = self.num_players
        self.public_state.dealer_id = self.dealer_id
        self.public_state.big_blind_bet = self.big_blind_bet
        self.public_state.raise_account = self.big_blind_bet

        self.public_state.is_fold = [False for i in xrange(self.num_players)]
        self.public_state.num_quit = 0
        self.public_state.is_allin = [False for i in xrange(self.num_players)]
        self.public_state.num_allin = 0
        self.public_state.is_needed_to_action = [
            True for i in xrange(self.num_players)
        ]
        self.public_state.num_needed_to_action = self.public_state.num_players

        self.public_state.bets = [0 for i in xrange(self.num_players)]
        self.public_state.chips = self.chips
        self.public_state.stage = StageSpace.firstStage
        self.public_state.turn = (big + 1) % self.public_state.num_players
        self.public_state.public_cards = []

        self.public_state.previous_id = None
        self.public_state.previous_action = None

        if self.public_state.chips[big] > self.big_blind_bet:
            self.public_state.chips[big] -= self.big_blind_bet
            self.public_state.bets[big] += self.big_blind_bet
        else:
            self.public_state.bets[big] = self.public_state.chips[big]
            self.public_state.chips[big] = 0
            self.public_state.is_allin[big] = True
            self.public_state.num_allin += 1
        self.public_state.max_bet_sofar = self.public_state.bets[big]
        self.public_state.raise_account = self.big_blind_bet

        if self.public_state.chips[small] > self.big_blind_bet / 2:
            self.public_state.chips[small] -= self.big_blind_bet / 2
            self.public_state.bets[small] += self.big_blind_bet / 2
        else:
            self.public_state.bets[small] = self.public_state.chips[small]
            self.public_state.chips[small] = 0
            self.public_state.is_allin[small] = True
            self.public_state.num_allin += 1

        self.public_state.is_terminal = False
        self.public_state.scores = None

        # private info
        self.private_state = TexasHoldemPrivateState()
        self.private_state.hand_cards = [[] for i in xrange(self.num_players)]
        for i in xrange(self.num_players):
            self.private_state.hand_cards[i] = [
                hand_cards[i][j].__deepcopy__()
                for j in xrange(len(hand_cards[i]))
            ]
        self.private_state.keep_cards = [
            keep_cards[i].__deepcopy__() for i in xrange(len(keep_cards))
        ]

        ## person info
        self.person_states = [
            TexasHoldemPersonState() for i in xrange(self.num_players)
        ]
        for i in xrange(self.num_players):
            self.person_states[i].id = i
            self.person_states[i].hand_cards = [
                hand_cards[i][j].__deepcopy__()
                for j in xrange(len(hand_cards[i]))
            ]
        self.person_states[
            self.public_state.turn].available_actions = self.available_actions(
                self.public_state, self.person_states[self.public_state.turn])

        self.__gen_history__()
        infos = self.__gen_infos__()

        if self.logger.level <= logging.DEBUG:
            self.logger.debug("TexasHoldemEnv.init: num_players = %d, dealer_id = %d, chip = %d, big_blind_bet = %d"%(\
                self.public_state.num_players,\
                self.public_state.dealer_id,\
                self.public_state.chips[0],\
                self.public_state.big_blind_bet
            ))

        return infos, self.public_state, self.person_states, self.private_state