def take_action(self): cur_strategies = self.get_strategies(self.states, self.available_actions) # print self.states # print cur_strategies new_state = '' val = random.random() total = 0 for key in cur_strategies: total += cur_strategies[key] if total > 0 and val < total: new_state = key if new_state != '' and new_state[-1] != '_': action1 = new_state.split("_")[-2] action2 = new_state.split("_")[-1] if len(action1) > 1: action1 = action1[-1] action = action1 + '_' + action2 return SevenKingAction.lookup(action) else: idx = int(random.random() * len(self.available_actions)) return list(self.available_actions.values())[idx]
def take_action(self): if "" not in self.available_actions: min_card = None for a in self.available_actions.values(): if a.pattern[0] == "p_0": if min_card is None: min_card = a.hand_card[0] else: card = a.hand_card[0] if SevenKingPokerCard.compare(card, min_card) < 0: min_card = card if min_card is None: return list(self.available_actions.values())[0] else: return SevenKingAction.lookup(min_card.key) else: return SevenKingAction("")
def testScores1(self): """ """ env = SevenKingEnv() infos, public_state, person_states, private_state = env.init() self.assertTrue("" not in infos[public_state.turn].person_state.available_actions) self.assertFalse(env.is_action_valid(SevenKingAction.lookup(""),public_state, person_states[public_state.turn]))
def take_action(self): """ Returns: """ for a in self.available_actions.values(): if a.key != "": return a return SevenKingAction.lookup("")
def take_action(self): action_list = [] regret_list = dict() for action in self.action_history: action_list.append(input_trans(action)) for action in self.available_actions: this_action = action_list this_action.append(input_trans(action)) action_t = [] if len(this_action) < self.rnn_model.TIME_STEPS: action_t1 = [0] * self.rnn_model.INPUT_SIZE for j in range(self.rnn_model.TIME_STEPS - len(this_action)): action_t.append(action_t1) action_t.extend(this_action) else: action_t = this_action[(len(this_action) - self.rnn_model.TIME_STEPS):-1] regret_list[action] = self.rnn_model.sess.run( self.rnn_model.output, feed_dict={ self.rnn_model.xs: np.array(action_t).reshape(-1, self.rnn_model.TIME_STEPS, self.rnn_model.INPUT_SIZE) }) # pdb.set_trace() cur_strategies = dict() normalizing_sum = 0 for key in regret_list: normalizing_sum += max(regret_list[key][-1][0][0], 0) for key in regret_list: if normalizing_sum > 0: cur_strategies[key] = max(regret_list[key][-1][0][0], 0) / normalizing_sum else: cur_strategies[key] = 1.0 / len(self.available_actions) val = random.random() total = 0 new_state = '' for key in cur_strategies: total += cur_strategies[key] if total > 0 and val < total: new_state = key if new_state != '': return SevenKingAction.lookup(new_state) else: idx = int(random.random() * len(self.available_actions)) return list(self.available_actions.values())[idx]
def take_action(self): max_action = SevenKingAction.lookup("") max_pattern = 0 for a in self.available_actions.values(): if (a.pattern[1] > max_pattern): max_pattern = a.pattern[1] max_action = a elif (a.pattern[1] == max_pattern): if (a.pattern[0] != 'p_0' and (SevenKingPokerCard.compare( a.cards[-1], max_action.cards[-1]) > 0)): max_action = a return max_action
def testEnv(self): """ """ env = SevenKingEnv() infos, public_state, person_states, private_state = env.init({"num_players":2}) assert(len(infos) == 2) turn = public_state.turn self.show_hand_card(person_states[turn].hand_cards) print (turn) print ("available_actions=",person_states[turn].available_actions.keys()) print ("available_actions_v=",person_states[turn].available_actions.values()) action = SevenKingAction("%s" % (person_states[turn].hand_cards[0].key)) infos, public_state, person_states, private_state = env.forward(action)
def forward(self, action): ''' The SevenKing game environment steps with the action taken by the current player :param action: :return: ''' pu = self.public_state pr = self.private_state pes = self.person_states turn = pu.turn if self.is_action_valid(action,pu, pes[turn]) == False: raise ValueError("The (%s) is an invalid action " % (action.key)) pes[pu.turn].__available_actions__ = dict() pu.__action_history__.append((pu.turn,action)) ## the action plays its role if action.pattern[0] == "p_0": pu.__is_fold__[turn] = True pu.__num_fold__ += 1 pes[turn].__available_actions__ = dict() else: pes[turn].__del_cards__(action.cards) if pu.stage == 0: tmp = [] for i in range(5 - len(pes[turn].hand_cards)): c = pr.__keep_cards__.pop() tmp.append(c) pes[turn].__add_cards__(tmp) elif pu.stage == 1: pu.__num_hand_cards__[turn] = len(pes[turn].hand_cards) if action.pattern[0] != "p_0": pu.__license_action__ = action #print (turn, "len_of_hand_card=",len(self.private_state.hand_cards[turn]), " len_of_keep_card=", len(self.private_state.keep_cards), " action = (%s)" %action.key,\ # " handcard1=%s"%(",".join([a.key for a in self.private_state.hand_cards[0]]))," handcard2=%s"%(",".join([a.key for a in self.private_state.hand_cards[1]])),\ # " num_fold =%d"%(self.public_state.num_fold),"fold=%s"%(",".join([str(s) for s in pu.is_fold]))) ## termminal if self.public_state.stage == 1 and len(self.person_states[turn].hand_cards) == 0: pu.__is_terminal__ = True pu.__scores__ = self.__compute_scores__() new_turn = None pu.__turn__ = new_turn pu.__license_action__ = SevenKingAction.lookup("") ## stage 0 to 1 elif len(self.private_state.keep_cards) < 5 and pu.stage == 0: new_turn, min_card = self.__choose_player_with_lowest_card__() pu.__turn__ = new_turn pu.__num_fold__ = 0 pu.__is_fold__ = [False for i in range(pu.num_normal_players)] pu.__license_action__ = SevenKingAction.lookup("") pes[new_turn].__available_actions__ = SevenKingEnv.available_actions(pu, pes[new_turn]) keys = list(pes[new_turn].available_actions.keys()) for key in keys: if min_card.key not in key: del pes[new_turn].__available_actions__[key] pu.__stage__ = 1 ## round next elif self.public_state.num_fold + 1 == pu.num_normal_players: new_turn = self.__choose_player_with_nofold__() pu.__turn__ = new_turn pu.__num_fold__ = 0 pu.__is_fold__ = [False for i in range(pu.num_normal_players)] pu.__license_action__ = SevenKingAction.lookup("") pes[new_turn].__available_actions__ = SevenKingEnv.available_actions(pu, pes[new_turn]) else: new_turn = (turn + 1) % pu.num_normal_players pu.__turn__ = new_turn pes[new_turn].__available_actions__ = SevenKingEnv.available_actions(pu, pes[new_turn]) self.__gen_history__() infos = self.__gen_infos__() return infos, self.public_state, self.person_states, self.private_state
def available_actions(cls, public_state, person_state): available_actions = dict() license_action = public_state.license_action if license_action is None: license_action = SevenKingAction("") hand_cards = person_state.hand_cards patterns = set() if license_action.pattern[0] == "p_0": for p in AllSevenKingPatterns.values(): if p[0] != "p_0": patterns.add(p) else: patterns.add(license_action.pattern) patterns.add(AllSevenKingPatterns["p_0"]) for pattern in patterns: if pattern[1] >= 2: point2cards = person_state.__gen_pointrank2cards__() if len(person_state.hand_cards) < pattern[1]: continue elif pattern[0] == "p_0": available_actions[""] = SevenKingAction.lookup("") elif pattern[0] == "p_1": license_pattern = license_action.pattern license_card = None if license_pattern[0] != "p_0": license_card = license_action.cards[-1] for c in person_state.hand_cards: if license_pattern[0] == "p_0" or SevenKingPokerCard.compare(c,license_card) >0: available_actions[c.key] = SevenKingAction.lookup(c.key) elif pattern[0] == "p_2": for p in point2cards: license_pattern = license_action.pattern license_card = None if license_pattern[0] != "p_0": #print license_action.key, license_action.pattern, license_pattern[0] != "p_0" license_card = license_action.cards[-1] len1 = len(point2cards[p]) if len1 == 2: if license_pattern[0] == "p_0" or SevenKingPokerCard.compare(point2cards[p][1], license_card) > 0: str = "%s,%s" % (point2cards[p][0].key, point2cards[p][1].key) available_actions[str] = SevenKingAction.lookup(str) if len1 == 3: if license_pattern[0] == "p_0" or SevenKingPokerCard.compare(point2cards[p][1], license_card) > 0: str = "%s,%s" % (point2cards[p][0].key, point2cards[p][1].key) available_actions[str] = (SevenKingAction.lookup(str)) if license_pattern[0] == "p_0" or SevenKingPokerCard.compare(point2cards[p][2], license_card) > 0: str = "%s,%s" % (point2cards[p][0].key, point2cards[p][2].key) available_actions[str] = (SevenKingAction.lookup(str)) str = "%s,%s" % (point2cards[p][1].key, point2cards[p][2].key) available_actions[str] = (SevenKingAction.lookup(str)) if len1 == 4: if license_pattern[0] == "p_0" or SevenKingPokerCard.compare(point2cards[p][1], license_card) > 0: str = "%s,%s" % (point2cards[p][0].key, point2cards[p][1].key) available_actions[str] = (SevenKingAction.lookup(str)) if license_pattern[0] == "p_0" or SevenKingPokerCard.compare(point2cards[p][2], license_card) > 0: str = "%s,%s" % (point2cards[p][0].key, point2cards[p][2].key) available_actions[str] = (SevenKingAction.lookup(str)) str = "%s,%s" % (point2cards[p][1].key, point2cards[p][2].key) available_actions[str] = (SevenKingAction.lookup(str)) if license_pattern[0] == "p_0" or SevenKingPokerCard.compare(point2cards[p][3], license_card) > 0: str = "%s,%s" % (point2cards[p][0].key, point2cards[p][3].key) available_actions[str] = (SevenKingAction.lookup(str)) str = "%s,%s" % (point2cards[p][1].key, point2cards[p][3].key) available_actions[str] = (SevenKingAction.lookup(str)) str = "%s,%s" % (point2cards[p][2].key, point2cards[p][3].key) available_actions[str] = (SevenKingAction.lookup(str)) elif pattern[0] == "p_3": for p in point2cards: license_pattern = license_action.pattern license_card = None if license_pattern[0] != "p_0" : license_card = license_action.cards[-1] len1 = len(point2cards[p]) if len1 == 3: if license_pattern[0] == "p_0" or SevenKingPokerCard.compare(point2cards[p][2], license_card) > 0: str = "%s,%s,%s" % (point2cards[p][0].key, point2cards[p][1].key, point2cards[p][2].key) available_actions[str] = (SevenKingAction.lookup(str)) if len1 == 4: if license_pattern[0] == "p_0" or SevenKingPokerCard.compare(point2cards[p][2], license_card) > 0: str = "%s,%s,%s" % (point2cards[p][0].key, point2cards[p][1].key, point2cards[p][2].key) available_actions[str] = (SevenKingAction.lookup(str)) if license_pattern[0] == "p_0" or SevenKingPokerCard.compare(point2cards[p][3], license_card) > 0: str = "%s,%s,%s" % (point2cards[p][0].key, point2cards[p][1].key, point2cards[p][3].key) available_actions[str]=(SevenKingAction.lookup(str)) str = "%s,%s,%s" % (point2cards[p][0].key, point2cards[p][2].key, point2cards[p][3].key) available_actions[str]=(SevenKingAction.lookup(str)) str = "%s,%s,%s" % (point2cards[p][1].key, point2cards[p][2].key, point2cards[p][3].key) available_actions[str]=(SevenKingAction.lookup(str)) elif pattern[0] == "p_4": for p in point2cards: license_pattern = license_action.pattern license_card = None if license_pattern[0] != "p_0" : license_card = license_action.cards[-1] len1 = len(point2cards[p]) if len1 >= 4: if license_pattern[0] == "p_0" or SevenKingPokerCard.compare(point2cards[p][3], license_card) > 0: str = "%s,%s,%s,%s" % ( point2cards[p][0].key, point2cards[p][1].key, point2cards[p][2].key, point2cards[p][3].key ) available_actions[str]=(SevenKingAction.lookup(str)) if pattern[0] != "p_0" and pattern[0] != "p_1" and\ pattern[0] != "p_2" and pattern[0] != "p_3" and pattern[0] != "p_4": raise ValueError("The %s pattern is invalid" % (pattern[0])) #for a in available_actions.values(): # if SevenKingEnv.__is_action_valid__(a,public_state,person_state) == False: # del available_actions[a.key] return available_actions
def init(self, params = dict()): ''' Initialize the SevenKing game environment with the initialization params.\n The initialization is a dict with some options\n 1) backward_enable: whether to record all history states. if you need call the backward function, please set it to True. default False\n 2) num_normal_players: how many players are in the game \n An example of the initialization param is {"num_normal_players":2,"backward_enable":True}\n :param params: the initialization params :return: infos, public_state, person_states, private_state ''' if "num_normal_players" in params: self.__params__["num_normal_players"] = params["num_normal_players"] else: self.__params__["num_normal_players"] = 3 if "backward_enable" in params: self.__params__["backward_enable"] = params["backward_enable"] else: self.__params__["backward_enable"] = False self.public_state = SevenKingPublicState() self.private_state = SevenKingPrivateState() self.person_states = [SevenKingPersonState() for i in range(self.__params__["num_normal_players"] + 1)] self.public_state_history = [] self.private_state_history = [] self.person_states_history = [] ## private_state allcards = [c.__deepcopy__() for c in AllSevenKingPokerCards.values()] random.shuffle(allcards) self.private_state.__keep_cards__ = allcards for i in range(self.__params__["num_normal_players"]): tmp = [] for j in range(5): c = self.private_state.__keep_cards__.pop() tmp.append(c) self.person_states[i].__add_cards__(tmp) ## public_state self.public_state.__turn__,_ = self.__choose_player_with_lowest_card__() self.public_state.__is_terminal__ = False self.public_state.__scores__ = [] self.public_state.__license_action__ = SevenKingAction.lookup("") self.public_state.__stage__ = 0 self.public_state.__num_normal_players__ = self.__params__["num_normal_players"] self.public_state.__num_keep_cards__ = len(self.private_state.keep_cards) self.public_state.__num_hand_cards__ = [len(person_state.hand_cards) for person_state in self.person_states] self.public_state.__is_fold__ = [False for i in range(self.public_state.num_normal_players)] self.public_state.__num_fold__ = 0 ## person_state for i in range(self.__params__["num_normal_players"]+1): self.person_states[i].__id__ = i if i == self.public_state.turn: self.person_states[i].__available_actions__ = SevenKingEnv.available_actions(self.public_state, self.person_states[i]) self.__gen_history__() infos = self.__gen_infos__() return infos, self.public_state, self.person_states, self.private_state
def init(self, params=dict()): """ Args: params: Returns: """ if "num_players" in params: self.num_players = params["num_players"] else: self.num_players = 3 if "allcards" in params: allcards = [c.__deepcopy__() for c in params["allcards"]] else: allcards = [ c.__deepcopy__() for c in AllSevenKingPokerCards.values() ] random.shuffle(allcards) if "record_history" in params: self.record_history = params["record_history"] else: self.record_history = False self.public_state = SevenKingPublicState() self.private_state = SevenKingPrivateState() self.person_states = [ SevenKingPersonState() for i in range(self.num_players) ] self.public_state_history = [] self.private_state_history = [] self.person_states_history = [] ## private_state self.private_state._SevenKingPrivateState__keep_cards = allcards for i in range(self.num_players): tmp = [] for j in range(5): c = self.private_state._SevenKingPrivateState__keep_cards.pop() tmp.append(c) self.person_states[i]._SevenKingPersonState__add_cards(tmp) ## public_state self.public_state.turn, _ = self.choose_player_with_lowest_card() self.public_state.is_terminal = False self.public_state.scores = [] self.public_state.previous_id = None self.public_state.previous_action = None self.public_state._SevenKingPublicState__license_action = SevenKingAction.lookup( "") self.public_state._SevenKingPublicState__stage = 0 self.public_state._SevenKingPublicState__num_players = self.num_players self.public_state._SevenKingPublicState__num_keep_cards = len( self.private_state.keep_cards) self.public_state._SevenKingPublicState__num_hand_cards = [ len(person_state.hand_cards) for person_state in self.person_states ] self.public_state._SevenKingPublicState__is_fold = [ False for i in range(self.public_state.num_players) ] self.public_state._SevenKingPublicState__num_fold = 0 ## person_state for i in range(self.num_players): self.person_states[i].id = i if i == self.public_state.turn: self.person_states[ i].available_actions = SevenKingEnv.available_actions( self.public_state, self.person_states[i]) self.__gen_history__() infos = self.__gen_infos__() return infos, self.public_state, self.person_states, self.private_state
def forward(self, action): """ Args: action: Returns: """ pu = self.public_state pr = self.private_state pes = self.person_states turn = pu.turn if self.is_action_valid(action, pu, pes[turn]) == False: raise ValueError("The (%s) is an invalid action " % (action.key)) ## the action plays its role if action.pattern[0] == "p_0": pu._SevenKingPublicState__is_fold[turn] = True pu._SevenKingPublicState__num_fold += 1 pes[turn].available_actions = dict() else: pes[turn]._SevenKingPersonState__del_cards(action.cards) if pu.stage == 0: tmp = [] for i in range(5 - len(pes[turn].hand_cards)): c = pr._SevenKingPrivateState__keep_cards.pop() tmp.append(c) pes[turn]._SevenKingPersonState__add_cards(tmp) elif pu.stage == 1: pu._SevenKingPublicState__num_hand_cards[turn] = len( pes[turn]._SevenKingPersonState__hand_cards) pes[turn].available_actions = dict() pu.previous_id = turn pu.previous_action = action if action.pattern[0] != "p_0": pu._SevenKingPublicState__license_action = action #print (turn, "len_of_hand_card=",len(self.private_state.hand_cards[turn]), " len_of_keep_card=", len(self.private_state.keep_cards), " action = (%s)" %action.key,\ # " handcard1=%s"%(",".join([a.key for a in self.private_state.hand_cards[0]]))," handcard2=%s"%(",".join([a.key for a in self.private_state.hand_cards[1]])),\ # " num_fold =%d"%(self.public_state.num_fold),"fold=%s"%(",".join([str(s) for s in pu.is_fold]))) ## termminal if self.public_state.stage == 1 and len( self.person_states[turn].hand_cards) == 0: pu.is_terminal = True pu.scores = self.compute_scores() new_turn = None pu.turn = new_turn pu._SevenKingPublicState__license_action = SevenKingAction.lookup( "") ## stage 0 to 1 elif len(self.private_state.keep_cards) < 5 and pu.stage == 0: new_turn, min_card = self.choose_player_with_lowest_card() pu.turn = new_turn pu._SevenKingPublicState__num_fold = 0 pu._SevenKingPublicState__is_fold = [ False for i in range(pu.num_players) ] pu._SevenKingPublicState__license_action = SevenKingAction.lookup( "") pes[new_turn].available_actions = SevenKingEnv.available_actions( pu, pes[new_turn]) keys = pes[new_turn].available_actions.keys() for key in keys: if min_card.key not in key: del pes[new_turn].available_actions[key] pu._SevenKingPublicState__stage = 1 ## round next elif self.public_state.num_fold + 1 == pu.num_players: new_turn = self.choose_player_with_nofold() pu.turn = new_turn pu._SevenKingPublicState__num_fold = 0 pu._SevenKingPublicState__is_fold = [ False for i in range(pu.num_players) ] pu._SevenKingPublicState__license_action = SevenKingAction.lookup( "") pes[new_turn].available_actions = SevenKingEnv.available_actions( pu, pes[new_turn]) else: new_turn = (turn + 1) % pu.num_players pu.turn = new_turn pes[new_turn].available_actions = SevenKingEnv.available_actions( pu, pes[new_turn]) self.__gen_history__() infos = self.__gen_infos__() return infos, self.public_state, self.person_states, self.private_state