Exemplo n.º 1
0
    def restore_other_player_cards(self, env: Env):
        cur_player_id = env.get_player_id()
        other_player_id = []
        for i in range(3):
            if i != cur_player_id:
                other_player_id.append(i)

        for id in other_player_id:
            env.game.players[id]._current_hand = self.other_hands[id]
            env.game.players[id].singles = self.other_singles[id]
            cardstr = cards2str(env.game.players[id]._current_hand)
            env.game.judger.playable_cards[
                id] = env.game.judger.playable_cards_from_hand(cardstr)
Exemplo n.º 2
0
    def save_other_player_cards(self, env: Env):
        cur_player_id = env.get_player_id()
        other_player_id = []
        for i in range(3):
            if i != cur_player_id:
                other_player_id.append(i)

        self.other_hands = [[] for i in range(3)]  # 手牌
        self.other_singles = [[] for i in range(3)]  # 去重的牌
        for id in other_player_id:
            self.other_hands[id] = copy.deepcopy(
                env.game.players[id]._current_hand)
            self.other_singles[id] = env.game.players[id].singles
Exemplo n.º 3
0
 def default_policy(self, node: MPMCTSTreeNode, env: Env):
     if env.is_over():
         return env.get_payoffs()
     player_id = env.get_player_id()
     #print(player_id)
     action = random.sample(node.legal_actions[player_id], 1)[0]
     while not env.is_over():
         # step forward
         next_state, next_player_id = env.step(action, False)
         if not env.is_over():
             action = random.sample(next_state['legal_actions'], 1)[0]
     # game over
     return env.get_payoffs()
Exemplo n.º 4
0
    def tree_policy(self, root_node: MPMCTSTreeNode, env: Env):
        player_id = env.get_player_id()
        untried_action = self.get_untried_action(root_node, player_id)
        if untried_action is not None:
            # 在env上执行action
            node = self.expand_action_on_node(root_node, untried_action, env)
        else:
            # select max-UCB value node
            node = self.get_max_UCB_child_node(root_node, CP_VAL, player_id)
            # step
            next_state, next_player_id = env.step(node.action, False)
            node.legal_actions[next_player_id] = next_state['legal_actions']

        return node
Exemplo n.º 5
0
    def shuffle_other_player_cards(self, env: Env):
        cur_player_id = env.get_player_id()
        other_player_id = []
        for i in range(3):
            if i != cur_player_id:
                other_player_id.append(i)

        other_hands = [[] for i in range(3)]
        # 合并二者
        merged_hands = []
        for id in other_player_id:
            other_hands[id] = copy.deepcopy(env.game.players[id]._current_hand)
            merged_hands.extend(other_hands[id])

        merged_hands = np.asarray(merged_hands)

        other_shuffuled_hands = [[] for i in range(3)]
        other_shuffuled_idx = random.sample(range(len(merged_hands)),
                                            len(merged_hands))
        idx_start = 0
        other_shuffled_singels = [[] for i in range(3)]

        for id in other_player_id:
            # 选取
            hands = list(
                merged_hands[other_shuffuled_idx[idx_start:idx_start +
                                                 len(other_hands[id])]])
            # 排序
            hands.sort(key=functools.cmp_to_key(doudizhu_sort_card))
            other_shuffuled_hands[id] = hands
            idx_start += len(other_hands[id])

            st = cards2str(other_shuffuled_hands[id])
            cardstr = "".join(
                OrderedDict.fromkeys(cards2str(other_shuffuled_hands[id])))
            other_shuffled_singels[id] = cardstr

        # 设置数据到env
        for id in other_player_id:
            # 手牌更新
            env.game.players[id]._current_hand = list(
                other_shuffuled_hands[id])
            # 去重更新
            env.game.players[id].singles = other_shuffled_singels[id]
            # 重新计算可出的牌
            cardstr = cards2str(env.game.players[id]._current_hand)
            env.game.judger.playable_cards[
                id] = env.game.judger.playable_cards_from_hand(cardstr)
Exemplo n.º 6
0
    def default_policy(self, node: MPMCTSTreeNode, env: Env):
        if env.is_over():
            return env.get_payoffs()
        player_id = env.get_player_id()
        #print(player_id)
        state = env.get_state(player_id)

        action, _ = self.drqn_agents[player_id].eval_step(state)
        while not env.is_over():
            #print(action)
            # step forward
            next_state, next_player_id = env.step(action, False)

            if not env.is_over():
                #action = random.sample(next_state['legal_actions'],1)[0]
                action, _ = self.drqn_agents[next_player_id].eval_step(
                    next_state)
        # game over
        return env.get_payoffs()