Exemplo n.º 1
0
    def default_policy(self, node: MCTSTreeNode, env: Env):
        action = random.sample(node.legal_actions, 1)[0]

        while not env.is_over():
            # step forward
            next_state, next_player_id = env.step(action, False)
            if not env.is_over():
                action = random.sample(next_state['legal_actions'], 1)[0]
        # game over
        reward = env.get_payoffs()[0]
        return reward
Exemplo n.º 2
0
    def expand_action_on_node(self, node: MCTSTreeNode, action, env: Env):
        # 进行step,获得进行这次action的legal_actions
        next_state, next_player_id = env.step(action, False)
        new_node = MCTSTreeNode(key=action,
                                action=action,
                                legal_actions=next_state['legal_actions'],
                                game_over=env.is_over(),
                                parent=node)
        node.children[action] = new_node

        return new_node
Exemplo n.º 3
0
    def tree_policy(self, root_node: MPMCTSTreeNode, env: Env):
        player_id = env.get_player_id()
        untried_action = self.get_untried_action(root_node, player_id)
        if untried_action is not None:
            # 在env上执行action
            node = self.expand_action_on_node(root_node, untried_action, env)
        else:
            # select max-UCB value node
            node = self.get_max_UCB_child_node(root_node, CP_VAL, player_id)
            # step
            next_state, next_player_id = env.step(node.action, False)
            node.legal_actions[next_player_id] = next_state['legal_actions']

        return node
Exemplo n.º 4
0
    def restore_other_player_cards(self, env: Env):
        cur_player_id = env.get_player_id()
        other_player_id = []
        for i in range(3):
            if i != cur_player_id:
                other_player_id.append(i)

        for id in other_player_id:
            env.game.players[id]._current_hand = self.other_hands[id]
            env.game.players[id].singles = self.other_singles[id]
            cardstr = cards2str(env.game.players[id]._current_hand)
            env.game.judger.playable_cards[
                id] = env.game.judger.playable_cards_from_hand(cardstr)
Exemplo n.º 5
0
    def save_other_player_cards(self, env: Env):
        cur_player_id = env.get_player_id()
        other_player_id = []
        for i in range(3):
            if i != cur_player_id:
                other_player_id.append(i)

        self.other_hands = [[] for i in range(3)]  # 手牌
        self.other_singles = [[] for i in range(3)]  # 去重的牌
        for id in other_player_id:
            self.other_hands[id] = copy.deepcopy(
                env.game.players[id]._current_hand)
            self.other_singles[id] = env.game.players[id].singles
Exemplo n.º 6
0
    def shuffle_other_player_cards(self, env: Env):
        cur_player_id = env.get_player_id()
        other_player_id = []
        for i in range(3):
            if i != cur_player_id:
                other_player_id.append(i)

        other_hands = [[] for i in range(3)]
        # 合并二者
        merged_hands = []
        for id in other_player_id:
            other_hands[id] = copy.deepcopy(env.game.players[id]._current_hand)
            merged_hands.extend(other_hands[id])

        merged_hands = np.asarray(merged_hands)

        other_shuffuled_hands = [[] for i in range(3)]
        other_shuffuled_idx = random.sample(range(len(merged_hands)),
                                            len(merged_hands))
        idx_start = 0
        other_shuffled_singels = [[] for i in range(3)]

        for id in other_player_id:
            # 选取
            hands = list(
                merged_hands[other_shuffuled_idx[idx_start:idx_start +
                                                 len(other_hands[id])]])
            # 排序
            hands.sort(key=functools.cmp_to_key(doudizhu_sort_card))
            other_shuffuled_hands[id] = hands
            idx_start += len(other_hands[id])

            st = cards2str(other_shuffuled_hands[id])
            cardstr = "".join(
                OrderedDict.fromkeys(cards2str(other_shuffuled_hands[id])))
            other_shuffled_singels[id] = cardstr

        # 设置数据到env
        for id in other_player_id:
            # 手牌更新
            env.game.players[id]._current_hand = list(
                other_shuffuled_hands[id])
            # 去重更新
            env.game.players[id].singles = other_shuffled_singels[id]
            # 重新计算可出的牌
            cardstr = cards2str(env.game.players[id]._current_hand)
            env.game.judger.playable_cards[
                id] = env.game.judger.playable_cards_from_hand(cardstr)
Exemplo n.º 7
0
    def default_policy(self, node: MPMCTSTreeNode, env: Env):
        if env.is_over():
            return env.get_payoffs()
        player_id = env.get_player_id()
        #print(player_id)
        state = env.get_state(player_id)

        action, _ = self.drqn_agents[player_id].eval_step(state)
        while not env.is_over():
            #print(action)
            # step forward
            next_state, next_player_id = env.step(action, False)

            if not env.is_over():
                #action = random.sample(next_state['legal_actions'],1)[0]
                action, _ = self.drqn_agents[next_player_id].eval_step(
                    next_state)
        # game over
        return env.get_payoffs()
Exemplo n.º 8
0
    def expand_action_on_node(self, node: MPMCTSTreeNode, action, env: Env):
        # 向前移动
        next_state, next_player_id = env.step(action, False)
        if node.children.__contains__(action):  # action存在
            new_node = node.children[action]
            new_node.legal_actions[next_player_id] = next_state[
                'legal_actions']
        else:
            actions = [[] for i in range(env.player_num)]
            actions[next_player_id] = next_state['legal_actions']
            new_node = MPMCTSTreeNode(key=action,
                                      action=action,
                                      legal_actions=actions,
                                      parent=node,
                                      player_num=env.player_num)
            node.children[action] = new_node

        return new_node