Python Env примеры использования

Язык программирования: Python

Пространство имен/Пакет: rlcard.envs

Класс/Тип: Env

Примеров на hotexamples.com: 8

Python Env - 8 примеров найдено. Это лучшие примеры Python кода для rlcard.envs.Env, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

get_player_id(6)

step(5)

is_over(4)

get_payoffs(2)

get_state(1)

Пример #1

Показать файл

    def default_policy(self, node: MCTSTreeNode, env: Env):
        action = random.sample(node.legal_actions, 1)[0]

        while not env.is_over():
            # step forward
            next_state, next_player_id = env.step(action, False)
            if not env.is_over():
                action = random.sample(next_state['legal_actions'], 1)[0]
        # game over
        reward = env.get_payoffs()[0]
        return reward

Пример #2

Показать файл

    def expand_action_on_node(self, node: MCTSTreeNode, action, env: Env):
        # 进行step，获得进行这次action的legal_actions
        next_state, next_player_id = env.step(action, False)
        new_node = MCTSTreeNode(key=action,
                                action=action,
                                legal_actions=next_state['legal_actions'],
                                game_over=env.is_over(),
                                parent=node)
        node.children[action] = new_node

        return new_node

Пример #3

Показать файл

    def tree_policy(self, root_node: MPMCTSTreeNode, env: Env):
        player_id = env.get_player_id()
        untried_action = self.get_untried_action(root_node, player_id)
        if untried_action is not None:
            # 在env上执行action
            node = self.expand_action_on_node(root_node, untried_action, env)
        else:
            # select max-UCB value node
            node = self.get_max_UCB_child_node(root_node, CP_VAL, player_id)
            # step
            next_state, next_player_id = env.step(node.action, False)
            node.legal_actions[next_player_id] = next_state['legal_actions']

        return node

Пример #4

Показать файл

    def restore_other_player_cards(self, env: Env):
        cur_player_id = env.get_player_id()
        other_player_id = []
        for i in range(3):
            if i != cur_player_id:
                other_player_id.append(i)

        for id in other_player_id:
            env.game.players[id]._current_hand = self.other_hands[id]
            env.game.players[id].singles = self.other_singles[id]
            cardstr = cards2str(env.game.players[id]._current_hand)
            env.game.judger.playable_cards[
                id] = env.game.judger.playable_cards_from_hand(cardstr)

Пример #5

Показать файл

    def save_other_player_cards(self, env: Env):
        cur_player_id = env.get_player_id()
        other_player_id = []
        for i in range(3):
            if i != cur_player_id:
                other_player_id.append(i)

        self.other_hands = [[] for i in range(3)]  # 手牌
        self.other_singles = [[] for i in range(3)]  # 去重的牌
        for id in other_player_id:
            self.other_hands[id] = copy.deepcopy(
                env.game.players[id]._current_hand)
            self.other_singles[id] = env.game.players[id].singles

Пример #6

Показать файл

    def shuffle_other_player_cards(self, env: Env):
        cur_player_id = env.get_player_id()
        other_player_id = []
        for i in range(3):
            if i != cur_player_id:
                other_player_id.append(i)

        other_hands = [[] for i in range(3)]
        # 合并二者
        merged_hands = []
        for id in other_player_id:
            other_hands[id] = copy.deepcopy(env.game.players[id]._current_hand)
            merged_hands.extend(other_hands[id])

        merged_hands = np.asarray(merged_hands)

        other_shuffuled_hands = [[] for i in range(3)]
        other_shuffuled_idx = random.sample(range(len(merged_hands)),
                                            len(merged_hands))
        idx_start = 0
        other_shuffled_singels = [[] for i in range(3)]

        for id in other_player_id:
            # 选取
            hands = list(
                merged_hands[other_shuffuled_idx[idx_start:idx_start +
                                                 len(other_hands[id])]])
            # 排序
            hands.sort(key=functools.cmp_to_key(doudizhu_sort_card))
            other_shuffuled_hands[id] = hands
            idx_start += len(other_hands[id])

            st = cards2str(other_shuffuled_hands[id])
            cardstr = "".join(
                OrderedDict.fromkeys(cards2str(other_shuffuled_hands[id])))
            other_shuffled_singels[id] = cardstr

        # 设置数据到env
        for id in other_player_id:
            # 手牌更新
            env.game.players[id]._current_hand = list(
                other_shuffuled_hands[id])
            # 去重更新
            env.game.players[id].singles = other_shuffled_singels[id]
            # 重新计算可出的牌
            cardstr = cards2str(env.game.players[id]._current_hand)
            env.game.judger.playable_cards[
                id] = env.game.judger.playable_cards_from_hand(cardstr)

Пример #7

Показать файл

    def default_policy(self, node: MPMCTSTreeNode, env: Env):
        if env.is_over():
            return env.get_payoffs()
        player_id = env.get_player_id()
        #print(player_id)
        state = env.get_state(player_id)

        action, _ = self.drqn_agents[player_id].eval_step(state)
        while not env.is_over():
            #print(action)
            # step forward
            next_state, next_player_id = env.step(action, False)

            if not env.is_over():
                #action = random.sample(next_state['legal_actions'],1)[0]
                action, _ = self.drqn_agents[next_player_id].eval_step(
                    next_state)
        # game over
        return env.get_payoffs()

Пример #8

Показать файл

    def expand_action_on_node(self, node: MPMCTSTreeNode, action, env: Env):
        # 向前移动
        next_state, next_player_id = env.step(action, False)
        if node.children.__contains__(action):  # action存在
            new_node = node.children[action]
            new_node.legal_actions[next_player_id] = next_state[
                'legal_actions']
        else:
            actions = [[] for i in range(env.player_num)]
            actions[next_player_id] = next_state['legal_actions']
            new_node = MPMCTSTreeNode(key=action,
                                      action=action,
                                      legal_actions=actions,
                                      parent=node,
                                      player_num=env.player_num)
            node.children[action] = new_node

        return new_node