Python MinMaxStats примеры использования

Язык программирования: Python

Пространство имен/Пакет: utils

Класс/Тип: MinMaxStats

Примеров на hotexamples.com: 10

Python MinMaxStats - 10 примеров найдено. Это лучшие примеры Python кода для utils.MinMaxStats, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

MinMaxStats(4)

Основные методы

MinMaxStats (4)

Пример #1

Показать файл

Файл: agent.py Проект: fidel-schaposnik/muzero

 def backpropagate(self, node: Node, value: Value,
                   min_max_stats: MinMaxStats) -> None:
     while node is not None:
         min_max_stats.update(node.update_value(value))
         value = node.reward + self.effective_discount * value if node.reward is not None else Value(
             float('nan'))
         node = node.parent

Пример #2

Показать файл

Файл: __init__.py Проект: ipsec/muzero

def backpropagate(search_path: List[Node], value: float, to_play: Player,
                  discount: float, min_max_stats: MinMaxStats):
    for node in reversed(search_path):
        node.value_sum += value if node.to_play == to_play else -value
        node.visit_count += 1
        min_max_stats.update(node.value())

        value = node.reward + discount * value

Пример #3

Показать файл

Файл: mcts.py Проект: tsubame-mz/machine_learning

    def run_mcts(self, obs: np.ndarray, network: Network) -> Node:
        # ルートノードを展開
        root = Node(0)
        state, policy, value = network.initial_inference(obs)
        root.expand_node(0, state.squeeze().detach().numpy(), 0, policy.squeeze().detach().numpy())
        root.add_exploration_noise(self.dirichlet_alpha, self.exploration_fraction)  # if train:

        min_max_stats = MinMaxStats(None)
        for _ in range(self.num_simulations):
            node = root
            search_path = [node]

            while node.expanded:
                # 展開されていない子まで辿る
                action, node = self._select_child(node, min_max_stats)
                search_path.append(node)

            # 子ノードを展開
            parent = search_path[-2]
            next_state, reward, policy, value = network.recurrent_inference(
                torch.from_numpy(parent.hidden_state).unsqueeze(0), np.array([action])
            )
            node.expand_node(reward.item(), next_state.squeeze().detach().numpy(), 0, policy.squeeze().detach().numpy())

            # 探索結果をルートまで反映
            self._backpropagate(search_path, value.item(), 0, min_max_stats)

        return root

Пример #4

Показать файл

Файл: actor.py Проект: ipsec/muzero

    def play_game(self) -> Game:
        game = Game(self.config.discount)
        min_max_stats = MinMaxStats(self.config.known_bounds)

        # Use Exponential Decay to reduce temperature over time
        temperature = max(
            self.temperature * (1 - self.config.temperature_decay_factor)**
            self.network.training_steps(), self.config.temperature_min)
        self.metrics_temperature(temperature)

        while not game.terminal() and len(
                game.history) < self.config.max_moves:

            # At the root of the search tree we use the representation function to
            # obtain a hidden state given the current observation.
            root = Node(0)
            current_observation = game.get_observation_from_index(-1)
            network_output = self.network.initial_inference(
                current_observation)
            expand_node(root, game.to_play(), game.legal_actions(),
                        network_output)
            backpropagate([root], network_output.value, game.to_play(),
                          self.config.discount, min_max_stats)
            add_exploration_noise(self.config, root)

            # We then run a Monte Carlo Tree Search using only action sequences and the
            # model learned by the network.
            run_mcts(self.config, root, game.action_history(), self.network,
                     min_max_stats)
            action = select_action(root, temperature)
            game.apply(action)
            game.store_search_statistics(root)

        return game

Пример #5

Показать файл

Файл: mcts.py Проект: tsubame-mz/machine_learning

    def _ucb_score(self, parent: Node, child: Node, min_max_stats: MinMaxStats) -> float:
        """
        UCBの計算
        """
        pb_c = np.log((parent.visit_count + self.pb_c_base + 1) / self.pb_c_base) + self.pb_c_init
        pb_c *= np.sqrt(parent.visit_count) / (child.visit_count + 1)

        prior_score = pb_c * child.prior
        value_score = min_max_stats.normalize(child.value)
        return prior_score + value_score

Пример #6

Показать файл

Файл: __init__.py Проект: ipsec/muzero

def ucb_score(config: MuZeroConfig, parent: Node, child: Node,
              min_max_stats: MinMaxStats) -> float:
    pb_c = math.log((parent.visit_count + config.pb_c_base + 1) /
                    config.pb_c_base) + config.pb_c_init
    pb_c *= math.sqrt(parent.visit_count) / (child.visit_count + 1)

    prior_score = pb_c * child.prior
    if child.visit_count > 0:
        value_score = min_max_stats.normalize(child.reward +
                                              config.discount * child.value())
    else:
        value_score = 0
    return prior_score + value_score

Пример #7

Показать файл

Файл: agent.py Проект: neuralsyn/muzero

    def run_mcts(self, root, num_moves):
        min_max_stats = MinMaxStats(self.config.known_bounds)

        for _ in range(self.config.num_simulations):
            # root.print()
            action, leaf, cur_moves = self.select_leaf(root, num_moves, min_max_stats)
            to_play = Player(cur_moves % self.config.game_config.num_players)

            batch_hidden_state = tf.expand_dims(leaf.parent.hidden_state, axis=0)
            network_output = self.network.recurrent_inference(batch_hidden_state, [action]).split_batch()[0]
            self.expand_node(node=leaf, to_play=to_play, actions=self.config.game_config.action_space,
                             network_output=network_output)
            self.backpropagate(leaf, network_output.value, to_play, min_max_stats)

Пример #8

Показать файл

Файл: agent.py Проект: fidel-schaposnik/muzero

    def make_move(self, game: Game) -> Action:
        root = Node()
        min_max_stats = MinMaxStats(
            known_bounds=self.config.value_config.known_bounds)
        observation = ObservationBatch(
            tf.expand_dims(game.history.make_image(), axis=0))
        self.expand_node(node=root,
                         actions=game.legal_actions(),
                         network_output=self.network.initial_inference(
                             observation).split_batch()[0])
        self.add_exploration_noise(root)
        self.run_mcts(root, min_max_stats)

        action_space = self.config.action_space()
        policy = [
            root.children[a].visit_count /
            root.visit_count if a in root.children else 0 for a in action_space
        ]
        game.store_search_statistics(root.value, Policy(tf.constant(policy)))
        return self.select_action(root, len(game.history))

Пример #9

Показать файл

Файл: mcts.py Проект: tsubame-mz/machine_learning

 def _backpropagate(self, search_path: List[Node], value: float, player: int, min_max_stats: MinMaxStats):
     for node in reversed(search_path):
         node.value_sum += value if node.player == player else -value
         node.visit_count += 1
         min_max_stats.update(node.value)
         value = node.reward + self.discount * value

Пример #10

Показать файл

Файл: agent.py Проект: fidel-schaposnik/muzero

 def ucb_score(self, node: Node, min_max_stats: MinMaxStats) -> float:
     exploitation_score = self.config.mcts_config.default_value if isnan(
         node.value) else node.reward + self.effective_discount * node.value
     exploration_score = node.prior * self.config.exploration_function(
         node.parent.visit_count, node.visit_count)
     return min_max_stats.normalize(exploitation_score) + exploration_score