Python CategoricalEpsilonGreedy示例

编程语言: Python

命名空间/包名称: rlpyt.distributions.epsilon_greedy

hotexamples.com的示例: 4

Python CategoricalEpsilonGreedy - 已找到4个示例。这些是从开源项目中提取的最受好评的rlpyt.distributions.epsilon_greedy.CategoricalEpsilonGreedy现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

CategoricalEpsilonGreedy(2)

sample(2)

set_z(2)

to_onehot(2)

示例#1

显示文件

文件： catdqn_agent.py 项目： wwxFromTju/rlpyt

class CatDqnAgent(DqnAgent):
    def __init__(self, n_atoms=51, **kwargs):
        super().__init__(**kwargs)
        self.n_atoms = self.model_kwargs["n_atoms"] = n_atoms

    def initialize(self, env_spaces, share_memory=False):
        super().initialize(env_spaces,
                           share_memory)  # Then overwrite distribution.
        self.distribution = CategoricalEpsilonGreedy(
            dim=env_spaces.action.n,
            z=torch.linspace(-1, 1, self.n_atoms))  # z placeholder for init.

    def give_V_min_max(self, V_min, V_max):
        self.V_min = V_min
        self.V_max = V_max
        self.distribution.set_z(torch.linspace(V_min, V_max, self.n_atoms))

    @torch.no_grad()
    def step(self, observation, prev_action, prev_reward):
        prev_action = self.distribution.to_onehot(prev_action)
        model_inputs = buffer_to((observation, prev_action, prev_reward),
                                 device=self.device)
        p = self.model(*model_inputs)
        p = p.cpu()
        action = self.distribution.sample(p)
        agent_info = AgentInfo(p=p)  # Only change from DQN: q -> p.
        action, agent_info = buffer_to((action, agent_info), device="cpu")
        return AgentStep(action=action, agent_info=agent_info)

示例#2

显示文件

文件： catdqn_agent.py 项目： keirp/glamor

class CatDqnAgent(DqnAgent):
    """Agent for Categorical DQN algorithm."""

    def __init__(self, n_atoms=51, **kwargs):
        """Standard init, and set the number of probability atoms (bins)."""
        super().__init__(**kwargs)
        self.n_atoms = self.model_kwargs["n_atoms"] = n_atoms

    def initialize(self, env_spaces, share_memory=False,
                   global_B=1, env_ranks=None):
        super().initialize(env_spaces, share_memory, global_B, env_ranks)
        # Overwrite distribution.
        self.distribution = CategoricalEpsilonGreedy(dim=env_spaces.action.n,
                                                     z=torch.linspace(-1, 1, self.n_atoms))  # z placeholder for init.

    def give_V_min_max(self, V_min, V_max):
        self.V_min = V_min
        self.V_max = V_max
        self.distribution.set_z(torch.linspace(V_min, V_max, self.n_atoms))

    @torch.no_grad()
    def step(self, observation, prev_action, prev_reward):
        """Compute the discrete distribution for the Q-value for each
        action for each state/observation (no grad)."""
        prev_action = self.distribution.to_onehot(prev_action)
        model_inputs = buffer_to((observation, prev_action, prev_reward),
                                 device=self.device)
        p = self.model(*model_inputs)
        p = p.cpu()
        action = self.distribution.sample(p)
        agent_info = AgentInfo(p=p)  # Only change from DQN: q -> p.
        action, agent_info = buffer_to((action, agent_info), device="cpu")
        return AgentStep(action=action, agent_info=agent_info)

示例#3

显示文件

文件： catdqn_agent.py 项目： nirbhayjm/rlpyt

 def initialize(self,
                env_spaces,
                share_memory=False,
                global_B=1,
                env_ranks=None):
     super().initialize(env_spaces, share_memory, global_B, env_ranks)
     # Overwrite distribution.
     self.distribution = CategoricalEpsilonGreedy(
         dim=env_spaces.action.n,
         z=torch.linspace(-1, 1, self.n_atoms))  # z placeholder for init.

示例#4

显示文件

文件： catdqn_agent.py 项目： wwxFromTju/rlpyt

 def initialize(self, env_spaces, share_memory=False):
     super().initialize(env_spaces,
                        share_memory)  # Then overwrite distribution.
     self.distribution = CategoricalEpsilonGreedy(
         dim=env_spaces.action.n,
         z=torch.linspace(-1, 1, self.n_atoms))  # z placeholder for init.