示例#1
0
文件: agents.py 项目: schouhy/erlyx
 def select_action(self,
                   observation: types.ObservationType) -> types.ActionData:
     if np.random.uniform() < self.epsilon:
         return types.ActionData(action=np.random.choice(self.action_space))
     distribution = self.policy.get_distribution(observation)
     return types.ActionData(
         action=np.random.choice(self.action_space, p=distribution))
示例#2
0
文件: agents.py 项目: schouhy/erlyx
 def select_action(self,
                   observation: types.ObservationType) -> types.ActionData:
     self._memory_buffer.append(observation)
     if np.random.uniform() < self.epsilon:
         return types.ActionData(action=np.random.choice(self.action_space))
     observation = np.asarray(list(self._memory_buffer))
     distribution = self.policy.get_distribution(observation)
     return types.ActionData(action=np.argmax(distribution))
示例#3
0
文件: agents.py 项目: schouhy/erlyx
 def select_action(self,
                   observation: types.ObservationType) -> types.ActionData:
     distribution = self.policy.get_distribution(observation)
     noise = np.random.normal(loc=0.,
                              scale=self.sigma,
                              size=len(self.action_space))
     distribution += noise
     return types.ActionData(action=np.argmax(distribution))