Exemplo n.º 1
0
 def step(self, obs):
     if obs.ndim < 2:
         obs = obs[np.newaxis, :]
     action_probs = self.actor(obs)
     dist = Categorical(probs=action_probs)
     action = dist.sample()
     return action.numpy()[0], dist.log_prob(action)
Exemplo n.º 2
0
 def infer(self, obs, act):
     action_probs = self.actor(obs)
     dist = Categorical(probs=action_probs)
     action_logprobs = dist.log_prob(act)
     dist_entropy = dist.entropy()
     q_value = self.critic(obs)
     return action_logprobs, tf.squeeze(q_value), dist_entropy
Exemplo n.º 3
0
 def call(self, state):
     if state.ndim < 2:
         state = state[np.newaxis, :]
     action_probs = self.net(state)
     dist = Categorical(probs=action_probs)
     action = dist.sample()
     log_pi = dist.log_prob(action)
     return action.numpy(), log_pi