Пример #1
0
 def sample(self, obs):
     means, logvars = self.policy(obs)
     print(means, logvars)
     sampled_act = means + (
             layers.exp(logvars / 2.0) *  # stddev
             layers.gaussian_random(shape=(self.act_dim,), dtype='float32'))
     return sampled_act
Пример #2
0
 def policy(self, obs):
     hid1 = self.fc1(obs)
     hid2 = self.fc2(hid1)
     mu = self.mean_linear(hid2)
     log_std = self.log_std_linear(hid2)
     log_std = LOG_STD_MIN + 0.5 * (LOG_STD_MAX - LOG_STD_MIN) * (log_std +
                                                                  1)
     log_std = layers.exp(log_std)
     return mu, log_std