def sample(self, obs): means, logvars = self.policy(obs) print(means, logvars) sampled_act = means + ( layers.exp(logvars / 2.0) * # stddev layers.gaussian_random(shape=(self.act_dim,), dtype='float32')) return sampled_act
def policy(self, obs): hid1 = self.fc1(obs) hid2 = self.fc2(hid1) mu = self.mean_linear(hid2) log_std = self.log_std_linear(hid2) log_std = LOG_STD_MIN + 0.5 * (LOG_STD_MAX - LOG_STD_MIN) * (log_std + 1) log_std = layers.exp(log_std) return mu, log_std