def _build_actor(self): self.policy = create_normal_policy(self.observation_shape, self.action_size, self.actor_encoder_factory, min_logstd=-6.0, max_logstd=0.0, use_std_parameter=True)
def _build_actor(self): self.policy = create_normal_policy(self.observation_shape, self.action_size, self.use_batch_norm, min_logstd=-6.0, max_logstd=0.0, use_std_parameter=True, encoder_params=self.encoder_params)
def test_create_normal_policy(observation_shape, action_size, batch_size, use_batch_norm): policy = create_normal_policy(observation_shape, action_size, use_batch_norm) assert isinstance(policy, NormalPolicy) x = torch.rand((batch_size, ) + observation_shape) y = policy(x) assert y.shape == (batch_size, action_size)
def _build_actor(self): self.policy = create_normal_policy(self.observation_shape, self.action_size, self.use_batch_norm, encoder_params=self.encoder_params)
def _build_actor(self): self.policy = create_normal_policy(self.observation_shape, self.action_size, self.actor_encoder_factory)