def _build_actor(self): self.policy = create_deterministic_residual_policy( self.observation_shape, self.action_size, self.action_flexibility, self.use_batch_norm, encoder_params=self.encoder_params)
def test_create_deterministic_residual_policy(observation_shape, action_size, scale, batch_size, use_batch_norm): policy = create_deterministic_residual_policy(observation_shape, action_size, scale, use_batch_norm) assert isinstance(policy, DeterministicResidualPolicy) x = torch.rand((batch_size, ) + observation_shape) action = torch.rand(batch_size, action_size) y = policy(x, action) assert y.shape == (batch_size, action_size)
def _build_actor(self): self.policy = create_deterministic_residual_policy( self.observation_shape, self.action_size, self.action_flexibility, self.actor_encoder_factory)