def init(
        self,
        discrete_state,
        discrete_action,
        dim_state,
        dim_action,
        num_heads,
        layers=None,
        biased_head=True,
    ):
        self.num_states, self.dim_state = ((dim_state,
                                            ()) if discrete_state else
                                           (-1, (dim_state, )))
        self.num_actions, self.dim_action = ((dim_action,
                                              ()) if discrete_action else
                                             (-1, (dim_action, )))

        layers = layers if layers is not None else [32, 32]

        self.q_function = NNEnsembleQFunction(
            dim_state=self.dim_state,
            dim_action=self.dim_action,
            num_states=self.num_states,
            num_actions=self.num_actions,
            num_heads=num_heads,
            layers=layers,
            biased_head=biased_head,
        )
Exemplo n.º 2
0
    def default(
        cls,
        environment,
        critic=None,
        policy=None,
        lr=3e-4,
        policy_update_frequency=2,
        clip_gradient_val=10,
        *args,
        **kwargs,
    ):
        """See `AbstractAgent.default'."""
        if critic is None:
            critic = NNEnsembleQFunction.default(environment)
        if policy is None:
            policy = NNPolicy.default(environment)

        optimizer = Adam(chain(policy.parameters(), critic.parameters()), lr=lr)

        return super().default(
            environment,
            critic=critic,
            policy=policy,
            optimizer=optimizer,
            policy_update_frequency=policy_update_frequency,
            clip_gradient_val=clip_gradient_val,
            *args,
            **kwargs,
        )
Exemplo n.º 3
0
 def default(cls, environment, critic=None, exploration_noise=None, *args, **kwargs):
     """Get Default TD3 agent."""
     if critic is None:
         critic = NNEnsembleQFunction.default(environment)
     if exploration_noise is None:
         noise = Constant(0.1)
     return super().default(
         environment, critic=critic, exploration_noise=noise, *args, **kwargs
     )
Exemplo n.º 4
0
    def init(
        self,
        discrete_state,
        discrete_action,
        dim_state,
        dim_action,
        num_heads,
        num_samples=1,
        layers=None,
        biased_head=True,
    ):
        self.num_states, self.dim_state = ((dim_state,
                                            ()) if discrete_state else
                                           (-1, (dim_state, )))
        self.num_actions, self.dim_action = ((dim_action,
                                              ()) if discrete_action else
                                             (-1, (dim_action, )))

        layers = layers if layers is not None else [32, 32]

        if num_heads is None:
            self.q_function = NNQFunction(
                dim_state=self.dim_state,
                dim_action=self.dim_action,
                num_states=self.num_states,
                num_actions=self.num_actions,
                layers=layers,
                biased_head=biased_head,
            )
        else:
            self.q_function = NNEnsembleQFunction(
                dim_state=self.dim_state,
                dim_action=self.dim_action,
                num_states=self.num_states,
                num_actions=self.num_actions,
                num_heads=num_heads,
                layers=layers,
                biased_head=biased_head,
            )

        self.policy = NNPolicy(
            dim_state=self.dim_state,
            dim_action=self.dim_action,
            num_states=self.num_states,
            num_actions=self.num_actions,
            layers=layers,
            biased_head=biased_head,
        )

        self.value_function = IntegrateQValueFunction(
            q_function=self.q_function,
            policy=self.policy,
            num_samples=num_samples)
 def test_input_transform(self, num_heads, batch_size):
     q_function = NNEnsembleQFunction(
         dim_state=(2, ),
         dim_action=(1, ),
         num_heads=num_heads,
         layers=[64, 64],
         non_linearity="Tanh",
         input_transform=StateTransform(),
     )
     value = q_function(random_tensor(False, 2, batch_size),
                        random_tensor(False, 1, batch_size))
     assert value.shape == torch.Size(
         [batch_size, num_heads] if batch_size else [num_heads])
     assert value.dtype is torch.get_default_dtype()
Exemplo n.º 6
0
    def default(cls, environment, policy=None, critic=None, lr=3e-4, *args, **kwargs):
        """See `AbstractAgent.default'."""
        if critic is None:
            critic = NNEnsembleQFunction.default(environment, jit_compile=False)
        if policy is None:
            policy = NNPolicy.default(environment, jit_compile=False)

        optimizer = Adam(chain(policy.parameters(), critic.parameters()), lr=lr)

        return super().default(
            environment,
            critic=critic,
            policy=policy,
            optimizer=optimizer,
            *args,
            **kwargs,
        )
    def test_from_q_function(self, discrete_state, discrete_action, dim_state,
                             dim_action, num_heads):
        num_states, dim_state = ((dim_state, ()) if discrete_state else
                                 (-1, (dim_state, )))
        num_actions, dim_action = ((dim_action, ()) if discrete_action else
                                   (-1, (dim_action, )))

        if not (discrete_state and not discrete_action):
            q_function = NNQFunction(
                dim_state=dim_state,
                num_states=num_states,
                dim_action=dim_action,
                num_actions=num_actions,
            )

            other = NNEnsembleQFunction.from_q_function(q_function, num_heads)

            assert q_function is not other
            assert other.num_heads == num_heads