示例#1
0
    def test_forward(self, num_states, num_actions, batch_size):
        q_function = TabularQFunction(num_states=num_states, num_actions=num_actions)

        state = random_tensor(True, num_states, batch_size)
        action = random_tensor(True, num_actions, batch_size)
        value = q_function(state, action)
        assert value.shape == torch.Size([batch_size] if batch_size else [])
        assert value.dtype is torch.get_default_dtype()
示例#2
0
 def test_forward(
     self, discrete_state, discrete_action, dim_state, dim_action, batch_size
 ):
     if not (discrete_state and not discrete_action):
         self.init(discrete_state, discrete_action, dim_state, dim_action)
         state = random_tensor(discrete_state, dim_state, batch_size)
         action = random_tensor(discrete_action, dim_action, batch_size)
         value = self.q_function(state, action)
         assert value.shape == torch.Size([batch_size] if batch_size else [])
         assert value.dtype is torch.get_default_dtype()
示例#3
0
 def test_input_transform(self, batch_size):
     q_function = NNQFunction(
         dim_state=(2,),
         dim_action=(1,),
         layers=[64, 64],
         non_linearity="Tanh",
         input_transform=StateTransform(),
     )
     value = q_function(
         random_tensor(False, 2, batch_size), random_tensor(False, 1, batch_size)
     )
     assert value.shape == torch.Size([batch_size] if batch_size else [])
     assert value.dtype is torch.get_default_dtype()
示例#4
0
    def test_goal(self, batch_size):
        goal = random_tensor(False, 3, None)
        self.init(False, False, 4, 2, goal=goal)
        state = random_tensor(False, 4, batch_size)
        pi = tensor_to_distribution(self.policy(state))
        action = pi.sample()
        assert action.shape == torch.Size([batch_size, 2] if batch_size else [2])
        assert action.dtype is torch.get_default_dtype()

        other_goal = random_tensor(False, 3, None)
        self.policy.set_goal(other_goal)
        other_pi = tensor_to_distribution(self.policy(state))

        assert not torch.any(other_pi.mean == pi.mean)
示例#5
0
    def test_call(self, discrete_state, discrete_action, dim_state, dim_action,
                  batch_size):
        self.init(discrete_state, discrete_action, dim_state, dim_action)
        state = random_tensor(discrete_state, dim_state, batch_size)
        distribution = tensor_to_distribution(self.policy(state))
        sample = distribution.sample()

        if distribution.has_enumerate_support:  # Discrete
            assert isinstance(distribution, Categorical)
            if batch_size:
                assert distribution.logits.shape == (batch_size,
                                                     self.num_actions)
                assert sample.shape == (batch_size, )
            else:
                assert distribution.logits.shape == (self.num_actions, )
                assert sample.shape == ()
        else:  # Continuous
            assert isinstance(distribution, MultivariateNormal)
            if batch_size:
                assert distribution.mean.shape == (
                    batch_size, ) + self.dim_action
                assert distribution.covariance_matrix.shape == (
                    batch_size,
                    self.dim_action[0],
                    self.dim_action[0],
                )
                assert sample.shape == (batch_size, dim_action)
            else:
                assert distribution.mean.shape == self.dim_action
                assert distribution.covariance_matrix.shape == (
                    self.dim_action[0],
                    self.dim_action[0],
                )
                assert sample.shape == (dim_action, )
示例#6
0
    def test_forward(self, dim_state, dim_action, batch_size, deterministic):
        self.init(False, False, dim_state, dim_action, deterministic)
        state = random_tensor(False, dim_state, batch_size)
        distribution = tensor_to_distribution(self.policy(state))
        sample = distribution.sample()

        if deterministic:
            assert isinstance(distribution, Delta)
        else:
            assert isinstance(distribution, MultivariateNormal)

        if batch_size:
            assert distribution.mean.shape == (batch_size,) + self.dim_action
            if not deterministic:
                assert distribution.covariance_matrix.shape == (
                    batch_size,
                    self.dim_action[0],
                    self.dim_action[0],
                )
            assert sample.shape == (batch_size, dim_action)
        else:
            assert distribution.mean.shape == self.dim_action
            if not deterministic:
                assert distribution.covariance_matrix.shape == (
                    self.dim_action[0],
                    self.dim_action[0],
                )
            assert sample.shape == torch.Size((dim_action,))
示例#7
0
    def test_from_nn(self, discrete_state, dim_state, dim_action, batch_size):
        self.init(discrete_state, False, dim_state, dim_action)
        policy = NNPolicy.from_nn(
            HomoGaussianNN(
                self.policy.nn.kwargs["in_dim"],
                self.policy.nn.kwargs["out_dim"],
                layers=[20, 20],
                biased_head=False,
            ),
            self.dim_state,
            self.dim_action,
            num_states=self.num_states,
            num_actions=self.num_actions,
        )

        state = random_tensor(discrete_state, dim_state, batch_size)
        action = tensor_to_distribution(policy(state)).sample()
        embeddings = policy.embeddings(state)

        assert action.shape == torch.Size(
            [batch_size, dim_action] if batch_size else [dim_action])
        assert embeddings.shape == torch.Size(
            [batch_size, 20] if batch_size else [20])
        assert action.dtype is torch.get_default_dtype()
        assert embeddings.dtype is torch.get_default_dtype()
示例#8
0
    def test_embeddings(self, discrete_state, dim_state, batch_size):
        self.init(discrete_state, dim_state)
        state = random_tensor(discrete_state, dim_state, batch_size)
        embeddings = self.value_function.embeddings(state)

        assert embeddings.shape == torch.Size([batch_size, 33] if batch_size else [33])
        assert embeddings.dtype is torch.get_default_dtype()
    def test_forward(self, discrete_state, dim_state, num_heads, batch_size):
        self.init(discrete_state, dim_state, num_heads)
        state = random_tensor(discrete_state, dim_state, batch_size)
        value = self.value_function(state)

        assert value.shape == torch.Size(
            [batch_size, num_heads] if batch_size else [num_heads])
        assert value.dtype is torch.get_default_dtype()
示例#10
0
    def test_partial_q_function(self, num_states, num_actions, batch_size):
        q_function = TabularQFunction(num_states=num_states, num_actions=num_actions)
        state = random_tensor(True, num_states, batch_size)

        action_value = q_function(state)
        assert action_value.shape == torch.Size(
            [batch_size, num_actions] if batch_size else [num_actions]
        )
        assert action_value.dtype is torch.get_default_dtype()
示例#11
0
    def test_goal(self, batch_size):
        goal = random_tensor(False, 3, None)
        policy = NNPolicy(dim_state=(4, ),
                          dim_action=(2, ),
                          layers=[32, 32],
                          goal=goal)
        state = random_tensor(False, 4, batch_size)
        pi = tensor_to_distribution(policy(state))
        action = pi.sample()
        assert action.shape == torch.Size(
            [batch_size, 2] if batch_size else [2])
        assert action.dtype is torch.get_default_dtype()

        other_goal = random_tensor(False, 3, None)
        policy.set_goal(other_goal)
        other_pi = tensor_to_distribution(policy(state))

        assert not torch.any(other_pi.mean == pi.mean)
示例#12
0
    def test_from_nn(
        self, discrete_state, discrete_action, dim_state, dim_action, batch_size
    ):
        if not (discrete_state and not discrete_action):
            self.init(discrete_state, discrete_action, dim_state, dim_action)
            q_function = NNQFunction.from_nn(
                nn.Linear(
                    self.q_function.nn.kwargs["in_dim"][0],
                    self.q_function.nn.kwargs["out_dim"][0],
                ),
                self.dim_state,
                self.dim_action,
                num_states=self.num_states,
                num_actions=self.num_actions,
            )

            state = random_tensor(discrete_state, dim_state, batch_size)
            action = random_tensor(discrete_action, dim_action, batch_size)
            value = q_function(state, action)
            assert value.shape == torch.Size([batch_size] if batch_size else [])
            assert value.dtype is torch.get_default_dtype()
示例#13
0
 def test_input_transform(self, batch_size):
     policy = NNPolicy(
         dim_state=(2, ),
         dim_action=(4, ),
         layers=[64, 64],
         input_transform=StateTransform(),
     )
     out = tensor_to_distribution(
         policy(random_tensor(False, 2, batch_size)))
     action = out.sample()
     assert action.shape == torch.Size(
         [batch_size, 4] if batch_size else [4])
     assert action.dtype is torch.get_default_dtype()
    def test_input_transform(self, num_heads, batch_size):
        value_function = NNEnsembleValueFunction(
            dim_state=(2, ),
            num_heads=num_heads,
            layers=[64, 64],
            non_linearity="Tanh",
            input_transform=StateTransform(),
        )
        value = value_function(random_tensor(False, 2, batch_size))

        assert value.shape == torch.Size(
            [batch_size, num_heads] if batch_size else [num_heads])
        assert value.dtype is torch.get_default_dtype()
    def test_embeddings(self, discrete_state, dim_state, num_heads, batch_size,
                        biased_head):
        layers = [64, 64]
        self.init(discrete_state,
                  dim_state,
                  num_heads,
                  layers=layers,
                  biased_head=biased_head)
        dim = layers[-1] + 1 if biased_head else layers[-1]
        state = random_tensor(discrete_state, dim_state, batch_size)
        embeddings = self.value_function.embeddings(state)

        assert embeddings.shape == torch.Size(
            [batch_size, dim, num_heads] if batch_size else [dim, num_heads])
        assert embeddings.dtype is torch.get_default_dtype()
示例#16
0
    def test_partial_q_function(
        self, discrete_state, discrete_action, dim_state, dim_action, batch_size
    ):
        if not (discrete_state and not discrete_action):
            self.init(discrete_state, discrete_action, dim_state, dim_action)
            state = random_tensor(discrete_state, dim_state, batch_size)

            if not discrete_action:
                with pytest.raises(NotImplementedError):
                    self.q_function(state)
            else:
                action_value = self.q_function(state)
                assert action_value.shape == torch.Size(
                    [batch_size, self.num_actions] if batch_size else [self.num_actions]
                )
                assert action_value.dtype is torch.get_default_dtype()
示例#17
0
    def test_call(self, dim_state, dim_action, batch_size):
        self.init(dim_state, dim_action)
        state = random_tensor(False, dim_state, batch_size)

        distribution = tensor_to_distribution(self.policy(state))
        sample = distribution.sample()

        assert isinstance(distribution, MultivariateNormal)
        if batch_size:
            assert distribution.mean.shape == (batch_size, dim_action)
            assert distribution.covariance_matrix.shape == (
                batch_size,
                dim_action,
                dim_action,
            )
            assert sample.shape == (batch_size, dim_action)
        else:
            assert distribution.mean.shape == (dim_action, )
            assert distribution.covariance_matrix.shape == (dim_action,
                                                            dim_action)
            assert sample.shape == (dim_action, )
示例#18
0
    def test_from_nn(self, discrete_state, dim_state, batch_size):
        self.init(discrete_state, dim_state)
        value_function = torch.jit.script(
            NNValueFunction.from_nn(
                DeterministicNN(
                    self.value_function.nn.kwargs["in_dim"],
                    self.value_function.nn.kwargs["out_dim"],
                    layers=[20, 20],
                    biased_head=False,
                ),
                self.dim_state,
                num_states=self.num_states,
            )
        )

        state = random_tensor(discrete_state, dim_state, batch_size)
        value = value_function(state)
        embeddings = value_function.embeddings(state)

        assert value.shape == torch.Size([batch_size] if batch_size else [])
        assert embeddings.shape == torch.Size([batch_size, 20] if batch_size else [20])
        assert value.dtype is torch.get_default_dtype()
        assert embeddings.dtype is torch.get_default_dtype()