def __init__(self,
                 env_spec,
                 n_agents,
                 hidden_sizes=(32, 32),
                 hidden_nonlinearity=torch.tanh,
                 hidden_w_init=nn.init.xavier_uniform_,
                 hidden_b_init=nn.init.zeros_,
                 output_nonlinearity=None,
                 output_w_init=nn.init.xavier_uniform_,
                 output_b_init=nn.init.zeros_,
                 layer_normalization=False,
                 name='CentralizedCategoricalMLPPolicy'):
        assert isinstance(env_spec.action_space, akro.Discrete), (
            'Categorical policy only works with akro.Discrete action space.')

        self.centralized = True
        self.vectorized = True
        
        self._n_agents = n_agents
        self._obs_dim = env_spec.observation_space.flat_dim
        self._action_dim = env_spec.action_space.n

        self.name = name

        MLPModule.__init__(self,
                           input_dim=self._obs_dim,
                           output_dim=self._action_dim * self._n_agents,
                           hidden_sizes=hidden_sizes,
                           hidden_nonlinearity=hidden_nonlinearity,
                           hidden_w_init=hidden_w_init,
                           hidden_b_init=hidden_b_init,
                           output_nonlinearity=output_nonlinearity,
                           output_w_init=output_w_init,
                           output_b_init=output_b_init,
                           layer_normalization=layer_normalization)
Пример #2
0
    def __init__(self, env_spec, name="CategoricalMLPPolicy", **kwargs):
        self._obs_dim = env_spec.input_space.flat_dim
        self._action_dim = env_spec.output_space.flat_dim

        Policy.__init__(self, env_spec, name)
        MLPModule.__init__(self, input_dim=self._obs_dim,
                           output_dim=self._action_dim,
                           **kwargs)
def test_dueling_output_values(output_dim, kernel_sizes, hidden_channels,
                               strides, paddings):

    batch_size = 64
    input_width = 32
    input_height = 32
    in_channel = 3
    input_shape = (batch_size, in_channel, input_height, input_width)
    obs = torch.rand(input_shape)

    module = DiscreteDuelingCNNModule(input_shape=input_shape,
                                      output_dim=output_dim,
                                      hidden_channels=hidden_channels,
                                      hidden_sizes=hidden_channels,
                                      kernel_sizes=kernel_sizes,
                                      strides=strides,
                                      paddings=paddings,
                                      padding_mode='zeros',
                                      hidden_w_init=nn.init.ones_,
                                      output_w_init=nn.init.ones_,
                                      is_image=False)

    cnn = CNNModule(input_var=obs,
                    hidden_channels=hidden_channels,
                    kernel_sizes=kernel_sizes,
                    strides=strides,
                    paddings=paddings,
                    padding_mode='zeros',
                    hidden_w_init=nn.init.ones_,
                    is_image=False)
    flat_dim = torch.flatten(cnn(obs).detach(), start_dim=1).shape[1]

    mlp_adv = MLPModule(
        flat_dim,
        output_dim,
        hidden_channels,
        hidden_w_init=nn.init.ones_,
        output_w_init=nn.init.ones_,
    )

    mlp_val = MLPModule(
        flat_dim,
        1,
        hidden_channels,
        hidden_w_init=nn.init.ones_,
        output_w_init=nn.init.ones_,
    )

    cnn_out = cnn(obs)
    val = mlp_val(torch.flatten(cnn_out, start_dim=1))
    adv = mlp_adv(torch.flatten(cnn_out, start_dim=1))
    output = val + (adv - adv.mean(1).unsqueeze(1))

    assert torch.all(torch.eq(output.detach(), module(obs).detach()))
Пример #4
0
    def test_is_pickleable(self, input_dim, output_dim, hidden_sizes):
        """Check MLPModule is pickeable.

        Args:
            input_dim (int): Input dimension.
            output_dim (int): Ouput dimension.
            hidden_sizes (list[int]): Size of hidden layers.

        """
        input_val = torch.ones([1, input_dim], dtype=torch.float32)
        module = MLPModule(input_dim=input_dim,
                           output_dim=output_dim,
                           hidden_nonlinearity=torch.relu,
                           hidden_sizes=hidden_sizes,
                           hidden_w_init=nn.init.ones_,
                           output_w_init=nn.init.ones_,
                           output_nonlinearity=torch.nn.ReLU)

        output1 = module(input_val)

        h = pickle.dumps(module)
        model_pickled = pickle.loads(h)
        output2 = model_pickled(input_val)

        assert np.array_equal(torch.all(torch.eq(output1, output2)), True)
Пример #5
0
    def __init__(self, env_spec, **kwargs):
        """
        Initialize class with multiple attributes.

        Args:
            env_spec (garage.envs.env_spec.EnvSpec): Environment specification.
            nn_module (nn.Module): Neural network module in PyTorch.
        """
        self._env_spec = env_spec
        self._obs_dim = env_spec.observation_space.flat_dim
        self._action_dim = env_spec.action_space.flat_dim

        MLPModule.__init__(self,
                           input_dim=self._obs_dim + self._action_dim,
                           output_dim=1,
                           **kwargs)
Пример #6
0
    def __init__(self,
                 input_shape,
                 output_dim,
                 kernel_sizes,
                 hidden_channels,
                 strides,
                 hidden_sizes=(32, 32),
                 cnn_hidden_nonlinearity=nn.ReLU,
                 mlp_hidden_nonlinearity=nn.ReLU,
                 hidden_w_init=nn.init.xavier_uniform_,
                 hidden_b_init=nn.init.zeros_,
                 paddings=0,
                 padding_mode='zeros',
                 max_pool=False,
                 pool_shape=None,
                 pool_stride=1,
                 output_nonlinearity=None,
                 output_w_init=nn.init.xavier_uniform_,
                 output_b_init=nn.init.zeros_,
                 layer_normalization=False,
                 is_image=True):

        super().__init__()

        input_var = torch.zeros(input_shape)
        cnn_module = CNNModule(input_var=input_var,
                               kernel_sizes=kernel_sizes,
                               strides=strides,
                               hidden_w_init=hidden_w_init,
                               hidden_b_init=hidden_b_init,
                               hidden_channels=hidden_channels,
                               hidden_nonlinearity=cnn_hidden_nonlinearity,
                               paddings=paddings,
                               padding_mode=padding_mode,
                               max_pool=max_pool,
                               layer_normalization=layer_normalization,
                               pool_shape=pool_shape,
                               pool_stride=pool_stride,
                               is_image=is_image)

        with torch.no_grad():
            cnn_out = cnn_module(input_var)
        flat_dim = torch.flatten(cnn_out, start_dim=1).shape[1]

        mlp_module = MLPModule(flat_dim,
                               output_dim,
                               hidden_sizes,
                               hidden_nonlinearity=mlp_hidden_nonlinearity,
                               hidden_w_init=hidden_w_init,
                               hidden_b_init=hidden_b_init,
                               output_nonlinearity=output_nonlinearity,
                               output_w_init=output_w_init,
                               output_b_init=output_b_init,
                               layer_normalization=layer_normalization)

        if mlp_hidden_nonlinearity is None:
            self._module = nn.Sequential(cnn_module, nn.Flatten(), mlp_module)
        else:
            self._module = nn.Sequential(cnn_module, mlp_hidden_nonlinearity(),
                                         nn.Flatten(), mlp_module)
Пример #7
0
    def __init__(self, env_spec, **kwargs):
        """Initialize class with multiple attributes.

        Args:
            env_spec (EnvSpec): Environment specification.
            **kwargs: Keyword arguments.

        """
        self._env_spec = env_spec
        self._obs_dim = env_spec.observation_space.flat_dim
        self._action_dim = env_spec.action_space.flat_dim

        MLPModule.__init__(self,
                           input_dim=self._obs_dim + self._action_dim,
                           output_dim=1,
                           **kwargs)
Пример #8
0
    def __init__(self,
                 spec,
                 image_format,
                 *,
                 kernel_sizes,
                 hidden_channels,
                 strides,
                 hidden_sizes=(32, 32),
                 cnn_hidden_nonlinearity=nn.ReLU,
                 mlp_hidden_nonlinearity=nn.ReLU,
                 hidden_w_init=nn.init.xavier_uniform_,
                 hidden_b_init=nn.init.zeros_,
                 paddings=0,
                 padding_mode='zeros',
                 max_pool=False,
                 pool_shape=None,
                 pool_stride=1,
                 output_nonlinearity=None,
                 output_w_init=nn.init.xavier_uniform_,
                 output_b_init=nn.init.zeros_,
                 layer_normalization=False):

        super().__init__()

        cnn_spec = InOutSpec(input_space=spec.input_space, output_space=None)
        cnn_module = CNNModule(spec=cnn_spec,
                               image_format=image_format,
                               kernel_sizes=kernel_sizes,
                               strides=strides,
                               hidden_w_init=hidden_w_init,
                               hidden_b_init=hidden_b_init,
                               hidden_channels=hidden_channels,
                               hidden_nonlinearity=cnn_hidden_nonlinearity,
                               paddings=paddings,
                               padding_mode=padding_mode,
                               max_pool=max_pool,
                               layer_normalization=layer_normalization,
                               pool_shape=pool_shape,
                               pool_stride=pool_stride)
        flat_dim = cnn_module.spec.output_space.flat_dim

        output_dim = spec.output_space.flat_dim
        mlp_module = MLPModule(flat_dim,
                               output_dim,
                               hidden_sizes,
                               hidden_nonlinearity=mlp_hidden_nonlinearity,
                               hidden_w_init=hidden_w_init,
                               hidden_b_init=hidden_b_init,
                               output_nonlinearity=output_nonlinearity,
                               output_w_init=output_w_init,
                               output_b_init=output_b_init,
                               layer_normalization=layer_normalization)

        if mlp_hidden_nonlinearity is None:
            self._module = nn.Sequential(cnn_module, nn.Flatten(), mlp_module)
        else:
            self._module = nn.Sequential(cnn_module, mlp_hidden_nonlinearity(),
                                         nn.Flatten(), mlp_module)
Пример #9
0
    def __init__(self, env_spec, name='DeterministicMLPPolicy', **kwargs):
        """Initialize class with multiple attributes.

        Args:
            env_spec (garage.envs.env_spec.EnvSpec): Environment specification.
            name (str): Policy name.
            kwargs : Additional keyword arguments passed to the MLPModule.

        """
        self._obs_dim = env_spec.observation_space.flat_dim
        self._action_dim = env_spec.action_space.flat_dim

        Policy.__init__(self, env_spec, name)

        MLPModule.__init__(self,
                           input_dim=self._obs_dim,
                           output_dim=self._action_dim,
                           **kwargs)
Пример #10
0
    def test_mlp_with_learnable_non_linear_function(self):
        """Test MLPModule with learnable non-linear functions."""
        input_dim, output_dim, hidden_sizes = 1, 1, (3, 2)

        input_val = -torch.ones([1, input_dim], dtype=torch.float32)
        module = MLPModule(input_dim=input_dim,
                           output_dim=output_dim,
                           hidden_nonlinearity=torch.nn.PReLU(init=10.),
                           hidden_sizes=hidden_sizes,
                           hidden_w_init=nn.init.ones_,
                           output_w_init=nn.init.ones_,
                           output_nonlinearity=torch.nn.PReLU(init=1.))

        output = module(input_val)
        output.sum().backward()

        for tt in module.parameters():
            assert torch.all(torch.ne(tt.grad, 0))
Пример #11
0
    def test_output_values(self, input_dim, output_dim, hidden_sizes):
        """Test output values from MLPModule.

        Args:
            input_dim (int): Input dimension.
            output_dim (int): Ouput dimension.
            hidden_sizes (list[int]): Size of hidden layers.

        """
        input_val = torch.ones([1, input_dim], dtype=torch.float32)
        module_with_nonlinear_function_and_module = MLPModule(
            input_dim=input_dim,
            output_dim=output_dim,
            hidden_nonlinearity=torch.relu,
            hidden_sizes=hidden_sizes,
            hidden_w_init=nn.init.ones_,
            output_w_init=nn.init.ones_,
            output_nonlinearity=torch.nn.ReLU)

        module_with_nonlinear_module_instance_and_function = MLPModule(
            input_dim=input_dim,
            output_dim=output_dim,
            hidden_nonlinearity=torch.nn.ReLU(),
            hidden_sizes=hidden_sizes,
            hidden_w_init=nn.init.ones_,
            output_w_init=nn.init.ones_,
            output_nonlinearity=torch.relu)

        output1 = module_with_nonlinear_function_and_module(input_val)
        output2 = module_with_nonlinear_module_instance_and_function(input_val)

        expected_output = torch.full([1, output_dim],
                                     fill_value=5 * np.prod(hidden_sizes),
                                     dtype=torch.float32)

        assert torch.all(torch.eq(expected_output, output1))
        assert torch.all(torch.eq(expected_output, output2))
Пример #12
0
    def test_is_pickleable(self, input_dim, output_dim, hidden_sizes):
        input_val = torch.ones([1, 5], dtype=torch.float32)
        module = MLPModule(input_dim=input_dim,
                           output_dim=output_dim,
                           hidden_nonlinearity=None,
                           hidden_sizes=hidden_sizes,
                           hidden_w_init=nn.init.ones_,
                           output_w_init=nn.init.ones_)
        output1 = module(input_val)

        h = pickle.dumps(module)
        model_pickled = pickle.loads(h)
        output2 = model_pickled(input_val)

        assert np.array_equal(torch.all(torch.eq(output1, output2)), True)
Пример #13
0
    def test_output_values(self, input_dim, output_dim, hidden_sizes):
        input_val = torch.ones([1, 5], dtype=torch.float32)
        module = MLPModule(input_dim=input_dim,
                           output_dim=output_dim,
                           hidden_nonlinearity=None,
                           hidden_sizes=hidden_sizes,
                           hidden_w_init=nn.init.ones_,
                           output_w_init=nn.init.ones_)
        output = module(input_val)

        expected_output = torch.full([1, output_dim],
                                     fill_value=5 * np.prod(hidden_sizes),
                                     dtype=torch.float32)

        self.assertEqual(torch.all(torch.eq(output, expected_output)), True)
Пример #14
0
    def test_output_shape(self, obs_dim, act_dim, output_dim, hidden_sizes):
        env_spec = TfEnv(DummyBoxEnv())
        obs = torch.ones(obs_dim, dtype=torch.float32).unsqueeze(0)
        act = torch.ones(act_dim, dtype=torch.float32).unsqueeze(0)
        nn_module = MLPModule(input_dim=obs_dim + act_dim,
                              output_dim=output_dim,
                              hidden_nonlinearity=None,
                              hidden_sizes=hidden_sizes,
                              hidden_w_init=nn.init.ones_,
                              output_w_init=nn.init.ones_)

        qf = ContinuousNNQFunction(env_spec, nn_module)
        output = qf.get_qval(obs, act)

        assert output.shape == (1, 1)
Пример #15
0
    def test_get_actions(self, obs_dim, act_dim, batch_size, hidden_sizes):
        env_spec = TfEnv(DummyBoxEnv())
        obs = torch.ones([batch_size, obs_dim], dtype=torch.float32)
        nn_module = MLPModule(
            input_dim=obs_dim,
            output_dim=act_dim,
            hidden_nonlinearity=None,
            hidden_sizes=hidden_sizes,
            hidden_w_init=nn.init.ones_,
            output_w_init=nn.init.ones_)

        policy = DeterministicPolicy(env_spec, nn_module)
        expected_output = np.full([batch_size, act_dim],
                                  fill_value=obs_dim * np.prod(hidden_sizes),
                                  dtype=np.float32)
        assert np.array_equal(policy.get_actions(obs), expected_output)
Пример #16
0
def test_output_values(output_dim, kernel_sizes, hidden_channels, strides,
                       paddings):

    input_width = 32
    input_height = 32
    in_channel = 3
    input_shape = (in_channel, input_height, input_width)
    spec = InOutSpec(akro.Box(shape=input_shape, low=-np.inf, high=np.inf),
                     akro.Box(shape=(output_dim, ), low=-np.inf, high=np.inf))
    obs = torch.rand(input_shape)

    module = DiscreteCNNModule(spec=spec,
                               image_format='NCHW',
                               hidden_channels=hidden_channels,
                               hidden_sizes=hidden_channels,
                               kernel_sizes=kernel_sizes,
                               strides=strides,
                               paddings=paddings,
                               padding_mode='zeros',
                               hidden_w_init=nn.init.ones_,
                               output_w_init=nn.init.ones_)

    cnn = CNNModule(spec=InOutSpec(
        akro.Box(shape=input_shape, low=-np.inf, high=np.inf), None),
                    image_format='NCHW',
                    hidden_channels=hidden_channels,
                    kernel_sizes=kernel_sizes,
                    strides=strides,
                    paddings=paddings,
                    padding_mode='zeros',
                    hidden_w_init=nn.init.ones_)
    flat_dim = torch.flatten(cnn(obs).detach(), start_dim=1).shape[1]

    mlp = MLPModule(
        flat_dim,
        output_dim,
        hidden_channels,
        hidden_w_init=nn.init.ones_,
        output_w_init=nn.init.ones_,
    )

    cnn_out = cnn(obs)
    output = mlp(torch.flatten(cnn_out, start_dim=1))

    assert torch.all(torch.eq(output.detach(), module(obs).detach()))
Пример #17
0
    def __init__(self,
                 input_dim,
                 output_dim,
                 hidden_sizes=(32, 32),
                 hidden_nonlinearity=torch.tanh,
                 hidden_w_init=nn.init.xavier_uniform_,
                 hidden_b_init=nn.init.zeros_,
                 output_nonlinearity=None,
                 output_w_init=nn.init.xavier_uniform_,
                 output_b_init=nn.init.zeros_,
                 learn_std=True,
                 init_std=1.0,
                 min_std=1e-6,
                 max_std=None,
                 std_parameterization='exp',
                 layer_normalization=False,
                 normal_distribution_cls=Normal):
        super().__init__(input_dim=input_dim,
                         output_dim=output_dim,
                         hidden_sizes=hidden_sizes,
                         hidden_nonlinearity=hidden_nonlinearity,
                         hidden_w_init=hidden_w_init,
                         hidden_b_init=hidden_b_init,
                         output_nonlinearity=output_nonlinearity,
                         output_w_init=output_w_init,
                         output_b_init=output_b_init,
                         learn_std=learn_std,
                         init_std=init_std,
                         min_std=min_std,
                         max_std=max_std,
                         std_parameterization=std_parameterization,
                         layer_normalization=layer_normalization,
                         normal_distribution_cls=normal_distribution_cls)

        self._mean_module = MLPModule(
            input_dim=self._input_dim,
            output_dim=self._action_dim,
            hidden_sizes=self._hidden_sizes,
            hidden_nonlinearity=self._hidden_nonlinearity,
            hidden_w_init=self._hidden_w_init,
            hidden_b_init=self._hidden_b_init,
            output_nonlinearity=self._output_nonlinearity,
            output_w_init=self._output_w_init,
            output_b_init=self._output_b_init,
            layer_normalization=self._layer_normalization)
Пример #18
0
    def test_no_head_invalid_settings(self, hidden_nonlinear,
                                      output_nonlinear):
        """Check MLPModule throws exception with invalid non-linear functions.

        Args:
            hidden_nonlinear (callable or torch.nn.Module): Non-linear
                functions for hidden layers.
            output_nonlinear (callable or torch.nn.Module): Non-linear
                functions for output layer.

        """
        expected_msg = 'Non linear function .* is not supported'
        with pytest.raises(ValueError, match=expected_msg):
            MLPModule(input_dim=3,
                      output_dim=5,
                      hidden_sizes=(2, 3),
                      hidden_nonlinearity=hidden_nonlinear,
                      output_nonlinearity=output_nonlinear)
Пример #19
0
    def test_is_pickleable(self, obs_dim, act_dim, batch_size, hidden_sizes):
        env_spec = TfEnv(DummyBoxEnv())
        obs = torch.ones([batch_size, obs_dim], dtype=torch.float32)
        nn_module = MLPModule(
            input_dim=obs_dim,
            output_dim=act_dim,
            hidden_nonlinearity=None,
            hidden_sizes=hidden_sizes,
            hidden_w_init=nn.init.ones_,
            output_w_init=nn.init.ones_)

        policy = DeterministicPolicy(env_spec, nn_module)
        output1 = policy.get_actions(obs)

        p = pickle.dumps(policy)
        policy_pickled = pickle.loads(p)
        output2 = policy_pickled.get_actions(obs)
        assert np.array_equal(output1, output2)
Пример #20
0
    def test_get_qval(self, obs_dim, act_dim, output_dim, hidden_sizes):
        env_spec = TfEnv(DummyBoxEnv())
        obs = torch.ones(obs_dim, dtype=torch.float32).unsqueeze(0)
        act = torch.ones(act_dim, dtype=torch.float32).unsqueeze(0)
        nn_module = MLPModule(input_dim=obs_dim + act_dim,
                              output_dim=output_dim,
                              hidden_nonlinearity=None,
                              hidden_sizes=hidden_sizes,
                              hidden_w_init=nn.init.ones_,
                              output_w_init=nn.init.ones_)

        qf = ContinuousNNQFunction(env_spec, nn_module)
        output = qf.get_qval(obs, act)
        expected_output = torch.full([1, output_dim],
                                     fill_value=(obs_dim + act_dim) *
                                     np.prod(hidden_sizes),
                                     dtype=torch.float32)
        assert torch.eq(output, expected_output)
Пример #21
0
    def test_is_pickleable(self, obs_dim, act_dim, output_dim, hidden_sizes):
        env_spec = TfEnv(DummyBoxEnv())
        obs = torch.ones(obs_dim, dtype=torch.float32).unsqueeze(0)
        act = torch.ones(act_dim, dtype=torch.float32).unsqueeze(0)
        nn_module = MLPModule(input_dim=obs_dim + act_dim,
                              output_dim=output_dim,
                              hidden_nonlinearity=None,
                              hidden_sizes=hidden_sizes,
                              hidden_w_init=nn.init.ones_,
                              output_w_init=nn.init.ones_)

        qf = ContinuousNNQFunction(env_spec, nn_module)
        output1 = qf.get_qval(obs, act)

        p = pickle.dumps(qf)
        qf_pickled = pickle.loads(p)
        output2 = qf_pickled.get_qval(obs, act)

        assert torch.eq(output1, output2)
Пример #22
0
    def __init__(
            self,
            K,
            Dx,
            mlp_input_dim=None,
            hidden_layer_sizes=(124, 124),
            reg=0.001,
            reparameterize=True,
    ):
        self._reg = reg
        self._reparameterize = reparameterize

        self._Dx = Dx
        self._K = K
        if mlp_input_dim == None:
            self._w_and_mu_logsig_t = torch.distributions.normal.Normal(0, 0.1)
            self._use_mlp = False
        else:
            self._w_and_mu_logsig_t = MLPModule(
                input_dim=mlp_input_dim,
                output_dim=K * (2 * Dx + 1),
                hidden_sizes=hidden_layer_sizes)
            self._use_mlp = True
    def __init__(self,
                 input_dim,
                 output_dim,
                 hidden_sizes=(64, 64),
                 hidden_nonlinearity=torch.tanh,
                 hidden_w_init=nn.init.xavier_uniform_,
                 hidden_b_init=nn.init.zeros_,
                 output_w_init=nn.init.xavier_uniform_,
                 output_b_init=nn.init.zeros_,
                 layer_normalization=False):
        super().__init__()

        self._input_dim = input_dim
        self._hidden_sizes = hidden_sizes
        self._action_dim = output_dim
        self._hidden_nonlinearity = hidden_nonlinearity
        self._hidden_w_init = hidden_w_init
        self._hidden_b_init = hidden_b_init
        self._output_w_init = output_w_init
        self._output_b_init = output_b_init
        self._layer_normalization = layer_normalization
        # Set output nonlinearity to none as we need raw preds for St gumbel-softmax estimator
        self._output_nonlinearity = None

        self.categorical_logits_module = MLPModule(
            input_dim= self._input_dim,
            output_dim = self._action_dim,
            hidden_sizes=self._hidden_sizes,
            hidden_nonlinearity=self._hidden_nonlinearity,
            hidden_w_init=self._hidden_w_init,
            hidden_b_init=self._hidden_b_init,
            output_nonlinearity=None,
            output_w_init=self._output_w_init,
            output_b_init=self._output_b_init,
            layer_normalization=self._layer_normalization
        )