示例#1
0
    def test_recurrent_param(self):
        param, _ = recurrent_parameter(input_var=self.input_vars,
                                       step_input_var=self.step_input_vars,
                                       length=3,
                                       initializer=tf.constant_initializer(
                                           self.initial_params))
        self.sess.run(tf.compat.v1.global_variables_initializer())
        p = self.sess.run(param, feed_dict=self.feed_dict)

        assert p.shape == (5, 2, 3)
        assert np.array_equal(p, np.full([5, 2, 3], self.initial_params))
示例#2
0
    def _build(self, state_input, step_input, hidden_input, name=None):
        action_dim = self._output_dim

        with tf.compat.v1.variable_scope('dist_params'):
            if self._std_share_network:
                # mean and std networks share an MLP
                (outputs, step_outputs, step_hidden, hidden_init_var) = gru(
                    name='mean_std_network',
                    gru_cell=self._mean_std_gru_cell,
                    all_input_var=state_input,
                    step_input_var=step_input,
                    step_hidden_var=hidden_input,
                    hidden_state_init=self._hidden_state_init,
                    hidden_state_init_trainable=self.
                    _hidden_state_init_trainable,
                    output_nonlinearity_layer=self.
                    _mean_std_output_nonlinearity_layer)
                with tf.compat.v1.variable_scope('mean_network'):
                    mean_var = outputs[..., :action_dim]
                    step_mean_var = step_outputs[..., :action_dim]
                with tf.compat.v1.variable_scope('log_std_network'):
                    log_std_var = outputs[..., action_dim:]
                    step_log_std_var = step_outputs[..., action_dim:]

            else:
                # separate MLPs for mean and std networks
                # mean network
                (mean_var, step_mean_var, step_hidden, hidden_init_var) = gru(
                    name='mean_network',
                    gru_cell=self._mean_gru_cell,
                    all_input_var=state_input,
                    step_input_var=step_input,
                    step_hidden_var=hidden_input,
                    hidden_state_init=self._hidden_state_init,
                    hidden_state_init_trainable=self.
                    _hidden_state_init_trainable,
                    output_nonlinearity_layer=self.
                    _mean_output_nonlinearity_layer)
                log_std_var, step_log_std_var = recurrent_parameter(
                    input_var=state_input,
                    step_input_var=step_input,
                    length=action_dim,
                    initializer=tf.constant_initializer(self._init_std_param),
                    trainable=self._learn_std,
                    name='log_std_param')

        dist = DiagonalGaussian(self._output_dim)

        return (mean_var, step_mean_var, log_std_var, step_log_std_var,
                step_hidden, hidden_init_var, dist)
示例#3
0
    def _build(self, state_input, step_input, step_hidden, name=None):
        """Build model.

        Args:
            state_input (tf.Tensor): Entire time-series observation input,
                with shape :math:`(N, T, S^*)`.
            step_input (tf.Tensor): Single timestep observation input,
                with shape :math:`(N, S^*)`.
            step_hidden (tf.Tensor): Hidden state for step, with shape
                :math:`(N, S^*)`.
            name (str): Inner model name, also the variable scope of the
                inner model, if exist. One example is
                garage.tf.models.Sequential.

        Returns:
            tfp.distributions.MultivariateNormalDiag: Policy distribution.
            tf.Tensor: Step means, with shape :math:`(N, S^*)`.
            tf.Tensor: Step log std, with shape :math:`(N, S^*)`.
            tf.Tensor: Step hidden state, with shape :math:`(N, S^*)`.
            tf.Tensor: Initial hidden state, with shape :math:`(S^*)`.

        """
        del name
        action_dim = self._output_dim

        with tf.compat.v1.variable_scope('dist_params'):
            if self._std_share_network:
                # mean and std networks share an MLP
                (outputs, step_outputs, step_hidden, hidden_init_var) = gru(
                    name='mean_std_network',
                    gru_cell=self._mean_std_gru_cell,
                    all_input_var=state_input,
                    step_input_var=step_input,
                    step_hidden_var=step_hidden,
                    hidden_state_init=self._hidden_state_init,
                    hidden_state_init_trainable=self.
                    _hidden_state_init_trainable,
                    output_nonlinearity_layer=self.
                    _mean_std_output_nonlinearity_layer)
                with tf.compat.v1.variable_scope('mean_network'):
                    mean_var = outputs[..., :action_dim]
                    step_mean_var = step_outputs[..., :action_dim]
                with tf.compat.v1.variable_scope('log_std_network'):
                    log_std_var = outputs[..., action_dim:]
                    step_log_std_var = step_outputs[..., action_dim:]

            else:
                # separate MLPs for mean and std networks
                # mean network
                (mean_var, step_mean_var, step_hidden, hidden_init_var) = gru(
                    name='mean_network',
                    gru_cell=self._mean_gru_cell,
                    all_input_var=state_input,
                    step_input_var=step_input,
                    step_hidden_var=step_hidden,
                    hidden_state_init=self._hidden_state_init,
                    hidden_state_init_trainable=self.
                    _hidden_state_init_trainable,
                    output_nonlinearity_layer=self.
                    _mean_output_nonlinearity_layer)
                log_std_var, step_log_std_var = recurrent_parameter(
                    input_var=state_input,
                    step_input_var=step_input,
                    length=action_dim,
                    initializer=tf.constant_initializer(self._init_std_param),
                    trainable=self._learn_std,
                    name='log_std_param')

        dist = tfp.distributions.MultivariateNormalDiag(
            loc=mean_var, scale_diag=tf.exp(log_std_var))

        return (dist, step_mean_var, step_log_std_var, step_hidden,
                hidden_init_var)
示例#4
0
    def _build(self, state_input, step_input, hidden_input, name=None):
        """Build model given input placeholder(s).

        Args:
            state_input (tf.Tensor): Place holder for entire time-series
                inputs.
            step_input (tf.Tensor): Place holder for step inputs.
            hidden_input (tf.Tensor): Place holder for step hidden state.
            name (str): Inner model name, also the variable scope of the
                inner model, if exist. One example is
                garage.tf.models.Sequential.

        Return:
            tf.Tensor: Entire time-series means.
            tf.Tensor: Step mean.
            tf.Tensor: Entire time-series std_log.
            tf.Tensor: Step std_log.
            tf.Tensor: Step hidden state.
            tf.Tensor: Initial hidden state.
            garage.tf.distributions.DiagonalGaussian: Policy distribution.

        """
        del name
        action_dim = self._output_dim

        with tf.compat.v1.variable_scope('dist_params'):
            if self._std_share_network:
                # mean and std networks share an MLP
                (outputs, step_outputs, step_hidden, hidden_init_var) = gru(
                    name='mean_std_network',
                    gru_cell=self._mean_std_gru_cell,
                    all_input_var=state_input,
                    step_input_var=step_input,
                    step_hidden_var=hidden_input,
                    hidden_state_init=self._hidden_state_init,
                    hidden_state_init_trainable=self.
                    _hidden_state_init_trainable,
                    output_nonlinearity_layer=self.
                    _mean_std_output_nonlinearity_layer)
                with tf.compat.v1.variable_scope('mean_network'):
                    mean_var = outputs[..., :action_dim]
                    step_mean_var = step_outputs[..., :action_dim]
                with tf.compat.v1.variable_scope('log_std_network'):
                    log_std_var = outputs[..., action_dim:]
                    step_log_std_var = step_outputs[..., action_dim:]

            else:
                # separate MLPs for mean and std networks
                # mean network
                (mean_var, step_mean_var, step_hidden, hidden_init_var) = gru(
                    name='mean_network',
                    gru_cell=self._mean_gru_cell,
                    all_input_var=state_input,
                    step_input_var=step_input,
                    step_hidden_var=hidden_input,
                    hidden_state_init=self._hidden_state_init,
                    hidden_state_init_trainable=self.
                    _hidden_state_init_trainable,
                    output_nonlinearity_layer=self.
                    _mean_output_nonlinearity_layer)
                log_std_var, step_log_std_var = recurrent_parameter(
                    input_var=state_input,
                    step_input_var=step_input,
                    length=action_dim,
                    initializer=tf.constant_initializer(self._init_std_param),
                    trainable=self._learn_std,
                    name='log_std_param')

        dist = DiagonalGaussian(self._output_dim)

        return (mean_var, step_mean_var, log_std_var, step_log_std_var,
                step_hidden, hidden_init_var, dist)