Пример #1
0
    def _build(self, state_input, step_input, hidden_input, name=None):
        action_dim = self._output_dim

        with tf.compat.v1.variable_scope('dist_params'):
            if self._std_share_network:
                # mean and std networks share an MLP
                (outputs, step_outputs, step_hidden, hidden_init_var) = gru(
                    name='mean_std_network',
                    gru_cell=self._mean_std_gru_cell,
                    all_input_var=state_input,
                    step_input_var=step_input,
                    step_hidden_var=hidden_input,
                    hidden_state_init=self._hidden_state_init,
                    hidden_state_init_trainable=self.
                    _hidden_state_init_trainable,
                    output_nonlinearity_layer=self.
                    _mean_std_output_nonlinearity_layer)
                with tf.compat.v1.variable_scope('mean_network'):
                    mean_var = outputs[..., :action_dim]
                    step_mean_var = step_outputs[..., :action_dim]
                with tf.compat.v1.variable_scope('log_std_network'):
                    log_std_var = outputs[..., action_dim:]
                    step_log_std_var = step_outputs[..., action_dim:]

            else:
                # separate MLPs for mean and std networks
                # mean network
                (mean_var, step_mean_var, step_hidden, hidden_init_var) = gru(
                    name='mean_network',
                    gru_cell=self._mean_gru_cell,
                    all_input_var=state_input,
                    step_input_var=step_input,
                    step_hidden_var=hidden_input,
                    hidden_state_init=self._hidden_state_init,
                    hidden_state_init_trainable=self.
                    _hidden_state_init_trainable,
                    output_nonlinearity_layer=self.
                    _mean_output_nonlinearity_layer)
                log_std_var, step_log_std_var = recurrent_parameter(
                    input_var=state_input,
                    step_input_var=step_input,
                    length=action_dim,
                    initializer=tf.constant_initializer(self._init_std_param),
                    trainable=self._learn_std,
                    name='log_std_param')

        dist = DiagonalGaussian(self._output_dim)

        return (mean_var, step_mean_var, log_std_var, step_log_std_var,
                step_hidden, hidden_init_var, dist)
Пример #2
0
    def _build(self, all_input_var, step_input_var, step_hidden_var,
               name=None):
        """Build model given input placeholder(s).

        Args:
            all_input_var (tf.Tensor): Place holder for entire time-series
                inputs.
            step_input_var (tf.Tensor): Place holder for step inputs.
            step_hidden_var (tf.Tensor): Place holder for step hidden state.
            name (str): Inner model name, also the variable scope of the
                inner model, if exist. One example is
                garage.tf.models.Sequential.

        Return:
            tf.Tensor: Entire time-series outputs.
            tf.Tensor: Step output.
            tf.Tensor: Step hidden state.
            tf.Tensor: Initial hidden state.

        """
        del name
        return gru(
            name='gru',
            gru_cell=self._gru_cell,
            all_input_var=all_input_var,
            step_input_var=step_input_var,
            step_hidden_var=step_hidden_var,
            hidden_state_init=self._hidden_state_init,
            hidden_state_init_trainable=self._hidden_state_init_trainable,
            output_nonlinearity_layer=self._output_nonlinearity_layer)
Пример #3
0
 def _build(self,
            all_input_var,
            step_input_var,
            step_hidden_var,
            name=None):
     return gru(
         name='gru',
         gru_cell=self._gru_cell,
         all_input_var=all_input_var,
         step_input_var=step_input_var,
         step_hidden_var=step_hidden_var,
         hidden_state_init=self._hidden_state_init,
         hidden_state_init_trainable=self._hidden_state_init_trainable,
         output_nonlinearity_layer=self._output_nonlinearity_layer)
Пример #4
0
    def test_output_shapes(self, time_step, input_dim, output_dim, hidden_init,
                           cell_init):
        obs_inputs = np.full((self.batch_size, time_step, input_dim), 1.)
        obs_input = np.full((self.batch_size, input_dim), 1.)

        input_var = tf.compat.v1.placeholder(tf.float32,
                                             shape=(None, None, input_dim),
                                             name='input')
        step_input_var = tf.compat.v1.placeholder(tf.float32,
                                                  shape=(None, input_dim),
                                                  name='step_input')
        output_nonlinearity = tf.keras.layers.Dense(
            units=output_dim,
            activation=None,
            kernel_initializer=tf.constant_initializer(1))
        with tf.compat.v1.variable_scope('GRU'):
            self.gru = gru(
                all_input_var=input_var,
                name='gru',
                gru_cell=self.gru_cell,
                step_input_var=step_input_var,
                step_hidden_var=self.step_hidden_var,
                hidden_state_init=tf.constant_initializer(hidden_init),
                output_nonlinearity_layer=output_nonlinearity)

        self.sess.run(tf.compat.v1.global_variables_initializer())

        # Compute output by doing t step() on the gru cell
        outputs_t, output_t, h_t, hidden_init = self.gru
        hidden = np.full((self.batch_size, self.hidden_dim),
                         hidden_init.eval())

        for _ in range(time_step):
            output, hidden = self.sess.run([output_t, h_t],
                                           feed_dict={
                                               step_input_var: obs_input,
                                               self.step_hidden_var: hidden,
                                           })  # noqa: E126
            assert output.shape == (self.batch_size, output_dim)
            assert hidden.shape == (self.batch_size, self.hidden_dim)

        full_output = self.sess.run(outputs_t,
                                    feed_dict={input_var: obs_inputs})

        assert full_output.shape == (self.batch_size, time_step, output_dim)
Пример #5
0
    def _build(self, state_input, step_input, step_hidden, name=None):
        """Build model.

        Args:
            state_input (tf.Tensor): Entire time-series observation input,
                with shape :math:`(N, T, S^*)`.
            step_input (tf.Tensor): Single timestep observation input,
                with shape :math:`(N, S^*)`.
            step_hidden (tf.Tensor): Hidden state for step, with shape
                :math:`(N, S^*)`.
            name (str): Inner model name, also the variable scope of the
                inner model, if exist. One example is
                garage.tf.models.Sequential.

        Returns:
            tfp.distributions.MultivariateNormalDiag: Policy distribution.
            tf.Tensor: Step means, with shape :math:`(N, S^*)`.
            tf.Tensor: Step log std, with shape :math:`(N, S^*)`.
            tf.Tensor: Step hidden state, with shape :math:`(N, S^*)`.
            tf.Tensor: Initial hidden state, with shape :math:`(S^*)`.

        """
        del name
        action_dim = self._output_dim

        with tf.compat.v1.variable_scope('dist_params'):
            if self._std_share_network:
                # mean and std networks share an MLP
                (outputs, step_outputs, step_hidden, hidden_init_var) = gru(
                    name='mean_std_network',
                    gru_cell=self._mean_std_gru_cell,
                    all_input_var=state_input,
                    step_input_var=step_input,
                    step_hidden_var=step_hidden,
                    hidden_state_init=self._hidden_state_init,
                    hidden_state_init_trainable=self.
                    _hidden_state_init_trainable,
                    output_nonlinearity_layer=self.
                    _mean_std_output_nonlinearity_layer)
                with tf.compat.v1.variable_scope('mean_network'):
                    mean_var = outputs[..., :action_dim]
                    step_mean_var = step_outputs[..., :action_dim]
                with tf.compat.v1.variable_scope('log_std_network'):
                    log_std_var = outputs[..., action_dim:]
                    step_log_std_var = step_outputs[..., action_dim:]

            else:
                # separate MLPs for mean and std networks
                # mean network
                (mean_var, step_mean_var, step_hidden, hidden_init_var) = gru(
                    name='mean_network',
                    gru_cell=self._mean_gru_cell,
                    all_input_var=state_input,
                    step_input_var=step_input,
                    step_hidden_var=step_hidden,
                    hidden_state_init=self._hidden_state_init,
                    hidden_state_init_trainable=self.
                    _hidden_state_init_trainable,
                    output_nonlinearity_layer=self.
                    _mean_output_nonlinearity_layer)
                log_std_var, step_log_std_var = recurrent_parameter(
                    input_var=state_input,
                    step_input_var=step_input,
                    length=action_dim,
                    initializer=tf.constant_initializer(self._init_std_param),
                    trainable=self._learn_std,
                    name='log_std_param')

        dist = tfp.distributions.MultivariateNormalDiag(
            loc=mean_var, scale_diag=tf.exp(log_std_var))

        return (dist, step_mean_var, step_log_std_var, step_hidden,
                hidden_init_var)
Пример #6
0
    def _build(self, state_input, step_input, hidden_input, name=None):
        """Build model given input placeholder(s).

        Args:
            state_input (tf.Tensor): Place holder for entire time-series
                inputs.
            step_input (tf.Tensor): Place holder for step inputs.
            hidden_input (tf.Tensor): Place holder for step hidden state.
            name (str): Inner model name, also the variable scope of the
                inner model, if exist. One example is
                garage.tf.models.Sequential.

        Return:
            tf.Tensor: Entire time-series means.
            tf.Tensor: Step mean.
            tf.Tensor: Entire time-series std_log.
            tf.Tensor: Step std_log.
            tf.Tensor: Step hidden state.
            tf.Tensor: Initial hidden state.
            garage.tf.distributions.DiagonalGaussian: Policy distribution.

        """
        del name
        action_dim = self._output_dim

        with tf.compat.v1.variable_scope('dist_params'):
            if self._std_share_network:
                # mean and std networks share an MLP
                (outputs, step_outputs, step_hidden, hidden_init_var) = gru(
                    name='mean_std_network',
                    gru_cell=self._mean_std_gru_cell,
                    all_input_var=state_input,
                    step_input_var=step_input,
                    step_hidden_var=hidden_input,
                    hidden_state_init=self._hidden_state_init,
                    hidden_state_init_trainable=self.
                    _hidden_state_init_trainable,
                    output_nonlinearity_layer=self.
                    _mean_std_output_nonlinearity_layer)
                with tf.compat.v1.variable_scope('mean_network'):
                    mean_var = outputs[..., :action_dim]
                    step_mean_var = step_outputs[..., :action_dim]
                with tf.compat.v1.variable_scope('log_std_network'):
                    log_std_var = outputs[..., action_dim:]
                    step_log_std_var = step_outputs[..., action_dim:]

            else:
                # separate MLPs for mean and std networks
                # mean network
                (mean_var, step_mean_var, step_hidden, hidden_init_var) = gru(
                    name='mean_network',
                    gru_cell=self._mean_gru_cell,
                    all_input_var=state_input,
                    step_input_var=step_input,
                    step_hidden_var=hidden_input,
                    hidden_state_init=self._hidden_state_init,
                    hidden_state_init_trainable=self.
                    _hidden_state_init_trainable,
                    output_nonlinearity_layer=self.
                    _mean_output_nonlinearity_layer)
                log_std_var, step_log_std_var = recurrent_parameter(
                    input_var=state_input,
                    step_input_var=step_input,
                    length=action_dim,
                    initializer=tf.constant_initializer(self._init_std_param),
                    trainable=self._learn_std,
                    name='log_std_param')

        dist = DiagonalGaussian(self._output_dim)

        return (mean_var, step_mean_var, log_std_var, step_log_std_var,
                step_hidden, hidden_init_var, dist)
Пример #7
0
    def test_output_same_as_rnn(self, time_step, input_dim, output_dim,
                                hidden_init, cell_init):
        obs_inputs = np.full((self.batch_size, time_step, input_dim), 1.)
        obs_input = np.full((self.batch_size, input_dim), 1.)

        input_var = tf.compat.v1.placeholder(tf.float32,
                                             shape=(None, None, input_dim),
                                             name='input')
        step_input_var = tf.compat.v1.placeholder(tf.float32,
                                                  shape=(None, input_dim),
                                                  name='step_input')
        output_nonlinearity = tf.keras.layers.Dense(
            units=output_dim,
            activation=None,
            kernel_initializer=tf.constant_initializer(1))
        with tf.compat.v1.variable_scope('GRU'):
            self.gru = gru(
                all_input_var=input_var,
                name='gru',
                gru_cell=self.gru_cell,
                step_input_var=step_input_var,
                step_hidden_var=self.step_hidden_var,
                hidden_state_init=tf.constant_initializer(hidden_init),
                output_nonlinearity_layer=output_nonlinearity)

        self.sess.run(tf.compat.v1.global_variables_initializer())

        # Create a RNN and compute the entire outputs
        rnn_layer = tf.keras.layers.RNN(cell=self.gru_cell,
                                        return_sequences=True,
                                        return_state=True)

        # Set initial state to all 0s
        hidden_var = tf.compat.v1.get_variable(
            name='initial_hidden',
            shape=(self.batch_size, self.hidden_dim),
            initializer=tf.constant_initializer(hidden_init),
            trainable=False,
            dtype=tf.float32)
        outputs, hiddens = rnn_layer(input_var, initial_state=[hidden_var])
        outputs = output_nonlinearity(outputs)

        self.sess.run(tf.compat.v1.global_variables_initializer())

        outputs, hiddens = self.sess.run([outputs, hiddens],
                                         feed_dict={input_var: obs_inputs})

        # Compute output by doing t step() on the gru cell
        hidden = np.full((self.batch_size, self.hidden_dim), hidden_init)
        _, output_t, hidden_t, _ = self.gru
        for i in range(time_step):
            output, hidden = self.sess.run([output_t, hidden_t],
                                           feed_dict={
                                               step_input_var: obs_input,
                                               self.step_hidden_var: hidden,
                                           })  # noqa: E126
            # The output from i-th timestep
            assert np.array_equal(output, outputs[:, i, :])
        assert np.array_equal(hidden, hiddens)

        # Also the full output from lstm
        full_outputs = self.sess.run(self.gru[0],
                                     feed_dict={input_var: obs_inputs})
        assert np.array_equal(outputs, full_outputs)
Пример #8
0
    def test_gradient_paths(self):
        time_step = 3
        input_dim = 2
        output_dim = 4
        obs_inputs = np.full((self.batch_size, time_step, input_dim), 1.)
        obs_input = np.full((self.batch_size, input_dim), 1.)

        input_var = tf.compat.v1.placeholder(tf.float32,
                                             shape=(None, None, input_dim),
                                             name='input')
        step_input_var = tf.compat.v1.placeholder(tf.float32,
                                                  shape=(None, input_dim),
                                                  name='step_input')
        output_nonlinearity = tf.keras.layers.Dense(
            units=output_dim,
            activation=None,
            kernel_initializer=tf.constant_initializer(1))
        with tf.compat.v1.variable_scope('GRU'):
            self.gru = gru(all_input_var=input_var,
                           name='gru',
                           gru_cell=self.gru_cell,
                           step_input_var=step_input_var,
                           step_hidden_var=self.step_hidden_var,
                           output_nonlinearity_layer=output_nonlinearity)

        self.sess.run(tf.compat.v1.global_variables_initializer())

        # Compute output by doing t step() on the gru cell
        outputs_t, output_t, h_t, hidden_init = self.gru
        hidden = np.full((self.batch_size, self.hidden_dim),
                         hidden_init.eval())

        grads_step_o_i = tf.gradients(output_t, step_input_var)
        grads_step_o_h = tf.gradients(output_t, self.step_hidden_var)
        grads_step_h = tf.gradients(h_t, step_input_var)

        self.sess.run([grads_step_o_i, grads_step_o_h, grads_step_h],
                      feed_dict={
                          step_input_var: obs_input,
                          self.step_hidden_var: hidden,
                      })  # noqa: E126

        grads_full = tf.gradients(outputs_t, input_var)
        self.sess.run(grads_full, feed_dict={input_var: obs_inputs})

        grads_step_o_i = tf.gradients(outputs_t, step_input_var)
        grads_step_o_h = tf.gradients(outputs_t, self.step_hidden_var)
        grads_step_h = tf.gradients(h_t, input_var)

        # No gradient flow
        with pytest.raises(TypeError):
            self.sess.run(grads_step_o_i,
                          feed_dict={
                              step_input_var: obs_input,
                              self.step_hidden_var: hidden,
                          })
        with pytest.raises(TypeError):
            self.sess.run(grads_step_o_h,
                          feed_dict={
                              step_input_var: obs_input,
                              self.step_hidden_var: hidden,
                          })
        with pytest.raises(TypeError):
            self.sess.run(grads_step_h, feed_dict={input_var: obs_inputs})
Пример #9
0
    def test_output_value_trainable_hidden_and_cell(self, time_step, input_dim,
                                                    output_dim):
        obs_inputs = np.full((self.batch_size, time_step, input_dim), 1.)
        obs_input = np.full((self.batch_size, input_dim), 1.)

        input_var = tf.compat.v1.placeholder(tf.float32,
                                             shape=(None, None, input_dim),
                                             name='input')
        step_input_var = tf.compat.v1.placeholder(tf.float32,
                                                  shape=(None, input_dim),
                                                  name='step_input')
        output_nonlinearity = tf.keras.layers.Dense(
            units=output_dim,
            activation=None,
            kernel_initializer=tf.constant_initializer(1))
        with tf.compat.v1.variable_scope('GRU'):
            self.gru = gru(all_input_var=input_var,
                           name='gru',
                           gru_cell=self.gru_cell,
                           step_input_var=step_input_var,
                           step_hidden_var=self.step_hidden_var,
                           hidden_state_init_trainable=True,
                           output_nonlinearity_layer=output_nonlinearity)

        self.sess.run(tf.compat.v1.global_variables_initializer())

        # Compute output by doing t step() on the gru cell
        outputs_t, output_t, h_t, hidden_init = self.gru
        hidden = np.full((self.batch_size, self.hidden_dim),
                         hidden_init.eval())

        _, hidden = self.sess.run([output_t, h_t],
                                  feed_dict={
                                      step_input_var: obs_input,
                                      self.step_hidden_var: hidden,
                                  })  # noqa: E126
        with tf.compat.v1.variable_scope('GRU/gru', reuse=True):
            hidden_init_var = tf.compat.v1.get_variable(name='initial_hidden')
            assert hidden_init_var in tf.compat.v1.trainable_variables()

        full_output1 = self.sess.run(outputs_t,
                                     feed_dict={input_var: obs_inputs})

        hidden2 = np.full((self.batch_size, self.hidden_dim),
                          hidden_init.eval())
        stack_hidden = None
        for i in range(time_step):
            hidden2 = recurrent_step_gru(input_val=obs_inputs[:, i, :],
                                         num_units=self.hidden_dim,
                                         step_hidden=hidden2,
                                         w_x_init=1.,
                                         w_h_init=1.,
                                         b_init=0.,
                                         nonlinearity=np.tanh,
                                         gate_nonlinearity=lambda x: 1. /
                                         (1. + np.exp(-x)))
            if stack_hidden is None:
                stack_hidden = hidden2[:, np.newaxis, :]
            else:
                stack_hidden = np.concatenate(
                    (stack_hidden, hidden2[:, np.newaxis, :]), axis=1)
        output_nonlinearity = np.full((np.prod(hidden2.shape[1:]), output_dim),
                                      1.)
        full_output2 = np.matmul(stack_hidden, output_nonlinearity)
        assert np.allclose(full_output1, full_output2)