def test_recurrent_param(self): param, _ = recurrent_parameter(input_var=self.input_vars, step_input_var=self.step_input_vars, length=3, initializer=tf.constant_initializer( self.initial_params)) self.sess.run(tf.compat.v1.global_variables_initializer()) p = self.sess.run(param, feed_dict=self.feed_dict) assert p.shape == (5, 2, 3) assert np.array_equal(p, np.full([5, 2, 3], self.initial_params))
def _build(self, state_input, step_input, hidden_input, name=None): action_dim = self._output_dim with tf.compat.v1.variable_scope('dist_params'): if self._std_share_network: # mean and std networks share an MLP (outputs, step_outputs, step_hidden, hidden_init_var) = gru( name='mean_std_network', gru_cell=self._mean_std_gru_cell, all_input_var=state_input, step_input_var=step_input, step_hidden_var=hidden_input, hidden_state_init=self._hidden_state_init, hidden_state_init_trainable=self. _hidden_state_init_trainable, output_nonlinearity_layer=self. _mean_std_output_nonlinearity_layer) with tf.compat.v1.variable_scope('mean_network'): mean_var = outputs[..., :action_dim] step_mean_var = step_outputs[..., :action_dim] with tf.compat.v1.variable_scope('log_std_network'): log_std_var = outputs[..., action_dim:] step_log_std_var = step_outputs[..., action_dim:] else: # separate MLPs for mean and std networks # mean network (mean_var, step_mean_var, step_hidden, hidden_init_var) = gru( name='mean_network', gru_cell=self._mean_gru_cell, all_input_var=state_input, step_input_var=step_input, step_hidden_var=hidden_input, hidden_state_init=self._hidden_state_init, hidden_state_init_trainable=self. _hidden_state_init_trainable, output_nonlinearity_layer=self. _mean_output_nonlinearity_layer) log_std_var, step_log_std_var = recurrent_parameter( input_var=state_input, step_input_var=step_input, length=action_dim, initializer=tf.constant_initializer(self._init_std_param), trainable=self._learn_std, name='log_std_param') dist = DiagonalGaussian(self._output_dim) return (mean_var, step_mean_var, log_std_var, step_log_std_var, step_hidden, hidden_init_var, dist)
def _build(self, state_input, step_input, step_hidden, name=None): """Build model. Args: state_input (tf.Tensor): Entire time-series observation input, with shape :math:`(N, T, S^*)`. step_input (tf.Tensor): Single timestep observation input, with shape :math:`(N, S^*)`. step_hidden (tf.Tensor): Hidden state for step, with shape :math:`(N, S^*)`. name (str): Inner model name, also the variable scope of the inner model, if exist. One example is garage.tf.models.Sequential. Returns: tfp.distributions.MultivariateNormalDiag: Policy distribution. tf.Tensor: Step means, with shape :math:`(N, S^*)`. tf.Tensor: Step log std, with shape :math:`(N, S^*)`. tf.Tensor: Step hidden state, with shape :math:`(N, S^*)`. tf.Tensor: Initial hidden state, with shape :math:`(S^*)`. """ del name action_dim = self._output_dim with tf.compat.v1.variable_scope('dist_params'): if self._std_share_network: # mean and std networks share an MLP (outputs, step_outputs, step_hidden, hidden_init_var) = gru( name='mean_std_network', gru_cell=self._mean_std_gru_cell, all_input_var=state_input, step_input_var=step_input, step_hidden_var=step_hidden, hidden_state_init=self._hidden_state_init, hidden_state_init_trainable=self. _hidden_state_init_trainable, output_nonlinearity_layer=self. _mean_std_output_nonlinearity_layer) with tf.compat.v1.variable_scope('mean_network'): mean_var = outputs[..., :action_dim] step_mean_var = step_outputs[..., :action_dim] with tf.compat.v1.variable_scope('log_std_network'): log_std_var = outputs[..., action_dim:] step_log_std_var = step_outputs[..., action_dim:] else: # separate MLPs for mean and std networks # mean network (mean_var, step_mean_var, step_hidden, hidden_init_var) = gru( name='mean_network', gru_cell=self._mean_gru_cell, all_input_var=state_input, step_input_var=step_input, step_hidden_var=step_hidden, hidden_state_init=self._hidden_state_init, hidden_state_init_trainable=self. _hidden_state_init_trainable, output_nonlinearity_layer=self. _mean_output_nonlinearity_layer) log_std_var, step_log_std_var = recurrent_parameter( input_var=state_input, step_input_var=step_input, length=action_dim, initializer=tf.constant_initializer(self._init_std_param), trainable=self._learn_std, name='log_std_param') dist = tfp.distributions.MultivariateNormalDiag( loc=mean_var, scale_diag=tf.exp(log_std_var)) return (dist, step_mean_var, step_log_std_var, step_hidden, hidden_init_var)
def _build(self, state_input, step_input, hidden_input, name=None): """Build model given input placeholder(s). Args: state_input (tf.Tensor): Place holder for entire time-series inputs. step_input (tf.Tensor): Place holder for step inputs. hidden_input (tf.Tensor): Place holder for step hidden state. name (str): Inner model name, also the variable scope of the inner model, if exist. One example is garage.tf.models.Sequential. Return: tf.Tensor: Entire time-series means. tf.Tensor: Step mean. tf.Tensor: Entire time-series std_log. tf.Tensor: Step std_log. tf.Tensor: Step hidden state. tf.Tensor: Initial hidden state. garage.tf.distributions.DiagonalGaussian: Policy distribution. """ del name action_dim = self._output_dim with tf.compat.v1.variable_scope('dist_params'): if self._std_share_network: # mean and std networks share an MLP (outputs, step_outputs, step_hidden, hidden_init_var) = gru( name='mean_std_network', gru_cell=self._mean_std_gru_cell, all_input_var=state_input, step_input_var=step_input, step_hidden_var=hidden_input, hidden_state_init=self._hidden_state_init, hidden_state_init_trainable=self. _hidden_state_init_trainable, output_nonlinearity_layer=self. _mean_std_output_nonlinearity_layer) with tf.compat.v1.variable_scope('mean_network'): mean_var = outputs[..., :action_dim] step_mean_var = step_outputs[..., :action_dim] with tf.compat.v1.variable_scope('log_std_network'): log_std_var = outputs[..., action_dim:] step_log_std_var = step_outputs[..., action_dim:] else: # separate MLPs for mean and std networks # mean network (mean_var, step_mean_var, step_hidden, hidden_init_var) = gru( name='mean_network', gru_cell=self._mean_gru_cell, all_input_var=state_input, step_input_var=step_input, step_hidden_var=hidden_input, hidden_state_init=self._hidden_state_init, hidden_state_init_trainable=self. _hidden_state_init_trainable, output_nonlinearity_layer=self. _mean_output_nonlinearity_layer) log_std_var, step_log_std_var = recurrent_parameter( input_var=state_input, step_input_var=step_input, length=action_dim, initializer=tf.constant_initializer(self._init_std_param), trainable=self._learn_std, name='log_std_param') dist = DiagonalGaussian(self._output_dim) return (mean_var, step_mean_var, log_std_var, step_log_std_var, step_hidden, hidden_init_var, dist)