def test_without_std_share_network_output_values(self, mock_normal, output_dim, hidden_dim, init_std): mock_normal.return_value = 0.5 model = GaussianGRUModel(output_dim=output_dim, hidden_dim=hidden_dim, std_share_network=False, hidden_nonlinearity=None, recurrent_nonlinearity=None, hidden_w_init=self.default_initializer, recurrent_w_init=self.default_initializer, output_w_init=self.default_initializer, init_std=init_std) step_hidden_var = tf.compat.v1.placeholder(shape=(self.batch_size, hidden_dim), name='step_hidden', dtype=tf.float32) (action_var, mean_var, step_mean_var, log_std_var, step_log_std_var, step_hidden, hidden_init_var, dist) = model.build(self.input_var, self.step_input_var, step_hidden_var) hidden1 = hidden2 = np.full((self.batch_size, hidden_dim), hidden_init_var.eval()) mean, log_std = self.sess.run( [mean_var, log_std_var], feed_dict={self.input_var: self.obs_inputs}) for i in range(self.time_step): action, mean1, log_std1, hidden1 = self.sess.run( [action_var, step_mean_var, step_log_std_var, step_hidden], feed_dict={ self.step_input_var: self.obs_input, step_hidden_var: hidden1 }) hidden2 = recurrent_step_gru(input_val=self.obs_input, num_units=hidden_dim, step_hidden=hidden2, w_x_init=0.1, w_h_init=0.1, b_init=0., nonlinearity=None, gate_nonlinearity=None) output_nonlinearity = np.full( (np.prod(hidden2.shape[1:]), output_dim), 0.1) output2 = np.matmul(hidden2, output_nonlinearity) assert np.allclose(mean1, output2) expected_log_std = np.full((self.batch_size, output_dim), np.log(init_std)) assert np.allclose(log_std1, expected_log_std) assert np.allclose(hidden1, hidden2) expected_action = 0.5 * np.exp(log_std1) + mean1 assert np.allclose(action, expected_action)
def test_std_share_network_output_values(self, mock_normal, output_dim, hidden_dim): mock_normal.return_value = 0.5 model = GaussianGRUModel(output_dim=output_dim, hidden_dim=hidden_dim, std_share_network=True, hidden_nonlinearity=None, recurrent_nonlinearity=None, hidden_w_init=self.default_initializer, recurrent_w_init=self.default_initializer, output_w_init=self.default_initializer) step_hidden_var = tf.compat.v1.placeholder(shape=(self.batch_size, hidden_dim), name='step_hidden', dtype=tf.float32) (_, step_mean_var, step_log_std_var, step_hidden, hidden_init_var) = model.build(self.input_var, self.step_input_var, step_hidden_var).outputs hidden1 = hidden2 = np.full((self.batch_size, hidden_dim), hidden_init_var.eval()) for _ in range(self.time_step): mean1, log_std1, hidden1 = self.sess.run( [step_mean_var, step_log_std_var, step_hidden], feed_dict={ self.step_input_var: self.obs_input, step_hidden_var: hidden1 }) hidden2 = recurrent_step_gru(input_val=self.obs_input, num_units=hidden_dim, step_hidden=hidden2, w_x_init=0.1, w_h_init=0.1, b_init=0., nonlinearity=None, gate_nonlinearity=None) output_nonlinearity = np.full( (np.prod(hidden2.shape[1:]), output_dim), 0.1) output2 = np.matmul(hidden2, output_nonlinearity) assert np.allclose(mean1, output2) assert np.allclose(log_std1, output2) assert np.allclose(hidden1, hidden2)
def test_output_value_trainable_hidden_and_cell(self, time_step, input_dim, output_dim): obs_inputs = np.full((self.batch_size, time_step, input_dim), 1.) obs_input = np.full((self.batch_size, input_dim), 1.) input_var = tf.placeholder(tf.float32, shape=(None, None, input_dim), name='input') step_input_var = tf.placeholder(tf.float32, shape=(None, input_dim), name='step_input') output_nonlinearity = tf.keras.layers.Dense( units=output_dim, activation=None, kernel_initializer=tf.constant_initializer(1)) with tf.variable_scope('GRU'): self.gru = gru(all_input_var=input_var, name='gru', gru_cell=self.gru_cell, step_input_var=step_input_var, step_hidden_var=self.step_hidden_var, hidden_state_init_trainable=True, output_nonlinearity_layer=output_nonlinearity) self.sess.run(tf.global_variables_initializer()) # Compute output by doing t step() on the gru cell outputs_t, output_t, h_t, hidden_init = self.gru hidden = np.full((self.batch_size, self.hidden_dim), hidden_init.eval()) output, hidden = self.sess.run([output_t, h_t], feed_dict={ step_input_var: obs_input, self.step_hidden_var: hidden, }) # noqa: E126 with tf.variable_scope('GRU/gru', reuse=True): hidden_init_var = tf.get_variable(name='initial_hidden') assert hidden_init_var in tf.trainable_variables() full_output1 = self.sess.run(outputs_t, feed_dict={input_var: obs_inputs}) hidden2 = np.full((self.batch_size, self.hidden_dim), hidden_init.eval()) stack_hidden = None for i in range(time_step): hidden2 = recurrent_step_gru(input_val=obs_inputs[:, i, :], num_units=self.hidden_dim, step_hidden=hidden2, w_x_init=1., w_h_init=1., b_init=0., nonlinearity=np.tanh, gate_nonlinearity=lambda x: 1. / (1. + np.exp(-x))) if stack_hidden is None: stack_hidden = hidden2[:, np.newaxis, :] else: stack_hidden = np.concatenate( (stack_hidden, hidden2[:, np.newaxis, :]), axis=1) output_nonlinearity = np.full((np.prod(hidden2.shape[1:]), output_dim), 1.) full_output2 = np.matmul(stack_hidden, output_nonlinearity) assert np.allclose(full_output1, full_output2)