def test_without_std_share_network_shapes(self, output_dim, hidden_dim): model = GaussianGRUModel2(output_dim=output_dim, hidden_dim=hidden_dim, std_share_network=False, hidden_nonlinearity=None, recurrent_nonlinearity=None, hidden_w_init=self.default_initializer, recurrent_w_init=self.default_initializer, output_w_init=self.default_initializer) step_hidden_var = tf.compat.v1.placeholder(shape=(self.batch_size, hidden_dim), name='step_hidden', dtype=tf.float32) model.build(self.input_var, self.step_input_var, step_hidden_var) # output layer is a tf.keras.layers.Dense object, # which cannot be access by tf.compat.v1.variable_scope. # A workaround is to access in tf.compat.v1.global_variables() for var in tf.compat.v1.global_variables(): if 'output_layer/kernel' in var.name: std_share_output_weights = var if 'output_layer/bias' in var.name: std_share_output_bias = var if 'log_std_param/parameter' in var.name: log_std_param = var assert std_share_output_weights.shape[1] == output_dim assert std_share_output_bias.shape == output_dim assert log_std_param.shape == output_dim
def test_dist(self): model = GaussianGRUModel2(output_dim=1, hidden_dim=1) step_hidden_var = tf.compat.v1.placeholder(shape=(self.batch_size, 1), name='step_hidden', dtype=tf.float32) model.build(self.input_var, self.step_input_var, step_hidden_var) assert isinstance(model.networks['default'].dist, tfp.distributions.MultivariateNormalDiag)
def test_without_std_share_network_output_values(self, mock_normal, output_dim, hidden_dim, init_std): mock_normal.return_value = 0.5 model = GaussianGRUModel2(output_dim=output_dim, hidden_dim=hidden_dim, std_share_network=False, hidden_nonlinearity=None, recurrent_nonlinearity=None, hidden_w_init=self.default_initializer, recurrent_w_init=self.default_initializer, output_w_init=self.default_initializer, init_std=init_std) step_hidden_var = tf.compat.v1.placeholder(shape=(self.batch_size, hidden_dim), name='step_hidden', dtype=tf.float32) (_, step_mean_var, step_log_std_var, step_hidden, hidden_init_var) = model.build(self.input_var, self.step_input_var, step_hidden_var) hidden1 = hidden2 = np.full((self.batch_size, hidden_dim), hidden_init_var.eval()) for _ in range(self.time_step): mean1, log_std1, hidden1 = self.sess.run( [step_mean_var, step_log_std_var, step_hidden], feed_dict={ self.step_input_var: self.obs_input, step_hidden_var: hidden1 }) hidden2 = recurrent_step_gru(input_val=self.obs_input, num_units=hidden_dim, step_hidden=hidden2, w_x_init=0.1, w_h_init=0.1, b_init=0., nonlinearity=None, gate_nonlinearity=None) output_nonlinearity = np.full( (np.prod(hidden2.shape[1:]), output_dim), 0.1) output2 = np.matmul(hidden2, output_nonlinearity) assert np.allclose(mean1, output2) expected_log_std = np.full((self.batch_size, output_dim), np.log(init_std)) assert np.allclose(log_std1, expected_log_std) assert np.allclose(hidden1, hidden2)
def __init__(self, env_spec, hidden_dim=32, name='GaussianGRUPolicy', hidden_nonlinearity=tf.nn.tanh, hidden_w_init=tf.initializers.glorot_uniform(), hidden_b_init=tf.zeros_initializer(), recurrent_nonlinearity=tf.nn.sigmoid, recurrent_w_init=tf.initializers.glorot_uniform(), output_nonlinearity=None, output_w_init=tf.initializers.glorot_uniform(), output_b_init=tf.zeros_initializer(), hidden_state_init=tf.zeros_initializer(), hidden_state_init_trainable=False, learn_std=True, std_share_network=False, init_std=1.0, layer_normalization=False, state_include_action=True): if not isinstance(env_spec.action_space, akro.Box): raise ValueError('GaussianGRUPolicy only works with ' 'akro.Box action space, but not {}'.format( env_spec.action_space)) super().__init__(name, env_spec) self._obs_dim = env_spec.observation_space.flat_dim self._action_dim = env_spec.action_space.flat_dim self._hidden_dim = hidden_dim self._hidden_nonlinearity = hidden_nonlinearity self._hidden_w_init = hidden_w_init self._hidden_b_init = hidden_b_init self._recurrent_nonlinearity = recurrent_nonlinearity self._recurrent_w_init = recurrent_w_init self._output_nonlinearity = output_nonlinearity self._output_w_init = output_w_init self._output_b_init = output_b_init self._hidden_state_init = hidden_state_init self._hidden_state_init_trainable = hidden_state_init_trainable self._learn_std = learn_std self._std_share_network = std_share_network self._init_std = init_std self._layer_normalization = layer_normalization self._state_include_action = state_include_action if state_include_action: self._input_dim = self._obs_dim + self._action_dim else: self._input_dim = self._obs_dim self._f_step_mean_std = None self.model = GaussianGRUModel2( output_dim=self._action_dim, hidden_dim=hidden_dim, name='GaussianGRUModel', hidden_nonlinearity=hidden_nonlinearity, hidden_w_init=hidden_w_init, hidden_b_init=hidden_b_init, recurrent_nonlinearity=recurrent_nonlinearity, recurrent_w_init=recurrent_w_init, output_nonlinearity=output_nonlinearity, output_w_init=output_w_init, output_b_init=output_b_init, hidden_state_init=hidden_state_init, hidden_state_init_trainable=hidden_state_init_trainable, layer_normalization=layer_normalization, learn_std=learn_std, std_share_network=std_share_network, init_std=init_std) self._prev_actions = None self._prev_hiddens = None
def test_without_std_share_network_is_pickleable(self, mock_normal, output_dim, hidden_dim): mock_normal.return_value = 0.5 model = GaussianGRUModel2(output_dim=output_dim, hidden_dim=hidden_dim, std_share_network=False, hidden_nonlinearity=None, recurrent_nonlinearity=None, hidden_w_init=self.default_initializer, recurrent_w_init=self.default_initializer, output_w_init=self.default_initializer) step_hidden_var = tf.compat.v1.placeholder(shape=(self.batch_size, hidden_dim), name='step_hidden', dtype=tf.float32) (dist, step_mean_var, step_log_std_var, step_hidden, _) = model.build(self.input_var, self.step_input_var, step_hidden_var) # output layer is a tf.keras.layers.Dense object, # which cannot be access by tf.compat.v1.variable_scope. # A workaround is to access in tf.compat.v1.global_variables() for var in tf.compat.v1.global_variables(): if 'output_layer/bias' in var.name: var.load(tf.ones_like(var).eval()) hidden = np.zeros((self.batch_size, hidden_dim)) outputs1 = self.sess.run([dist.loc, dist.scale.diag], feed_dict={self.input_var: self.obs_inputs}) output1 = self.sess.run([step_mean_var, step_log_std_var, step_hidden], feed_dict={ self.step_input_var: self.obs_input, step_hidden_var: hidden }) # noqa: E126 h = pickle.dumps(model) with tf.compat.v1.Session(graph=tf.Graph()) as sess: model_pickled = pickle.loads(h) input_var = tf.compat.v1.placeholder(tf.float32, shape=(None, None, self.feature_shape), name='input') step_input_var = tf.compat.v1.placeholder( tf.float32, shape=(None, self.feature_shape), name='step_input') step_hidden_var = tf.compat.v1.placeholder(shape=(self.batch_size, hidden_dim), name='initial_hidden', dtype=tf.float32) (dist2, step_mean_var2, step_log_std_var2, step_hidden2, _) = model_pickled.build(input_var, step_input_var, step_hidden_var) outputs2 = sess.run([dist2.loc, dist2.scale.diag], feed_dict={input_var: self.obs_inputs}) output2 = sess.run( [step_mean_var2, step_log_std_var2, step_hidden2], feed_dict={ step_input_var: self.obs_input, step_hidden_var: hidden }) assert np.array_equal(outputs1, outputs2) assert np.array_equal(output1, output2)