def test_softplus_min_std(self, output_dim, hidden_sizes): filters = ((3, (3, 3)), (6, (3, 3))) strides = [1, 1] model = GaussianCNNModel(filters=filters, strides=strides, padding='VALID', output_dim=output_dim, init_std=2.0, hidden_sizes=hidden_sizes, std_share_network=False, adaptive_std=False, hidden_nonlinearity=None, std_parameterization='softplus', min_std=10, hidden_w_init=tf.constant_initializer(0.1), output_w_init=tf.constant_initializer(1)) outputs = model.build(self._input_ph) _, _, log_std, std_param = self.sess.run( outputs[:-1], feed_dict={self._input_ph: self.obs}) expected_log_std = np.full([1, output_dim], np.log(10)) expected_std_param = np.full([1, output_dim], np.log(np.exp(10) - 1)) assert np.allclose(log_std, expected_log_std) assert np.allclose(std_param, expected_std_param)
def test_softplus_max_std(self, output_dim, hidden_sizes): filter_sizes = [3, 3] out_channels = [3, 6] strides = [1, 1] model = GaussianCNNModel(filter_dims=filter_sizes, num_filters=out_channels, strides=strides, padding='VALID', output_dim=output_dim, init_std=10.0, hidden_sizes=hidden_sizes, std_share_network=False, adaptive_std=False, hidden_nonlinearity=None, std_parameterization='softplus', max_std=1.0, hidden_w_init=tf.constant_initializer(0.1), output_w_init=tf.constant_initializer(1)) outputs = model.build(self._input_ph) action, mean, log_std, std_param = self.sess.run( outputs[:-1], feed_dict={self._input_ph: self.obs}) expected_log_std = np.full([1, output_dim], np.log(1)) expected_std_param = np.full([1, output_dim], np.log(np.exp(1) - 1)) assert np.allclose(log_std, expected_log_std, rtol=0, atol=0.0001) assert np.allclose(std_param, expected_std_param, rtol=0, atol=0.0001)
def test_without_std_share_network_is_pickleable(self, mock_normal, output_dim, hidden_sizes): mock_normal.return_value = 0.5 input_var = tf.compat.v1.placeholder(tf.float32, shape=(None, 10, 10, 3)) model = GaussianCNNModel(num_filters=[3, 6], filter_dims=[3, 3], strides=[1, 1], padding='SAME', hidden_sizes=hidden_sizes, output_dim=output_dim, std_share_network=False, adaptive_std=False) outputs = model.build(input_var) # get output bias with tf.compat.v1.variable_scope('GaussianCNNModel', reuse=True): bias = tf.compat.v1.get_variable( 'dist_params/mean_network/output/bias') # assign it to all ones bias.load(tf.ones_like(bias).eval()) output1 = self.sess.run(outputs[:-1], feed_dict={input_var: self.obs}) h = pickle.dumps(model) with tf.compat.v1.Session(graph=tf.Graph()) as sess: input_var = tf.compat.v1.placeholder(tf.float32, shape=(None, 10, 10, 3)) model_pickled = pickle.loads(h) outputs = model_pickled.build(input_var) output2 = sess.run(outputs[:-1], feed_dict={input_var: self.obs}) assert np.array_equal(output1, output2)
def test_without_std_share_network_output_values(self, mock_normal, filters, in_channels, strides, output_dim, hidden_sizes): mock_normal.return_value = 0.5 model = GaussianCNNModel(filters=filters, strides=strides, padding='VALID', output_dim=output_dim, init_std=2, hidden_sizes=hidden_sizes, std_share_network=False, adaptive_std=False, hidden_nonlinearity=None, std_parameterization='exp', hidden_w_init=tf.constant_initializer(0.01), output_w_init=tf.constant_initializer(1)) outputs = model.build(self._input_ph) action, mean, log_std, std_param = self.sess.run( outputs[:-1], feed_dict={self._input_ph: self.obs}) filter_sum = 1 for filter_iter, in_channel in zip(filters, in_channels): filter_height = filter_iter[1][0] filter_width = filter_iter[1][1] filter_sum *= 0.01 * filter_height * filter_width * in_channel for _ in hidden_sizes: filter_sum *= 0.01 height_size = self.input_height width_size = self.input_width for filter_iter, stride in zip(filters, strides): height_size = int((height_size - filter_iter[1][0]) / stride) + 1 width_size = int((width_size - filter_iter[1][1]) / stride) + 1 flatten_shape = height_size * width_size * filters[-1][0] network_output = filter_sum * flatten_shape * np.prod(hidden_sizes) expected_mean = np.full((self.batch_size, output_dim), network_output, dtype=np.float32) expected_std_param = np.full((self.batch_size, output_dim), np.log(2), dtype=np.float32) expected_log_std = np.full((self.batch_size, output_dim), np.log(2), dtype=np.float32) assert np.allclose(mean, expected_mean) assert np.allclose(std_param, expected_std_param) assert np.allclose(log_std, expected_log_std) expected_action = 0.5 * np.exp(expected_log_std) + expected_mean assert np.allclose(action, expected_action, rtol=0, atol=0.1)
def test_softplus_output_values(self, mock_normal, output_dim, hidden_sizes): mock_normal.return_value = 0.5 filter_sizes = [3, 3] in_channels = [3, 3] out_channels = [3, 6] strides = [1, 1] model = GaussianCNNModel(filter_dims=filter_sizes, num_filters=out_channels, strides=strides, padding='VALID', output_dim=output_dim, init_std=2.0, hidden_sizes=hidden_sizes, std_share_network=False, adaptive_std=False, hidden_nonlinearity=None, std_parameterization='softplus', hidden_w_init=tf.constant_initializer(0.01), output_w_init=tf.constant_initializer(1)) outputs = model.build(self._input_ph) action, mean, log_std, std_param = self.sess.run( outputs[:-1], feed_dict={self._input_ph: self.obs}) filter_sum = 1 for filter_size, in_channel in zip(filter_sizes, in_channels): filter_sum *= 0.01 * filter_size * filter_size * in_channel for hidden_size in hidden_sizes: filter_sum *= 0.01 current_size = self.input_width for filter_size, stride in zip(filter_sizes, strides): current_size = int((current_size - filter_size) / stride) + 1 flatten_shape = current_size * current_size * out_channels[-1] network_output = filter_sum * flatten_shape * np.prod(hidden_sizes) expected_mean = np.full((self.batch_size, output_dim), network_output, dtype=np.float32) expected_std_param = np.full((self.batch_size, output_dim), np.log(np.exp(2) - 1), dtype=np.float32) expected_log_std = np.log(np.log(1. + np.exp(expected_std_param))) assert np.allclose(mean, expected_mean) assert np.allclose(std_param, expected_std_param) assert np.allclose(log_std, expected_log_std) expected_action = 0.5 * np.exp(expected_log_std) + expected_mean assert np.allclose(action, expected_action, rtol=0, atol=0.1)
def test_std_share_network_shapes(self, output_dim, hidden_sizes): # should be 2 * output_dim model = GaussianCNNModel(filters=((3, (3, 3)), (6, (3, 3))), strides=[1, 1], padding='SAME', hidden_sizes=hidden_sizes, output_dim=output_dim, std_share_network=True) model.build(self._input_ph) with tf.compat.v1.variable_scope(model.name, reuse=True): std_share_output_weights = tf.compat.v1.get_variable( 'dist_params/mean_std_network/output/kernel') std_share_output_bias = tf.compat.v1.get_variable( 'dist_params/mean_std_network/output/bias') assert std_share_output_weights.shape[1] == output_dim * 2 assert std_share_output_bias.shape == output_dim * 2
def test_unknown_std_parameterization(self): with pytest.raises(NotImplementedError): _ = GaussianCNNModel(filters=(((3, 3), 3), ((3, 3), 6)), strides=[1, 1], padding='SAME', hidden_sizes=(1, ), output_dim=1, std_parameterization='unknown')
def test_without_std_share_network_shapes(self, output_dim, hidden_sizes): model = GaussianCNNModel(filters=((3, (3, 3)), (6, (3, 3))), strides=[1, 1], padding='SAME', hidden_sizes=hidden_sizes, output_dim=output_dim, std_share_network=False, adaptive_std=False) model.build(self._input_ph) with tf.compat.v1.variable_scope(model.name, reuse=True): mean_output_weights = tf.compat.v1.get_variable( 'dist_params/mean_network/output/kernel') mean_output_bias = tf.compat.v1.get_variable( 'dist_params/mean_network/output/bias') log_std_output_weights = tf.compat.v1.get_variable( 'dist_params/log_std_network/parameter') assert mean_output_weights.shape[1] == output_dim assert mean_output_bias.shape == output_dim assert log_std_output_weights.shape == output_dim
def test_adaptive_std_output_shape(self, output_dim, hidden_sizes, std_hidden_sizes): model = GaussianCNNModel( num_filters=[3, 6], filter_dims=[3, 3], strides=[1, 1], padding='SAME', output_dim=output_dim, hidden_sizes=hidden_sizes, std_share_network=False, adaptive_std=True, hidden_nonlinearity=None, std_hidden_nonlinearity=None, std_filter_dims=[3, 3], std_num_filters=[3, 6], std_strides=[1, 1], std_padding='SAME', std_hidden_sizes=hidden_sizes, hidden_w_init=tf.constant_initializer(0.01), output_w_init=tf.constant_initializer(1), std_hidden_w_init=tf.constant_initializer(0.01), std_output_w_init=tf.constant_initializer(1)) model.build(self._input_ph) with tf.compat.v1.variable_scope(model.name, reuse=True): mean_output_weights = tf.compat.v1.get_variable( 'dist_params/mean_network/output/kernel') mean_output_bias = tf.compat.v1.get_variable( 'dist_params/mean_network/output/bias') log_std_output_weights = tf.compat.v1.get_variable( 'dist_params/log_std_network/output/kernel') log_std_output_bias = tf.compat.v1.get_variable( 'dist_params/log_std_network/output/bias') assert mean_output_weights.shape[1] == output_dim assert mean_output_bias.shape == output_dim assert log_std_output_weights.shape[1] == output_dim assert log_std_output_bias.shape == output_dim