def test_handle_preprocessing_layers(self, outer_dims): num_actions_dims = 2 observation_spec = (tensor_spec.TensorSpec([1], tf.float32), tensor_spec.TensorSpec([], tf.float32)) time_step_spec = ts.time_step_spec(observation_spec) time_step = tensor_spec.sample_spec_nest( time_step_spec, outer_dims=outer_dims) action_spec = tensor_spec.BoundedTensorSpec((2, ), tf.float32, 2, 3) actions = tf.random.uniform(list(outer_dims) + [num_actions_dims]) preprocessing_layers = (tf.keras.layers.Dense(4), sequential_layer.SequentialLayer([ tf.keras.layers.Reshape((1, )), tf.keras.layers.Dense(4) ])) critic_net = critic_network.CriticNetwork( (observation_spec, action_spec), observation_preprocessing_layers=preprocessing_layers, observation_preprocessing_combiner=tf.keras.layers.Add()) q_values, _ = critic_net((time_step.observation, actions)) self.assertAllEqual(q_values.shape.as_list(), list(outer_dims))
def testHandlePreprocessingLayers(self): observation_spec = (tensor_spec.TensorSpec([1], tf.float32), tensor_spec.TensorSpec([], tf.float32)) time_step_spec = ts.time_step_spec(observation_spec) time_step = tensor_spec.sample_spec_nest(time_step_spec, outer_dims=(3, 4)) action_spec = [ tensor_spec.BoundedTensorSpec((2,), tf.float32, 2, 3), tensor_spec.BoundedTensorSpec((3,), tf.int32, 0, 3) ] preprocessing_layers = (tf.keras.layers.Dense(4), sequential_layer.SequentialLayer([ tf.keras.layers.Reshape((1,)), tf.keras.layers.Dense(4) ])) net = actor_distribution_rnn_network.ActorDistributionRnnNetwork( observation_spec, action_spec, preprocessing_layers=preprocessing_layers, preprocessing_combiner=tf.keras.layers.Add()) initial_state = actor_policy.ActorPolicy(time_step_spec, action_spec, net).get_initial_state(3) action_distributions, _ = net(time_step.observation, time_step.step_type, initial_state) self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertEqual([3, 4, 2], action_distributions[0].mode().shape.as_list()) self.assertEqual([3, 4, 3], action_distributions[1].mode().shape.as_list()) self.assertGreater(len(net.trainable_variables), 4)
def test_dict_spec_and_pre_processing(self): input_spec = { 'a': tensor_spec.TensorSpec((32, 32, 3), tf.float32), 'b': tensor_spec.TensorSpec((32, 32, 3), tf.float32) } network = encoding_network.EncodingNetwork( input_spec, preprocessing_layers={ 'a': sequential_layer.SequentialLayer([ tf.keras.layers.Dense(4, activation='tanh'), tf.keras.layers.Flatten() ]), 'b': tf.keras.layers.Flatten() }, fc_layer_params=(), preprocessing_combiner=tf.keras.layers.Concatenate(axis=-1), activation_fn=tf.keras.activations.tanh, ) sample_input = tensor_spec.sample_spec_nest(input_spec) output, _ = network(sample_input) # 6144 is the shape from a concat of flat (32, 32, 3) x2. self.assertEqual((7168, ), output.shape)
def testCopy(self): sequential = sequential_layer.SequentialLayer([ tf.keras.layers.Dense(3), tf.keras.layers.Dense(4, use_bias=False) ]) clone = type(sequential).from_config(sequential.get_config()) self.assertLen(clone.layers, 2) for l1, l2 in zip(sequential.layers, clone.layers): self.assertEqual(l1.dtype, l2.dtype) self.assertEqual(l1.units, l2.units) self.assertEqual(l1.use_bias, l2.use_bias)
def testBuild(self): sequential = sequential_layer.SequentialLayer( [tf.keras.layers.Dense(4, use_bias=False), tf.keras.layers.ReLU()]) inputs = np.ones((2, 3)) out = sequential(inputs) self.evaluate(tf.compat.v1.global_variables_initializer()) out = self.evaluate(out) weights = self.evaluate(sequential.layers[0].weights[0]) expected = np.dot(inputs, weights) expected[expected < 0] = 0 self.assertAllClose(expected, out)
def testTrainableVariables(self): sequential = sequential_layer.SequentialLayer( [tf.keras.layers.Dense(3), tf.keras.layers.Dense(4)]) sequential.build((3, 2)) self.evaluate(tf.compat.v1.global_variables_initializer()) variables = self.evaluate(sequential.trainable_variables) self.assertLen(variables, 4) self.assertLen(sequential.variables, 4) self.assertTrue(sequential.trainable) sequential.trainable = False self.assertFalse(sequential.trainable) self.assertEmpty(sequential.trainable_variables) self.assertLen(sequential.variables, 4)
def test_layers_buildable(self): input_spec = { 'a': tensor_spec.TensorSpec((32, 32, 3), tf.float32), 'b': tensor_spec.TensorSpec((32, 32, 3), tf.float32) } network = encoding_network.EncodingNetwork( input_spec, preprocessing_layers={ 'a': sequential_layer.SequentialLayer([ tf.keras.layers.Dense(4, activation='tanh'), tf.keras.layers.Flatten() ]), 'b': tf.keras.layers.Flatten() }, fc_layer_params=(), preprocessing_combiner=tf.keras.layers.Concatenate(axis=-1), activation_fn=tf.keras.activations.tanh, ) network.create_variables() self.assertNotEmpty(network.variables)
def test_handle_preprocessing_layers(self, outer_dims): observation_spec = (tensor_spec.TensorSpec([1], tf.float32), tensor_spec.TensorSpec([], tf.float32)) time_step_spec = ts.time_step_spec(observation_spec) time_step = tensor_spec.sample_spec_nest(time_step_spec, outer_dims=outer_dims) action_spec = tensor_spec.BoundedTensorSpec((2, ), tf.float32, 2, 3) preprocessing_layers = (tf.keras.layers.Dense(4), sequential_layer.SequentialLayer([ tf.keras.layers.Reshape((1, )), tf.keras.layers.Dense(4) ])) net = actor_network.ActorNetwork( observation_spec, action_spec, preprocessing_layers=preprocessing_layers, preprocessing_combiner=tf.keras.layers.Add()) action, _ = net(time_step.observation, time_step.step_type, ()) self.assertEqual(list(outer_dims) + [2], action.shape.as_list()) self.assertGreater(len(net.trainable_variables), 4)
def __init__(self, input_tensor_spec, observation_preprocessing_layers=None, observation_preprocessing_combiner=None, observation_conv_layer_params=None, observation_fc_layer_params=(200, ), action_fc_layer_params=(200, ), joint_fc_layer_params=(100), lstm_size=(40, ), output_fc_layer_params=(200, 100), activation_fn=tf.keras.activations.relu, dtype=tf.float32, name='CriticRnnNetwork'): """Creates an instance of `CriticRnnNetwork`. This CriticRnnNetwork supports handling complex observations with preprocessing_layer and preprocessing_combiner. Args: input_tensor_spec: A tuple of (observation, action) each of type `tensor_spec.TensorSpec` representing the inputs. observation_preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer` representing preprocessing for the different observations. All of these layers must not be already built. For more details see the documentation of `networks.EncodingNetwork`. observation_preprocessing_combiner: (Optional.) A keras layer that takes a flat list of tensors and combines them. Good options include `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`. This layer must not be already built. For more details see the documentation of `networks.EncodingNetwork`. observation_conv_layer_params: Optional list of convolution layers parameters to apply to the observations, where each item is a length-three tuple indicating (filters, kernel_size, stride). observation_fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. This is applied after the observation convultional layer. action_fc_layer_params: Optional list of parameters for a fully_connected layer to apply to the actions, where each item is the number of units in the layer. joint_fc_layer_params: Optional list of parameters for a fully_connected layer to apply after merging observations and actions, where each item is the number of units in the layer. lstm_size: An iterable of ints specifying the LSTM cell sizes to use. output_fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. This is applied after the LSTM cell. activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ... name: A string representing name of the network. Raises: ValueError: If `action_spec` contains more than one item. """ observation_spec, action_spec = input_tensor_spec if len(tf.nest.flatten(action_spec)) > 1: raise ValueError( 'Only a single action is supported by this network.') kernel_initializer = tf.compat.v1.variance_scaling_initializer( scale=2.0, mode='fan_in', distribution='truncated_normal') obs_encoder = encoding_network.EncodingNetwork( observation_spec, preprocessing_layers=observation_preprocessing_layers, preprocessing_combiner=observation_preprocessing_combiner, conv_layer_params=observation_conv_layer_params, fc_layer_params=observation_fc_layer_params, activation_fn=activation_fn, kernel_initializer=kernel_initializer, dtype=dtype, name='obs_encoding') action_layers = sequential_layer.SequentialLayer( utils.mlp_layers(fc_layer_params=action_fc_layer_params, activation_fn=activation_fn, kernel_initializer=tf.compat.v1.keras. initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform'), name='action_encoding')) obs_encoding_spec = tf.TensorSpec( shape=(observation_fc_layer_params[-1], ), dtype=tf.float32) lstm_encoder = lstm_encoding_network.LSTMEncodingNetwork( input_tensor_spec=(obs_encoding_spec, action_spec), preprocessing_layers=(tf.keras.layers.Flatten(), action_layers), preprocessing_combiner=tf.keras.layers.Concatenate(axis=-1), input_fc_layer_params=joint_fc_layer_params, lstm_size=lstm_size, output_fc_layer_params=output_fc_layer_params, activation_fn=activation_fn, dtype=dtype, name='lstm') output_layers = [ tf.keras.layers.Dense( 1, activation=None, kernel_initializer=tf.keras.initializers.RandomUniform( minval=-0.003, maxval=0.003), name='value') ] super(CriticRnnNetwork, self).__init__(input_tensor_spec=input_tensor_spec, state_spec=lstm_encoder.state_spec, name=name) self._obs_encoder = obs_encoder self._lstm_encoder = lstm_encoder self._output_layers = output_layers