Пример #1
0
    def test_handle_preprocessing_layers(self, outer_dims):
        num_actions_dims = 2

        observation_spec = (tensor_spec.TensorSpec([1], tf.float32),
                            tensor_spec.TensorSpec([], tf.float32))
        time_step_spec = ts.time_step_spec(observation_spec)
        time_step = tensor_spec.sample_spec_nest(
            time_step_spec, outer_dims=outer_dims)

        action_spec = tensor_spec.BoundedTensorSpec((2, ), tf.float32, 2, 3)

        actions = tf.random.uniform(list(outer_dims) + [num_actions_dims])

        preprocessing_layers = (tf.keras.layers.Dense(4),
                                sequential_layer.SequentialLayer([
                                    tf.keras.layers.Reshape((1, )),
                                    tf.keras.layers.Dense(4)
                                ]))

        critic_net = critic_network.CriticNetwork(
            (observation_spec, action_spec),
            observation_preprocessing_layers=preprocessing_layers,
            observation_preprocessing_combiner=tf.keras.layers.Add())

        q_values, _ = critic_net((time_step.observation, actions))
        self.assertAllEqual(q_values.shape.as_list(), list(outer_dims))
Пример #2
0
  def testHandlePreprocessingLayers(self):
    observation_spec = (tensor_spec.TensorSpec([1], tf.float32),
                        tensor_spec.TensorSpec([], tf.float32))
    time_step_spec = ts.time_step_spec(observation_spec)
    time_step = tensor_spec.sample_spec_nest(time_step_spec, outer_dims=(3, 4))

    action_spec = [
        tensor_spec.BoundedTensorSpec((2,), tf.float32, 2, 3),
        tensor_spec.BoundedTensorSpec((3,), tf.int32, 0, 3)
    ]

    preprocessing_layers = (tf.keras.layers.Dense(4),
                            sequential_layer.SequentialLayer([
                                tf.keras.layers.Reshape((1,)),
                                tf.keras.layers.Dense(4)
                            ]))

    net = actor_distribution_rnn_network.ActorDistributionRnnNetwork(
        observation_spec,
        action_spec,
        preprocessing_layers=preprocessing_layers,
        preprocessing_combiner=tf.keras.layers.Add())

    initial_state = actor_policy.ActorPolicy(time_step_spec, action_spec,
                                             net).get_initial_state(3)

    action_distributions, _ = net(time_step.observation, time_step.step_type,
                                  initial_state)

    self.evaluate(tf.compat.v1.global_variables_initializer())
    self.assertEqual([3, 4, 2], action_distributions[0].mode().shape.as_list())
    self.assertEqual([3, 4, 3], action_distributions[1].mode().shape.as_list())
    self.assertGreater(len(net.trainable_variables), 4)
Пример #3
0
    def test_dict_spec_and_pre_processing(self):
        input_spec = {
            'a': tensor_spec.TensorSpec((32, 32, 3), tf.float32),
            'b': tensor_spec.TensorSpec((32, 32, 3), tf.float32)
        }
        network = encoding_network.EncodingNetwork(
            input_spec,
            preprocessing_layers={
                'a':
                sequential_layer.SequentialLayer([
                    tf.keras.layers.Dense(4, activation='tanh'),
                    tf.keras.layers.Flatten()
                ]),
                'b':
                tf.keras.layers.Flatten()
            },
            fc_layer_params=(),
            preprocessing_combiner=tf.keras.layers.Concatenate(axis=-1),
            activation_fn=tf.keras.activations.tanh,
        )

        sample_input = tensor_spec.sample_spec_nest(input_spec)
        output, _ = network(sample_input)
        # 6144 is the shape from a concat of flat (32, 32, 3) x2.
        self.assertEqual((7168, ), output.shape)
Пример #4
0
 def testCopy(self):
     sequential = sequential_layer.SequentialLayer([
         tf.keras.layers.Dense(3),
         tf.keras.layers.Dense(4, use_bias=False)
     ])
     clone = type(sequential).from_config(sequential.get_config())
     self.assertLen(clone.layers, 2)
     for l1, l2 in zip(sequential.layers, clone.layers):
         self.assertEqual(l1.dtype, l2.dtype)
         self.assertEqual(l1.units, l2.units)
         self.assertEqual(l1.use_bias, l2.use_bias)
Пример #5
0
 def testBuild(self):
     sequential = sequential_layer.SequentialLayer(
         [tf.keras.layers.Dense(4, use_bias=False),
          tf.keras.layers.ReLU()])
     inputs = np.ones((2, 3))
     out = sequential(inputs)
     self.evaluate(tf.compat.v1.global_variables_initializer())
     out = self.evaluate(out)
     weights = self.evaluate(sequential.layers[0].weights[0])
     expected = np.dot(inputs, weights)
     expected[expected < 0] = 0
     self.assertAllClose(expected, out)
Пример #6
0
 def testTrainableVariables(self):
     sequential = sequential_layer.SequentialLayer(
         [tf.keras.layers.Dense(3),
          tf.keras.layers.Dense(4)])
     sequential.build((3, 2))
     self.evaluate(tf.compat.v1.global_variables_initializer())
     variables = self.evaluate(sequential.trainable_variables)
     self.assertLen(variables, 4)
     self.assertLen(sequential.variables, 4)
     self.assertTrue(sequential.trainable)
     sequential.trainable = False
     self.assertFalse(sequential.trainable)
     self.assertEmpty(sequential.trainable_variables)
     self.assertLen(sequential.variables, 4)
Пример #7
0
 def test_layers_buildable(self):
     input_spec = {
         'a': tensor_spec.TensorSpec((32, 32, 3), tf.float32),
         'b': tensor_spec.TensorSpec((32, 32, 3), tf.float32)
     }
     network = encoding_network.EncodingNetwork(
         input_spec,
         preprocessing_layers={
             'a':
             sequential_layer.SequentialLayer([
                 tf.keras.layers.Dense(4, activation='tanh'),
                 tf.keras.layers.Flatten()
             ]),
             'b':
             tf.keras.layers.Flatten()
         },
         fc_layer_params=(),
         preprocessing_combiner=tf.keras.layers.Concatenate(axis=-1),
         activation_fn=tf.keras.activations.tanh,
     )
     network.create_variables()
     self.assertNotEmpty(network.variables)
Пример #8
0
    def test_handle_preprocessing_layers(self, outer_dims):
        observation_spec = (tensor_spec.TensorSpec([1], tf.float32),
                            tensor_spec.TensorSpec([], tf.float32))
        time_step_spec = ts.time_step_spec(observation_spec)
        time_step = tensor_spec.sample_spec_nest(time_step_spec,
                                                 outer_dims=outer_dims)

        action_spec = tensor_spec.BoundedTensorSpec((2, ), tf.float32, 2, 3)

        preprocessing_layers = (tf.keras.layers.Dense(4),
                                sequential_layer.SequentialLayer([
                                    tf.keras.layers.Reshape((1, )),
                                    tf.keras.layers.Dense(4)
                                ]))

        net = actor_network.ActorNetwork(
            observation_spec,
            action_spec,
            preprocessing_layers=preprocessing_layers,
            preprocessing_combiner=tf.keras.layers.Add())

        action, _ = net(time_step.observation, time_step.step_type, ())
        self.assertEqual(list(outer_dims) + [2], action.shape.as_list())
        self.assertGreater(len(net.trainable_variables), 4)
Пример #9
0
    def __init__(self,
                 input_tensor_spec,
                 observation_preprocessing_layers=None,
                 observation_preprocessing_combiner=None,
                 observation_conv_layer_params=None,
                 observation_fc_layer_params=(200, ),
                 action_fc_layer_params=(200, ),
                 joint_fc_layer_params=(100),
                 lstm_size=(40, ),
                 output_fc_layer_params=(200, 100),
                 activation_fn=tf.keras.activations.relu,
                 dtype=tf.float32,
                 name='CriticRnnNetwork'):
        """Creates an instance of `CriticRnnNetwork`.

        This CriticRnnNetwork supports handling complex observations with preprocessing_layer
        and preprocessing_combiner.

        Args:
            input_tensor_spec: A tuple of (observation, action) each of type
                `tensor_spec.TensorSpec` representing the inputs.
            observation_preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer`
                representing preprocessing for the different observations.
                All of these layers must not be already built. For more details see
                the documentation of `networks.EncodingNetwork`.
            observation_preprocessing_combiner: (Optional.) A keras layer that takes a flat list
                of tensors and combines them. Good options include
                `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`.
                This layer must not be already built. For more details see
                the documentation of `networks.EncodingNetwork`.
            observation_conv_layer_params: Optional list of convolution layers
                parameters to apply to the observations, where each item is a
                length-three tuple indicating (filters, kernel_size, stride).
            observation_fc_layer_params: Optional list of fully_connected parameters,
                where each item is the number of units in the layer. This is applied
                after the observation convultional layer.
            action_fc_layer_params: Optional list of parameters for a fully_connected
                layer to apply to the actions, where each item is the number of units
                in the layer.
            joint_fc_layer_params: Optional list of parameters for a fully_connected
                layer to apply after merging observations and actions, where each item
                is the number of units in the layer.
            lstm_size: An iterable of ints specifying the LSTM cell sizes to use.
            output_fc_layer_params: Optional list of fully_connected parameters, where
                each item is the number of units in the layer. This is applied after the
                LSTM cell.
            activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ...
            name: A string representing name of the network.
        Raises:
            ValueError: If `action_spec` contains more than one item.
        """
        observation_spec, action_spec = input_tensor_spec

        if len(tf.nest.flatten(action_spec)) > 1:
            raise ValueError(
                'Only a single action is supported by this network.')

        kernel_initializer = tf.compat.v1.variance_scaling_initializer(
            scale=2.0, mode='fan_in', distribution='truncated_normal')

        obs_encoder = encoding_network.EncodingNetwork(
            observation_spec,
            preprocessing_layers=observation_preprocessing_layers,
            preprocessing_combiner=observation_preprocessing_combiner,
            conv_layer_params=observation_conv_layer_params,
            fc_layer_params=observation_fc_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=kernel_initializer,
            dtype=dtype,
            name='obs_encoding')

        action_layers = sequential_layer.SequentialLayer(
            utils.mlp_layers(fc_layer_params=action_fc_layer_params,
                             activation_fn=activation_fn,
                             kernel_initializer=tf.compat.v1.keras.
                             initializers.VarianceScaling(
                                 scale=1. / 3.,
                                 mode='fan_in',
                                 distribution='uniform'),
                             name='action_encoding'))

        obs_encoding_spec = tf.TensorSpec(
            shape=(observation_fc_layer_params[-1], ), dtype=tf.float32)
        lstm_encoder = lstm_encoding_network.LSTMEncodingNetwork(
            input_tensor_spec=(obs_encoding_spec, action_spec),
            preprocessing_layers=(tf.keras.layers.Flatten(), action_layers),
            preprocessing_combiner=tf.keras.layers.Concatenate(axis=-1),
            input_fc_layer_params=joint_fc_layer_params,
            lstm_size=lstm_size,
            output_fc_layer_params=output_fc_layer_params,
            activation_fn=activation_fn,
            dtype=dtype,
            name='lstm')

        output_layers = [
            tf.keras.layers.Dense(
                1,
                activation=None,
                kernel_initializer=tf.keras.initializers.RandomUniform(
                    minval=-0.003, maxval=0.003),
                name='value')
        ]

        super(CriticRnnNetwork,
              self).__init__(input_tensor_spec=input_tensor_spec,
                             state_spec=lstm_encoder.state_spec,
                             name=name)

        self._obs_encoder = obs_encoder
        self._lstm_encoder = lstm_encoder
        self._output_layers = output_layers