Exemplo n.º 1
0
    def test_network_can_preprocess_and_combine_no_time_dim(self):
        if tf.executing_eagerly():
            self.skipTest('b/123776211')

        batch_size = 3
        num_actions = 2
        lstm_size = 5
        states = (tf.random.uniform([batch_size,
                                     1]), tf.random.uniform([batch_size]))
        preprocessing_layers = (
            tf.keras.layers.Dense(4),
            tf.keras.Sequential([
                expand_dims_layer.ExpandDims(-1),  # Convert to vec size (1,).
                tf.keras.layers.Dense(4)
            ]))
        network = q_rnn_network.QRnnNetwork(
            input_tensor_spec=(tensor_spec.TensorSpec([1], tf.float32),
                               tensor_spec.TensorSpec([], tf.float32)),
            preprocessing_layers=preprocessing_layers,
            preprocessing_combiner=tf.keras.layers.Add(),
            lstm_size=(lstm_size, ),
            action_spec=tensor_spec.BoundedTensorSpec([1], tf.int32, 0,
                                                      num_actions - 1))
        empty_step_type = tf.constant([time_step.StepType.FIRST] * batch_size)
        q_values, _ = network(states, empty_step_type)

        # Processed 1 time step and the time axis was squeezed back.
        self.assertAllEqual(q_values.shape.as_list(),
                            [batch_size, num_actions])

        # At least 2 variables each for the preprocessing layers.
        self.assertGreater(len(network.trainable_variables), 4)
Exemplo n.º 2
0
 def test_network_can_preprocess_and_combine(self):
     batch_size = 3
     frames = 5
     num_actions = 2
     lstm_size = 6
     states = (tf.random.uniform([batch_size, frames, 1]),
               tf.random.uniform([batch_size, frames]))
     preprocessing_layers = (
         tf.keras.layers.Dense(4),
         tf.keras.Sequential([
             expand_dims_layer.ExpandDims(-1),  # Convert to vec size (1,).
             tf.keras.layers.Dense(4)
         ]))
     network = q_rnn_network.QRnnNetwork(
         input_tensor_spec=(tensor_spec.TensorSpec([1], tf.float32),
                            tensor_spec.TensorSpec([], tf.float32)),
         preprocessing_layers=preprocessing_layers,
         preprocessing_combiner=tf.keras.layers.Add(),
         lstm_size=(lstm_size, ),
         action_spec=tensor_spec.BoundedTensorSpec([1], tf.int32, 0,
                                                   num_actions - 1))
     empty_step_type = tf.constant([[time_step.StepType.FIRST] * frames] *
                                   batch_size)
     q_values, _ = network(states, empty_step_type)
     self.assertAllEqual(q_values.shape.as_list(),
                         [batch_size, frames, num_actions])
     # At least 2 variables each for the preprocessing layers.
     self.assertGreater(len(network.trainable_variables), 4)
    def testTrainWithNN(self, is_convert, is_distribution_network):
        # Hard code a trajectory shaped (time=6, batch=1, ...).
        traj, time_step_spec, action_spec = create_arbitrary_trajectory()

        if is_convert:
            # Convert to single step trajectory of shapes (batch=6, 1, ...).
            traj = tf.nest.map_structure(common.transpose_batch_time, traj)

        if is_distribution_network:
            cloning_net = sequential.Sequential([
                expand_dims_layer.ExpandDims(-1),
                tf.keras.layers.Dense(action_spec.maximum -
                                      action_spec.minimum + 1),
                tf.keras.layers.Lambda(
                    lambda t: tfp.distributions.Categorical(logits=t)),
            ])
        else:
            cloning_net = q_network.QNetwork(time_step_spec.observation,
                                             action_spec)
        agent = behavioral_cloning_agent.BehavioralCloningAgent(
            time_step_spec,
            action_spec,
            cloning_network=cloning_net,
            optimizer=tf.compat.v1.train.AdamOptimizer(learning_rate=0.001),
            num_outer_dims=2)
        # Disable clipping to make sure we can see the difference in behavior
        agent.policy._clip = False
        # TODO(b/123883319)
        if tf.executing_eagerly():
            train_and_loss = lambda: agent.train(traj)
        else:
            train_and_loss = agent.train(traj)
        self.evaluate(tf.compat.v1.global_variables_initializer())

        initial_loss = self.evaluate(train_and_loss).loss
        for _ in range(TRAIN_ITERATIONS - 1):
            loss = self.evaluate(train_and_loss).loss

        # We don't necessarily converge to the same actions as in trajectory after
        # 10 steps of an untuned optimizer, but the loss should go down.
        self.assertGreater(initial_loss, loss)
Exemplo n.º 4
0
def get_observation_processing_layer_creator(quantile_file_dir,
                                             with_z_score_normalization=False,
                                             eps=1e-8):
  """Wrapper for observation_processing_layer."""
  quantile_map = _build_quantile_map(quantile_file_dir)
  expand_dims_op = expand_dims_layer.ExpandDims(-1)

  def observation_processing_layer(obs_spec):
    """Creates the layer to process observation given obs_spec."""
    if obs_spec.name not in quantile_map:

      # Return empty will cause tf.keras.layers.Concatenate to fail.
      def discard_feature(obs):
        expanded_obs = expand_dims_op(obs)
        return tf.zeros_like(expanded_obs, dtype=tf.float32)

      func = discard_feature

    else:
      quantile, mean, std = quantile_map[obs_spec.name]

      def normalization(obs):
        expanded_obs = expand_dims_op(obs)
        x = tf.cast(
            tf.raw_ops.Bucketize(input=expanded_obs, boundaries=quantile),
            tf.float32) / len(quantile)
        features = [x, tf.sqrt(x), x * x]
        if with_z_score_normalization:
          y = tf.cast(expanded_obs, tf.float32)
          y = (y - mean) / (std + eps)
          features.append(y)
        return tf.concat(features, axis=-1)

      func = normalization

    return tf.keras.layers.Lambda(func)

  return observation_processing_layer