def call(self, observations, step_type=(), network_state=()):

        # observations = self._bn_layer(observations)
        tempt = self._dense1(observations)
        tempt = self._dense2(tempt)
        tempt = self._dense3(tempt)
        actions = self._action_projection_layer(tempt)
        actions = common_utils.scale_to_spec(actions, self._action_spec)

        return actions, network_state
Exemplo n.º 2
0
    def call(self, observations, step_type=(), network_state=()):
        del step_type  # unused.
        observations = nest.flatten(observations)
        output = tf.cast(observations[0], tf.float32)
        for layer in self._mlp_layers:
            output = layer(output)

        actions = common_utils.scale_to_spec(output, self._single_action_spec)
        return nest.pack_sequence_as(self._action_spec,
                                     [actions]), network_state
Exemplo n.º 3
0
  def call(self, observations, step_type=(), network_state=(), training=False):
    del step_type  # unused.
    observations = tf.nest.flatten(observations)
    output = tf.cast(observations[0], tf.float32)
    for layer in self._mlp_layers:
      output = layer(output, training=training)

    actions = common.scale_to_spec(output, self._single_action_spec)
    output_actions = tf.nest.pack_sequence_as(self._output_tensor_spec,
                                              [actions])

    return output_actions, network_state
Exemplo n.º 4
0
  def call(self, observations, step_type=(), network_state=()):
    del step_type  # unused.
    observations = tf.cast(nest.flatten(observations)[0], tf.float32)
    output = self._layer(observations)
    actions = tf.reshape(output,
                         [-1] + self._single_action_spec.shape.as_list())

    if not self._unbounded_actions:
      actions = common_utils.scale_to_spec(actions, self._single_action_spec)

    output_actions = nest.pack_sequence_as(self._output_tensor_spec, [actions])
    return output_actions, network_state
    def call(self, observation, step_type, network_state=(), training=False):
        num_outer_dims = nest_utils.get_outer_rank(observation,
                                                   self.input_tensor_spec)
        if num_outer_dims not in (1, 2):
            raise ValueError(
                'Input observation must have a batch or batch x time outer shape.'
            )

        has_time_dim = num_outer_dims == 2
        if not has_time_dim:
            # Add a time dimension to the inputs.
            observation = tf.nest.map_structure(lambda t: tf.expand_dims(t, 1),
                                                observation)
            step_type = tf.nest.map_structure(lambda t: tf.expand_dims(t, 1),
                                              step_type)

        states = tf.cast(tf.nest.flatten(observation)[0], tf.float32)
        batch_squash = utils.BatchSquash(2)  # Squash B, and T dims.
        states = batch_squash.flatten(states)  # [B, T, ...] -> [B x T, ...]

        for layer in self._input_layers:
            states = layer(states, training=training)

        states = batch_squash.unflatten(states)  # [B x T, ...] -> [B, T, ...]

        with tf.name_scope('reset_mask'):
            reset_mask = tf.equal(step_type, time_step.StepType.FIRST)
        # Unroll over the time sequence.
        states, network_state = self._dynamic_unroll(
            states,
            reset_mask=reset_mask,
            initial_state=network_state,
            training=training)

        states = batch_squash.flatten(states)  # [B, T, ...] -> [B x T, ...]

        for layer in self._output_layers:
            states = layer(states, training=training)

        actions = []
        for layer, spec in zip(self._action_layers, self._flat_action_spec):
            action = layer(states, training=training)
            action = common.scale_to_spec(action, spec)
            action = batch_squash.unflatten(
                action)  # [B x T, ...] -> [B, T, ...]
            if not has_time_dim:
                action = tf.squeeze(action, axis=1)
            actions.append(action)

        output_actions = tf.nest.pack_sequence_as(self._output_tensor_spec,
                                                  actions)
        return output_actions, network_state
Exemplo n.º 6
0
    def testScaleToSpec(self):
        value = tf.constant([[1, -1], [0.5, -0.5], [1.0, 0.0]])
        spec = tensor_spec.BoundedTensorSpec(
            (3, 2),
            tf.float32,
            [[-5, -5], [-4, -4], [-2, -6]],
            [[5, 5], [4, 4], [2, 6]],
        )
        expected_scaled_value = np.array([[[5, -5], [2.0, -2.0], [2.0, 0.0]]])
        scaled_value = common.scale_to_spec(value, spec)

        scaled_value_ = self.evaluate(scaled_value)
        self.assertAllClose(expected_scaled_value, scaled_value_)
    def call(self, inputs, step_type=(), network_state=()):
        observations, actions = inputs
        observations = self._preprocess_observation_layer(observations)
        actions = self._preprocess_action_layer(actions)

        tempt = self._combine_layer([observations, actions])
        tempt = self._dense1(tempt)
        tempt = self._dense2(tempt)

        q_values = self._q_value_projection_layer(tempt)
        q_values = common_utils.scale_to_spec(q_values, self._q_value_spec)

        return q_values, network_state
    def call(self, observations, step_type=(), network_state=()):
        outer_rank = nest_utils.get_outer_rank(observations,
                                               self.input_tensor_spec)
        # We use batch_squash here in case the observations have a time sequence
        # compoment.
        batch_squash = utils.BatchSquash(outer_rank)
        observations = tf.nest.map_structure(batch_squash.flatten,
                                             observations)

        state, network_state = self._encoder(observations,
                                             step_type=step_type,
                                             network_state=network_state)
        actions = self._action_projection_layer(state)
        actions = common_utils.scale_to_spec(actions, self._single_action_spec)
        actions = batch_squash.unflatten(actions)
        return tf.nest.pack_sequence_as(self._action_spec,
                                        [actions]), network_state
Exemplo n.º 9
0
    def call(self,
             observations,
             step_type=(),
             network_state=(),
             training=False):

        state, network_state = self._encoder(observations,
                                             step_type=step_type,
                                             network_state=network_state,
                                             training=training)

        output = self._action_layer(state, training=training)

        actions = common.scale_to_spec(output, self._single_action_spec)
        output_actions = tf.nest.pack_sequence_as(self._output_tensor_spec,
                                                  [actions])

        return output_actions, network_state
Exemplo n.º 10
0
    def call(self, observations, step_type=(), network_state=()):

        outer_rank = nest_utils.get_outer_rank(observations,
                                               self.input_tensor_spec)

        batch_squash = BatchSquash(outer_rank)
        observations = nest.map_structure(batch_squash.flatten, observations)

        state, network_state = self._encoder(observations,
                                             step_type=step_type,
                                             network_state=network_state)

        actions = self._action_projection_layer(state)
        actions = scale_to_spec(actions, self._single_action_spec)
        actions = batch_squash.unflatten(actions)

        return nest.pack_sequence_as(self._action_spec,
                                     [actions]), network_state
Exemplo n.º 11
0
    def call(self, observation, step_type, network_state=None):
        outer_rank = nest_utils.get_outer_rank(observation,
                                               self.input_tensor_spec)
        batch_squash = utils.BatchSquash(outer_rank)

        observation, network_state = self._lstm_encoder(
            observation, step_type=step_type, network_state=network_state)

        states = batch_squash.flatten(observation)

        actions = []
        for layer, spec in zip(self._action_layers, self._flat_action_spec):
            action = layer(states)
            action = common.scale_to_spec(action, spec)
            action = batch_squash.unflatten(action)
            actions.append(action)

        output_actions = tf.nest.pack_sequence_as(self._output_tensor_spec,
                                                  actions)
        return output_actions, network_state
Exemplo n.º 12
0
def create_actor_network(fc_layer_units, action_spec):
    """Create an actor network for DDPG."""
    flat_action_spec = tf.nest.flatten(action_spec)
    if len(flat_action_spec) > 1:
        raise ValueError(
            'Only a single action tensor is supported by this network')
    flat_action_spec = flat_action_spec[0]

    fc_layers = [dense(num_units) for num_units in fc_layer_units]

    num_actions = flat_action_spec.shape.num_elements()
    action_fc_layer = tf.keras.layers.Dense(
        num_actions,
        activation=tf.keras.activations.tanh,
        kernel_initializer=tf.keras.initializers.RandomUniform(minval=-0.003,
                                                               maxval=0.003))

    scaling_layer = tf.keras.layers.Lambda(
        lambda x: common.scale_to_spec(x, flat_action_spec))
    return sequential.Sequential(fc_layers + [action_fc_layer, scaling_layer])
    def call(self, observations, step_type=(), network_state=()):
        outer_rank = nest_utils.get_outer_rank(observations,
                                               self.input_tensor_spec)
        # We use batch_squash here in case the observations have a time sequence
        # compoment.
        batch_squash = utils.BatchSquash(outer_rank)
        observations = tf.nest.map_structure(batch_squash.flatten,
                                             observations)

        state, network_state = self._encoder(observations,
                                             step_type=step_type,
                                             network_state=network_state)
        actions = self._action_projection_layer(state)
        actions = common_utils.scale_to_spec(actions, self._action_spec)
        actions = batch_squash.unflatten(actions)
        return tf.nest.pack_sequence_as(self._action_spec,
                                        [actions]), network_state


####ACTOR TEST####
#action_spec = array_spec.BoundedArraySpec((6,), np.float32, minimum=0, maximum=10)
#observation_spec = array_spec.BoundedArraySpec((64, 64, 3), np.float32, minimum=0,
#                                        maximum=255)
#
#random_env = random_py_environment.RandomPyEnvironment(observation_spec, action_spec=action_spec)
#
## Convert the environment to a TFEnv to generate tensors.
#tf_env = tf_py_environment.TFPyEnvironment(random_env)
#
##preprocessing_layers = {
##    'image': tf.keras.models.Sequential([tf.keras.layers.Conv2D(8, 4),
##                                        tf.keras.layers.Flatten()]),
##    'vector': tf.keras.layers.Dense(5)
##    }
##preprocessing_combiner = tf.keras.layers.Concatenate(axis=-1)
#actor = ActorNetwork(tf_env.observation_spec(),
#                     tf_env.action_spec())
#
#time_step = tf_env.reset()
##print(actor(time_step.observation,time_step.step_type))
Exemplo n.º 14
0
  def call(self, observation, step_type, network_state=None, training=False):
    # Preprocess for multiple observations
    if self._flat_preprocessing_layers is None:
      processed = observation
    else:
      processed = []
      for obs, layer in zip(
          nest.flatten_up_to(
              self._preprocessing_nest, observation, check_types=False),
          self._flat_preprocessing_layers):
        processed.append(layer(obs, training=training))
      if len(processed) == 1 and self._preprocessing_combiner is None:
        # If only one observation is passed and the preprocessing_combiner
        # is unspecified, use the preprocessed version of this observation.
        processed = processed[0]
    observation = processed
    if self._preprocessing_combiner is not None:
      observation = self._preprocessing_combiner(observation)
    observation_spec = tensor_spec.TensorSpec((observation.shape[-1],), dtype=observation.dtype)

    num_outer_dims = nest_utils.get_outer_rank(observation,
                                               observation_spec)
    if num_outer_dims not in (1, 2):
      raise ValueError(
          'Input observation must have a batch or batch x time outer shape.')

    has_time_dim = num_outer_dims == 2
    if not has_time_dim:
      # Add a time dimension to the inputs.
      observation = tf.nest.map_structure(lambda t: tf.expand_dims(t, 1),
                                          observation)
      step_type = tf.nest.map_structure(lambda t: tf.expand_dims(t, 1),
                                        step_type)

    states = tf.cast(tf.nest.flatten(observation)[0], tf.float32)
    batch_squash = utils.BatchSquash(2)  # Squash B, and T dims.
    states = batch_squash.flatten(states)  # [B, T, ...] -> [B x T, ...]

    for layer in self._input_layers:
      states = layer(states, training=training)

    states = batch_squash.unflatten(states)  # [B x T, ...] -> [B, T, ...]

    with tf.name_scope('reset_mask'):
      reset_mask = tf.equal(step_type, time_step.StepType.FIRST)
    # Unroll over the time sequence.
    states, network_state = self._dynamic_unroll(
        states,
        reset_mask,
        initial_state=network_state,
        training=training)

    states = batch_squash.flatten(states)  # [B, T, ...] -> [B x T, ...]

    for layer in self._output_layers:
      states = layer(states, training=training)

    actions = []
    for layer, spec in zip(self._action_layers, self._flat_action_spec):
      action = layer(states, training=training)
      action = common.scale_to_spec(action, spec)
      action = batch_squash.unflatten(action)  # [B x T, ...] -> [B, T, ...]
      if not has_time_dim:
        action = tf.squeeze(action, axis=1)
      actions.append(action)

    output_actions = tf.nest.pack_sequence_as(self._output_tensor_spec, actions)
    return output_actions, network_state