示例#1
0
    def q_func_builder(input_placeholder, num_actions, scope, reuse=False):
        with tf.compat.v1.variable_scope(scope, reuse=reuse):
            latent = network(input_placeholder)
            if isinstance(latent, tuple):
                if latent[1] is not None:
                    raise NotImplementedError("DQN is not compatible with recurrent policies yet")
                latent = latent[0]

            latent = tf.compat.v1.layers.flatten(latent)

            with tf.compat.v1.variable_scope("action_value"):
                action_out = latent
                for hidden in hiddens:
                    action_out = tf_slim.fully_connected(action_out, num_outputs=hidden, activation_fn=None)
                    if layer_norm_bool:
                        action_out = tf_slim.layer_norm(action_out, center=True, scale=True)
                    action_out = tf.nn.relu(action_out)
                action_scores = tf_slim.fully_connected(action_out, num_outputs=num_actions, activation_fn=None)

            if dueling:
                with tf.compat.v1.variable_scope("state_value"):
                    state_out = latent
                    for hidden in hiddens:
                        state_out = tf_slim.fully_connected(state_out, num_outputs=hidden, activation_fn=None)
                        if layer_norm_bool:
                            state_out = tf_slim.layer_norm(state_out, center=True, scale=True)
                        state_out = tf.nn.relu(state_out)
                    state_score = tf_slim.fully_connected(state_out, num_outputs=1, activation_fn=None)
                action_scores_mean = tf.reduce_mean(action_scores, 1)
                action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, 1)
                q_out = state_score + action_scores_centered
            else:
                q_out = action_scores
            return q_out
示例#2
0
文件: rnn.py 项目: timgates42/magenta
 def _norm(self, inp, scope, dtype=tf.float32):
   shape = inp.get_shape()[-1:]
   gamma_init = tf.constant_initializer(self._norm_gain)
   beta_init = tf.constant_initializer(self._norm_shift)
   with tf.variable_scope(scope):
     # Initialize beta and gamma for use by layer_norm.
     tf.get_variable("gamma", shape=shape, initializer=gamma_init, dtype=dtype)
     tf.get_variable("beta", shape=shape, initializer=beta_init, dtype=dtype)
   normalized = tf_slim.layer_norm(inp, reuse=True, scope=scope)
   return normalized
示例#3
0
def layer_norm(input_tensor, name=None):
  """Run layer normalization on the last dimension of the tensor."""
  return tf_slim.layer_norm(
      inputs=input_tensor, begin_norm_axis=-1, begin_params_axis=-1, scope=name)
def layer_norm(inputs, name):
   return slim.layer_norm(inputs, scope=name)