Exemplo n.º 1
0
 def __call__(self,
              training,
              inputs_BxIxD,
              bias_BxIxI,
              memory_BxMxD,
              bias_BxIxM,
              cache=None,
              decode_i=None):
     s_BxIxD = inputs_BxIxD
     with tf.variable_scope("self_attention"):
         y_BxIxD = contrib_layers.layer_norm(s_BxIxD, begin_norm_axis=2)
         y_BxIxD = self._self_attn_layer(y_BxIxD,
                                         bias_BxIxI,
                                         training,
                                         cache=cache,
                                         decode_i=decode_i)
         s_BxIxD += self._dropout_fn(y_BxIxD, training)
     if memory_BxMxD is not None:
         with tf.variable_scope("memory_attention"):
             y_BxIxD = contrib_layers.layer_norm(s_BxIxD, begin_norm_axis=2)
             y_BxIxD = self._attn_layer(y_BxIxD, memory_BxMxD, bias_BxIxM,
                                        training)
             s_BxIxD += self._dropout_fn(y_BxIxD, training)
     with tf.variable_scope("ffn"):
         y_BxIxD = contrib_layers.layer_norm(s_BxIxD, begin_norm_axis=2)
         y_BxIxD = self._dropout_fn(self._relu_layer(y_BxIxD), training)
         s_BxIxD += self._dropout_fn(self._output_layer(y_BxIxD), training)
     return s_BxIxD
Exemplo n.º 2
0
def _cnn_to_mlp(convs, hiddens, dueling, inpt, num_actions, scope, reuse=False, layer_norm=False):
    with tf.variable_scope(scope, reuse=reuse):
        out = inpt
        with tf.variable_scope("convnet"):
            for num_outputs, kernel_size, stride in convs:
                out = layers.Conv2D(out,
                                           num_outputs=num_outputs,
                                           kernel_size=kernel_size,
                                           stride=stride,
                                           activation_fn=tf.nn.relu)
        conv_out = layers.flatten(out)
        with tf.variable_scope("action_value"):
            action_out = conv_out
            for hidden in hiddens:
                action_out = layers.fully_connected(action_out, num_outputs=hidden, activation_fn=None)
                if layer_norm:
                    action_out = layers.layer_norm(action_out, center=True, scale=True)
                action_out = tf.nn.relu(action_out)
            action_scores = layers.fully_connected(action_out, num_outputs=num_actions, activation_fn=None)

        if dueling:
            with tf.variable_scope("state_value"):
                state_out = conv_out
                for hidden in hiddens:
                    state_out = layers.fully_connected(state_out, num_outputs=hidden, activation_fn=None)
                    if layer_norm:
                        state_out = layers.layer_norm(state_out, center=True, scale=True)
                    state_out = tf.nn.relu(state_out)
                state_score = layers.fully_connected(state_out, num_outputs=1, activation_fn=None)
            action_scores_mean = tf.reduce_mean(action_scores, 1)
            action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, 1)
            q_out = state_score + action_scores_centered
        else:
            q_out = action_scores
        return q_out
Exemplo n.º 3
0
def _mlp(hiddens, inpt, num_actions, scope, reuse=False, layer_norm=False):
    with tf.variable_scope(scope, reuse=reuse):
        out = inpt
        for hidden in hiddens:
            out = layers.fully_connected(out, num_outputs=hidden, activation_fn=None)
            if layer_norm:
                out = layers.layer_norm(out, center=True, scale=True)
            out = tf.nn.relu(out)
        q_out = layers.fully_connected(out, num_outputs=num_actions, activation_fn=None)
        return q_out
Exemplo n.º 4
0
 def _encode(self, features, training):
     inputs_BxI = features["inputs"]
     inputs_bias_Bx1xI = attention.ids_to_bias(inputs_BxI, self._dtype)
     states_BxIxD = self._embedding_layer(inputs_BxI, True)
     states_BxIxD = self._dropout_fn(timing.add_time_signal(states_BxIxD),
                                     training)
     with tf.variable_scope("encoder", reuse=tf.AUTO_REUSE):
         states_BxIxD = transformer_block.stack(self._encoder_layers,
                                                training, states_BxIxD,
                                                inputs_bias_Bx1xI, None,
                                                None)
         states_BxIxD = contrib_layers.layer_norm(states_BxIxD,
                                                  begin_norm_axis=2)
     return {"memory": states_BxIxD, "memory_bias": inputs_bias_Bx1xI}
Exemplo n.º 5
0
    def q_func_builder(input_placeholder, num_actions, scope, reuse=False):
        with tf.variable_scope(scope, reuse=reuse):
            latent = network(input_placeholder)
            if isinstance(latent, tuple):
                if latent[1] is not None:
                    raise NotImplementedError(
                        "DQN is not compatible with recurrent policies yet")
                latent = latent[0]

            latent = tf.layers.flatten(latent)

            with tf.variable_scope("action_value"):
                action_out = latent

                for hidden in hiddens:
                    action_out = tf.contrib.layers.fully_connected(
                        action_out, num_outputs=hidden, activation_fn=None)
                    if layer_norm:
                        action_out = layers.layer_norm(action_out,
                                                       center=True,
                                                       scale=True)
                    action_out = tf.nn.relu(action_out)
                action_scores = tf.contrib.layers.fully_connected(
                    action_out, num_outputs=num_actions, activation_fn=None)

            if dueling:
                with tf.variable_scope("state_value"):
                    state_out = latent
                    for hidden in hiddens:

                        state_out = tf.contrib.layers.fully_connected(
                            state_out, num_outputs=hidden, activation_fn=None)
                        if layer_norm:
                            state_out = tf.layers.layer_norm(state_out,
                                                             center=True,
                                                             scale=True)
                        state_out = tf.nn.relu(state_out)
                    state_score = tf.contrib.layers.fully_connected(
                        state_out, num_outputs=1, activation_fn=None)
                action_scores_mean = tf.reduce_mean(action_scores, 1)
                action_scores_centered = action_scores - tf.expand_dims(
                    action_scores_mean, 1)
                q_out = state_score + action_scores_centered
            else:
                q_out = action_scores
            return q_out
Exemplo n.º 6
0
def normalize(inp, activation, scope, reuse):
    with tf.name_scope("normalize"):
        if FLAGS.norm == 'batch_norm':
            tf.nn.batch_normalization
            return tf_layers.BatchNormalization(inp,
                                                activation_fn=activation,
                                                reuse=reuse,
                                                scope=scope)
        elif FLAGS.norm == 'layer_norm':
            return tf_layers.layer_norm(inp,
                                        activation_fn=activation,
                                        reuse=reuse,
                                        scope=scope)
        elif FLAGS.norm == 'None':
            if activation is not None:
                return activation(inp)
            else:
                return inp
Exemplo n.º 7
0
 def symbols_to_logits_fn(dec_BxT, context, i):
     """Decode loop."""
     dec_Bx1 = tf.slice(dec_BxT,
                        [0, tf.maximum(tf.cast(0, i.dtype), i - 1)],
                        [dec_BxT.shape[0], 1])
     bias_1x1xT = tf.slice(bias_1xTxT, [0, i, 0], [1, 1, T])
     dec_Bx1xD = self._embedding_layer(dec_Bx1, True)
     dec_Bx1xD *= tf.cast(tf.greater(i, 0), self._dtype)
     dec_Bx1xD = timing.add_time_signal(dec_Bx1xD, start_index=i)
     with tf.variable_scope(self._decoder_scope_name,
                            reuse=tf.AUTO_REUSE):
         dec_Bx1xD = transformer_block.stack(
             self._decoder_layers, False, dec_Bx1xD, bias_1x1xT,
             context["memory"], context["memory_bias"], context, i)
         dec_Bx1xD = contrib_layers.layer_norm(dec_Bx1xD,
                                               begin_norm_axis=2)
     logits_Bx1xV = self._embedding_layer(dec_Bx1xD, False)
     logits_BxV = tf.squeeze(logits_Bx1xV, axis=1)
     return logits_BxV
Exemplo n.º 8
0
    def __call__(self, features, training):
        """Create model.

    Args:
      features: dictionary of tensors including "inputs" [batch, input_len] and
        "targets" [batch, output_len]
      training: bool of whether the mode is training.

    Returns:
     Tuple of (loss, outputs): Loss is a scalar. Output is a dictionary of
       tensors, containing model's output logits.
    """
        if "inputs" not in features or "targets" not in features:
            raise ValueError("Require inputs and targets keys in features.")

        context = self._encode(features, training)
        self._context = context
        targets_BxT = features["targets"]
        bias_1xTxT = attention.upper_triangle_bias(
            tf.shape(input=targets_BxT)[1], self._dtype)
        states_BxTxD = self._embedding_layer(targets_BxT, True)
        states_BxTxD = tf.pad(tensor=states_BxTxD,
                              paddings=[[0, 0], [1, 0], [0, 0]])[:, :-1, :]
        states_BxTxD = timing.add_time_signal(states_BxTxD)
        states_BxTxD = self._dropout_fn(states_BxTxD, training)
        with tf.compat.v1.variable_scope(self._decoder_scope_name,
                                         reuse=tf.compat.v1.AUTO_REUSE):
            states_BxTxD = transformer_block.stack(self._decoder_layers,
                                                   training, states_BxTxD,
                                                   bias_1xTxT,
                                                   context["memory"],
                                                   context["memory_bias"])
            states_BxTxD = contrib_layers.layer_norm(states_BxTxD,
                                                     begin_norm_axis=2)
        logits_BxTxV = self._embedding_layer(states_BxTxD, False)
        targets_mask_BxT = tf.cast(tf.greater(targets_BxT, 0), self._dtype)
        loss = tf.compat.v1.losses.softmax_cross_entropy(
            tf.one_hot(targets_BxT, self._vocab_size),
            logits_BxTxV,
            label_smoothing=self._label_smoothing,
            weights=targets_mask_BxT)
        return loss, {"logits": logits_BxTxV}
Exemplo n.º 9
0
def layer_norm(input_tensor, name=None):
    """Run layer normalization on the last dimension of the tensor."""
    return contrib_layers.layer_norm(inputs=input_tensor,
                                     begin_norm_axis=-1,
                                     begin_params_axis=-1,
                                     scope=name)
Exemplo n.º 10
0
    def __init__(self,
                 sess,
                 ob_space,
                 ac_space,
                 n_env,
                 n_steps,
                 n_batch,
                 reuse=False,
                 layers=None,
                 cnn_extractor=nature_cnn,
                 feature_extraction="cnn",
                 obs_phs=None,
                 layer_norm=False,
                 dueling=True,
                 act_fun=tf.nn.relu,
                 **kwargs):
        super(FeedForwardPolicy,
              self).__init__(sess,
                             ob_space,
                             ac_space,
                             n_env,
                             n_steps,
                             n_batch,
                             dueling=dueling,
                             reuse=reuse,
                             scale=(feature_extraction == "cnn"),
                             obs_phs=obs_phs)

        self._kwargs_check(feature_extraction, kwargs)

        if layers is None:
            layers = [64, 64]

        with tf.variable_scope("model", reuse=reuse):
            with tf.variable_scope("action_value"):
                if feature_extraction == "cnn":
                    extracted_features = cnn_extractor(self.processed_obs,
                                                       **kwargs)
                    action_out = extracted_features
                else:
                    extracted_features = tf.layers.flatten(self.processed_obs)
                    action_out = extracted_features
                    for layer_size in layers:
                        action_out = tf_layers.fully_connected(
                            action_out,
                            num_outputs=layer_size,
                            activation_fn=None)
                        if layer_norm:
                            action_out = tf_layers.layer_norm(action_out,
                                                              center=True,
                                                              scale=True)
                        action_out = act_fun(action_out)

                action_scores = tf_layers.fully_connected(
                    action_out, num_outputs=self.n_actions, activation_fn=None)

            if self.dueling:
                with tf.variable_scope("state_value"):
                    state_out = extracted_features
                    for layer_size in layers:
                        state_out = tf_layers.fully_connected(
                            state_out,
                            num_outputs=layer_size,
                            activation_fn=None)
                        if layer_norm:
                            state_out = tf_layers.layer_norm(state_out,
                                                             center=True,
                                                             scale=True)
                        state_out = act_fun(state_out)
                    state_score = tf_layers.fully_connected(state_out,
                                                            num_outputs=1,
                                                            activation_fn=None)
                action_scores_mean = tf.reduce_mean(action_scores, axis=1)
                action_scores_centered = action_scores - tf.expand_dims(
                    action_scores_mean, axis=1)
                q_out = state_score + action_scores_centered
            else:
                q_out = action_scores

        self.q_values = q_out
        self._setup_init()