def __call__(self, training, inputs_BxIxD, bias_BxIxI, memory_BxMxD, bias_BxIxM, cache=None, decode_i=None): s_BxIxD = inputs_BxIxD with tf.variable_scope("self_attention"): y_BxIxD = contrib_layers.layer_norm(s_BxIxD, begin_norm_axis=2) y_BxIxD = self._self_attn_layer(y_BxIxD, bias_BxIxI, training, cache=cache, decode_i=decode_i) s_BxIxD += self._dropout_fn(y_BxIxD, training) if memory_BxMxD is not None: with tf.variable_scope("memory_attention"): y_BxIxD = contrib_layers.layer_norm(s_BxIxD, begin_norm_axis=2) y_BxIxD = self._attn_layer(y_BxIxD, memory_BxMxD, bias_BxIxM, training) s_BxIxD += self._dropout_fn(y_BxIxD, training) with tf.variable_scope("ffn"): y_BxIxD = contrib_layers.layer_norm(s_BxIxD, begin_norm_axis=2) y_BxIxD = self._dropout_fn(self._relu_layer(y_BxIxD), training) s_BxIxD += self._dropout_fn(self._output_layer(y_BxIxD), training) return s_BxIxD
def _cnn_to_mlp(convs, hiddens, dueling, inpt, num_actions, scope, reuse=False, layer_norm=False): with tf.variable_scope(scope, reuse=reuse): out = inpt with tf.variable_scope("convnet"): for num_outputs, kernel_size, stride in convs: out = layers.Conv2D(out, num_outputs=num_outputs, kernel_size=kernel_size, stride=stride, activation_fn=tf.nn.relu) conv_out = layers.flatten(out) with tf.variable_scope("action_value"): action_out = conv_out for hidden in hiddens: action_out = layers.fully_connected(action_out, num_outputs=hidden, activation_fn=None) if layer_norm: action_out = layers.layer_norm(action_out, center=True, scale=True) action_out = tf.nn.relu(action_out) action_scores = layers.fully_connected(action_out, num_outputs=num_actions, activation_fn=None) if dueling: with tf.variable_scope("state_value"): state_out = conv_out for hidden in hiddens: state_out = layers.fully_connected(state_out, num_outputs=hidden, activation_fn=None) if layer_norm: state_out = layers.layer_norm(state_out, center=True, scale=True) state_out = tf.nn.relu(state_out) state_score = layers.fully_connected(state_out, num_outputs=1, activation_fn=None) action_scores_mean = tf.reduce_mean(action_scores, 1) action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, 1) q_out = state_score + action_scores_centered else: q_out = action_scores return q_out
def _mlp(hiddens, inpt, num_actions, scope, reuse=False, layer_norm=False): with tf.variable_scope(scope, reuse=reuse): out = inpt for hidden in hiddens: out = layers.fully_connected(out, num_outputs=hidden, activation_fn=None) if layer_norm: out = layers.layer_norm(out, center=True, scale=True) out = tf.nn.relu(out) q_out = layers.fully_connected(out, num_outputs=num_actions, activation_fn=None) return q_out
def _encode(self, features, training): inputs_BxI = features["inputs"] inputs_bias_Bx1xI = attention.ids_to_bias(inputs_BxI, self._dtype) states_BxIxD = self._embedding_layer(inputs_BxI, True) states_BxIxD = self._dropout_fn(timing.add_time_signal(states_BxIxD), training) with tf.variable_scope("encoder", reuse=tf.AUTO_REUSE): states_BxIxD = transformer_block.stack(self._encoder_layers, training, states_BxIxD, inputs_bias_Bx1xI, None, None) states_BxIxD = contrib_layers.layer_norm(states_BxIxD, begin_norm_axis=2) return {"memory": states_BxIxD, "memory_bias": inputs_bias_Bx1xI}
def q_func_builder(input_placeholder, num_actions, scope, reuse=False): with tf.variable_scope(scope, reuse=reuse): latent = network(input_placeholder) if isinstance(latent, tuple): if latent[1] is not None: raise NotImplementedError( "DQN is not compatible with recurrent policies yet") latent = latent[0] latent = tf.layers.flatten(latent) with tf.variable_scope("action_value"): action_out = latent for hidden in hiddens: action_out = tf.contrib.layers.fully_connected( action_out, num_outputs=hidden, activation_fn=None) if layer_norm: action_out = layers.layer_norm(action_out, center=True, scale=True) action_out = tf.nn.relu(action_out) action_scores = tf.contrib.layers.fully_connected( action_out, num_outputs=num_actions, activation_fn=None) if dueling: with tf.variable_scope("state_value"): state_out = latent for hidden in hiddens: state_out = tf.contrib.layers.fully_connected( state_out, num_outputs=hidden, activation_fn=None) if layer_norm: state_out = tf.layers.layer_norm(state_out, center=True, scale=True) state_out = tf.nn.relu(state_out) state_score = tf.contrib.layers.fully_connected( state_out, num_outputs=1, activation_fn=None) action_scores_mean = tf.reduce_mean(action_scores, 1) action_scores_centered = action_scores - tf.expand_dims( action_scores_mean, 1) q_out = state_score + action_scores_centered else: q_out = action_scores return q_out
def normalize(inp, activation, scope, reuse): with tf.name_scope("normalize"): if FLAGS.norm == 'batch_norm': tf.nn.batch_normalization return tf_layers.BatchNormalization(inp, activation_fn=activation, reuse=reuse, scope=scope) elif FLAGS.norm == 'layer_norm': return tf_layers.layer_norm(inp, activation_fn=activation, reuse=reuse, scope=scope) elif FLAGS.norm == 'None': if activation is not None: return activation(inp) else: return inp
def symbols_to_logits_fn(dec_BxT, context, i): """Decode loop.""" dec_Bx1 = tf.slice(dec_BxT, [0, tf.maximum(tf.cast(0, i.dtype), i - 1)], [dec_BxT.shape[0], 1]) bias_1x1xT = tf.slice(bias_1xTxT, [0, i, 0], [1, 1, T]) dec_Bx1xD = self._embedding_layer(dec_Bx1, True) dec_Bx1xD *= tf.cast(tf.greater(i, 0), self._dtype) dec_Bx1xD = timing.add_time_signal(dec_Bx1xD, start_index=i) with tf.variable_scope(self._decoder_scope_name, reuse=tf.AUTO_REUSE): dec_Bx1xD = transformer_block.stack( self._decoder_layers, False, dec_Bx1xD, bias_1x1xT, context["memory"], context["memory_bias"], context, i) dec_Bx1xD = contrib_layers.layer_norm(dec_Bx1xD, begin_norm_axis=2) logits_Bx1xV = self._embedding_layer(dec_Bx1xD, False) logits_BxV = tf.squeeze(logits_Bx1xV, axis=1) return logits_BxV
def __call__(self, features, training): """Create model. Args: features: dictionary of tensors including "inputs" [batch, input_len] and "targets" [batch, output_len] training: bool of whether the mode is training. Returns: Tuple of (loss, outputs): Loss is a scalar. Output is a dictionary of tensors, containing model's output logits. """ if "inputs" not in features or "targets" not in features: raise ValueError("Require inputs and targets keys in features.") context = self._encode(features, training) self._context = context targets_BxT = features["targets"] bias_1xTxT = attention.upper_triangle_bias( tf.shape(input=targets_BxT)[1], self._dtype) states_BxTxD = self._embedding_layer(targets_BxT, True) states_BxTxD = tf.pad(tensor=states_BxTxD, paddings=[[0, 0], [1, 0], [0, 0]])[:, :-1, :] states_BxTxD = timing.add_time_signal(states_BxTxD) states_BxTxD = self._dropout_fn(states_BxTxD, training) with tf.compat.v1.variable_scope(self._decoder_scope_name, reuse=tf.compat.v1.AUTO_REUSE): states_BxTxD = transformer_block.stack(self._decoder_layers, training, states_BxTxD, bias_1xTxT, context["memory"], context["memory_bias"]) states_BxTxD = contrib_layers.layer_norm(states_BxTxD, begin_norm_axis=2) logits_BxTxV = self._embedding_layer(states_BxTxD, False) targets_mask_BxT = tf.cast(tf.greater(targets_BxT, 0), self._dtype) loss = tf.compat.v1.losses.softmax_cross_entropy( tf.one_hot(targets_BxT, self._vocab_size), logits_BxTxV, label_smoothing=self._label_smoothing, weights=targets_mask_BxT) return loss, {"logits": logits_BxTxV}
def layer_norm(input_tensor, name=None): """Run layer normalization on the last dimension of the tensor.""" return contrib_layers.layer_norm(inputs=input_tensor, begin_norm_axis=-1, begin_params_axis=-1, scope=name)
def __init__(self, sess, ob_space, ac_space, n_env, n_steps, n_batch, reuse=False, layers=None, cnn_extractor=nature_cnn, feature_extraction="cnn", obs_phs=None, layer_norm=False, dueling=True, act_fun=tf.nn.relu, **kwargs): super(FeedForwardPolicy, self).__init__(sess, ob_space, ac_space, n_env, n_steps, n_batch, dueling=dueling, reuse=reuse, scale=(feature_extraction == "cnn"), obs_phs=obs_phs) self._kwargs_check(feature_extraction, kwargs) if layers is None: layers = [64, 64] with tf.variable_scope("model", reuse=reuse): with tf.variable_scope("action_value"): if feature_extraction == "cnn": extracted_features = cnn_extractor(self.processed_obs, **kwargs) action_out = extracted_features else: extracted_features = tf.layers.flatten(self.processed_obs) action_out = extracted_features for layer_size in layers: action_out = tf_layers.fully_connected( action_out, num_outputs=layer_size, activation_fn=None) if layer_norm: action_out = tf_layers.layer_norm(action_out, center=True, scale=True) action_out = act_fun(action_out) action_scores = tf_layers.fully_connected( action_out, num_outputs=self.n_actions, activation_fn=None) if self.dueling: with tf.variable_scope("state_value"): state_out = extracted_features for layer_size in layers: state_out = tf_layers.fully_connected( state_out, num_outputs=layer_size, activation_fn=None) if layer_norm: state_out = tf_layers.layer_norm(state_out, center=True, scale=True) state_out = act_fun(state_out) state_score = tf_layers.fully_connected(state_out, num_outputs=1, activation_fn=None) action_scores_mean = tf.reduce_mean(action_scores, axis=1) action_scores_centered = action_scores - tf.expand_dims( action_scores_mean, axis=1) q_out = state_score + action_scores_centered else: q_out = action_scores self.q_values = q_out self._setup_init()