def decode(self, targets, encoder_outputs, attention_bias, training): with tf.name_scope("decode"): # Prepare inputs to decoder layers by shifting targets, adding positional # encoding and applying dropout. decoder_inputs = self.embedding_softmax_layer(targets) decoder_inputs = tf.cast(decoder_inputs, tf.float32) attention_bias = tf.cast(attention_bias, tf.float32) with tf.name_scope("shift_targets"): # Shift targets to the right, and remove the last element decoder_inputs = tf.pad(decoder_inputs, [[0, 0], [1, 0], [0, 0]])[:, :-1, :] with tf.name_scope("add_pos_encoding"): length = tf.shape(decoder_inputs)[1] pos_encoding = transformer_utils.get_position_encoding( length, self.args.hidden_size) pos_encoding = tf.cast(pos_encoding, tf.float32) decoder_inputs += pos_encoding if training: decoder_inputs = tf.nn.dropout( decoder_inputs, rate=self.args.dropout) # Run values decoder_self_attention_bias = transformer_utils.get_decoder_self_attention_bias( length, dtype=tf.float32) outputs = self.decoder_stack( decoder_inputs, encoder_outputs, decoder_self_attention_bias, attention_bias, training=training) logits = self.embedding_softmax_layer(outputs, mode="linear") logits = tf.cast(logits, tf.float32) return logits
def __call__(self, adj, nodes, roles, targ, mask): """ Puts the tensors through encoders and decoders :param adj: Adjacency matrices of input example :type adj: tf.tensor :param nodes: node features :type nodes: tf.tensor :param targ: target sequences :type targ: tf.tensor :return: output probability distribution :rtype: tf.tensor """ node_tensor = self.emb_node_layer(nodes) role_tensor = self.emb_role_layer(roles) if targ is not None: decoder_inputs = self.emb_tgt_layer(targ) decoder_inputs = tf.cast(decoder_inputs, tf.float32) node_tensor = tf.cast(node_tensor, tf.float32) role_tensor = tf.cast(role_tensor, tf.float32) enc_output = self.encoder(node_tensor, adj, role_tensor, self.num_heads, self.encoder.trainable) attention_bias = transformer_utils.get_padding_bias(nodes) attention_bias = tf.cast(attention_bias, tf.float32) if targ is None: predictions = self.predict(enc_output, attention_bias, False) return predictions with tf.name_scope("shift_targets"): # Shift targets to the right, and remove the last element decoder_inputs = tf.pad(decoder_inputs, [[0, 0], [1, 0], [0, 0]])[:, :-1, :] attention_bias = tf.cast(attention_bias, tf.float32) with tf.name_scope("add_pos_encoding"): length = tf.shape(decoder_inputs)[1] pos_encoding = transformer_utils.get_position_encoding( length, self.args.hidden_size) pos_encoding = tf.cast(pos_encoding, tf.float32) decoder_inputs += pos_encoding if self.trainable: decoder_inputs = tf.nn.dropout(decoder_inputs, rate=self.args.dropout) # Run values decoder_self_attention_bias = transformer_utils.get_decoder_self_attention_bias( length, dtype=tf.float32) outputs = self.decoder_stack(decoder_inputs, enc_output, decoder_self_attention_bias, attention_bias, training=self.trainable) predictions = self.final_layer(outputs) return predictions
def _get_symbols_to_logits_fn(self, max_decode_length, training): """Returns a decoding function that calculates logits of the next tokens.""" timing_signal = transformer_utils.get_position_encoding( max_decode_length + 1, self.args.hidden_size) decoder_self_attention_bias = transformer_utils.get_decoder_self_attention_bias( max_decode_length) def symbols_to_logits_fn(ids, i, cache): """Generate logits for next potential IDs. Args: ids: Current decoded sequences. int tensor with shape [batch_size * beam_size, i + 1] i: Loop index cache: dictionary of values storing the encoder output, encoder-decoder attention bias, and previous decoder attention values. Returns: Tuple of (logits with shape [batch_size * beam_size, vocab_size], updated cache values) """ # Set decoder input to the last generated IDs decoder_input = ids[:, -1:] # Preprocess decoder input by getting embeddings and adding timing signal. decoder_input = self.emb_tgt_layer(decoder_input) decoder_input += timing_signal[i:i + 1] self_attention_bias = decoder_self_attention_bias[:, :, i:i + 1, :i + 1] decoder_outputs = self.decoder_stack( decoder_input, cache.get("encoder_outputs"), self_attention_bias, cache.get("encoder_decoder_attention_bias"), training=training, cache=cache) logits = self.final_layer(decoder_outputs) logits = tf.squeeze(logits, axis=[1]) return logits, cache return symbols_to_logits_fn
def encode(self, inputs, attention_bias, training): with tf.name_scope("encode"): # Prepare inputs to the layer stack by adding positional encodings and # applying dropout. embedded_inputs = self.embedding_softmax_layer(inputs) embedded_inputs = tf.cast(embedded_inputs, tf.float32) inputs_padding = transformer_utils.get_padding(inputs) attention_bias = tf.cast(attention_bias, tf.float32) with tf.name_scope("add_positional_encoding"): length = tf.shape(embedded_inputs)[1] pos_encoding = transformer_utils.get_position_encoding( length, self.args.hidden_size) pos_encoding = tf.cast(pos_encoding, tf.float32) encoder_inputs = embedded_inputs + pos_encoding if training: encoder_inputs = tf.nn.dropout( encoder_inputs, rate=self.args.dropout) return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding, training=training)