def transformer_prepare_decoder(targets_l2r, targets_r2l, hparams): """Prepare one shard of the model for the decoder. """ decoder_self_attention_bias = ( common_attention.attention_bias_lower_triangle(tf.shape(targets_l2r)[1])) ## [1, 1, length, length] decoder_input_l2r = common_layers.shift_left_3d(targets_l2r) decoder_input_r2l = common_layers.shift_left_3d(targets_r2l) if hparams.pos == "timing": decoder_input_l2r = common_attention.add_timing_signal_1d(decoder_input_l2r) decoder_input_r2l = common_attention.add_timing_signal_1d(decoder_input_r2l) decoder_input = tf.concat([tf.expand_dims(decoder_input_l2r, 0), tf.expand_dims(decoder_input_r2l, 0)], axis=0) ## [2, batch, length, hidden_size] return (decoder_input, decoder_self_attention_bias)
def transformer_prepare_decoder(targets, hparams): """Prepare one shard of the model for the decoder. """ decoder_self_attention_bias = ( common_attention.attention_bias_lower_triangle(tf.shape(targets)[1])) decoder_input = common_layers.shift_left_3d(targets) if hparams.pos == "timing": decoder_input = common_attention.add_timing_signal_1d(decoder_input) return (decoder_input, decoder_self_attention_bias)
def prepare_decoder(targets, target_space_emb): """Prepare decoder.""" decoder_self_attention_bias = ( common_attention.attention_bias_lower_triangle(tf.shape(targets)[1])) target_space_emb = tf.reshape(target_space_emb, [1, 1, -1]) target_space_emb = tf.tile(target_space_emb, [tf.shape(targets)[0], 1, 1]) decoder_input = common_layers.shift_left_3d( targets, pad_value=target_space_emb) decoder_input = common_attention.add_timing_signal_1d(decoder_input) return (decoder_input, decoder_self_attention_bias)
def attention_lm_moe_prepare_decoder(targets, hparams): """Prepare one shard of the model for the decoder. Args: targets: a Tensor. hparams: run hyperparameters Returns: decoder_input: a Tensor, bottom of decoder stack decoder_self_attention_bias: a Tensor, containing large negative values to implement masked attention and possibly baises for diagonal alignments """ decoder_self_attention_bias = ( common_attention.attention_bias_lower_triangle(tf.shape(targets)[1])) decoder_input = common_layers.shift_left_3d(targets) if hparams.pos == "timing": decoder_input = common_attention.add_timing_signal_1d(decoder_input) return (decoder_input, decoder_self_attention_bias)