Пример #1
0
def transformer_prepare_decoder(targets_l2r, targets_r2l, hparams):
    """Prepare one shard of the model for the decoder.
    """
    decoder_self_attention_bias = (
        common_attention.attention_bias_lower_triangle(tf.shape(targets_l2r)[1])) ## [1, 1, length, length]
    decoder_input_l2r = common_layers.shift_left_3d(targets_l2r)
    decoder_input_r2l = common_layers.shift_left_3d(targets_r2l)
    if hparams.pos == "timing":
        decoder_input_l2r = common_attention.add_timing_signal_1d(decoder_input_l2r)
        decoder_input_r2l = common_attention.add_timing_signal_1d(decoder_input_r2l)
    decoder_input = tf.concat([tf.expand_dims(decoder_input_l2r, 0), tf.expand_dims(decoder_input_r2l, 0)], axis=0) ## [2, batch, length, hidden_size]
    return (decoder_input, decoder_self_attention_bias)
Пример #2
0
def transformer_prepare_decoder(targets, hparams):
    """Prepare one shard of the model for the decoder.
    """
    decoder_self_attention_bias = (
        common_attention.attention_bias_lower_triangle(tf.shape(targets)[1]))
    decoder_input = common_layers.shift_left_3d(targets)
    if hparams.pos == "timing":
        decoder_input = common_attention.add_timing_signal_1d(decoder_input)
    return (decoder_input, decoder_self_attention_bias)
Пример #3
0
def prepare_decoder(targets, target_space_emb):
  """Prepare decoder."""
  decoder_self_attention_bias = (
      common_attention.attention_bias_lower_triangle(tf.shape(targets)[1]))
  target_space_emb = tf.reshape(target_space_emb, [1, 1, -1])
  target_space_emb = tf.tile(target_space_emb, [tf.shape(targets)[0], 1, 1])
  decoder_input = common_layers.shift_left_3d(
      targets, pad_value=target_space_emb)
  decoder_input = common_attention.add_timing_signal_1d(decoder_input)
  return (decoder_input, decoder_self_attention_bias)
Пример #4
0
def attention_lm_moe_prepare_decoder(targets, hparams):
    """Prepare one shard of the model for the decoder.

  Args:
    targets: a Tensor.
    hparams: run hyperparameters

  Returns:
    decoder_input: a Tensor, bottom of decoder stack
    decoder_self_attention_bias: a Tensor, containing large negative values
    to implement masked attention and possibly baises for diagonal alignments
  """
    decoder_self_attention_bias = (
        common_attention.attention_bias_lower_triangle(tf.shape(targets)[1]))
    decoder_input = common_layers.shift_left_3d(targets)
    if hparams.pos == "timing":
        decoder_input = common_attention.add_timing_signal_1d(decoder_input)
    return (decoder_input, decoder_self_attention_bias)