def transformer_prepare_encoder(inputs, target_space, hparams): """Prepare one shard of the model for the encoder. Args: inputs: a Tensor. target_space: a Tensor. hparams: run hyperparameters Returns: encoder_input: a Tensor, bottom of encoder stack encoder_self_attention_bias: a Tensor, containing large negative values to implement masked attention and possibly baises for diagonal alignments encoder_padding: a Tensor """ # Flatten inputs. ishape_static = inputs.shape.as_list() encoder_input = inputs encoder_padding = common_attention.embedding_to_padding(encoder_input) encoder_self_attention_bias = common_attention.attention_bias_ignore_padding( encoder_padding) # Append target_space_id embedding to inputs. emb_target_space = common_layers.embedding(target_space, 32, ishape_static[-1], name="target_space_embedding") emb_target_space = tf.reshape(emb_target_space, [1, 1, -1]) encoder_input += emb_target_space if hparams.pos == "timing": encoder_input = common_attention.add_timing_signal_1d(encoder_input) return (encoder_input, encoder_self_attention_bias, encoder_padding)
def transformer_prepare_encoder(inputs, target_space, hparams): """Prepare one shard of the model for the encoder. Args: inputs: a Tensor. target_space: a Tensor. hparams: run hyperparameters Returns: encoder_input: a Tensor, bottom of encoder stack encoder_self_attention_bias: a Tensor, containing large negative values to implement masked attention and possibly baises for diagonal alignments encoder_padding: a Tensor """ # Flatten inputs. ishape_static = inputs.shape.as_list() encoder_input = inputs encoder_padding = common_attention.embedding_to_padding(encoder_input) encoder_self_attention_bias = common_attention.attention_bias_ignore_padding( encoder_padding) # Append target_space_id embedding to inputs. emb_target_space = common_layers.embedding( target_space, 32, ishape_static[-1], name="target_space_embedding") emb_target_space = tf.reshape(emb_target_space, [1, 1, -1]) encoder_input += emb_target_space if hparams.pos == "timing": encoder_input = common_attention.add_timing_signal_1d(encoder_input) return (encoder_input, encoder_self_attention_bias, encoder_padding)