示例#1
0
    def __init__(self,
                 num_layers,
                 num_units=512,
                 num_heads=8,
                 ffn_inner_dim=2048,
                 dropout=0.1,
                 attention_dropout=0.1,
                 ffn_dropout=0.1,
                 ffn_activation=tf.nn.relu,
                 position_encoder_class=SinusoidalPositionEncoder,
                 num_sources=1,
                 maximum_relative_position=None,
                 **kwargs):
        """Initializes the parameters of the decoder.

    Args:
      num_layers: The number of layers.
      num_units: The number of hidden units.
      num_heads: The number of heads in the multi-head attention.
      ffn_inner_dim: The number of units of the inner linear transformation
        in the feed forward layer.
      dropout: The probability to drop units from the outputs.
      attention_dropout: The probability to drop units from the attention.
      ffn_dropout: The probability to drop units from the activation output in
        the feed forward layer.
      ffn_activation: The activation function to apply between the two linear
        transformations of the feed forward layer.
      position_encoder_class: The :class:`opennmt.layers.PositionEncoder`
        class to use for position encoding (or a callable that returns an
        instance).
      num_sources: The number of source contexts expected by this decoder.
      maximum_relative_position: Maximum relative position representation
        (from https://arxiv.org/abs/1803.02155).
      **kwargs: Additional layer arguments.
    """
        super(SelfAttentionDecoder, self).__init__(num_sources=num_sources,
                                                   **kwargs)
        self.num_units = num_units
        self.num_heads = num_heads
        self.dropout = dropout
        self.position_encoder = None
        if position_encoder_class is not None:
            self.position_encoder = position_encoder_class()
        self.layer_norm = common.LayerNorm()
        self.layers = [
            transformer.SelfAttentionDecoderLayer(
                self.num_units,
                self.num_heads,
                ffn_inner_dim,
                num_sources=num_sources,
                dropout=dropout,
                attention_dropout=attention_dropout,
                ffn_dropout=ffn_dropout,
                ffn_activation=ffn_activation,
                maximum_relative_position=maximum_relative_position)
            for i in range(num_layers)
        ]
    def __init__(self,
                 num_layers,
                 num_units=768,
                 num_heads=12,
                 ffn_inner_dim=3072,
                 dropout=0.1,
                 attention_dropout=0.1,
                 ffn_dropout=0.1,
                 ffn_activation=tf.nn.relu,
                 embedding_table=None,
                 position_encoder=SinusoidalPositionEncoder(),
                 num_sources=1,
                 **kwargs):
        """Initializes the parameters of the decoder.

    Args:
      num_layers: The number of layers.
      num_units: The number of hidden units.
      num_heads: The number of heads in the multi-head attention.
      ffn_inner_dim: The number of units of the inner linear transformation
        in the feed forward layer.
      dropout: The probability to drop units from the outputs.
      attention_dropout: The probability to drop units from the attention.
      ffn_dropout: The probability to drop units from the activation output in
        the feed forward layer.
      ffn_activation: The activation function to apply between the two linear
        transformations of the feed forward layer.
      position_encoder: The :class:`opennmt.layers.position.PositionEncoder` to
        apply on inputs.
      num_sources: The number of source contexts expected by this decoder.
      **kwargs: Additional layer arguments.
    """
        super(SelfAttentionDecoderV2, self).__init__(num_sources=num_sources,
                                                     **kwargs)
        self.num_units = num_units
        self.num_heads = num_heads
        self.dropout = dropout
        self.embedding_table = embedding_table
        self.position_encoder = position_encoder

        self.layer_norm = common.LayerNorm(name="output_norm")
        self.layers = [
            _SelfAttentionDecoderLayer(self.num_units,
                                       self.num_heads,
                                       ffn_inner_dim,
                                       num_sources=num_sources,
                                       dropout=dropout,
                                       attention_dropout=attention_dropout,
                                       ffn_dropout=ffn_dropout,
                                       ffn_activation=ffn_activation,
                                       name="layer_%d" % i)
            for i in range(num_layers)
        ]
    def __init__(self,
                 num_layers,
                 num_units=512,
                 num_heads=8,
                 ffn_inner_dim=2048,
                 dropout=0.1,
                 attention_dropout=0.1,
                 ffn_dropout=0.1,
                 ffn_activation=tf.nn.relu,
                 position_encoder_class=SinusoidalPositionEncoder,
                 **kwargs):
        """Initializes the parameters of the encoder.

    Args:
      num_layers: The number of layers.
      num_units: The number of hidden units.
      num_heads: The number of heads in the multi-head attention.
      ffn_inner_dim: The number of units of the inner linear transformation
        in the feed forward layer.
      dropout: The probability to drop units from the outputs.
      attention_dropout: The probability to drop units from the attention.
      ffn_dropout: The probability to drop units from the activation output in
        the feed forward layer.
      ffn_activation: The activation function to apply between the two linear
        transformations of the feed forward layer.
      position_encoder_class: The :class:`opennmt.layers.PositionEncoder`
        class to use for position encoding (or a callable that returns an
        instance).
    """
        super(SelfAttentionEncoder, self).__init__(**kwargs)
        self.num_units = num_units
        self.dropout = dropout
        self.position_encoder = None
        if position_encoder_class is not None:
            self.position_encoder = position_encoder_class()
        self.layer_norm = common.LayerNorm()
        self.layers = [
            transformer.SelfAttentionEncoderLayer(
                num_units,
                num_heads,
                ffn_inner_dim,
                dropout=dropout,
                attention_dropout=attention_dropout,
                ffn_dropout=ffn_dropout,
                ffn_activation=ffn_activation) for i in range(num_layers)
        ]
示例#4
0
    def __init__(self,
                 num_layers,
                 num_units=512,
                 num_heads=8,
                 ffn_inner_dim=2048,
                 dropout=0.1,
                 attention_dropout=0.1,
                 ffn_dropout=0.1,
                 ffn_activation=tf.nn.relu,
                 position_encoder=SinusoidalPositionEncoder(),
                 max_relative_positions=0,
                 **kwargs):
        """Initializes the parameters of the encoder.

    Args:
      num_layers: The number of layers.
      num_units: The number of hidden units.
      num_heads: The number of heads in the multi-head attention.
      ffn_inner_dim: The number of units of the inner linear transformation
        in the feed forward layer.
      dropout: The probability to drop units from the outputs.
      attention_dropout: The probability to drop units from the attention.
      ffn_dropout: The probability to drop units from the activation output in
        the feed forward layer.
      ffn_activation: The activation function to apply between the two linear
        transformations of the feed forward layer.
      position_encoder: The :class:`opennmt.layers.position.PositionEncoder` to
        apply on inputs.
    """
        super(SelfAttentionEncoderV2, self).__init__(**kwargs)
        self.num_units = num_units
        self.dropout = dropout
        self.position_encoder = position_encoder
        self.layer_norm = common.LayerNorm()
        self.layers = [
            _SelfAttentionEncoderLayer(num_units,
                                       num_heads,
                                       ffn_inner_dim,
                                       dropout=dropout,
                                       attention_dropout=attention_dropout,
                                       ffn_dropout=ffn_dropout,
                                       ffn_activation=ffn_activation,
                                       name="layer_%d" % i)
            for i in range(num_layers)
        ]
    def __init__(self,
                 num_layers,
                 num_units=512,
                 num_heads=8,
                 ffn_inner_dim=2048,
                 dropout=0.1,
                 attention_dropout=0.1,
                 relu_dropout=0.1,
                 position_encoder=None,
                 **kwargs):
        """Initializes the parameters of the encoder.

    Args:
      num_layers: The number of layers.
      num_units: The number of hidden units.
      num_heads: The number of heads in the multi-head attention.
      ffn_inner_dim: The number of units of the inner linear transformation
        in the feed forward layer.
      dropout: The probability to drop units from the outputs.
      attention_dropout: The probability to drop units from the attention.
      relu_dropout: The probability to drop units from the ReLU activation in
        the feed forward layer.
      position_encoder: The :class:`opennmt.layers.position.PositionEncoder` to
        apply on inputs. If ``None``, defaults to
        :class:`opennmt.layers.position.SinusoidalPositionEncoder`.
    """
        super(SelfAttentionEncoderV2, self).__init__(**kwargs)
        self.num_units = num_units
        self.dropout = dropout
        self.position_encoder = position_encoder
        if self.position_encoder is None:
            self.position_encoder = SinusoidalPositionEncoder()
        self.layer_norm = common.LayerNorm()
        self.layers = [
            _SelfAttentionEncoderLayer(num_units,
                                       num_heads,
                                       ffn_inner_dim,
                                       dropout=dropout,
                                       attention_dropout=attention_dropout,
                                       relu_dropout=relu_dropout,
                                       name="layer_%d" % i)
            for i in range(num_layers)
        ]
示例#6
0
 def testLayerNorm(self):
   layer_norm = common.LayerNorm()
   x = tf.random.uniform([4, 10])
   y = layer_norm(x)
   self.assertEqual(y.shape, x.shape)
  def __init__(self,
               num_layers,
               num_units=512,
               num_heads=8,
               ffn_inner_dim=2048,
               dropout=0.1,
               attention_dropout=0.1,
               ffn_dropout=0.1,
               ffn_activation=tf.nn.relu,
               position_encoder_class=SinusoidalPositionEncoder,
               maximum_relative_position=None,
               attention_span=None,
               num_attended_heads=1,
               **kwargs):
    """Initializes the parameters of the encoder.

    Args:
      num_layers: The number of layers.
      num_units: The number of hidden units.
      num_heads: The number of heads in the multi-head attention.
      ffn_inner_dim: The number of units of the inner linear transformation
        in the feed forward layer.
      dropout: The probability to drop units from the outputs.
      attention_dropout: The probability to drop units from the attention.
      ffn_dropout: The probability to drop units from the activation output in
        the feed forward layer.
      ffn_activation: The activation function to apply between the two linear
        transformations of the feed forward layer.
      position_encoder_class: The :class:`opennmt.layers.PositionEncoder`
        class to use for position encoding (or a callable that returns an
        instance).
      maximum_relative_position: Maximum relative position representation
        (from https://arxiv.org/abs/1803.02155).
      attention_span: Maximum relative position to attend to
        (from https://arxiv.org/abs/1904.03107).
      num_attended_heads: How many heads should be attended. Defaults to 1
        as each head only attends to itself in vanilla Transformer. Increase to
        an odd number < `num_heads` to also model head interaction.
        (from ttps://arxiv.org/abs/1904.03107).
      **kwargs: Additional layer arguments.
    """
    super(SelfAttentionEncoder, self).__init__(**kwargs)
    self.num_units = num_units
    self.dropout = dropout
    self.position_encoder = None
    if position_encoder_class is not None:
      self.position_encoder = position_encoder_class()
    self.layer_norm = common.LayerNorm()

    if attention_span is None:
      num_unconstrained_layers = num_layers
    else:
      num_unconstrained_layers = math.floor(num_layers / 2)
    num_constrained_layers = num_layers - num_unconstrained_layers
    self.layers = [
        transformer.SelfAttentionEncoderLayer(
            num_units,
            num_heads,
            ffn_inner_dim,
            dropout=dropout,
            attention_dropout=attention_dropout,
            ffn_dropout=ffn_dropout,
            ffn_activation=ffn_activation,
            maximum_relative_position=maximum_relative_position,
            attention_span=attention_span,
            num_attended_heads=num_attended_heads)
        for _ in range(num_constrained_layers)]
    self.layers += [
        transformer.SelfAttentionEncoderLayer(
            num_units,
            num_heads,
            ffn_inner_dim,
            dropout=dropout,
            attention_dropout=attention_dropout,
            ffn_dropout=ffn_dropout,
            ffn_activation=ffn_activation,
            maximum_relative_position=maximum_relative_position)
        for _ in range(num_unconstrained_layers)]