示例#1
0
    def __init__(self,
                 filter_size,
                 output_size,
                 dropout_rate,
                 activation="relu",
                 name="ffn"):
        """ Initializes Transformer FFN.

        Args:
            filter_size: The hidden size of the relu layer.
            output_size: The output size.
            dropout_rate: The dropout rate.
            activation: The activation of internal layer.
            name: The name of this layer.
        """
        super(TransformerFFN, self).__init__(name=name)
        self._dropout_rate = dropout_rate
        self._filter_size = filter_size
        self._output_size = output_size
        self._activation = activation
        self._activation_fn = get_activation(activation)
        self._conv1 = None
        self._conv2 = None
示例#2
0
    def __init__(self,
                 output_units,
                 num_heads,
                 kernel_initializer=None,
                 bias_initializer="zeros",
                 activation=None,
                 use_bias=True,
                 is_output_transform=False,
                 name="transform"):
        """ Initializes MultiHeadDenseLayer.

        Args:
            output_units: A int scalar or int list, indicating the transformed output units.
                It must be a int scalar when `is_output_transform` is True.
            num_heads: The head num.
            kernel_initializer: The initializer of kernel weight.
            bias_initializer: The initializer of bias.
            activation: A string or a callable function for activation.
            use_bias: A boolean, whether to add bias tensor.
            is_output_transform: A boolean, whether to use this layer for the output
                transformation in multi head attention.
            name: The name of the layer.
        """
        super(MultiHeadDenseLayer, self).__init__(name=name)
        self._output_units = output_units
        self._num_heads = num_heads
        self._kernel_initializer = kernel_initializer
        self._bias_initializer = bias_initializer
        self._use_bias = use_bias
        self._is_output_transform = is_output_transform
        self._activation = activation
        self._activation_fn = get_activation(activation)
        # compatible
        self._flatten_output_units = tf.nest.flatten(self._output_units)
        if is_output_transform:
            assert not tf.nest.is_nested(self._output_units)