Exemplo n.º 1
0
def decoder_layer(batch_size: Any, d_model: int, num_heads: int, dropout: float,
                  d_type: tf.dtypes.DType = tf.float32, name: str = "decoder_layer") -> tf.keras.Model:
    """Transformer的decoder层

    :param batch_size: batch大小
    :param units: 词汇量大小
    :param d_model: 深度,词嵌入维度
    :param num_heads: 注意力头数
    :param dropout: dropout的权重
    :param d_type: 运算精度
    :param name: 名称
    :return: Transformer的Decoder内部层
    """
    inputs = tf.keras.Input(shape=(30, d_model), dtype=d_type, name="{}_inputs".format(name))
    # inputs = tf.keras.Input(shape=(36, d_model), dtype=d_type, name="{}_inputs".format(name))
    enc_outputs = tf.keras.Input(shape=(12, d_model), dtype=d_type, name="{}_encoder_outputs".format(name))
    # look_ahead_mask = tf.keras.Input(shape=(1, None, None), dtype=d_type, name="{}_look_ahead_mask".format(name))

    # self_attention = scaled_dot_product_attention(num_heads=num_heads, depth=d_model // num_heads,
    #                                               d_type=d_type, mask=look_ahead_mask)
    self_attention_output = attention_layer(
        batch_size=batch_size, d_model=d_model, num_heads=num_heads,
        d_type=d_type, name="{}_attention_layer_1".format(name)
    )(inputs=[inputs, inputs, inputs])

    self_attention_output = tf.keras.layers.Dropout(rate=dropout, dtype=d_type,
                                                    name="{}_attention_dropout1".format(name))(self_attention_output)
    self_attention_output = tf.keras.layers.LayerNormalization(
        epsilon=1e-6, dtype=d_type, name="{}_attention_layer_norm1".format(name))(inputs + self_attention_output)

    # cross_attention = scaled_dot_product_attention(num_heads=num_heads, depth=d_model // num_heads,
    #                                                d_type=d_type, mask=padding_mask)
    cross_attention_output = attention_layer(
        batch_size=batch_size, d_model=d_model, num_heads=num_heads,
        d_type=d_type, name="{}_attention_layer_2".format(name)
    )(inputs=[self_attention_output, enc_outputs, enc_outputs])

    cross_attention_output = tf.keras.layers.Dropout(
        rate=dropout, dtype=d_type, name="{}_attention_dropout2".format(name))(cross_attention_output)
    cross_attention_output = tf.keras.layers.LayerNormalization(
        epsilon=1e-6, dtype=d_type,
        name="{}_attention_layer_norm2".format(name)
    )(self_attention_output + cross_attention_output)

    outputs = tf.keras.layers.Conv1D(filters=4 * d_model, kernel_size=1,
                                     strides=1, activation="relu")(cross_attention_output)
    outputs = tf.keras.layers.Dropout(rate=dropout, dtype=d_type,
                                      name="{}_outputs_dropout".format(name))(outputs)
    outputs = tf.keras.layers.Conv1D(filters=d_model, kernel_size=1, strides=1)(outputs)

    outputs = tf.keras.layers.LayerNormalization(
        epsilon=1e-6, dtype=d_type, name="{}_outputs_layer_norm".format(name))(cross_attention_output + outputs)

    return tf.keras.Model(inputs=[inputs, enc_outputs], outputs=outputs, name=name)
Exemplo n.º 2
0
 def res_chan_attention_blocks(res_in, num_chans, reduction_ratio):
     x = resnet_layer(inputs=res_in,
                      num_filters=num_chans,
                      kernel_initializer=test_initializer
                      )
     x = attention_layer(x, 4)
     return x
Exemplo n.º 3
0
def encoder_layer(batch_size: Any, d_model: Any, num_heads: Any, dropout: Any,
                  d_type: tf.dtypes.DType = tf.float32, name="encoder_layer") -> tf.keras.Model:
    """

    :param batch_size:
    :param d_model:
    :param num_heads:
    :param dropout:
    :param d_type:
    :param name:
    :return:
    """
    inputs = tf.keras.Input(shape=(12, d_model), dtype=d_type, name="{}_inputs".format(name))

    # attention = scaled_dot_product_attention(num_heads=num_heads, depth=d_model // num_heads,
    #                                          d_type=d_type, mask=padding_mask)
    attention_output = attention_layer(batch_size=batch_size, d_model=d_model, num_heads=num_heads,
                                       d_type=d_type)(inputs=[inputs, inputs, inputs])

    attention_output = tf.keras.layers.Dropout(rate=dropout, dtype=d_type,
                                               name="{}_attention_dropout".format(name))(attention_output)
    attention_output = tf.keras.layers.LayerNormalization(
        epsilon=1e-6, dtype=d_type, name="{}_attention_layer_norm".format(name))(inputs + attention_output)

    conv_output = tf.keras.layers.Conv1D(filters=4 * d_model, kernel_size=1,
                                         strides=1, activation="relu", padding="same")(attention_output)
    conv_output = tf.keras.layers.Dropout(rate=dropout, dtype=d_type,
                                          name="{}_outputs_dropout".format(name))(conv_output)
    conv_output = tf.keras.layers.Conv1D(filters=d_model, kernel_size=1, strides=1, padding="same")(conv_output)
    outputs = tf.keras.layers.LayerNormalization(epsilon=1e-6, dtype=d_type,
                                                 name="{}_outputs_layer_norm".format(name))(conv_output)

    return tf.keras.Model(inputs=inputs, outputs=outputs, name=name)
Exemplo n.º 4
0
 def res_chan_attention_blocks(res_in, num_chans, reduction_ratio):
     x = resnet_layer(inputs=res_in,
                      num_filters=num_chans
                      )
     x = attention_layer(x, 4)
     return x