def __init__(self, d_model, heads, d_ff=1024, dropout=0.1, attention_type="Baseline", relative_time_pitch=False, max_relative_position=512): super().__init__() self.norm_1 = Norm(d_model) self.norm_2 = Norm(d_model) self.norm_3 = Norm(d_model) self.attention_type = attention_type self.relative_time_pitch = relative_time_pitch self.dropout_1 = nn.Dropout(dropout) self.dropout_2 = nn.Dropout(dropout) self.dropout_3 = nn.Dropout(dropout) self.attn_1 = MultiHeadAttention(heads, d_model, dropout = dropout, attention_type = self.attention_type, \ relative_time_pitch = self.relative_time_pitch, max_relative_position = max_relative_position) self.attn_2 = MultiHeadAttention(heads, d_model, dropout =dropout, attention_type = self.attention_type, \ relative_time_pitch = self.relative_time_pitch, max_relative_position = max_relative_position) self.ff = FeedForward(d_model, d_ff, dropout)
def __init__(self, d_model, heads, dropout=0.1): super().__init__() self.norm_1 = Norm(d_model) self.norm_2 = Norm(d_model) self.norm_3 = Norm(d_model) self.dropout_1 = nn.Dropout(dropout) self.dropout_2 = nn.Dropout(dropout) self.dropout_3 = nn.Dropout(dropout) self.attn_1 = MultiHeadAttention(heads, d_model, dropout=dropout) self.attn_2 = MultiHeadAttention(heads, d_model, dropout=dropout) self.ff = FeedForward(d_model, dropout=dropout)
def __init__(self, d_model, num_heads, dff, rate=0.1): super(DecoderLayer, self).__init__() self.mha1 = MultiHeadAttention(d_model, num_heads) self.mha2 = MultiHeadAttention(d_model, num_heads) self.ffn = point_wise_feed_forward_network(d_model, dff) self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6) self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6) self.layernorm3 = tf.keras.layers.LayerNormalization(epsilon=1e-6) self.dropout1 = tf.keras.layers.Dropout(rate) self.dropout2 = tf.keras.layers.Dropout(rate) self.dropout3 = tf.keras.layers.Dropout(rate)
def __init__(self, d_input, d_model, heads, dropout=0.1): super().__init__() self.input_linear = nn.Linear(d_input, d_model) self.norm_1 = nn.LayerNorm(d_model) self.norm_2 = nn.LayerNorm(d_model) self.attn = MultiHeadAttention(heads, d_model, dropout=dropout) self.ff = FeedForward(d_model, dropout=dropout) self.dropout_1 = nn.Dropout(dropout) self.dropout_2 = nn.Dropout(dropout)