def __init__(self, d_model, d_ff, n_heads, kernel_size, dropout, dropout_att, dropout_layer, layer_norm_eps, ffn_activation, param_init, ffn_bottleneck_dim=0): super(ConformerEncoderBlock, self).__init__() self.n_heads = n_heads self.fc_factor = 0.5 # first half position-wise feed-forward self.norm1 = nn.LayerNorm(d_model, eps=layer_norm_eps) self.feed_forward1 = FFN(d_model, d_ff, dropout, ffn_activation, param_init, ffn_bottleneck_dim) # conv module self.norm2 = nn.LayerNorm(d_model, eps=layer_norm_eps) self.conv = ConformerConvBlock(d_model, kernel_size, param_init) # self-attention self.norm3 = nn.LayerNorm(d_model, eps=layer_norm_eps) self.self_attn = RelMHA(kdim=d_model, qdim=d_model, adim=d_model, odim=d_model, n_heads=n_heads, dropout=dropout_att, param_init=param_init) # second half position-wise feed-forward self.norm4 = nn.LayerNorm(d_model, eps=layer_norm_eps) self.feed_forward2 = FFN(d_model, d_ff, dropout, ffn_activation, param_init, ffn_bottleneck_dim) self.dropout = nn.Dropout(dropout) self.dropout_layer = dropout_layer
def __init__(self, d_model, d_ff, n_heads, kernel_size, dropout, dropout_att, dropout_layer, layer_norm_eps, ffn_activation, param_init, pe_type, clamp_len, ffn_bottleneck_dim, unidirectional, normalization='layer_norm'): super(ConformerEncoderBlock, self).__init__() self.n_heads = n_heads self.fc_factor = 0.5 # first half position-wise feed-forward self.norm1 = nn.LayerNorm(d_model, eps=layer_norm_eps) self.feed_forward_macaron = FFN(d_model, d_ff, dropout, ffn_activation, param_init, ffn_bottleneck_dim) # self-attention self.norm2 = nn.LayerNorm(d_model, eps=layer_norm_eps) self.self_attn = RelMHA(kdim=d_model, qdim=d_model, adim=d_model, odim=d_model, n_heads=n_heads, dropout=dropout_att, param_init=param_init, xl_like=pe_type == 'relative_xl', clamp_len=clamp_len) # conv module self.norm3 = nn.LayerNorm(d_model, eps=layer_norm_eps) self.conv = ConformerConvBlock(d_model, kernel_size, param_init, normalization, causal=unidirectional) self.conv_context = kernel_size # second half position-wise feed-forward self.norm4 = nn.LayerNorm(d_model, eps=layer_norm_eps) self.feed_forward = FFN(d_model, d_ff, dropout, ffn_activation, param_init, ffn_bottleneck_dim) self.norm5 = nn.LayerNorm(d_model, eps=layer_norm_eps) self.dropout = nn.Dropout(dropout) self.dropout_layer = dropout_layer # probability to skip logger.info('Stochastic depth prob: %.3f' % dropout_layer) self.reset_visualization()
def __init__(self, d_model, d_ff, n_heads, kernel_size, dropout, dropout_att, dropout_layer, layer_norm_eps, ffn_activation, param_init, pe_type, ffn_bottleneck_dim, unidirectional): super(ConformerEncoderBlock, self).__init__() self.n_heads = n_heads self.fc_factor = 0.5 # first half position-wise feed-forward self.norm1 = nn.LayerNorm(d_model, eps=layer_norm_eps) self.feed_forward_macaron = FFN(d_model, d_ff, dropout, ffn_activation, param_init, ffn_bottleneck_dim) # self-attention self.norm2 = nn.LayerNorm(d_model, eps=layer_norm_eps) self.self_attn = RelMHA(kdim=d_model, qdim=d_model, adim=d_model, odim=d_model, n_heads=n_heads, dropout=dropout_att, param_init=param_init, xl_like=pe_type == 'relative_xl') # conv module self.norm3 = nn.LayerNorm(d_model, eps=layer_norm_eps) self.conv = ConformerConvBlock(d_model, kernel_size, param_init, causal=unidirectional) # second half position-wise feed-forward self.norm4 = nn.LayerNorm(d_model, eps=layer_norm_eps) self.feed_forward = FFN(d_model, d_ff, dropout, ffn_activation, param_init, ffn_bottleneck_dim) self.norm5 = nn.LayerNorm(d_model, eps=layer_norm_eps) self.dropout = nn.Dropout(dropout) self.dropout_layer = dropout_layer self.reset_visualization()