def __init__(self, idim, selfattention_layer_type="selfattn", attention_dim=256, attention_heads=4, linear_units=2048, num_blocks=6, dropout_rate=0.1, positional_dropout_rate=0.1, attention_dropout_rate=0.0, input_layer="conv2d", pos_enc_class=PositionalEncoding, normalize_before=True, concat_after=False, positionwise_layer_type="linear", positionwise_conv_kernel_size=1): super(Encoder, self).__init__() self.conv_subsampling_factor = 1 self.embed = torch.nn.Sequential( input_layer, pos_enc_class(attention_dim, positional_dropout_rate), ) self.normalize_before = normalize_before positionwise_layer, positionwise_layer_args = self.get_positionwise_layer( positionwise_layer_type, attention_dim, linear_units, dropout_rate, positionwise_conv_kernel_size, ) self.encoders = repeat( num_blocks, lambda lnum: EncoderLayer( attention_dim, MultiHeadedAttention(attention_heads, attention_dim, attention_dropout_rate), positionwise_layer(*positionwise_layer_args), dropout_rate, normalize_before, concat_after, ), ) if self.normalize_before: self.after_norm = LayerNorm(attention_dim)
def __init__( self, odim, selfattention_layer_type="selfattn", attention_dim=256, attention_heads=4, conv_wshare=4, conv_kernel_length=11, conv_usebias=False, linear_units=2048, num_blocks=6, dropout_rate=0.1, positional_dropout_rate=0.1, self_attention_dropout_rate=0.0, src_attention_dropout_rate=0.0, input_layer="embed", use_output_layer=True, pos_enc_class=PositionalEncoding, normalize_before=True, concat_after=False, ): """Construct an Decoder object.""" torch.nn.Module.__init__(self) if input_layer == "embed": self.embed = torch.nn.Sequential( torch.nn.Embedding(odim, attention_dim), pos_enc_class(attention_dim, positional_dropout_rate), ) elif input_layer == "linear": self.embed = torch.nn.Sequential( torch.nn.Linear(odim, attention_dim), torch.nn.LayerNorm(attention_dim), torch.nn.Dropout(dropout_rate), torch.nn.ReLU(), pos_enc_class(attention_dim, positional_dropout_rate), ) elif isinstance(input_layer, torch.nn.Module): self.embed = torch.nn.Sequential( input_layer, pos_enc_class(attention_dim, positional_dropout_rate) ) else: raise NotImplementedError("only `embed` or torch.nn.Module is supported.") self.normalize_before = normalize_before self.decoders = repeat( num_blocks, lambda lnum: DecoderLayer( attention_dim, MultiHeadedAttention(attention_heads, attention_dim, self_attention_dropout_rate), MultiHeadedAttention(attention_heads, attention_dim, src_attention_dropout_rate), PositionwiseFeedForward(attention_dim, linear_units, dropout_rate), dropout_rate, normalize_before, concat_after, ), ) self.selfattention_layer_type = selfattention_layer_type if self.normalize_before: self.after_norm = LayerNorm(attention_dim) if use_output_layer: self.output_layer = torch.nn.Linear(attention_dim, odim) else: self.output_layer = None
def __init__(self, idim, attention_dim=256, attention_heads=4, linear_units=2048, num_blocks=6, dropout_rate=0.1, positional_dropout_rate=0.1, attention_dropout_rate=0.0, input_layer="conv2d", normalize_before=True, concat_after=False, positionwise_conv_kernel_size=1, macaron_style=False, use_cnn_module=False, cnn_module_kernel=31, zero_triu=False): """Construct a Conformer object.""" super(Conformer, self).__init__() activation = Swish() self.conv_subsampling_factor = 1 if isinstance(input_layer, torch.nn.Module): self.embed = torch.nn.Sequential( input_layer, RelPositionalEncoding(attention_dim, positional_dropout_rate), ) elif input_layer is None: self.embed = torch.nn.Sequential( RelPositionalEncoding(attention_dim, positional_dropout_rate)) else: raise ValueError("unknown input_layer: " + input_layer) self.normalize_before = normalize_before # self-attention module definition encoder_selfattn_layer = RelPositionMultiHeadedAttention encoder_selfattn_layer_args = (attention_heads, attention_dim, attention_dropout_rate, zero_triu) # feed-forward module definition positionwise_layer = MultiLayeredConv1d positionwise_layer_args = ( attention_dim, linear_units, positionwise_conv_kernel_size, dropout_rate, ) # convolution module definition convolution_layer = ConvolutionModule convolution_layer_args = (attention_dim, cnn_module_kernel, activation) self.encoders = repeat( num_blocks, lambda lnum: EncoderLayer( attention_dim, encoder_selfattn_layer(*encoder_selfattn_layer_args), positionwise_layer(*positionwise_layer_args), positionwise_layer(*positionwise_layer_args) if macaron_style else None, convolution_layer(*convolution_layer_args) if use_cnn_module else None, dropout_rate, normalize_before, concat_after)) if self.normalize_before: self.after_norm = LayerNorm(attention_dim)