def __init__(self, args, kernel_size=0): super().__init__() self.embed_dim = args.encoder_embed_dim self.conv_dim = args.encoder_conv_dim padding_l = kernel_size // 2 if kernel_size % 2 == 1 else ((kernel_size - 1) // 2, kernel_size // 2) if args.encoder_glu: self.linear1 = Linear(self.embed_dim, 2*self.conv_dim) self.act = nn.GLU() else: self.linear1 = Linear(self.embed_dim, self.conv_dim) self.act = None if args.encoder_conv_type == 'lightweight': self.conv = LightweightConv1dTBC(self.conv_dim, kernel_size, padding_l=padding_l, weight_softmax=args.weight_softmax, num_heads=args.encoder_attention_heads, weight_dropout=args.weight_dropout) elif args.encoder_conv_type == 'dynamic': self.conv = DynamicConv1dTBC(self.conv_dim, kernel_size, padding_l=padding_l, weight_softmax=args.weight_softmax, num_heads=args.encoder_attention_heads, weight_dropout=args.weight_dropout) else: raise NotImplementedError self.linear2 = Linear(self.conv_dim, self.embed_dim) self.dropout = args.dropout self.relu_dropout = args.relu_dropout self.input_dropout = args.input_dropout self.normalize_before = args.encoder_normalize_before self.fc1 = Linear(self.embed_dim, args.encoder_ffn_embed_dim) self.fc2 = Linear(args.encoder_ffn_embed_dim, self.embed_dim) self.layer_norms = nn.ModuleList([LayerNorm(self.embed_dim) for _ in range(2)])
def __init__(self, args, no_encoder_attn=False, kernel_size=0): super().__init__() self.embed_dim = args.decoder_embed_dim self.conv_dim = args.decoder_conv_dim if args.decoder_glu: self.linear1 = Linear(self.embed_dim, 2 * self.conv_dim) self.act = nn.GLU() else: self.linear1 = Linear(self.embed_dim, self.conv_dim) self.act = None if args.decoder_conv_type == 'lightweight': self.conv = LightweightConv1dTBC( self.conv_dim, kernel_size, padding_l=kernel_size - 1, weight_softmax=args.weight_softmax, num_heads=args.decoder_attention_heads, weight_dropout=args.weight_dropout) elif args.decoder_conv_type == 'dynamic': self.conv = DynamicConv1dTBC( self.conv_dim, kernel_size, padding_l=kernel_size - 1, weight_softmax=args.weight_softmax, num_heads=args.decoder_attention_heads, weight_dropout=args.weight_dropout) else: raise NotImplementedError self.linear2 = Linear(self.conv_dim, self.embed_dim) self.dropout = args.dropout self.relu_dropout = args.relu_dropout self.input_dropout = args.input_dropout self.normalize_before = args.decoder_normalize_before self.conv_layer_norm = LayerNorm(self.embed_dim) if no_encoder_attn: self.encoder_attn = None self.encoder_attn_layer_norm = None else: self.encoder_attn = MultiheadAttention( self.embed_dim, args.decoder_attention_heads, dropout=args.attention_dropout, encoder_decoder_attention=True) self.encoder_attn_layer_norm = LayerNorm(self.embed_dim) self.fc1 = Linear(self.embed_dim, args.decoder_ffn_embed_dim) self.fc2 = Linear(args.decoder_ffn_embed_dim, self.embed_dim) self.final_layer_norm = LayerNorm(self.embed_dim) self.need_attn = True
def __init__(self, args): super(LightweightConvolution, self).__init__() self.first_kernel = args.kernel_size self.first_padding = args.kernel_padding self.conv_layer_1 = LightweightConv1dTBC( input_size=args.hidden_dim * 2, kernel_size=self.first_kernel, padding_l=self.first_padding, num_heads=args.num_head, weight_dropout=args.conv_dropout, weight_softmax=args.kernel_softmax) self.conv_layer_2 = LightweightConv1dTBC( input_size=args.hidden_dim * 2, kernel_size=self.first_kernel, padding_l=self.first_padding, num_heads=args.num_head, weight_dropout=args.conv_dropout, weight_softmax=args.kernel_softmax) self.conv_layer_3 = LightweightConv1dTBC( input_size=args.hidden_dim * 2, kernel_size=self.first_kernel, padding_l=self.first_padding, num_heads=int(args.num_head), weight_dropout=args.conv_dropout, weight_softmax=args.kernel_softmax) self.conv_layer_4 = LightweightConv1dTBC( input_size=args.hidden_dim * 2, kernel_size=args.kernel_size, padding_l=args.kernel_padding, num_heads=int(args.num_head), weight_dropout=args.conv_dropout, weight_softmax=args.kernel_softmax) self.conv_layer_5 = LightweightConv1dTBC( input_size=args.hidden_dim * 2, kernel_size=args.kernel_size, padding_l=args.kernel_padding, num_heads=int(args.num_head), weight_dropout=args.conv_dropout, weight_softmax=args.kernel_softmax) self.conv_layer_6 = LightweightConv1dTBC( input_size=args.hidden_dim * 2, kernel_size=args.kernel_size, padding_l=args.kernel_padding, num_heads=int(args.num_head), weight_dropout=args.conv_dropout, weight_softmax=args.kernel_softmax) self.relu = nn.ReLU()