def __init__(self, hparams, n_units=128): super(MultiHeadAttention, self).__init__() self.token_embedding_size = hparams.token_embedding_size self.num_heads = hparams.num_heads self.token_num = hparams.token_num self.n_units = hparams.gstAtt_dim self.split_size = n_units // self.num_heads self.conv_Q = mm.Conv1d(self.token_embedding_size, n_units, 1) # in_channels, out_channels, kernel_size self.conv_K = mm.Conv1d(self.token_embedding_size, n_units, 1) # in_channels, out_channels, kernel_size self.fc_Q = nn.Sequential( nn.Linear(n_units, n_units), nn.Tanh(), ) self.fc_K = nn.Sequential( nn.Linear(n_units, n_units), nn.Tanh(), ) self.fc_V = nn.Sequential( nn.Linear(self.token_embedding_size, self.split_size), nn.Tanh(), ) self.fc_A = nn.Sequential( nn.Linear(n_units, self.token_num), nn.Tanh(), )
def __init__(self, input_dim, hidden_dim, K=16, n_highway=4, bidirectional=True): super(CBHG, self).__init__() self.K = K self.conv_bank = mm.Conv1dBank(input_dim, hidden_dim, K=self.K, activation_fn=torch.relu) self.max_pool = nn.MaxPool1d(2, stride=1, padding=1) self.projection = nn.Sequential( mm.Conv1d(self.K * hidden_dim, hidden_dim, 3, activation_fn=torch.relu, bias=False, bn=True), mm.Conv1d(hidden_dim, input_dim, 3, bias=False, bn=True), ) self.highway = nn.ModuleList( [mm.Highway(input_dim) for _ in range(n_highway)]) self.gru = nn.GRU( input_dim, hidden_dim, num_layers=1, batch_first=True, bidirectional=bidirectional ) # if batch_first is True, (Batch, Sequence, Feature)
def __init__(self, mel_channels=80, z_dim=256): super(Encoder, self).__init__() self.encoder = nn.Sequential( mm.Conv1d(mel_channels, 64, kernel_size=4, stride=2, padding='same', bias=True, activation_fn=nn.ReLU), mm.Conv1d(64, 256, kernel_size=3, padding='same', bias=True), mm.Conv1d(256, 128, kernel_size=3, padding='same', bias=True), mm.Conv1dResBlock(128, 128, kernel_size=3, padding='same', bias=True, activation_fn=nn.ReLU), mm.Conv1dResBlock(128, 128, kernel_size=3, padding='same', bias=True, activation_fn=nn.ReLU), mm.Conv1d(128, z_dim, kernel_size=1, padding='same', bias=False) )
def __init__(self, in_channels, mel_channels=80): super(Decoder, self).__init__() self.res_blocks = nn.Sequential( mm.Conv1d(in_channels, 128, kernel_size=3, bias=False, padding='same'), mm.Conv1dResBlock(128, 128, kernel_size=3, bias=True, padding='same', activation_fn=nn.ReLU), mm.Conv1dResBlock(128, 128, kernel_size=3, bias=True, padding='same', activation_fn=nn.ReLU), mm.Upsample(scale_factor=2, mode='nearest'), mm.Conv1d(128, 256, kernel_size=2, bias=True, padding='same', activation_fn=nn.ReLU), mm.Linear(256, mel_channels) )
def __init__(self): super(TextEncoder, self).__init__() self.hc_blocks = nn.ModuleList([ norm( mm.Conv1d(args.Ce, args.Cx * 2, 1, padding='same', activation_fn=torch.relu)) ]) # filter up to split into K, V self.hc_blocks.extend([ norm( mm.Conv1d(args.Cx * 2, args.Cx * 2, 1, padding='same', activation_fn=None)) ]) self.hc_blocks.extend([ norm( mm.HighwayConv1d(args.Cx * 2, args.Cx * 2, 3, dilation=3**i, padding='same')) for _ in range(2) for i in range(4) ]) self.hc_blocks.extend([ norm( mm.HighwayConv1d(args.Cx * 2, args.Cx * 2, 3, dilation=1, padding='same')) for i in range(2) ]) self.hc_blocks.extend([ norm( mm.HighwayConv1d(args.Cx * 2, args.Cx * 2, 1, dilation=1, padding='same')) for i in range(2) ])
def __init__(self): super(SSRN, self).__init__() self.name = 'SSRN' # (N, n_mels, Ty/r) -> (N, Cs, Ty/r) self.hc_blocks = nn.ModuleList([ norm(mm.Conv1d(args.n_mels, args.Cs, 1, activation_fn=torch.relu)) ]) self.hc_blocks.extend([ norm(mm.HighwayConv1d(args.Cs, args.Cs, 3, dilation=3**i)) for i in range(2) ]) # (N, Cs, Ty/r*2) -> (N, Cs, Ty/r*2) self.hc_blocks.extend([ norm(mm.ConvTranspose1d(args.Cs, args.Cs, 4, stride=2, padding=1)) ]) self.hc_blocks.extend([ norm(mm.HighwayConv1d(args.Cs, args.Cs, 3, dilation=3**i)) for i in range(2) ]) # (N, Cs, Ty/r*2) -> (N, Cs, Ty/r*4==Ty) self.hc_blocks.extend([ norm(mm.ConvTranspose1d(args.Cs, args.Cs, 4, stride=2, padding=1)) ]) self.hc_blocks.extend([ norm(mm.HighwayConv1d(args.Cs, args.Cs, 3, dilation=3**i)) for i in range(2) ]) # (N, Cs, Ty) -> (N, Cs*2, Ty) self.hc_blocks.extend([norm(mm.Conv1d(args.Cs, args.Cs * 2, 1))]) self.hc_blocks.extend([ norm(mm.HighwayConv1d(args.Cs * 2, args.Cs * 2, 3, dilation=1)) for i in range(2) ]) # (N, Cs*2, Ty) -> (N, n_mags, Ty) self.hc_blocks.extend([norm(mm.Conv1d(args.Cs * 2, args.n_mags, 1))]) self.hc_blocks.extend([ norm( mm.Conv1d(args.n_mags, args.n_mags, 1, activation_fn=torch.relu)) for i in range(2) ]) self.hc_blocks.extend([norm(mm.Conv1d(args.n_mags, args.n_mags, 1))])
def __init__(self, n_units=128, embed_size=128): super(MultiHeadAttention, self).__init__() self.split_size = n_units // args.n_heads self.conv_Q = mm.Conv1d(embed_size, n_units, 1) self.conv_K = mm.Conv1d(embed_size, n_units, 1) self.fc_Q = nn.Sequential( nn.Linear(n_units, n_units), nn.Tanh(), ) self.fc_K = nn.Sequential( nn.Linear(n_units, n_units), nn.Tanh(), ) self.fc_V = nn.Sequential( nn.Linear(embed_size, self.split_size), nn.Tanh(), ) self.fc_A = nn.Sequential( nn.Linear(n_units, args.n_tokens), nn.Tanh(), )
def __init__(self, text_dims, style_dims): super(TPSENet, self).__init__() self.conv = nn.Sequential( mm.Conv1d(text_dims, style_dims, 3, activation_fn=torch.relu, bn=True, bias=False), # mm.Conv1d(style_dims, style_dims, 3, activation_fn=torch.relu, bn=True, bias=False) ) self.gru = nn.GRU(style_dims, style_dims, batch_first=True, bidirectional=True) self.fc = nn.Linear(style_dims * 2, style_dims)