def __init__(self, hparams, conv_filters, rnn_dim, bias=False, conv_act_func=torch.relu, out_activation_fn=torch.tanh, drop_rate=0.): super(ReferenceEncoder, self).__init__() # ref_enc_filters channels = [1] + conv_filters + [rnn_dim] self.convs = nn.ModuleList([ mm.Conv2d(channels[c], channels[c + 1], 3, stride=2, bn=True, bias=bias, activation_fn=conv_act_func, drop_rate=drop_rate) for c in range(len(channels) - 1) ]) # [B, dec_T/r, 128] self.gru = nn.GRU(rnn_dim * 2, rnn_dim, batch_first=True) self.fc = nn.Sequential(nn.Linear(rnn_dim, rnn_dim), ) self.activation_fn = out_activation_fn
def __init__(self, in_channels=1, embed_size=128, activation_fn=None): super(ReferenceEncoder, self).__init__() self.embed_size = embed_size channels = [in_channels, 32, 32, 64, 64, 128, embed_size] self.convs = nn.ModuleList([ mm.Conv2d(channels[c], channels[c + 1], 3, stride=2, bn=True, bias=False, activation_fn=torch.relu) for c in range(len(channels) - 1) ]) # (N, Ty/r, 128) self.gru = nn.GRU(self.embed_size * 2, self.embed_size, batch_first=True) self.fc = nn.Sequential(nn.Linear(embed_size, embed_size), ) self.activation_fn = activation_fn
def __init__(self, hparams, activation_fn=None): super(ReferenceEncoder, self).__init__() self.token_embedding_size = hparams.token_embedding_size self.in_channels = hparams.n_frames_per_step # ref_enc_filters channels = [self.in_channels ] + hparams.ref_enc_filters + [self.token_embedding_size] self.convs = nn.ModuleList([ mm.Conv2d(channels[c], channels[c + 1], 3, stride=2, bn=True, bias=False, activation_fn=torch.relu) for c in range(len(channels) - 1) ]) # (Batch, Time_domain/r, 128) self.gru = nn.GRU(self.token_embedding_size * 2, self.token_embedding_size, batch_first=True) self.fc = nn.Sequential( nn.Linear(self.token_embedding_size, self.token_embedding_size), ) self.activation_fn = activation_fn