示例#1
0
    def __init__(self,
                 hparams,
                 conv_filters,
                 rnn_dim,
                 bias=False,
                 conv_act_func=torch.relu,
                 out_activation_fn=torch.tanh,
                 drop_rate=0.):
        super(ReferenceEncoder, self).__init__()
        # ref_enc_filters

        channels = [1] + conv_filters + [rnn_dim]
        self.convs = nn.ModuleList([
            mm.Conv2d(channels[c],
                      channels[c + 1],
                      3,
                      stride=2,
                      bn=True,
                      bias=bias,
                      activation_fn=conv_act_func,
                      drop_rate=drop_rate) for c in range(len(channels) - 1)
        ])  # [B, dec_T/r, 128]
        self.gru = nn.GRU(rnn_dim * 2, rnn_dim, batch_first=True)
        self.fc = nn.Sequential(nn.Linear(rnn_dim, rnn_dim), )
        self.activation_fn = out_activation_fn
示例#2
0
 def __init__(self, in_channels=1, embed_size=128, activation_fn=None):
     super(ReferenceEncoder, self).__init__()
     self.embed_size = embed_size
     channels = [in_channels, 32, 32, 64, 64, 128, embed_size]
     self.convs = nn.ModuleList([
         mm.Conv2d(channels[c],
                   channels[c + 1],
                   3,
                   stride=2,
                   bn=True,
                   bias=False,
                   activation_fn=torch.relu)
         for c in range(len(channels) - 1)
     ])  # (N, Ty/r, 128)
     self.gru = nn.GRU(self.embed_size * 2,
                       self.embed_size,
                       batch_first=True)
     self.fc = nn.Sequential(nn.Linear(embed_size, embed_size), )
     self.activation_fn = activation_fn
示例#3
0
    def __init__(self, hparams, activation_fn=None):
        super(ReferenceEncoder, self).__init__()
        self.token_embedding_size = hparams.token_embedding_size
        self.in_channels = hparams.n_frames_per_step
        # ref_enc_filters

        channels = [self.in_channels
                    ] + hparams.ref_enc_filters + [self.token_embedding_size]
        self.convs = nn.ModuleList([
            mm.Conv2d(channels[c],
                      channels[c + 1],
                      3,
                      stride=2,
                      bn=True,
                      bias=False,
                      activation_fn=torch.relu)
            for c in range(len(channels) - 1)
        ])  # (Batch, Time_domain/r, 128)
        self.gru = nn.GRU(self.token_embedding_size * 2,
                          self.token_embedding_size,
                          batch_first=True)
        self.fc = nn.Sequential(
            nn.Linear(self.token_embedding_size, self.token_embedding_size), )
        self.activation_fn = activation_fn