Exemplo n.º 1
0
    def __init__(self, hparams):
        super(Spotify, self).__init__()

        self.conv0 = nn.Sequential(
            nn.Conv1d(hparams.num_mels, 32, kernel_size=4, stride=1, padding=0),
            nn.BatchNorm1d(32),
            nn.LeakyReLU(),
            nn.MaxPool1d(8, stride=8)
        )

        self.conv1 = nn.Sequential(
            nn.Conv1d(32, 32, kernel_size=4, stride=1, padding=0),
            nn.BatchNorm1d(32),
            nn.LeakyReLU(),
            nn.MaxPool1d(4, stride=4)
        )

        self.conv2 = nn.Sequential(
            nn.Conv1d(32, 64, kernel_size=4, stride=1, padding=0),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(),
            nn.MaxPool1d(4, stride=4)
        )

        self.pool0 = nn.AvgPool1d(4, stride=4)
        self.pool1 = nn.MaxPool1d(4, stride=4)
        self.pool2 = nn.LPPool1d(2, kernel_size=4, stride=4)
        self.pool3 = nn.LPPool1d(1, kernel_size=4, stride=4)

        self.fc1 = nn.Sequential(
            nn.Dropout(0.3),
            nn.Linear(192, 64)
        )
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, len(hparams.genres))
Exemplo n.º 2
0
    def __init__(self):
        super(NNPoolingModule, self).__init__()
        self.input1d = torch.randn(1, 16, 50)
        self.module1d = nn.ModuleList([
            nn.MaxPool1d(3, stride=2),
            nn.AvgPool1d(3, stride=2),
            nn.LPPool1d(2, 3, stride=2),
            nn.AdaptiveMaxPool1d(3),
            nn.AdaptiveAvgPool1d(3),
        ])

        self.input2d = torch.randn(1, 16, 30, 10)
        self.module2d = nn.ModuleList([
            nn.MaxPool2d((3, 2), stride=(2, 1)),
            nn.AvgPool2d((3, 2), stride=(2, 1)),
            nn.FractionalMaxPool2d(3, output_ratio=(0.5, 0.5)),
            nn.LPPool2d(2, 3, stride=(2, 1)),
            nn.AdaptiveMaxPool2d((5, 7)),
            nn.AdaptiveAvgPool2d((7)),
        ])

        self.input3d = torch.randn(1, 16, 20, 4, 4)
        self.module3d = nn.ModuleList([
            nn.MaxPool3d(2),
            nn.AvgPool3d(2),
            nn.FractionalMaxPool3d(2, output_ratio=(0.5, 0.5, 0.5)),
            nn.AdaptiveMaxPool3d((5, 7, 9)),
            nn.AdaptiveAvgPool3d((5, 7, 9)),
        ])
Exemplo n.º 3
0
 def forward(self, src_tokens, src_lengths, prev_output_tokens):
     encoder_out = self.encoder(src_tokens, src_lengths)
     decoder_out = self.decoder(prev_output_tokens, encoder_out)
     if self.prop_predict:
         if 'pool' in self.fp_type:
             fp = encoder_out['encoder_out']
             seqz, bsz, csz = fp.shape
             if self.fp_type == 'avgpool':
                 pool = nn.AvgPool1d(seqz)
             elif self.fp_type == 'maxpool':
                 pool = nn.MaxPool1d(seqz)
             else:
                 pool = nn.LPPool1d(2, seqz)
             fp = torch.transpose(torch.transpose(fp, 0, 1), 1, 2)
             fp = pool(fp)
             fp = fp.contiguous().view(bsz, csz)
         else:  # 'first'
             # Use the encoder output of the first token <GO> as FP.
             fp = encoder_out['encoder_out'][0, :, :]
         if self.get_fp:
             return fp
         pred_out = self.prednet(fp)
         return decoder_out, pred_out
     else:
         return decoder_out
Exemplo n.º 4
0
    def __init__(self):
        super(Model, self).__init__()

        self.pool_0 = nn.LPPool1d(norm_type=2, kernel_size=3)
        self.pool_1 = nn.LPPool1d(norm_type=2, kernel_size=4, stride=2)
        self.pool_2 = nn.LPPool1d(norm_type=1, kernel_size=3, stride=1, ceil_mode=False)
        self.pool_3 = nn.LPPool1d(norm_type=1, kernel_size=5, stride=1, ceil_mode=True)
        self.pool_4 = nn.LPPool1d(norm_type=1.2, kernel_size=3, stride=2, ceil_mode=False)
        self.pool_5 = nn.LPPool1d(norm_type=0.5, kernel_size=2, stride=1, ceil_mode=True)
        self.pool_6 = nn.LPPool1d(norm_type=0.1, kernel_size=4, stride=1, ceil_mode=False)
Exemplo n.º 5
0
 def __init__(self,
              mode,
              nfilters,
              samplerate=16000,
              wlen=25,
              wstride=10,
              compression='log',
              preemp=False,
              mvn=False):
     super(TDFbanks, self).__init__()
     window_size = samplerate * wlen // 1000 + 1
     window_stride = samplerate * wstride // 1000
     padding_size = (window_size - 1) // 2
     self.preemp = None
     if preemp:
         self.preemp = nn.Conv1d(1,
                                 1,
                                 2,
                                 1,
                                 padding=1,
                                 groups=1,
                                 bias=False)
     self.complex_conv = nn.Conv1d(1,
                                   2 * nfilters,
                                   window_size,
                                   1,
                                   padding=padding_size,
                                   groups=1,
                                   bias=False)
     self.modulus = nn.LPPool1d(2, 2, stride=2)
     self.lowpass = nn.Conv1d(nfilters,
                              nfilters,
                              window_size,
                              window_stride,
                              padding=0,
                              groups=nfilters,
                              bias=False)
     if mode == 'Fixed':
         for param in self.parameters():
             param.requires_grad = False
     elif mode == 'learnfbanks':
         if preemp:
             self.preemp.weight.requires_grad = False
         self.lowpass.weight.requires_grad = False
     if mvn:
         self.instancenorm = nn.InstanceNorm1d(nfilters, momentum=1)
     self.nfilters = nfilters
     self.fs = samplerate
     self.wlen = wlen
     self.wstride = wstride
     self.compression = compression
     self.mvn = mvn
Exemplo n.º 6
0
    def __init__(self, args):
        super().__init__()
        self.args = args

        self.original_len = args.window_size
        self.latent_len = int(self.original_len / 2)
        self.dropout_rate = args.drop_out

        self.hidden = 256
        self.heads = 2
        self.n_layers = 2
        self.output_size = args.output_size

        self.conv = nn.Conv1d(in_channels=1,
                              out_channels=self.hidden,
                              kernel_size=5,
                              stride=1,
                              padding=2,
                              padding_mode='replicate')
        self.pool = nn.LPPool1d(norm_type=2, kernel_size=2, stride=2)

        self.position = PositionalEmbedding(max_len=self.latent_len,
                                            d_model=self.hidden)
        self.layer_norm = LayerNorm(self.hidden)
        self.dropout = nn.Dropout(p=self.dropout_rate)

        self.transformer_blocks = nn.ModuleList([
            TransformerBlock(self.hidden, self.heads, self.hidden * 4,
                             self.dropout_rate) for _ in range(self.n_layers)
        ])

        self.deconv = nn.ConvTranspose1d(in_channels=self.hidden,
                                         out_channels=self.hidden,
                                         kernel_size=4,
                                         stride=2,
                                         padding=1)
        self.linear1 = nn.Linear(self.hidden, 128)
        self.linear2 = nn.Linear(128, self.output_size)

        self.truncated_normal_init()
#%%
unpool(out, indices)  ##恢复为原先的形状,除最大元素外,其他元素为 0

#%%
#>3. 平均池化
##>3.1 二维平均池化
m = nn.AvgPool2d((3, 2), stride=(2, 1))
input = torch.randn(20, 16, 50, 32)
out = m(input)
out.shape

#%%
#>4. LP范数池化
input = torch.arange(1, 5, dtype=torch.float).reshape(1, 1, 4)
pool = nn.LPPool1d(2, 2)  ##取2-范数,核为2,其他参数默认
out = pool(input)
out

#%%
#>5. 自适应最大池化

#%% [markdown]
# - 自适应最大池化只需要指定输出的目标尺寸即可,其他参数自动计算

#%%
##>5.1 一维自适应最大池化
input = torch.randn(1, 64, 8)
m = nn.AdaptiveMaxPool1d(5)  #目标尺寸为5
out = m(input)
out.shape
Exemplo n.º 8
0
 def __append_layer(self, net_style, args_dict):
     args_values_list = list(args_dict.values())
     if net_style == "Conv2d":
         self.layers.append(
             nn.Conv2d(args_values_list[0], args_values_list[1],
                       args_values_list[2], args_values_list[3],
                       args_values_list[4], args_values_list[5],
                       args_values_list[6], args_values_list[7]))
     elif net_style == "MaxPool2d":
         self.layers.append(
             nn.MaxPool2d(args_values_list[0], args_values_list[1],
                          args_values_list[2], args_values_list[3],
                          args_values_list[4], args_values_list[5]))
     elif net_style == "Linear":
         self.layers.append(
             nn.Linear(args_values_list[0], args_values_list[1],
                       args_values_list[2]))
     elif net_style == "reshape":
         # 如果是特殊情况 reshape,就直接将目标向量尺寸传入
         # print(type(args_values_list[0]))
         self.layers.append(args_values_list[0])
     elif net_style == "Conv1d":
         self.layers.append(
             nn.Conv1d(args_values_list[0], args_values_list[1],
                       args_values_list[2], args_values_list[3],
                       args_values_list[4], args_values_list[5],
                       args_values_list[6], args_values_list[7]))
     elif net_style == "Conv3d":
         self.layers.append(
             nn.Conv3d(args_values_list[0], args_values_list[1],
                       args_values_list[2], args_values_list[3],
                       args_values_list[4], args_values_list[5],
                       args_values_list[6], args_values_list[7]))
     elif net_style == "ConvTranspose1d":
         self.layers.append(
             nn.ConvTranspose1d(args_values_list[0], args_values_list[1],
                                args_values_list[2], args_values_list[3],
                                args_values_list[4], args_values_list[5],
                                args_values_list[6], args_values_list[7],
                                args_values_list[8]))
     elif net_style == "ConvTranspose2d":
         self.layers.append(
             nn.ConvTranspose2d(args_values_list[0], args_values_list[1],
                                args_values_list[2], args_values_list[3],
                                args_values_list[4], args_values_list[5],
                                args_values_list[6], args_values_list[7],
                                args_values_list[8]))
     elif net_style == "ConvTranspose3d":
         self.layers.append(
             nn.ConvTranspose3d(args_values_list[0], args_values_list[1],
                                args_values_list[2], args_values_list[3],
                                args_values_list[4], args_values_list[5],
                                args_values_list[6], args_values_list[7],
                                args_values_list[8]))
     elif net_style == "Unfold":
         self.layers.append(
             nn.Unfold(args_values_list[0], args_values_list[1],
                       args_values_list[2], args_values_list[3]))
     elif net_style == "Fold":
         self.layers.append(
             nn.Unfold(args_values_list[0], args_values_list[1],
                       args_values_list[2], args_values_list[3],
                       args_values_list[4]))
     elif net_style == "MaxPool1d":
         self.layers.append(
             nn.MaxPool1d(args_values_list[0], args_values_list[1],
                          args_values_list[2], args_values_list[3],
                          args_values_list[4], args_values_list[5]))
     elif net_style == "MaxPool3d":
         self.layers.append(
             nn.MaxPool3d(args_values_list[0], args_values_list[1],
                          args_values_list[2], args_values_list[3],
                          args_values_list[4], args_values_list[5]))
     elif net_style == "MaxUnpool1d":
         self.layers.append(
             nn.MaxUnpool1d(args_values_list[0], args_values_list[1],
                            args_values_list[2]))
     elif net_style == "MaxUnpool2d":
         self.layers.append(
             nn.MaxUnpool2d(args_values_list[0], args_values_list[1],
                            args_values_list[2]))
     elif net_style == "MaxUnpool3d":
         self.layers.append(
             nn.MaxUnpool3d(args_values_list[0], args_values_list[1],
                            args_values_list[2]))
     elif net_style == "AvgPool1d":
         self.layers.append(
             nn.AvgPool1d(args_values_list[0], args_values_list[1],
                          args_values_list[2], args_values_list[3],
                          args_values_list[4]))
     elif net_style == "AvgPool2d":
         self.layers.append(
             nn.AvgPool2d(args_values_list[0], args_values_list[1],
                          args_values_list[2], args_values_list[3],
                          args_values_list[4]))
     elif net_style == "AvgPool3d":
         self.layers.append(
             nn.AvgPool3d(args_values_list[0], args_values_list[1],
                          args_values_list[2], args_values_list[3],
                          args_values_list[4]))
     elif net_style == "FractionalMaxPool2d":
         self.layers.append(
             nn.FractionalMaxPool2d(args_values_list[0],
                                    args_values_list[1],
                                    args_values_list[2],
                                    args_values_list[3],
                                    args_values_list[4]))
     elif net_style == "LPPool1d":
         self.layers.append(
             nn.LPPool1d(args_values_list[0], args_values_list[1],
                         args_values_list[2], args_values_list[3]))
     elif net_style == "LPPool2d":
         self.layers.append(
             nn.LPPool2d(args_values_list[0], args_values_list[1],
                         args_values_list[2], args_values_list[3]))
     elif net_style == "AdaptiveMaxPool1d":
         self.layers.append(
             nn.AdaptiveMaxPool1d(args_values_list[0], args_values_list[1]))
     elif net_style == "AdaptiveMaxPool2d":
         self.layers.append(
             nn.AdaptiveMaxPool2d(args_values_list[0], args_values_list[1]))
     elif net_style == "AdaptiveMaxPool3d":
         self.layers.append(
             nn.AdaptiveMaxPool3d(args_values_list[0], args_values_list[1]))
     elif net_style == "AdaptiveAvgPool1d":
         self.layers.append(nn.AdaptiveAvgPool1d(args_values_list[0]))
     elif net_style == "AdaptiveAvgPool2d":
         self.layers.append(nn.AdaptiveAvgPool2d(args_values_list[0]))
     elif net_style == "AdaptiveAvgPool3d":
         self.layers.append(nn.AdaptiveAvgPool3d(args_values_list[0]))
     elif net_style == "ReflectionPad1d":
         self.layers.append(nn.ReflectionPad1d(args_values_list[0]))
     elif net_style == "ReflectionPad2d":
         self.layers.append(nn.ReflectionPad2d(args_values_list[0]))
     elif net_style == "ReplicationPad1d":
         self.layers.append(nn.ReplicationPad1d(args_values_list[0]))
     elif net_style == "ReplicationPad2d":
         self.layers.append(nn.ReplicationPad2d(args_values_list[0]))
     elif net_style == "ReplicationPad3d":
         self.layers.append(nn.ReplicationPad3d(args_values_list[0]))
     elif net_style == "ZeroPad2d":
         self.layers.append(nn.ZeroPad2d(args_values_list[0]))
     elif net_style == "ConstantPad1d":
         self.layers.append(
             nn.ConstantPad1d(args_values_list[0], args_values_list[1]))
     elif net_style == "ConstantPad2d":
         self.layers.append(
             nn.ConstantPad2d(args_values_list[0], args_values_list[1]))
     elif net_style == "ConstantPad3d":
         self.layers.append(
             nn.ConstantPad3d(args_values_list[0], args_values_list[1]))
     elif net_style == "ELU":
         self.layers.append(nn.ELU(args_values_list[0],
                                   args_values_list[1]))
     elif net_style == "Hardshrink":
         self.layers.append(nn.Hardshrink(args_values_list[0]))
     elif net_style == "Hardtanh":
         self.layers.append(
             nn.Hardtanh(args_values_list[0], args_values_list[1],
                         args_values_list[2], args_values_list[3],
                         args_values_list[4]))
     elif net_style == "LeakyReLU":
         self.layers.append(
             nn.LeakyReLU(args_values_list[0], args_values_list[1]))
     elif net_style == "LogSigmoid":
         self.layers.append(nn.LogSigmoid())
     elif net_style == "PReLU":
         self.layers.append(
             nn.PReLU(args_values_list[0], args_values_list[1]))
     elif net_style == "ReLU":
         self.layers.append(nn.ReLU(args_values_list[0]))
     elif net_style == "ReLU6":
         self.layers.append(nn.ReLU6(args_values_list[0]))
     elif net_style == "RReLU":
         self.layers.append(
             nn.RReLU(args_values_list[0], args_values_list[1],
                      args_values_list[2]))
     elif net_style == "SELU":
         self.layers.append(nn.SELU(args_values_list[0]))
     elif net_style == "CELU":
         self.layers.append(
             nn.CELU(args_values_list[0], args_values_list[1]))
     elif net_style == "Sigmoid":
         self.layers.append(nn.Sigmoid())
     elif net_style == "Softplus":
         self.layers.append(
             nn.Softplus(args_values_list[0], args_values_list[1]))
     elif net_style == "Softshrink":
         self.layers.append(nn.Softshrink(args_values_list[0]))
     elif net_style == "Softsign":
         self.layers.append(nn.Softsign())
     elif net_style == "Tanh":
         self.layers.append(nn.Tanh())
     elif net_style == "Tanhshrink":
         self.layers.append(nn.Tanhshrink())
     elif net_style == "Threshold":
         self.layers.append(
             nn.Threshold(args_values_list[0], args_values_list[1],
                          args_values_list[2]))
     elif net_style == "Softmin":
         self.layers.append(nn.Softmin(args_values_list[0]))
     elif net_style == "Softmax":
         self.layers.append(nn.Softmax(args_values_list[0]))
     elif net_style == "Softmax2d":
         self.layers.append(nn.Softmax2d())
     elif net_style == "LogSoftmax":
         self.layers.append(nn.LogSoftmax(args_values_list[0]))
     elif net_style == "AdaptiveLogSoftmaxWithLoss":
         self.layers.append(
             nn.AdaptiveLogSoftmaxWithLoss(args_values_list[0],
                                           args_values_list[1],
                                           args_values_list[2],
                                           args_values_list[3],
                                           args_values_list[4]))
     elif net_style == "BatchNorm1d":
         self.layers.append(
             nn.BatchNorm1d(args_values_list[0], args_values_list[1],
                            args_values_list[2], args_values_list[3],
                            args_values_list[4]))
     elif net_style == "BatchNorm2d":
         self.layers.append(
             nn.BatchNorm2d(args_values_list[0], args_values_list[1],
                            args_values_list[2], args_values_list[3],
                            args_values_list[4]))
     elif net_style == "BatchNorm3d":
         self.layers.append(
             nn.BatchNorm3d(args_values_list[0], args_values_list[1],
                            args_values_list[2], args_values_list[3],
                            args_values_list[4]))
     elif net_style == "GroupNorm":
         self.layers.append(
             nn.GroupNorm(args_values_list[0], args_values_list[1],
                          args_values_list[2], args_values_list[3]))
     elif net_style == "InstanceNorm1d":
         self.layers.append(
             nn.InstanceNorm1d(args_values_list[0], args_values_list[1],
                               args_values_list[2], args_values_list[3],
                               args_values_list[4]))
     elif net_style == "InstanceNorm2d":
         self.layers.append(
             nn.InstanceNorm2d(args_values_list[0], args_values_list[1],
                               args_values_list[2], args_values_list[3],
                               args_values_list[4]))
     elif net_style == "InstanceNorm3d":
         self.layers.append(
             nn.InstanceNorm3d(args_values_list[0], args_values_list[1],
                               args_values_list[2], args_values_list[3],
                               args_values_list[4]))
     elif net_style == "LayerNorm":
         self.layers.append(
             nn.LayerNorm(args_values_list[0], args_values_list[1],
                          args_values_list[2]))
     elif net_style == "LocalResponseNorm":
         self.layers.append(
             nn.LocalResponseNorm(args_values_list[0], args_values_list[1],
                                  args_values_list[2], args_values_list[3]))
     elif net_style == "Linear":
         self.layers.append(
             nn.Linear(args_values_list[0], args_values_list[1],
                       args_values_list[2]))
     elif net_style == "Dropout":
         self.layers.append(
             nn.Dropout(args_values_list[0], args_values_list[1]))
     elif net_style == "Dropout2d":
         self.layers.append(
             nn.Dropout2d(args_values_list[0], args_values_list[1]))
     elif net_style == "Dropout3d":
         self.layers.append(
             nn.Dropout3d(args_values_list[0], args_values_list[1]))
     elif net_style == "AlphaDropout":
         self.layers.append(
             nn.AlphaDropout(args_values_list[0], args_values_list[1]))