Пример #1
0
    def __init__(self,
                 nz=32,
                 ngf=64,
                 output_size=16384,
                 nc=1,
                 num_measurements=1000,
                 cuda=True):
        super(DCGAN_Audio_Straight, self).__init__()
        self.nc = nc
        self.output_size = output_size
        self.CUDA = cuda

        # Deconv Layers: (in_channels, out_channels, kernel_size, stride, padding, bias = false)
        # Inputs: R^(N x Cin x Lin), Outputs: R^(N, Cout, Lout) s.t. Lout = (Lin - 1)*stride - 2*padding + kernel_size

        self.conv1 = nn.ConvTranspose1d(nz, ngf, 4, 1, 0, bias=False)
        self.bn1 = nn.BatchNorm1d(ngf)
        # LAYER 1: input: (random) zϵR^(nzx1), output: x1ϵR^(64x4) (channels x length)

        self.conv2 = nn.ConvTranspose1d(ngf, ngf, 6, 2, 2, bias=False)
        self.bn2 = nn.BatchNorm1d(ngf)
        # LAYER 2: input: x1ϵR^(64x4), output: x2ϵR^(64x8) (channels x length)

        self.conv3 = nn.ConvTranspose1d(ngf, ngf, 6, 2, 2, bias=False)
        self.bn3 = nn.BatchNorm1d(ngf)
        # LAYER 3: input: x1ϵR^(64x8), output: x2ϵR^(64x16) (channels x length)

        self.conv4 = nn.ConvTranspose1d(ngf, ngf, 6, 2, 2, bias=False)
        self.bn4 = nn.BatchNorm1d(ngf)
        # LAYER 4: input: x1ϵR^(64x16), output: x2ϵR^(64x32) (channels x length)

        self.conv5 = nn.ConvTranspose1d(ngf, ngf, 6, 2, 2, bias=False)
        self.bn5 = nn.BatchNorm1d(ngf)
        # LAYER 5: input: x2ϵR^(64x32), output: x3ϵR^(64x64) (channels x length)

        self.conv6 = nn.ConvTranspose1d(ngf, ngf, 6, 2, 2, bias=False)
        self.bn6 = nn.BatchNorm1d(ngf)
        # LAYER 6: input: x3ϵR^(64x64), output: x4ϵR^(64x128) (channels x length)

        self.conv7 = nn.ConvTranspose1d(ngf, ngf, 6, 2, 2, bias=False)
        self.bn7 = nn.BatchNorm1d(ngf)
        # LAYER 7: input: x4ϵR^(64x128), output: x5ϵR^(64x256) (channels x length)

        self.conv8 = nn.ConvTranspose1d(ngf, ngf, 6, 2, 2, bias=False)
        self.bn8 = nn.BatchNorm1d(ngf)
        # LAYER 8: input: x5ϵR^(64x256), output: x6ϵR^(64x512) (channels x length)

        self.conv9 = nn.ConvTranspose1d(ngf, ngf, 6, 2, 2, bias=False)
        self.bn9 = nn.BatchNorm1d(ngf)
        # LAYER 9: input: x5ϵR^(64x512), output: x6ϵR^(64x1024) (channels x length)

        self.conv10 = nn.ConvTranspose1d(ngf, ngf, 6, 2, 2, bias=False)
        self.bn10 = nn.BatchNorm1d(ngf)
        # LAYER 10: input: x5ϵR^(64x1024), output: x6ϵR^(64x2048) (channels x length)

        self.conv11 = nn.ConvTranspose1d(ngf, ngf, 6, 2, 2, bias=False)
        self.bn11 = nn.BatchNorm1d(ngf)
        # LAYER 11: input: x5ϵR^(64x2048), output: x6ϵR^(64x4096) (channels x length)

        self.conv12 = nn.ConvTranspose1d(ngf, ngf, 6, 2, 2, bias=False)
        self.bn12 = nn.BatchNorm1d(ngf)
        # LAYER 12: input: x5ϵR^(64x4096), output: x6ϵR^(64x8192) (channels x length)

        self.conv13 = nn.ConvTranspose1d(ngf, nc, 4, 2, 1,
                                         bias=False)  # output is image
        # LAYER 13: input: x6ϵR^(64x8192), output: (sinusoid) G(z,w)ϵR^(ncx16384) (channels x length)

        self.fc = nn.Linear(output_size * nc, num_measurements,
                            bias=False)  # output is A; measurement matrix
Пример #2
0
    def __init__(self, ngf=16):
        super().__init__()
        # size notations = [batch_size x feature_maps x width] (height omitted - 1D convolutions)
        # encoder gets a noisy signal as input
        self.enc1 = nn.Conv1d(in_channels=1, out_channels=16, kernel_size=32, stride=2, padding=15)   # out : [B x 16 x 8192]
        self.enc1_nl = nn.PReLU()  # non-linear transformation after encoder layer 1
        self.enc2 = nn.Conv1d(16, 32, 32, 2, 15)  # [B x 32 x 4096]
        self.enc2_nl = nn.PReLU()
        self.enc3 = nn.Conv1d(32, 32, 32, 2, 15)  # [B x 32 x 2048]
        self.enc3_nl = nn.PReLU()
        self.enc4 = nn.Conv1d(32, 64, 32, 2, 15)  # [B x 64 x 1024]
        self.enc4_nl = nn.PReLU()
        self.enc5 = nn.Conv1d(64, 64, 32, 2, 15)  # [B x 64 x 512]
        self.enc5_nl = nn.PReLU()
        self.enc6 = nn.Conv1d(64, 128, 32, 2, 15)  # [B x 128 x 256]
        self.enc6_nl = nn.PReLU()
        self.enc7 = nn.Conv1d(128, 128, 32, 2, 15)  # [B x 128 x 128]
        self.enc7_nl = nn.PReLU()
        self.enc8 = nn.Conv1d(128, 256, 32, 2, 15)  # [B x 256 x 64]
        self.enc8_nl = nn.PReLU()
        self.enc9 = nn.Conv1d(256, 256, 32, 2, 15)  # [B x 256 x 32]
        self.enc9_nl = nn.PReLU()
        self.enc10 = nn.Conv1d(256, 512, 32, 2, 15)  # [B x 512 x 16]
        self.enc10_nl = nn.PReLU()
        self.enc11 = nn.Conv1d(512, 1024, 32, 2, 15)  # output : [B x 1024 x 8]
        self.enc11_nl = nn.PReLU()

        # decoder generates an enhanced signal
        # each decoder output are concatenated with homolgous encoder output,
        # so the feature map sizes are doubled
        self.dec10 = nn.ConvTranspose1d(in_channels=2048, out_channels=512, kernel_size=32, stride=2, padding=15)
        self.dec10_nl = nn.PReLU()  # out : [B x 512 x 16] -> (concat) [B x 1024 x 16]
        self.dec9 = nn.ConvTranspose1d(1024, 256, 32, 2, 15)  # [B x 256 x 32]
        self.dec9_nl = nn.PReLU()
        self.dec8 = nn.ConvTranspose1d(512, 256, 32, 2, 15)  # [B x 256 x 64]
        self.dec8_nl = nn.PReLU()
        self.dec7 = nn.ConvTranspose1d(512, 128, 32, 2, 15)  # [B x 128 x 128]
        self.dec7_nl = nn.PReLU()
        self.dec6 = nn.ConvTranspose1d(256, 128, 32, 2, 15)  # [B x 128 x 256]
        self.dec6_nl = nn.PReLU()
        self.dec5 = nn.ConvTranspose1d(256, 64, 32, 2, 15)  # [B x 64 x 512]
        self.dec5_nl = nn.PReLU()
        self.dec4 = nn.ConvTranspose1d(128, 64, 32, 2, 15)  # [B x 64 x 1024]
        self.dec4_nl = nn.PReLU()
        self.dec3 = nn.ConvTranspose1d(128, 32, 32, 2, 15)  # [B x 32 x 2048]
        self.dec3_nl = nn.PReLU()
        self.dec2 = nn.ConvTranspose1d(64, 32, 32, 2, 15)  # [B x 32 x 4096]
        self.dec2_nl = nn.PReLU()
        self.dec1 = nn.ConvTranspose1d(64, 16, 32, 2, 15)  # [B x 16 x 8192]
        self.dec1_nl = nn.PReLU()
        self.dec_final = nn.ConvTranspose1d(32, 1, 32, 2, 15)  # [B x 1 x 16384]
        self.dec_tanh = nn.Tanh()

        # initialize weights
        self.init_weights()
Пример #3
0
def conv_transpose(*args, **kwargs):
    return nn.ConvTranspose1d(*args, **kwargs)
Пример #4
0
 def __init__(self,
              ninputs,
              fmaps,
              kwidth,
              activation,
              padding=None,
              lnorm=False,
              dropout=0.,
              pooling=2,
              enc=True,
              bias=False,
              aal_h=None,
              linterp=False,
              snorm=False,
              convblock=False):
     # linterp: do linear interpolation instead of simple conv transpose
     # snorm: spectral norm
     super(GBlock, self).__init__()
     self.pooling = pooling
     self.linterp = linterp
     self.enc = enc
     self.kwidth = kwidth
     self.convblock = convblock
     if padding is None:
         padding = 0
     if enc:
         if aal_h is not None:
             self.aal_conv = nn.Conv1d(ninputs,
                                       ninputs,
                                       aal_h.shape[0],
                                       stride=1,
                                       padding=aal_h.shape[0] // 2 - 1,
                                       bias=False)
             if snorm:
                 self.aal_conv = SpectralNorm(self.aal_conv)
             # apply AAL weights, reshaping impulse response to match
             # in channels and out channels
             aal_t = torch.FloatTensor(aal_h).view(1, 1, -1)
             aal_t = aal_t.repeat(ninputs, ninputs, 1)
             self.aal_conv.weight.data = aal_t
         if convblock:
             self.conv = Conv1DResBlock(ninputs,
                                        fmaps,
                                        kwidth,
                                        stride=pooling,
                                        bias=bias)
         else:
             self.conv = nn.Conv1d(ninputs,
                                   fmaps,
                                   kwidth,
                                   stride=pooling,
                                   padding=padding,
                                   bias=bias)
         if snorm:
             self.conv = SpectralNorm(self.conv)
         if activation == 'glu':
             # TODO: REVIEW
             raise NotImplementedError
             self.glu_conv = nn.Conv1d(ninputs,
                                       fmaps,
                                       kwidth,
                                       stride=pooling,
                                       padding=padding,
                                       bias=bias)
             if snorm:
                 self.glu_conv = spectral_norm(self.glu_conv)
     else:
         if linterp:
             # pre-conv prior to upsampling
             self.pre_conv = nn.Conv1d(ninputs,
                                       ninputs // 8,
                                       kwidth,
                                       stride=1,
                                       padding=kwidth // 2,
                                       bias=bias)
             self.conv = nn.Conv1d(ninputs // 8,
                                   fmaps,
                                   kwidth,
                                   stride=1,
                                   padding=kwidth // 2,
                                   bias=bias)
             if snorm:
                 self.conv = SpectralNorm(self.conv)
             if activation == 'glu':
                 self.glu_conv = nn.Conv1d(ninputs,
                                           fmaps,
                                           kwidth,
                                           stride=1,
                                           padding=kwidth // 2,
                                           bias=bias)
                 if snorm:
                     self.glu_conv = SpectralNorm(self.glu_conv)
         else:
             if convblock:
                 self.conv = Conv1DResBlock(ninputs,
                                            fmaps,
                                            kwidth,
                                            stride=pooling,
                                            bias=bias,
                                            transpose=True)
             else:
                 # decoder like with transposed conv
                 # compute padding required based on pooling
                 pad = (2 * pooling - pooling - kwidth) // -2
                 self.conv = nn.ConvTranspose1d(ninputs,
                                                fmaps,
                                                kwidth,
                                                stride=pooling,
                                                padding=pad,
                                                output_padding=0,
                                                bias=bias)
             if snorm:
                 self.conv = SpectralNorm(self.conv)
             if activation == 'glu':
                 # TODO: REVIEW
                 raise NotImplementedError
                 self.glu_conv = nn.ConvTranspose1d(ninputs,
                                                    fmaps,
                                                    kwidth,
                                                    stride=pooling,
                                                    padding=padding,
                                                    output_padding=pooling -
                                                    1,
                                                    bias=bias)
                 if snorm:
                     self.glu_conv = spectral_norm(self.glu_conv)
     if activation is not None:
         self.act = activation
     if lnorm:
         self.ln = LayerNorm()
     if dropout > 0:
         self.dout = nn.Dropout(dropout)
Пример #5
0
def encoder_sequential(input_size, output_size, *args, **kwargs):
    return nn.Sequential(
        nn.LeakyReLU(0.2),
        weight_norm((nn.ConvTranspose1d(input_size, output_size, *args,
                                        **kwargs))))
Пример #6
0
    def __init__(self):
        super(Autoencoder, self).__init__()
        n_channels_base = 4

        self.encoder = nn.Sequential(
            nn.Conv1d(in_channels=1,
                      out_channels=n_channels_base,
                      kernel_size=5,
                      stride=2,
                      padding=0,
                      dilation=1,
                      groups=1,
                      bias=True,
                      padding_mode='zeros'),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv1d(in_channels=n_channels_base,
                      out_channels=2 * n_channels_base,
                      kernel_size=5,
                      stride=2,
                      padding=0,
                      dilation=1,
                      groups=1,
                      bias=True,
                      padding_mode='zeros'),
            nn.BatchNorm1d(2 * n_channels_base),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv1d(in_channels=2 * n_channels_base,
                      out_channels=4 * n_channels_base,
                      kernel_size=5,
                      stride=3,
                      padding=0,
                      dilation=1,
                      groups=1,
                      bias=True,
                      padding_mode='zeros'),
            nn.BatchNorm1d(4 * n_channels_base),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv1d(in_channels=4 * n_channels_base,
                      out_channels=8 * n_channels_base,
                      kernel_size=5,
                      stride=3,
                      padding=0,
                      dilation=1,
                      groups=1,
                      bias=True,
                      padding_mode='zeros'),
            nn.BatchNorm1d(8 * n_channels_base),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv1d(in_channels=8 * n_channels_base,
                      out_channels=16 * n_channels_base,
                      kernel_size=5,
                      stride=3,
                      padding=0,
                      dilation=1,
                      groups=1,
                      bias=True,
                      padding_mode='zeros'),
            nn.BatchNorm1d(16 * n_channels_base),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv1d(in_channels=16 * n_channels_base,
                      out_channels=32 * n_channels_base,
                      kernel_size=8,
                      stride=1,
                      padding=0,
                      dilation=1,
                      groups=1,
                      bias=True,
                      padding_mode='zeros'),
            nn.Tanh(),
        )

        self.decoder = nn.Sequential(
            nn.ConvTranspose1d(in_channels=32 * n_channels_base,
                               out_channels=16 * n_channels_base,
                               kernel_size=5,
                               stride=1,
                               padding=0,
                               dilation=1,
                               groups=1,
                               bias=True,
                               padding_mode='zeros'),
            nn.ReLU(),
            nn.ConvTranspose1d(in_channels=16 * n_channels_base,
                               out_channels=8 * n_channels_base,
                               kernel_size=5,
                               stride=4,
                               padding=0,
                               dilation=1,
                               groups=1,
                               bias=True,
                               padding_mode='zeros'),
            nn.BatchNorm1d(8 * n_channels_base),
            nn.ReLU(),
            nn.ConvTranspose1d(in_channels=8 * n_channels_base,
                               out_channels=4 * n_channels_base,
                               kernel_size=7,
                               stride=4,
                               padding=0,
                               dilation=1,
                               groups=1,
                               bias=True,
                               padding_mode='zeros'),
            nn.BatchNorm1d(4 * n_channels_base),
            nn.ReLU(),
            nn.ConvTranspose1d(in_channels=4 * n_channels_base,
                               out_channels=2 * n_channels_base,
                               kernel_size=7,
                               stride=3,
                               padding=0,
                               dilation=1,
                               groups=1,
                               bias=True,
                               padding_mode='zeros'),
            nn.BatchNorm1d(2 * n_channels_base),
            nn.ReLU(),
            nn.ConvTranspose1d(in_channels=2 * n_channels_base,
                               out_channels=n_channels_base,
                               kernel_size=7,
                               stride=2,
                               padding=0,
                               dilation=1,
                               groups=1,
                               bias=True,
                               padding_mode='zeros'),
            nn.BatchNorm1d(n_channels_base),
            nn.ReLU(),
            nn.ConvTranspose1d(in_channels=n_channels_base,
                               out_channels=1,
                               kernel_size=3,
                               stride=2,
                               padding=0,
                               dilation=1,
                               groups=1,
                               bias=True,
                               padding_mode='zeros'),
            nn.Sigmoid(),
        )
Пример #7
0
    def __init__(
        self,
        out_channels,
        kernel_size,
        input_shape=None,
        in_channels=None,
        stride=1,
        dilation=1,
        padding=0,
        output_padding=0,
        groups=1,
        bias=True,
        skip_transpose=False,
    ):
        super().__init__()
        self.kernel_size = kernel_size
        self.stride = stride
        self.dilation = dilation
        self.padding = padding
        self.unsqueeze = False
        self.skip_transpose = skip_transpose

        if input_shape is None and in_channels is None:
            raise ValueError("Must provide one of input_shape or in_channels")

        if in_channels is None:
            in_channels = self._check_input_shape(input_shape)

        if self.padding == "same":
            L_in = input_shape[-1] if skip_transpose else input_shape[1]
            padding_value = get_padding_elem_transposed(
                L_in,
                L_in,
                stride=stride,
                kernel_size=kernel_size,
                dilation=dilation,
                output_padding=output_padding,
            )
        elif self.padding == "factor":
            L_in = input_shape[-1] if skip_transpose else input_shape[1]
            padding_value = get_padding_elem_transposed(
                L_in * stride,
                L_in,
                stride=stride,
                kernel_size=kernel_size,
                dilation=dilation,
                output_padding=output_padding,
            )
        elif self.padding == "valid":
            padding_value = 0
        elif type(self.padding) is int:
            padding_value = padding
        else:
            raise ValueError("Not supported padding type")

        self.conv = nn.ConvTranspose1d(
            in_channels,
            out_channels,
            self.kernel_size,
            stride=self.stride,
            dilation=self.dilation,
            padding=padding_value,
            groups=groups,
            bias=bias,
        )
Пример #8
0
    def __init__(self,
                 num_classes,
                 cls_in_channels=256,
                 reg_in_channels=256,
                 roi_feat_size=7,
                 reg_feat_up_ratio=2,
                 reg_pre_kernel=3,
                 reg_post_kernel=3,
                 reg_pre_num=2,
                 reg_post_num=1,
                 cls_out_channels=1024,
                 reg_offset_out_channels=256,
                 reg_cls_out_channels=256,
                 num_cls_fcs=1,
                 num_reg_fcs=0,
                 reg_class_agnostic=True,
                 norm_cfg=None,
                 bbox_coder=dict(type='BucketingBBoxCoder',
                                 num_buckets=14,
                                 scale_factor=1.7),
                 loss_cls=dict(type='CrossEntropyLoss',
                               use_sigmoid=False,
                               loss_weight=1.0),
                 loss_bbox_cls=dict(type='CrossEntropyLoss',
                                    use_sigmoid=True,
                                    loss_weight=1.0),
                 loss_bbox_reg=dict(type='SmoothL1Loss',
                                    beta=0.1,
                                    loss_weight=1.0)):
        super(SABLHead, self).__init__()
        self.cls_in_channels = cls_in_channels
        self.reg_in_channels = reg_in_channels
        self.roi_feat_size = roi_feat_size
        self.reg_feat_up_ratio = int(reg_feat_up_ratio)
        self.num_buckets = bbox_coder['num_buckets']
        assert self.reg_feat_up_ratio // 2 >= 1
        self.up_reg_feat_size = roi_feat_size * self.reg_feat_up_ratio
        assert self.up_reg_feat_size == bbox_coder['num_buckets']
        self.reg_pre_kernel = reg_pre_kernel
        self.reg_post_kernel = reg_post_kernel
        self.reg_pre_num = reg_pre_num
        self.reg_post_num = reg_post_num
        self.num_classes = num_classes
        self.cls_out_channels = cls_out_channels
        self.reg_offset_out_channels = reg_offset_out_channels
        self.reg_cls_out_channels = reg_cls_out_channels
        self.num_cls_fcs = num_cls_fcs
        self.num_reg_fcs = num_reg_fcs
        self.reg_class_agnostic = reg_class_agnostic
        assert self.reg_class_agnostic
        self.norm_cfg = norm_cfg

        self.bbox_coder = build_bbox_coder(bbox_coder)
        self.loss_cls = build_loss(loss_cls)
        self.loss_bbox_cls = build_loss(loss_bbox_cls)
        self.loss_bbox_reg = build_loss(loss_bbox_reg)

        self.cls_fcs = self._add_fc_branch(self.num_cls_fcs,
                                           self.cls_in_channels,
                                           self.roi_feat_size,
                                           self.cls_out_channels)

        self.side_num = int(np.ceil(self.num_buckets / 2))

        if self.reg_feat_up_ratio > 1:
            self.upsample_x = nn.ConvTranspose1d(reg_in_channels,
                                                 reg_in_channels,
                                                 self.reg_feat_up_ratio,
                                                 stride=self.reg_feat_up_ratio)
            self.upsample_y = nn.ConvTranspose1d(reg_in_channels,
                                                 reg_in_channels,
                                                 self.reg_feat_up_ratio,
                                                 stride=self.reg_feat_up_ratio)

        self.reg_pre_convs = nn.ModuleList()
        for i in range(self.reg_pre_num):
            reg_pre_conv = ConvModule(reg_in_channels,
                                      reg_in_channels,
                                      kernel_size=reg_pre_kernel,
                                      padding=reg_pre_kernel // 2,
                                      norm_cfg=norm_cfg,
                                      act_cfg=dict(type='ReLU'))
            self.reg_pre_convs.append(reg_pre_conv)

        self.reg_post_conv_xs = nn.ModuleList()
        for i in range(self.reg_post_num):
            reg_post_conv_x = ConvModule(reg_in_channels,
                                         reg_in_channels,
                                         kernel_size=(1, reg_post_kernel),
                                         padding=(0, reg_post_kernel // 2),
                                         norm_cfg=norm_cfg,
                                         act_cfg=dict(type='ReLU'))
            self.reg_post_conv_xs.append(reg_post_conv_x)
        self.reg_post_conv_ys = nn.ModuleList()
        for i in range(self.reg_post_num):
            reg_post_conv_y = ConvModule(reg_in_channels,
                                         reg_in_channels,
                                         kernel_size=(reg_post_kernel, 1),
                                         padding=(reg_post_kernel // 2, 0),
                                         norm_cfg=norm_cfg,
                                         act_cfg=dict(type='ReLU'))
            self.reg_post_conv_ys.append(reg_post_conv_y)

        self.reg_conv_att_x = nn.Conv2d(reg_in_channels, 1, 1)
        self.reg_conv_att_y = nn.Conv2d(reg_in_channels, 1, 1)

        self.fc_cls = nn.Linear(self.cls_out_channels, self.num_classes + 1)
        self.relu = nn.ReLU(inplace=True)

        self.reg_cls_fcs = self._add_fc_branch(self.num_reg_fcs,
                                               self.reg_in_channels, 1,
                                               self.reg_cls_out_channels)
        self.reg_offset_fcs = self._add_fc_branch(self.num_reg_fcs,
                                                  self.reg_in_channels, 1,
                                                  self.reg_offset_out_channels)
        self.fc_reg_cls = nn.Linear(self.reg_cls_out_channels, 1)
        self.fc_reg_offset = nn.Linear(self.reg_offset_out_channels, 1)
Пример #9
0
def WNConvTranspose1d(*args, **kwargs):
    return weight_norm(nn.ConvTranspose1d(*args, **kwargs))
Пример #10
0
    def __init__(self,
                 sources,
                 audio_channels=2,
                 channels=64,
                 depth=6,
                 rewrite=True,
                 glu=True,
                 rescale=0.1,
                 resample=True,
                 kernel_size=8,
                 stride=4,
                 growth=2.,
                 lstm_layers=2,
                 context=3,
                 normalize=False,
                 samplerate=44100,
                 segment_length=4 * 10 * 44100):
        """
        Args:
            sources (list[str]): list of source names
            audio_channels (int): stereo or mono
            channels (int): first convolution channels
            depth (int): number of encoder/decoder layers
            rewrite (bool): add 1x1 convolution to each encoder layer
                and a convolution to each decoder layer.
                For the decoder layer, `context` gives the kernel size.
            glu (bool): use glu instead of ReLU
            resample_input (bool): upsample x2 the input and downsample /2 the output.
            rescale (int): rescale initial weights of convolutions
                to get their standard deviation closer to `rescale`
            kernel_size (int): kernel size for convolutions
            stride (int): stride for convolutions
            growth (float): multiply (resp divide) number of channels by that
                for each layer of the encoder (resp decoder)
            lstm_layers (int): number of lstm layers, 0 = no lstm
            context (int): kernel size of the convolution in the
                decoder before the transposed convolution. If > 1,
                will provide some context from neighboring time
                steps.
            samplerate (int): stored as meta information for easing
                future evaluations of the model.
            segment_length (int): stored as meta information for easing
                future evaluations of the model. Length of the segments on which
                the model was trained.
        """

        super().__init__()
        self.audio_channels = audio_channels
        self.sources = sources
        self.kernel_size = kernel_size
        self.context = context
        self.stride = stride
        self.depth = depth
        self.resample = resample
        self.channels = channels
        self.normalize = normalize
        self.samplerate = samplerate
        self.segment_length = segment_length

        self.encoder = nn.ModuleList()
        self.decoder = nn.ModuleList()

        if glu:
            activation = nn.GLU(dim=1)
            ch_scale = 2
        else:
            activation = nn.ReLU()
            ch_scale = 1
        in_channels = audio_channels
        for index in range(depth):
            encode = []
            encode += [
                nn.Conv1d(in_channels, channels, kernel_size, stride),
                nn.ReLU()
            ]
            if rewrite:
                encode += [
                    nn.Conv1d(channels, ch_scale * channels, 1), activation
                ]
            self.encoder.append(nn.Sequential(*encode))

            decode = []
            if index > 0:
                out_channels = in_channels
            else:
                out_channels = len(self.sources) * audio_channels
            if rewrite:
                decode += [
                    nn.Conv1d(channels, ch_scale * channels, context),
                    activation
                ]
            decode += [
                nn.ConvTranspose1d(channels, out_channels, kernel_size, stride)
            ]
            if index > 0:
                decode.append(nn.ReLU())
            self.decoder.insert(0, nn.Sequential(*decode))
            in_channels = channels
            channels = int(growth * channels)

        channels = in_channels

        if lstm_layers:
            self.lstm = BLSTM(channels, lstm_layers)
        else:
            self.lstm = None

        if rescale:
            rescale_module(self, reference=rescale)
Пример #11
0
    def __init__(self, cfg):
        super(Vae, self).__init__(cfg)

        # encoder
        self.res0 = make_layers(cfg["n_channels"],
                                cfg["conv1_ch"],
                                cfg["conv0_ker"],
                                n_layers=1,
                                cardinality=1,
                                dropRate=0)
        self.resx = ResNeXtBottleNeck(cfg["conv1_ch"],
                                      cfg["conv1_ker"],
                                      cardinality=cfg["cardinality"],
                                      dropRate=cfg["dropRate"])
        self.res2 = nn.Sequential(
            nn.Conv1d(cfg["conv1_ch"],
                      cfg["conv2_ch"],
                      cfg["conv2_ker"],
                      groups=1,
                      padding=get_padding(cfg["conv2_ker"]),
                      bias=False), nn.BatchNorm1d(cfg["conv2_ch"]),
            nn.Dropout(p=cfg["dropRate"]))
        self.enc_mu = nn.Linear(in_features=int(
            cfg["conv2_ch"] * cfg["spk_length"] / cfg["ds_ratio_tot"]),
                                out_features=cfg["latent_dim"])
        self.enc_log_var = nn.Linear(in_features=int(
            cfg["conv2_ch"] * cfg["spk_length"] / cfg["ds_ratio_tot"]),
                                     out_features=cfg["latent_dim"])
        # decoder
        self.dec_linear = nn.Linear(
            in_features=cfg["latent_dim"],
            out_features=int(cfg["conv2_ch"] * cfg["spk_length"] /
                             cfg["ds_ratio_tot"]))
        self.deres2 = make_layers(cfg["conv2_ch"],
                                  cfg["conv1_ch"],
                                  cfg["conv2_ker"],
                                  n_layers=1,
                                  decode=True,
                                  dropRate=cfg["dropRate"])
        self.deres1 = BasicResBlock(cfg["conv1_ch"],
                                    cfg["conv1_ker"],
                                    n_layers=2,
                                    decode=True,
                                    dropRate=cfg["dropRate"])
        self.deres0 = nn.ConvTranspose1d(cfg["conv1_ch"],
                                         cfg["n_channels"],
                                         cfg["conv0_ker"],
                                         padding=get_padding(cfg["conv0_ker"]))
        # down sampling layers
        self.ds1 = nn.MaxPool1d(cfg["ds_ratio_1"])
        self.ds2 = nn.MaxPool1d(cfg["ds_ratio_2"])
        # move model to GPU
        if torch.cuda.is_available():
            self.cuda()
        # optimizer
        self.optimizer = optim.Adam(self.parameters(),
                                    lr=self.cfg["learn_rate"],
                                    weight_decay=self.cfg["weight_decay"],
                                    amsgrad=True)
        self.unique_labels = []
        self.target_means = []
Пример #12
0
 def __append_layer(self, net_style, args_dict):
     args_values_list = list(args_dict.values())
     if net_style == "Conv2d":
         self.layers.append(
             nn.Conv2d(args_values_list[0], args_values_list[1],
                       args_values_list[2], args_values_list[3],
                       args_values_list[4], args_values_list[5],
                       args_values_list[6], args_values_list[7]))
     elif net_style == "MaxPool2d":
         self.layers.append(
             nn.MaxPool2d(args_values_list[0], args_values_list[1],
                          args_values_list[2], args_values_list[3],
                          args_values_list[4], args_values_list[5]))
     elif net_style == "Linear":
         self.layers.append(
             nn.Linear(args_values_list[0], args_values_list[1],
                       args_values_list[2]))
     elif net_style == "reshape":
         # 如果是特殊情况 reshape,就直接将目标向量尺寸传入
         # print(type(args_values_list[0]))
         self.layers.append(args_values_list[0])
     elif net_style == "Conv1d":
         self.layers.append(
             nn.Conv1d(args_values_list[0], args_values_list[1],
                       args_values_list[2], args_values_list[3],
                       args_values_list[4], args_values_list[5],
                       args_values_list[6], args_values_list[7]))
     elif net_style == "Conv3d":
         self.layers.append(
             nn.Conv3d(args_values_list[0], args_values_list[1],
                       args_values_list[2], args_values_list[3],
                       args_values_list[4], args_values_list[5],
                       args_values_list[6], args_values_list[7]))
     elif net_style == "ConvTranspose1d":
         self.layers.append(
             nn.ConvTranspose1d(args_values_list[0], args_values_list[1],
                                args_values_list[2], args_values_list[3],
                                args_values_list[4], args_values_list[5],
                                args_values_list[6], args_values_list[7],
                                args_values_list[8]))
     elif net_style == "ConvTranspose2d":
         self.layers.append(
             nn.ConvTranspose2d(args_values_list[0], args_values_list[1],
                                args_values_list[2], args_values_list[3],
                                args_values_list[4], args_values_list[5],
                                args_values_list[6], args_values_list[7],
                                args_values_list[8]))
     elif net_style == "ConvTranspose3d":
         self.layers.append(
             nn.ConvTranspose3d(args_values_list[0], args_values_list[1],
                                args_values_list[2], args_values_list[3],
                                args_values_list[4], args_values_list[5],
                                args_values_list[6], args_values_list[7],
                                args_values_list[8]))
     elif net_style == "Unfold":
         self.layers.append(
             nn.Unfold(args_values_list[0], args_values_list[1],
                       args_values_list[2], args_values_list[3]))
     elif net_style == "Fold":
         self.layers.append(
             nn.Unfold(args_values_list[0], args_values_list[1],
                       args_values_list[2], args_values_list[3],
                       args_values_list[4]))
     elif net_style == "MaxPool1d":
         self.layers.append(
             nn.MaxPool1d(args_values_list[0], args_values_list[1],
                          args_values_list[2], args_values_list[3],
                          args_values_list[4], args_values_list[5]))
     elif net_style == "MaxPool3d":
         self.layers.append(
             nn.MaxPool3d(args_values_list[0], args_values_list[1],
                          args_values_list[2], args_values_list[3],
                          args_values_list[4], args_values_list[5]))
     elif net_style == "MaxUnpool1d":
         self.layers.append(
             nn.MaxUnpool1d(args_values_list[0], args_values_list[1],
                            args_values_list[2]))
     elif net_style == "MaxUnpool2d":
         self.layers.append(
             nn.MaxUnpool2d(args_values_list[0], args_values_list[1],
                            args_values_list[2]))
     elif net_style == "MaxUnpool3d":
         self.layers.append(
             nn.MaxUnpool3d(args_values_list[0], args_values_list[1],
                            args_values_list[2]))
     elif net_style == "AvgPool1d":
         self.layers.append(
             nn.AvgPool1d(args_values_list[0], args_values_list[1],
                          args_values_list[2], args_values_list[3],
                          args_values_list[4]))
     elif net_style == "AvgPool2d":
         self.layers.append(
             nn.AvgPool2d(args_values_list[0], args_values_list[1],
                          args_values_list[2], args_values_list[3],
                          args_values_list[4]))
     elif net_style == "AvgPool3d":
         self.layers.append(
             nn.AvgPool3d(args_values_list[0], args_values_list[1],
                          args_values_list[2], args_values_list[3],
                          args_values_list[4]))
     elif net_style == "FractionalMaxPool2d":
         self.layers.append(
             nn.FractionalMaxPool2d(args_values_list[0],
                                    args_values_list[1],
                                    args_values_list[2],
                                    args_values_list[3],
                                    args_values_list[4]))
     elif net_style == "LPPool1d":
         self.layers.append(
             nn.LPPool1d(args_values_list[0], args_values_list[1],
                         args_values_list[2], args_values_list[3]))
     elif net_style == "LPPool2d":
         self.layers.append(
             nn.LPPool2d(args_values_list[0], args_values_list[1],
                         args_values_list[2], args_values_list[3]))
     elif net_style == "AdaptiveMaxPool1d":
         self.layers.append(
             nn.AdaptiveMaxPool1d(args_values_list[0], args_values_list[1]))
     elif net_style == "AdaptiveMaxPool2d":
         self.layers.append(
             nn.AdaptiveMaxPool2d(args_values_list[0], args_values_list[1]))
     elif net_style == "AdaptiveMaxPool3d":
         self.layers.append(
             nn.AdaptiveMaxPool3d(args_values_list[0], args_values_list[1]))
     elif net_style == "AdaptiveAvgPool1d":
         self.layers.append(nn.AdaptiveAvgPool1d(args_values_list[0]))
     elif net_style == "AdaptiveAvgPool2d":
         self.layers.append(nn.AdaptiveAvgPool2d(args_values_list[0]))
     elif net_style == "AdaptiveAvgPool3d":
         self.layers.append(nn.AdaptiveAvgPool3d(args_values_list[0]))
     elif net_style == "ReflectionPad1d":
         self.layers.append(nn.ReflectionPad1d(args_values_list[0]))
     elif net_style == "ReflectionPad2d":
         self.layers.append(nn.ReflectionPad2d(args_values_list[0]))
     elif net_style == "ReplicationPad1d":
         self.layers.append(nn.ReplicationPad1d(args_values_list[0]))
     elif net_style == "ReplicationPad2d":
         self.layers.append(nn.ReplicationPad2d(args_values_list[0]))
     elif net_style == "ReplicationPad3d":
         self.layers.append(nn.ReplicationPad3d(args_values_list[0]))
     elif net_style == "ZeroPad2d":
         self.layers.append(nn.ZeroPad2d(args_values_list[0]))
     elif net_style == "ConstantPad1d":
         self.layers.append(
             nn.ConstantPad1d(args_values_list[0], args_values_list[1]))
     elif net_style == "ConstantPad2d":
         self.layers.append(
             nn.ConstantPad2d(args_values_list[0], args_values_list[1]))
     elif net_style == "ConstantPad3d":
         self.layers.append(
             nn.ConstantPad3d(args_values_list[0], args_values_list[1]))
     elif net_style == "ELU":
         self.layers.append(nn.ELU(args_values_list[0],
                                   args_values_list[1]))
     elif net_style == "Hardshrink":
         self.layers.append(nn.Hardshrink(args_values_list[0]))
     elif net_style == "Hardtanh":
         self.layers.append(
             nn.Hardtanh(args_values_list[0], args_values_list[1],
                         args_values_list[2], args_values_list[3],
                         args_values_list[4]))
     elif net_style == "LeakyReLU":
         self.layers.append(
             nn.LeakyReLU(args_values_list[0], args_values_list[1]))
     elif net_style == "LogSigmoid":
         self.layers.append(nn.LogSigmoid())
     elif net_style == "PReLU":
         self.layers.append(
             nn.PReLU(args_values_list[0], args_values_list[1]))
     elif net_style == "ReLU":
         self.layers.append(nn.ReLU(args_values_list[0]))
     elif net_style == "ReLU6":
         self.layers.append(nn.ReLU6(args_values_list[0]))
     elif net_style == "RReLU":
         self.layers.append(
             nn.RReLU(args_values_list[0], args_values_list[1],
                      args_values_list[2]))
     elif net_style == "SELU":
         self.layers.append(nn.SELU(args_values_list[0]))
     elif net_style == "CELU":
         self.layers.append(
             nn.CELU(args_values_list[0], args_values_list[1]))
     elif net_style == "Sigmoid":
         self.layers.append(nn.Sigmoid())
     elif net_style == "Softplus":
         self.layers.append(
             nn.Softplus(args_values_list[0], args_values_list[1]))
     elif net_style == "Softshrink":
         self.layers.append(nn.Softshrink(args_values_list[0]))
     elif net_style == "Softsign":
         self.layers.append(nn.Softsign())
     elif net_style == "Tanh":
         self.layers.append(nn.Tanh())
     elif net_style == "Tanhshrink":
         self.layers.append(nn.Tanhshrink())
     elif net_style == "Threshold":
         self.layers.append(
             nn.Threshold(args_values_list[0], args_values_list[1],
                          args_values_list[2]))
     elif net_style == "Softmin":
         self.layers.append(nn.Softmin(args_values_list[0]))
     elif net_style == "Softmax":
         self.layers.append(nn.Softmax(args_values_list[0]))
     elif net_style == "Softmax2d":
         self.layers.append(nn.Softmax2d())
     elif net_style == "LogSoftmax":
         self.layers.append(nn.LogSoftmax(args_values_list[0]))
     elif net_style == "AdaptiveLogSoftmaxWithLoss":
         self.layers.append(
             nn.AdaptiveLogSoftmaxWithLoss(args_values_list[0],
                                           args_values_list[1],
                                           args_values_list[2],
                                           args_values_list[3],
                                           args_values_list[4]))
     elif net_style == "BatchNorm1d":
         self.layers.append(
             nn.BatchNorm1d(args_values_list[0], args_values_list[1],
                            args_values_list[2], args_values_list[3],
                            args_values_list[4]))
     elif net_style == "BatchNorm2d":
         self.layers.append(
             nn.BatchNorm2d(args_values_list[0], args_values_list[1],
                            args_values_list[2], args_values_list[3],
                            args_values_list[4]))
     elif net_style == "BatchNorm3d":
         self.layers.append(
             nn.BatchNorm3d(args_values_list[0], args_values_list[1],
                            args_values_list[2], args_values_list[3],
                            args_values_list[4]))
     elif net_style == "GroupNorm":
         self.layers.append(
             nn.GroupNorm(args_values_list[0], args_values_list[1],
                          args_values_list[2], args_values_list[3]))
     elif net_style == "InstanceNorm1d":
         self.layers.append(
             nn.InstanceNorm1d(args_values_list[0], args_values_list[1],
                               args_values_list[2], args_values_list[3],
                               args_values_list[4]))
     elif net_style == "InstanceNorm2d":
         self.layers.append(
             nn.InstanceNorm2d(args_values_list[0], args_values_list[1],
                               args_values_list[2], args_values_list[3],
                               args_values_list[4]))
     elif net_style == "InstanceNorm3d":
         self.layers.append(
             nn.InstanceNorm3d(args_values_list[0], args_values_list[1],
                               args_values_list[2], args_values_list[3],
                               args_values_list[4]))
     elif net_style == "LayerNorm":
         self.layers.append(
             nn.LayerNorm(args_values_list[0], args_values_list[1],
                          args_values_list[2]))
     elif net_style == "LocalResponseNorm":
         self.layers.append(
             nn.LocalResponseNorm(args_values_list[0], args_values_list[1],
                                  args_values_list[2], args_values_list[3]))
     elif net_style == "Linear":
         self.layers.append(
             nn.Linear(args_values_list[0], args_values_list[1],
                       args_values_list[2]))
     elif net_style == "Dropout":
         self.layers.append(
             nn.Dropout(args_values_list[0], args_values_list[1]))
     elif net_style == "Dropout2d":
         self.layers.append(
             nn.Dropout2d(args_values_list[0], args_values_list[1]))
     elif net_style == "Dropout3d":
         self.layers.append(
             nn.Dropout3d(args_values_list[0], args_values_list[1]))
     elif net_style == "AlphaDropout":
         self.layers.append(
             nn.AlphaDropout(args_values_list[0], args_values_list[1]))
Пример #13
0
    def __init__(self, image_size=64, z_dim=100, conv_dim=64):
        super(Generator, self).__init__()
        self.imsize = image_size

        layer1 = []
        layer2 = []
        layer3 = []
        # layern = []
        last = []

        repeat_num = int(np.log2(self.imsize)) - 3
        mult = 2**repeat_num  # 8
        layer1.append(
            SpectralNorm(nn.ConvTranspose2d(z_dim, conv_dim * mult, 4)))
        layer1.append(nn.BatchNorm2d(conv_dim * mult))
        layer1.append(nn.ReLU())

        curr_dim = conv_dim * mult

        layer2.append(
            SpectralNorm(
                nn.ConvTranspose2d(curr_dim, int(curr_dim / 2), 3, 2,
                                   2)))  # 4,2,1
        layer2.append(nn.BatchNorm2d(int(curr_dim / 2)))
        layer2.append(nn.ReLU())

        curr_dim = int(curr_dim / 2)

        layer3.append(
            SpectralNorm(
                nn.ConvTranspose2d(curr_dim, int(curr_dim / 2), 3, 2, 2)))
        layer3.append(nn.BatchNorm2d(int(curr_dim / 2)))
        layer3.append(nn.ReLU())

        # curr_dim = int(curr_dim / 2)
        #
        # layern.append(SpectralNorm(nn.ConvTranspose1d(curr_dim, int(curr_dim / 2), 4, 2, 1)))
        # layern.append(nn.BatchNorm2d(int(curr_dim / 2)))
        # layern.append(nn.ReLU())

        if self.imsize == 64:
            layer4 = []
            curr_dim = int(curr_dim / 2)
            layer4.append(
                SpectralNorm(
                    nn.ConvTranspose2d(curr_dim, int(curr_dim / 2), 4, 2, 1)))
            layer4.append(nn.BatchNorm2d(int(curr_dim / 2)))
            layer4.append(nn.ReLU())
            self.l4 = nn.Sequential(*layer4)
            curr_dim = int(curr_dim / 2)

    # self.ln = nn.Sequential(*layern)
        self.l1 = nn.Sequential(*layer1)
        self.l2 = nn.Sequential(*layer2)
        self.l3 = nn.Sequential(*layer3)

        last.append(nn.ConvTranspose2d(64, 1, 2, 2, 1))  # curr_dim
        last.append(nn.Tanh())
        self.last = nn.Sequential(*last)

        self.attn1 = Self_Attn(64, 'relu')  #128
        self.attn2 = Self_Attn(64, 'relu')
        self.input1d2d = nn.ConvTranspose1d(144, 128, 1)
Пример #14
0
    def __init__(self):
        super().__init__()
        # encoder gets a noisy signal as input [B x 1 x 16384]
        '''
        class torch.nn.Conv1d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)
        in_channels(int) – 输入信号的通道。
        out_channels(int) – 卷积产生的通道。有多少个out_channels,就需要多少个1维卷积
        kernel_size(int or tuple) - 卷积核的尺寸,卷积核的大小为(k,),第二个维度是由in_channels来决定的,所以实际上卷积大小为kernel_size*in_channels
        stride(int or tuple, optional) - 卷积步长
        padding (int or tuple, optional)- 输入的每一条边补充0的层数
        dilation(int or tuple, `optional``) – 卷积核元素之间的间距
        groups(int, optional) – 从输入通道到输出通道的阻塞连接数
        bias(bool, optional) - 如果bias=True,添加偏置   
        
        
        输入: (N,C_in,L_in)
        输出: (N,C_out,L_out)
        输入输出的计算方式:
        $$L_{out}=floor((L_{in}+2padding-dilation(kernerl_size-1)-1)/stride+1)$$
        
        '''

        self.enc1 = nn.Conv1d(in_channels=1, out_channels=16, kernel_size=32, stride=2, padding=15)  # [B x 16 x 8192] 1->16
        self.enc1_nl = nn.PReLU()

        #   PReLU(x)=max(0,x)+a∗min(0,x)  Parametric ReLU    torch.nn.PReLU(num_parameters=1, init(a)=0.25)
        '''
        torch.nn.PReLU(num_parameters=1, init=0.25):$PReLU(x) = max(0,x) + a * min(0,x)

        a是一个可学习参数。当没有声明时,nn.PReLU()在所有的输入中只有一个参数a;如果是nn.PReLU(nChannels),a将应用到每个输入。
        
        注意:当为了表现更佳的模型而学习参数a时不要使用权重衰减(weight decay)
        
        参数:

        num_parameters:需要学习的a的个数,默认等于1
        init:a的初始值,默认等于0.25
        '''
        self.enc2 = nn.Conv1d(16, 32, 32, 2, 15)  # [B x 32 x 4096]
        self.enc2_nl = nn.PReLU()
        self.enc3 = nn.Conv1d(32, 32, 32, 2, 15)  # [B x 32 x 2048]
        self.enc3_nl = nn.PReLU()
        self.enc4 = nn.Conv1d(32, 64, 32, 2, 15)  # [B x 64 x 1024]
        self.enc4_nl = nn.PReLU()
        self.enc5 = nn.Conv1d(64, 64, 32, 2, 15)  # [B x 64 x 512]
        self.enc5_nl = nn.PReLU()
        self.enc6 = nn.Conv1d(64, 128, 32, 2, 15)  # [B x 128 x 256]
        self.enc6_nl = nn.PReLU()
        self.enc7 = nn.Conv1d(128, 128, 32, 2, 15)  # [B x 128 x 128]
        self.enc7_nl = nn.PReLU()
        self.enc8 = nn.Conv1d(128, 256, 32, 2, 15)  # [B x 256 x 64]
        self.enc8_nl = nn.PReLU()
        self.enc9 = nn.Conv1d(256, 256, 32, 2, 15)  # [B x 256 x 32]
        self.enc9_nl = nn.PReLU()
        self.enc10 = nn.Conv1d(256, 512, 32, 2, 15)  # [B x 512 x 16]
        self.enc10_nl = nn.PReLU()
        self.enc11 = nn.Conv1d(512, 1024, 32, 2, 15)  # [B x 1024 x 8]
        self.enc11_nl = nn.PReLU()

        # decoder generates an enhanced signal
        # each decoder output are concatenated with homologous encoder output,
        # so the feature map sizes are doubled
        self.dec10 = nn.ConvTranspose1d(in_channels=2048, out_channels=512, kernel_size=32, stride=2, padding=15) # 解卷积
        '''
        shape:
        输入: (N,C_in,L_in)
        输出: (N,C_out,L_out)
        $$L_{out}=(L_{in}-1)stride-2padding+kernel_size+output_padding$$ 
        '''
        self.dec10_nl = nn.PReLU()  # out : [B x 512 x 16] -> (concat) [B x 1024 x 16]
        self.dec9 = nn.ConvTranspose1d(1024, 256, 32, 2, 15)  # [B x 256 x 32]
        self.dec9_nl = nn.PReLU()
        self.dec8 = nn.ConvTranspose1d(512, 256, 32, 2, 15)  # [B x 256 x 64]
        self.dec8_nl = nn.PReLU()
        self.dec7 = nn.ConvTranspose1d(512, 128, 32, 2, 15)  # [B x 128 x 128]
        self.dec7_nl = nn.PReLU()
        self.dec6 = nn.ConvTranspose1d(256, 128, 32, 2, 15)  # [B x 128 x 256]
        self.dec6_nl = nn.PReLU()
        self.dec5 = nn.ConvTranspose1d(256, 64, 32, 2, 15)  # [B x 64 x 512]
        self.dec5_nl = nn.PReLU()
        self.dec4 = nn.ConvTranspose1d(128, 64, 32, 2, 15)  # [B x 64 x 1024]
        self.dec4_nl = nn.PReLU()
        self.dec3 = nn.ConvTranspose1d(128, 32, 32, 2, 15)  # [B x 32 x 2048]
        self.dec3_nl = nn.PReLU()
        self.dec2 = nn.ConvTranspose1d(64, 32, 32, 2, 15)  # [B x 32 x 4096]
        self.dec2_nl = nn.PReLU()
        self.dec1 = nn.ConvTranspose1d(64, 16, 32, 2, 15)  # [B x 16 x 8192]
        self.dec1_nl = nn.PReLU()
        self.dec_final = nn.ConvTranspose1d(32, 1, 32, 2, 15)  # [B x 1 x 16384]
        self.dec_tanh = nn.Tanh()

        # initialize weights
        self.init_weights()
    def __init__(self, args, temp=1., alpha0=10., training_flag=True):
        super(VAE_Concrete_Simulated, self).__init__()
        self.temp = temp
        self.dataset = args.dataset_type
        self.bottleneck_size = args.bottleneck_size
        self.learning_type = args.learning_type
        self.num_classes = args.num_classes
        self.kernel_size = 7
        self.stride = 5
        self.pad = 1
        self.device = args.device
        self.model_type = args.model_type.lower()
        self.vae_dropout = 0.1
        print("IMPORTANT: NEED TO PASS proper training_flag for ibp execution ...")
        if self.model_type == 'ibp':
            self.params_to_learn = 3
        else:
            self.params_to_learn = 2
        if self.model_type == 'ibp':
            self.training = training_flag
            self.beta_a = torch.Tensor([10])
            self.beta_b = torch.Tensor([1])
            self.beta_a = F.softplus(self.beta_a) + 0.01
            self.beta_b = F.softplus(self.beta_b) + 0.01

        # ENCODER -----------------------------------------------
        # channels progression: 1 -> [32, 64, 128, 200, 200]
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=32, kernel_size=self.kernel_size, padding=self.pad,
                               stride=self.stride, bias=False)
        self.bn1 = nn.BatchNorm1d(num_features=32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

        self.conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=self.kernel_size, padding=self.pad,
                               stride=self.stride, bias=True)
        self.bn2 = nn.BatchNorm1d(num_features=64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

        self.conv3 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=self.kernel_size, padding=self.pad,
                               stride=self.stride, bias=True)
        self.bn3 = nn.BatchNorm1d(num_features=128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

        self.conv4 = nn.Conv1d(in_channels=128, out_channels=self.bottleneck_size*self.params_to_learn,
                               kernel_size=self.kernel_size, padding=self.pad, stride=self.stride, bias=True)
        self.bn4 = nn.BatchNorm1d(num_features=self.bottleneck_size*self.params_to_learn, eps=1e-05, momentum=0.1,
                                  affine=True, track_running_stats=True)

        self.conv5 = nn.Conv1d(in_channels=self.bottleneck_size*self.params_to_learn, out_channels=self.bottleneck_size*self.params_to_learn,
                               kernel_size=self.kernel_size, padding=self.pad, stride=1, bias=True)
        self.bn5 = nn.BatchNorm1d(num_features=self.bottleneck_size*self.params_to_learn)

        # Learns means
        self.conv_mean = nn.Conv1d(in_channels=self.bottleneck_size*self.params_to_learn, out_channels=self.bottleneck_size,
                                   kernel_size=1, padding=0, stride=1, groups=1, bias=True)
        # Learns logvar
        self.conv_logvar = nn.Conv1d(in_channels=self.bottleneck_size*self.params_to_learn, out_channels=self.bottleneck_size,
                                     kernel_size=1, padding=0, stride=1, groups=1, bias=True)
        if self.model_type == 'ibp':
            self.conv_bernoulli = nn.Conv1d(in_channels=self.bottleneck_size*self.params_to_learn, out_channels=self.bottleneck_size,
                                            kernel_size=1, padding=0, stride=1, groups=1, bias=True)


        # Classifier ----------------------------
        self.full_conn1 = nn.Linear(in_features=self.bottleneck_size*self.params_to_learn, out_features=100)
        self.full_conn2 = nn.Linear(in_features=100, out_features=self.num_classes)
        
        # IBP -----------------------------------
        if self.model_type == 'ibp':
            a_val = np.log(np.exp(alpha0) - 1)  # inverse softplus
            b_val = np.log(np.exp(1.) - 1)
            self.beta_a = nn.Parameter(torch.Tensor(self.bottleneck_size).zero_() + a_val)
            self.beta_b = nn.Parameter(torch.Tensor(self.bottleneck_size).zero_() + b_val)

        # DECODER -----------------------------
        if self.learning_type == 'supervised' or self.learning_type == 'baseline':
            self.unconv1 = nn.ConvTranspose1d(in_channels=self.bottleneck_size+10, out_channels=self.bottleneck_size*self.params_to_learn,
                                              kernel_size=self.kernel_size, padding=self.pad, stride=self.stride,
                                              bias=True)
        elif self.learning_type == 'unsupervised':
            self.unconv1 = nn.ConvTranspose1d(in_channels=self.bottleneck_size, out_channels=self.bottleneck_size*self.params_to_learn,
                                              kernel_size=self.kernel_size, padding=self.pad, stride=self.stride, bias=True)
        self.unbn1 = nn.BatchNorm1d(num_features=self.bottleneck_size*self.params_to_learn, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

        self.unconv2 = nn.ConvTranspose1d(in_channels=self.bottleneck_size*self.params_to_learn, out_channels=128,
                                          kernel_size=self.kernel_size, padding=self.pad, stride=self.stride, bias=True)
        self.unbn2 = nn.BatchNorm1d(num_features=128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

        self.unconv3 = nn.ConvTranspose1d(in_channels=128, out_channels=64, kernel_size=self.kernel_size, padding=self.pad,
                                          stride=self.stride, bias=True)
        self.unbn3 = nn.BatchNorm1d(num_features=64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

        self.unconv4 = nn.ConvTranspose1d(in_channels=64, out_channels=32, kernel_size=self.kernel_size, padding=self.pad,
                                          stride=self.stride, bias=True)
        self.unbn4 = nn.BatchNorm1d(num_features=32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

        self.unconv5 = nn.ConvTranspose1d(in_channels=32, out_channels=1, kernel_size=self.kernel_size, padding=self.pad,
                                          stride=self.stride, bias=True)
Пример #16
0
    def __init__(
        self,
        feat_in,
        feat_out,
        feat_hidden,
        stride_layers,
        non_stride_layers=0,
        kernel_size=11,
        init_mode="xavier_uniform",
        activation="relu",
        stride_transpose=True,
    ):
        super().__init__()

        if ((stride_layers + non_stride_layers) > 0) and (kernel_size < 3 or kernel_size % 2 == 0):
            raise ValueError("Kernel size in this decoder needs to be >= 3 and odd when using at least 1 conv layer.")

        activation = jasper_activations[activation]()

        self.feat_in = feat_in
        self.feat_out = feat_out
        self.feat_hidden = feat_hidden

        self.decoder_layers = [nn.Conv1d(self.feat_in, self.feat_hidden, kernel_size=1, bias=True)]
        for i in range(stride_layers):
            self.decoder_layers.append(activation)
            if stride_transpose:
                self.decoder_layers.append(
                    nn.ConvTranspose1d(
                        self.feat_hidden,
                        self.feat_hidden,
                        kernel_size,
                        stride=2,
                        padding=(kernel_size - 3) // 2 + 1,
                        output_padding=1,
                        bias=True,
                        groups=self.feat_hidden,
                    )
                )
            else:
                self.decoder_layers.append(
                    nn.Conv1d(
                        self.feat_hidden,
                        self.feat_hidden,
                        kernel_size,
                        stride=2,
                        padding=(kernel_size - 1) // 2,
                        bias=True,
                        groups=self.feat_hidden,
                    )
                )
            self.decoder_layers.append(nn.Conv1d(self.feat_hidden, self.feat_hidden, kernel_size=1, bias=True))
            self.decoder_layers.append(nn.BatchNorm1d(self.feat_hidden, eps=1e-3, momentum=0.1))
        for i in range(non_stride_layers):
            self.decoder_layers.append(activation)
            self.decoder_layers.append(
                nn.Conv1d(
                    self.feat_hidden,
                    self.feat_hidden,
                    kernel_size,
                    bias=True,
                    groups=self.feat_hidden,
                    padding=kernel_size // 2,
                )
            )
            self.decoder_layers.append(nn.Conv1d(self.feat_hidden, self.feat_hidden, kernel_size=1, bias=True))
            self.decoder_layers.append(nn.BatchNorm1d(self.feat_hidden, eps=1e-3, momentum=0.1))

        self.decoder_layers.append(activation)
        self.decoder_layers.append(nn.Conv1d(self.feat_hidden, self.feat_out, kernel_size=1, bias=True))

        self.decoder_layers = nn.Sequential(*self.decoder_layers)

        self.apply(lambda x: init_weights(x, mode=init_mode))
Пример #17
0
    def __init__(self, N, L, B, H, P, X, R, S=1):
        super(ResidualTN, self).__init__()

        # Number of sources to produce
        self.S, self.N, self.L, self.B, self.H, self.P = S, N, L, B, H, P
        self.X, self.R = X, R

        # Front end
        self.fe = nn.ModuleList([
            nn.Conv1d(in_channels=1,
                      out_channels=N,
                      kernel_size=L,
                      stride=L // 2,
                      padding=L // 2),
            nn.ReLU(),
        ])

        self.ln = nn.BatchNorm1d(N)
        self.l1 = nn.Conv1d(in_channels=N, out_channels=B, kernel_size=1)

        # Separation module
        # Residual connections
        self.residual_to_from = [[] for _ in range(R * X)]
        self.residual_to_from[8] = [-1]
        self.residual_to_from[16] = [-1, 8]
        self.residual_to_from[24] = [-1, 8, 16]
        self.residual_to_from[11] = [3]
        self.residual_to_from[19] = [3, 11]
        self.residual_to_from[27] = [3, 11, 19]
        self.layer_to_dense = {}
        j = 0
        for i, res_connections in enumerate(self.residual_to_from):
            if len(res_connections):
                self.layer_to_dense[i] = j
                j += 1

        self.residual_denses = nn.ModuleList([
            nn.Conv1d(in_channels=len(res_connections) * B,
                      out_channels=B,
                      kernel_size=1)
            for res_connections in self.residual_to_from
            if len(res_connections) > 0
        ])

        self.prev_connections = {}
        self.residual_norms = []
        k = 0
        for res_from in self.residual_to_from:
            for res_ind in res_from:
                if res_ind not in self.prev_connections:
                    self.prev_connections[res_ind] = k
                    k += 1
                    self.residual_norms.append(CepstralNorm(B))
        self.residual_norms = nn.ModuleList(self.residual_norms)

        self.sm = nn.ModuleList([
            ResidualTN.TCN(B=B, H=H, P=P, D=2**d) for _ in range(R)
            for d in range(X)
        ])

        if B != N:
            self.reshape_before_masks = nn.Conv1d(in_channels=B,
                                                  out_channels=N,
                                                  kernel_size=1)

        # Masks layer
        self.m = nn.Conv2d(in_channels=1,
                           out_channels=S,
                           kernel_size=(N + 1, 1),
                           padding=(N - N // 2, 0))

        # Back end
        self.be = nn.ConvTranspose1d(in_channels=N * S,
                                     out_channels=S,
                                     output_padding=(L // 2) - 1,
                                     kernel_size=L,
                                     stride=L // 2,
                                     padding=L // 2,
                                     groups=S)
        self.ln_mask_in = nn.BatchNorm1d(self.N)
Пример #18
0
    def __init__(self, nwin=5, in_channels=4, out_channels=2, start_filts=128):
        super(stabnet, self).__init__()
        self.conv1 = nn.Conv1d(in_channels * (nwin - 1),
                               start_filts,
                               kernel_size=3,
                               stride=1,
                               dilation=1,
                               padding=1)
        self.conv2 = nn.Conv1d(start_filts,
                               start_filts * 2,
                               kernel_size=4,
                               stride=2,
                               dilation=1,
                               padding=1)
        self.conv3 = nn.Conv1d(start_filts * 2,
                               start_filts * 2,
                               kernel_size=3,
                               stride=1,
                               dilation=1,
                               padding=1)
        self.conv4 = nn.Conv1d(start_filts * 2,
                               start_filts * 4,
                               kernel_size=4,
                               stride=2,
                               dilation=1,
                               padding=1)
        self.conv5 = nn.Conv1d(start_filts * 4,
                               start_filts * 4,
                               kernel_size=3,
                               stride=1,
                               dilation=1,
                               padding=1)
        self.conv6 = nn.Conv1d(start_filts * 4,
                               start_filts * 4,
                               kernel_size=3,
                               stride=1,
                               dilation=1,
                               padding=1)
        self.conv7 = nn.Conv1d(start_filts * 4,
                               start_filts * 8,
                               kernel_size=4,
                               stride=2,
                               dilation=1,
                               padding=1)
        self.conv8 = nn.Conv1d(start_filts * 8,
                               start_filts * 8,
                               kernel_size=3,
                               stride=1,
                               dilation=2,
                               padding=2)
        self.conv9 = nn.Conv1d(start_filts * 8,
                               start_filts * 8,
                               kernel_size=3,
                               stride=1,
                               dilation=2,
                               padding=2)
        self.conv10 = nn.Conv1d(start_filts * 8,
                                start_filts * 8,
                                kernel_size=3,
                                stride=1,
                                dilation=2,
                                padding=2)

        self.conv1_1 = nn.Conv1d(in_channels * (nwin - 1),
                                 start_filts,
                                 kernel_size=3,
                                 stride=1,
                                 dilation=1,
                                 padding=1)
        self.conv2_1 = nn.Conv1d(start_filts,
                                 start_filts * 2,
                                 kernel_size=4,
                                 stride=2,
                                 dilation=1,
                                 padding=1)
        self.conv3_1 = nn.Conv1d(start_filts * 2,
                                 start_filts * 2,
                                 kernel_size=3,
                                 stride=1,
                                 dilation=1,
                                 padding=1)
        self.conv4_1 = nn.Conv1d(start_filts * 2,
                                 start_filts * 4,
                                 kernel_size=4,
                                 stride=2,
                                 dilation=1,
                                 padding=1)
        self.conv5_1 = nn.Conv1d(start_filts * 4,
                                 start_filts * 4,
                                 kernel_size=3,
                                 stride=1,
                                 dilation=1,
                                 padding=1)
        self.conv6_1 = nn.Conv1d(start_filts * 4,
                                 start_filts * 4,
                                 kernel_size=3,
                                 stride=1,
                                 dilation=1,
                                 padding=1)
        self.conv7_1 = nn.Conv1d(start_filts * 4,
                                 start_filts * 8,
                                 kernel_size=4,
                                 stride=2,
                                 dilation=1,
                                 padding=1)
        self.conv8_1 = nn.Conv1d(start_filts * 8,
                                 start_filts * 8,
                                 kernel_size=3,
                                 stride=1,
                                 dilation=2,
                                 padding=2)
        self.conv9_1 = nn.Conv1d(start_filts * 8,
                                 start_filts * 8,
                                 kernel_size=3,
                                 stride=1,
                                 dilation=2,
                                 padding=2)
        self.conv10_1 = nn.Conv1d(start_filts * 8,
                                  start_filts * 8,
                                  kernel_size=3,
                                  stride=1,
                                  dilation=2,
                                  padding=2)

        self.conv11 = nn.ConvTranspose1d(start_filts * 32,
                                         start_filts * 8,
                                         kernel_size=4,
                                         stride=2,
                                         dilation=1,
                                         padding=1)
        self.conv12 = nn.Conv1d(start_filts * 8,
                                start_filts * 8,
                                kernel_size=3,
                                stride=1,
                                dilation=1,
                                padding=1)
        self.conv13 = nn.Conv1d(start_filts * 8,
                                start_filts * 8,
                                kernel_size=3,
                                stride=1,
                                dilation=1,
                                padding=1)
        self.conv14 = nn.ConvTranspose1d(start_filts * 16,
                                         start_filts * 4,
                                         kernel_size=4,
                                         stride=2,
                                         dilation=1,
                                         padding=1)
        self.conv15 = nn.Conv1d(start_filts * 4,
                                start_filts * 4,
                                kernel_size=3,
                                stride=1,
                                dilation=1,
                                padding=1)
        self.conv16 = nn.ConvTranspose1d(start_filts * 8,
                                         start_filts * 2,
                                         kernel_size=4,
                                         stride=2,
                                         dilation=1,
                                         padding=1)
        self.conv17 = nn.Conv1d(start_filts * 2,
                                start_filts * 2,
                                kernel_size=3,
                                stride=1,
                                dilation=1,
                                padding=1)
        self.conv18 = nn.Conv1d(start_filts * 2,
                                out_channels * (nwin - 2),
                                kernel_size=1,
                                stride=1,
                                dilation=1,
                                padding=0)

        self.conv11_1 = nn.ConvTranspose1d(start_filts * 32,
                                           start_filts * 8,
                                           kernel_size=4,
                                           stride=2,
                                           dilation=1,
                                           padding=1)
        self.conv12_1 = nn.Conv1d(start_filts * 8,
                                  start_filts * 8,
                                  kernel_size=3,
                                  stride=1,
                                  dilation=1,
                                  padding=1)
        self.conv13_1 = nn.Conv1d(start_filts * 8,
                                  start_filts * 8,
                                  kernel_size=3,
                                  stride=1,
                                  dilation=1,
                                  padding=1)
        self.conv14_1 = nn.ConvTranspose1d(start_filts * 16,
                                           start_filts * 4,
                                           kernel_size=4,
                                           stride=2,
                                           dilation=1,
                                           padding=1)
        self.conv15_1 = nn.Conv1d(start_filts * 4,
                                  start_filts * 4,
                                  kernel_size=3,
                                  stride=1,
                                  dilation=1,
                                  padding=1)
        self.conv16_1 = nn.ConvTranspose1d(start_filts * 8,
                                           start_filts * 2,
                                           kernel_size=4,
                                           stride=2,
                                           dilation=1,
                                           padding=1)
        self.conv17_1 = nn.Conv1d(start_filts * 2,
                                  start_filts * 2,
                                  kernel_size=3,
                                  stride=1,
                                  dilation=1,
                                  padding=1)
        self.conv18_1 = nn.Conv1d(start_filts * 2,
                                  out_channels * (nwin - 2),
                                  kernel_size=1,
                                  stride=1,
                                  dilation=1,
                                  padding=0)

        self.bn2 = nn.BatchNorm1d(start_filts * 2)
        self.bn4 = nn.BatchNorm1d(start_filts * 4)
        self.bn8 = nn.BatchNorm1d(start_filts * 8)
        self.bn16 = nn.BatchNorm1d(start_filts * 16)

        self.linconv1 = nn.Conv1d(start_filts * 2,
                                  start_filts,
                                  kernel_size=3,
                                  stride=1,
                                  dilation=1,
                                  padding=1)
        self.linconv2 = nn.Conv1d(start_filts,
                                  2,
                                  kernel_size=1,
                                  stride=1,
                                  dilation=1,
                                  padding=0)
        self.lin1 = nn.Linear(2 * 512, 512)
        self.lin2 = nn.Linear(512, 4 * (nwin - 2))
        #self.batchnorm
        for m in self.modules():
            if isinstance(m, nn.Conv1d):
                nn.init.kaiming_normal(m.weight.data)
                #m.weight.data.fill_(0)
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                nn.init.constant(m.weight.data, 0)
Пример #19
0
    def __init__(self,
                 up_scale: int,
                 in_channels: int,
                 out_channels: int,
                 filter_lengths: Union[Sequence[int], int],
                 deconv_filter_length: Optional[int] = None,
                 groups: int = 1,
                 dropouts: Union[Sequence[float], float] = 0.0,
                 mode: str = "deconv",
                 **config) -> NoReturn:
        """ finished, NOT checked,

        Parameters
        ----------
        up_scale: int,
            scale of up sampling
        in_channels: int,
            number of channels in the input
        out_channels: int,
            number of channels produced by the convolutional layers
        filter_lengths: int or sequence of int,
            length(s) of the filters (kernel size) of the convolutional layers
        deconv_filter_length: int,
            only used when `mode` == "deconv"
            length(s) of the filters (kernel size) of the deconvolutional upsampling layer
        groups: int, default 1, not used currently,
            connection pattern (of channels) of the inputs and outputs
        dropouts: float or sequence of float, default 0.0,
            dropout ratio after each `Conv_Bn_Activation`
        mode: str, default "deconv", case insensitive,
            mode of up sampling
        config: dict,
            other parameters, including
            activation choices, weight initializer, batch normalization choices, etc.
            for the deconvolutional layers
        """
        super().__init__()
        self.__up_scale = up_scale
        self.__in_channels = in_channels
        self.__out_channels = out_channels
        self.__deconv_filter_length = deconv_filter_length
        self.__mode = mode.lower()
        assert self.__mode in self.__MODES__
        self.config = ED(deepcopy(config))
        if self.__DEBUG__:
            print(
                f"configuration of {self.__name__} is as follows\n{dict_to_str(self.config)}"
            )

        # the following has to be checked
        # if bilinear, use the normal convolutions to reduce the number of channels
        if self.__mode == "deconv":
            self.__deconv_padding = max(
                0, (self.__deconv_filter_length - self.__up_scale) // 2)
            self.up = nn.ConvTranspose1d(
                in_channels=self.__in_channels,
                out_channels=self.__in_channels,
                kernel_size=self.__deconv_filter_length,
                stride=self.__up_scale,
                padding=self.__deconv_padding,
            )
        else:
            self.up = nn.Upsample(
                scale_factor=self.__up_scale,
                mode=mode,
            )
        self.conv = TripleConv(
            # `+ self.__out_channels` corr. to the output of the corr. down layer
            in_channels=self.__in_channels + self.__out_channels[-1],
            out_channels=self.__out_channels,
            filter_lengths=filter_lengths,
            subsample_lengths=1,
            groups=groups,
            dropouts=dropouts,
            **(self.config),
        )
Пример #20
0
    def __init__(self):
        super().__init__()

        in_channels = 23
        self.input_conv = nn.Sequential(
            nn.Conv1d(in_channels,
                      in_channels,
                      kernel_size=7,
                      stride=1,
                      padding=3), nn.BatchNorm1d(in_channels), nn.Tanh())

        self.downsampler = nn.Sequential()
        self.downsampler.add_module(
            'CONV_1',
            nn.Conv1d(in_channels, 8, kernel_size=7, stride=2, padding=3))
        self.downsampler.add_module('BN_1', nn.BatchNorm1d(8))
        self.downsampler.add_module('TANH_1', nn.Tanh())
        self.downsampler.add_module(
            'CONV_2', nn.Conv1d(8, 16, kernel_size=7, stride=2, padding=3))
        self.downsampler.add_module('BN_2', nn.BatchNorm1d(16))
        self.downsampler.add_module('TANH_2', nn.Tanh())
        self.downsampler.add_module(
            'CONV_3', nn.Conv1d(16, 32, kernel_size=7, stride=2, padding=3))
        self.downsampler.add_module('BN_3', nn.BatchNorm1d(32))
        self.downsampler.add_module('TANH_3', nn.Tanh())
        self.downsampler.add_module(
            'CONV_4', nn.Conv1d(32, 64, kernel_size=7, stride=2, padding=3))
        self.downsampler.add_module('BN_4', nn.BatchNorm1d(64))
        self.downsampler.add_module('TANH_4', nn.Tanh())

        self.upsampler = nn.Sequential()
        self.upsampler.add_module(
            'CONVTRANS_1',
            nn.ConvTranspose1d(64,
                               32,
                               kernel_size=7,
                               stride=2,
                               padding=3,
                               output_padding=1))
        self.upsampler.add_module('BN_1', nn.BatchNorm1d(32))
        self.upsampler.add_module('TANH_1', nn.Tanh())
        self.upsampler.add_module(
            'CONVTRANS_2',
            nn.ConvTranspose1d(32,
                               16,
                               kernel_size=7,
                               stride=2,
                               padding=3,
                               output_padding=1))
        self.upsampler.add_module('BN_2', nn.BatchNorm1d(16))
        self.upsampler.add_module('TANH_2', nn.Tanh())
        self.upsampler.add_module(
            'CONVTRANS_3',
            nn.ConvTranspose1d(16,
                               8,
                               kernel_size=7,
                               stride=2,
                               padding=3,
                               output_padding=1))
        self.upsampler.add_module('BN_3', nn.BatchNorm1d(8))
        self.upsampler.add_module('TANH_3', nn.Tanh())
        self.upsampler.add_module(
            'CONVTRANS_4',
            nn.ConvTranspose1d(8,
                               in_channels,
                               kernel_size=7,
                               stride=2,
                               padding=3,
                               output_padding=1))
        self.upsampler.add_module('BN_4', nn.BatchNorm1d(in_channels))
        self.upsampler.add_module('TANH_4', nn.Tanh())

        self.output_conv = nn.Conv1d(in_channels,
                                     1,
                                     kernel_size=7,
                                     stride=1,
                                     padding=3)
Пример #21
0
 def __init__(self, chs=(128, 64)):
     super().__init__()
     self.chs         = chs
     self.upconvs    = nn.ModuleList([nn.ConvTranspose1d(chs[i], chs[i+1], 2, 2) for i in range(len(chs)-1)])
     self.dec_blocks = nn.ModuleList([Block(chs[i], chs[i+1]) for i in range(len(chs)-1)]) 
Пример #22
0
    def __init__(self):
        super(Conv_Decoder, self).__init__()

        self.conv1tr = nn.ConvTranspose1d(emb2, emb1, 3)
        self.conv2tr = nn.ConvTranspose1d(emb1, emb0, 3)
        self.conv3tr = nn.ConvTranspose1d(emb0, 2 * num_channels, 3)
Пример #23
0
    def __init__(self, input_shape, z_shape=20, output_shape=11):
        super(VAE_without_label, self).__init__()

        self.input_shape = input_shape
        self.z_shape = z_shape
        self.output_shape = output_shape

        # encoder
        self.encoder = nn.Sequential()
        self.encoder.add_module(
            'enc_conv1',
            nn.Conv1d(in_channels=3,
                      out_channels=9,
                      kernel_size=16,
                      stride=10,
                      padding=6,
                      padding_mode='zeros'))
        self.encoder.add_module('enc_relu1', nn.ReLU(True))
        self.encoder.add_module(
            'enc_conv2',
            nn.Conv1d(in_channels=9,
                      out_channels=9,
                      kernel_size=16,
                      stride=10,
                      padding=6,
                      padding_mode='zeros'))
        self.encoder.add_module('enc_relu2', nn.ReLU(True))
        self.encoder.add_module(
            'enc_conv3',
            nn.Conv1d(in_channels=9,
                      out_channels=9,
                      kernel_size=16,
                      stride=10,
                      padding=6,
                      padding_mode='zeros'))
        self.encoder.add_module('enc_relu3', nn.ReLU(True))
        # z to mean
        self.encmean_fc11 = nn.Linear(int(input_shape / 10 / 10 / 10 * 9),
                                      z_shape)
        # z to var
        self.encvar_fc12 = nn.Linear(int(input_shape / 10 / 10 / 10 * 9),
                                     z_shape)

        # decoder
        self.dec_fc1 = nn.Linear(z_shape, int(input_shape / 10 / 10 / 10 * 9))
        self.decoder = nn.Sequential()
        self.decoder.add_module(
            'dec_deconv1',
            nn.ConvTranspose1d(in_channels=9,
                               out_channels=9,
                               kernel_size=16,
                               stride=10,
                               padding=3,
                               padding_mode='zeros'))
        self.decoder.add_module('dec_relu1', nn.ReLU(True))
        self.decoder.add_module(
            'dec_deconv2',
            nn.ConvTranspose1d(in_channels=9,
                               out_channels=9,
                               kernel_size=16,
                               stride=10,
                               padding=3,
                               padding_mode='zeros'))
        self.decoder.add_module('dec_relu2', nn.ReLU(True))
        self.decoder.add_module(
            'dec_deconv3',
            nn.ConvTranspose1d(in_channels=9,
                               out_channels=3,
                               kernel_size=16,
                               stride=10,
                               padding=3,
                               padding_mode='zeros'))
        self.decoder.add_module('dec_sig1', nn.Sigmoid())
Пример #24
0
def wn_conv_transpose1d(*args, **kwargs):
    return nn.utils.weight_norm(nn.ConvTranspose1d(*args, **kwargs))
Пример #25
0
    def __init__(
            self,
            sources: int = 2,
            n_audio_channels: int = 2,  # pylint: disable=redefined-outer-name
            kernel_size: int = 8,
            stride: int = 4,
            context: int = 3,
            depth: int = 6,
            channels: int = 64,
            growth: float = 2.0,
            lstm_layers: int = 2,
            rescale: float = 0.1,
            upsample: bool = False,
            location_shifts=12):  # pylint: disable=redefined-outer-name
        super().__init__()
        self.sources = sources
        self.n_audio_channels = n_audio_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.context = context
        self.depth = depth
        self.channels = channels
        self.growth = growth
        self.lstm_layers = lstm_layers
        self.rescale = rescale
        self.upsample = upsample
        self.location_shifts = location_shifts

        self.encoder = nn.ModuleList()  # Source encoder
        self.decoder = nn.ModuleList()  # Audio output decoder
        self.loc_decoder = nn.ModuleList()  # Location decoder

        self.final = None

        if upsample:
            self.final = nn.Conv1d(channels + n_audio_channels,
                                   sources * n_audio_channels, 1)
            stride = 1

        activation = nn.GLU(dim=1)

        in_channels = n_audio_channels  # Number of input channels
        in_loc_channels = 3  # Number of input location channels

        # Wave U-Net structure
        for index in range(depth):
            encode = []
            encode += [
                nn.Conv1d(in_channels, channels, kernel_size, stride),
                nn.ReLU()
            ]
            encode += [nn.Conv1d(channels, 2 * channels, 1), activation]
            self.encoder.append(nn.Sequential(*encode))

            decode = []
            if index > 0:
                out_channels = in_channels
                out_loc_channels = 3
            else:
                if upsample:
                    out_channels = channels
                else:
                    out_channels = sources * n_audio_channels
                    out_loc_channels = sources * 3

            decode += [nn.Conv1d(channels, 2 * channels, context), activation]

            if upsample:
                decode += [
                    nn.Conv1d(channels, out_channels, kernel_size, stride=1)
                ]
            else:
                decode += [
                    nn.ConvTranspose1d(channels, out_channels, kernel_size,
                                       stride)
                ]

            if index > 0:
                decode.append(nn.ReLU())
            self.decoder.insert(0, nn.Sequential(*decode))

            loc_decoder = []
            loc_decoder += [
                nn.ConvTranspose1d(in_loc_channels, out_loc_channels,
                                   kernel_size, stride)
            ]
            if index > 0:
                loc_decoder.append(nn.ReLU())
            self.loc_decoder.insert(0, nn.Sequential(*loc_decoder))

            in_channels = channels
            channels = int(growth * channels)

        # Bi-directional LSTM for the bottleneck layer
        channels = in_channels
        self.lstm = nn.LSTM(bidirectional=True,
                            num_layers=lstm_layers,
                            hidden_size=channels,
                            input_size=channels)
        self.lstm_linear = nn.Linear(2 * channels, channels)
        self.loc_prediction = nn.Linear(2 * channels, 3)
        # self.loc_prediction = nn.Linear(18 * 2048, 2*self.location_shifts + 1)

        rescale_module(self, reference=rescale)
Пример #26
0
    def __init__(self,
                 sources=2,
                 audio_channels=1,
                 channels=80,
                 depth=6,
                 rewrite=True,
                 glu=True,
                 upsample=False,
                 rescale=0.1,
                 kernel_size=8,
                 stride=4,
                 growth=2.,
                 lstm_layers=2,
                 context=3):
        """
        Args:
            sources (int): number of sources to separate
            audio_channels (int): stereo or mono
            channels (int): first convolution channels
            depth (int): number of encoder/decoder layers
            rewrite (bool): add 1x1 convolution to each encoder layer
                and a convolution to each decoder layer.
                For the decoder layer, `context` gives the kernel size.
            glu (bool): use glu instead of ReLU
            upsample (bool): use linear upsampling with convolutions
                Wave-U-Net style, instead of transposed convolutions
            rescale (int): rescale initial weights of convolutions
                to get their standard deviation closer to `rescale`
            kernel_size (int): kernel size for convolutions
            stride (int): stride for convolutions
            growth (float): multiply (resp divide) number of channels by that
                for each layer of the encoder (resp decoder)
            lstm_layers (int): number of lstm layers, 0 = no lstm
            context (int): kernel size of the convolution in the
                decoder before the transposed convolution. If > 1,
                will provide some context from neighboring time
                steps.
        """

        super().__init__()
        self.audio_channels = audio_channels
        self.sources = sources
        self.kernel_size = kernel_size
        self.context = context
        self.stride = stride
        self.depth = depth
        self.upsample = upsample
        self.channels = channels

        self.encoder = nn.ModuleList()
        self.decoder = nn.ModuleList()

        self.final = None
        if upsample:
            self.final = nn.Conv1d(channels + audio_channels,
                                   sources * audio_channels, 1)
            stride = 1

        if glu:
            activation = nn.GLU(dim=1)
            ch_scale = 2
        else:
            activation = nn.ReLU()
            ch_scale = 1
        in_channels = audio_channels
        for index in range(depth):
            encode = []
            encode += [
                nn.Conv1d(
                    in_channels,
                    channels,
                    kernel_size,
                    stride,
                    # padding=(kernel_size - 1)//2
                ),
                nn.ReLU()
            ]
            if rewrite:
                encode += [
                    nn.Conv1d(
                        channels,
                        ch_scale * channels,
                        1,
                        # padding=0
                    ),
                    activation
                ]
            self.encoder.append(nn.Sequential(*encode))

            decode = []
            if index > 0:
                out_channels = in_channels
            else:
                if upsample:
                    out_channels = channels
                else:
                    out_channels = sources * audio_channels
            if rewrite:
                decode += [
                    nn.Conv1d(
                        channels,
                        ch_scale * channels,
                        context,
                        # padding=(context - 1)//2
                    ),
                    activation
                ]
            if upsample:
                decode += [
                    nn.Conv1d(
                        channels,
                        out_channels,
                        kernel_size,
                        # padding=(kernel_size - 1)//2,
                        stride=1),
                ]
            else:
                decode += [
                    nn.ConvTranspose1d(
                        channels,
                        out_channels,
                        kernel_size,
                        stride,
                        # padding=(kernel_size)//2-1,
                        # output_padding=stride // 2
                    )
                ]
            if index > 0:
                decode.append(nn.ReLU())
            self.decoder.insert(0, nn.Sequential(*decode))
            in_channels = channels
            channels = int(growth * channels)

        channels = in_channels

        if lstm_layers:
            self.lstm = BLSTM(channels, lstm_layers)
        else:
            self.lstm = None

        if rescale:
            rescale_module(self, reference=rescale)
Пример #27
0
    def __init__(self,
                 in_channels=80,
                 out_channels=1,
                 proj_kernel=7,
                 base_channels=512,
                 upsample_factors=(8, 8, 2, 2),
                 res_kernel=3,
                 num_res_blocks=3):
        super(MelganGenerator, self).__init__()

        # assert model parameters
        assert (proj_kernel -
                1) % 2 == 0, " [!] proj_kernel should be an odd number."

        # setup additional model parameters
        base_padding = (proj_kernel - 1) // 2
        act_slope = 0.2
        self.inference_padding = 2

        # initial layer
        layers = []
        layers += [
            nn.ReflectionPad1d(base_padding),
            weight_norm(
                nn.Conv1d(in_channels,
                          base_channels,
                          kernel_size=proj_kernel,
                          stride=1,
                          bias=True))
        ]

        # upsampling layers and residual stacks
        for idx, upsample_factor in enumerate(upsample_factors):
            layer_in_channels = base_channels // (2**idx)
            layer_out_channels = base_channels // (2**(idx + 1))
            layer_filter_size = upsample_factor * 2
            layer_stride = upsample_factor
            layer_output_padding = upsample_factor % 2
            layer_padding = upsample_factor // 2 + layer_output_padding
            layers += [
                nn.LeakyReLU(act_slope),
                weight_norm(
                    nn.ConvTranspose1d(layer_in_channels,
                                       layer_out_channels,
                                       layer_filter_size,
                                       stride=layer_stride,
                                       padding=layer_padding,
                                       output_padding=layer_output_padding,
                                       bias=True)),
                ResidualStack(channels=layer_out_channels,
                              num_res_blocks=num_res_blocks,
                              kernel_size=res_kernel)
            ]

        layers += [nn.LeakyReLU(act_slope)]

        # final layer
        layers += [
            nn.ReflectionPad1d(base_padding),
            weight_norm(
                nn.Conv1d(layer_out_channels,
                          out_channels,
                          proj_kernel,
                          stride=1,
                          bias=True)),
            nn.Tanh()
        ]
        self.layers = nn.Sequential(*layers)
Пример #28
0
    def test_builder_to_backend_stress(
        self,
        use_cpu_only,
        backend,
        conv_dim,
        padding,
        DHWKdKhKw,
        stride,
        dilation,
        has_bias,
        groups,
        test_symbolic,
        test_output_shape,
    ):

        if test_symbolic and test_output_shape:
            # conv_transpose output_shape can only be constant (non-symbolic)
            return

        if backend[0] == "mlprogram" and groups == 2:
            pytest.xfail(
                "rdar://81999134 (ConvTranspose with group > 1 crashing on both CPU and GPU backend)"
            )

        D, H, W, Kd, Kh, Kw = DHWKdKhKw
        N, C_in, C_out = 1, 1 * groups, 2 * groups

        import torch
        import torch.nn as nn

        isDeconv1d = conv_dim == "conv1d"
        isDeconv2d = conv_dim == "conv2d"

        if isDeconv1d:
            strides = [stride[0]]
            dilations = [dilation[0]]
            kernels = [Kh]
            m = nn.ConvTranspose1d(
                C_in,
                C_out,
                kernels,
                stride=strides,
                dilation=dilations,
                bias=has_bias,
                groups=groups,
                padding=padding[0],
            )
            input_shape = [N, C_in, H]
            paddings = [padding[0], padding[0]]

        elif isDeconv2d:
            strides = [stride[0], stride[1]]
            dilations = [dilation[0], dilation[1]]
            kernels = [Kh, Kw]
            m = nn.ConvTranspose2d(
                C_in,
                C_out,
                kernels,
                stride=strides,
                dilation=dilations,
                bias=has_bias,
                groups=groups,
                padding=(padding[0], padding[1]),
            )
            input_shape = [N, C_in, H, W]
            paddings = [padding[0], padding[0], padding[1], padding[1]]
        else:
            strides = [stride[0], stride[1], stride[2]]
            dilations = [dilation[0], dilation[1], dilation[2]]
            kernels = [Kd, Kh, Kw]
            m = nn.ConvTranspose3d(
                C_in,
                C_out,
                kernels,
                stride=strides,
                dilation=dilations,
                bias=has_bias,
                groups=groups,
                padding=padding,
            )
            input_shape = [N, C_in, D, H, W]
            paddings = [
                padding[0],
                padding[0],
                padding[1],
                padding[1],
                padding[2],
                padding[2],
            ]

        wts = m.state_dict()
        weight = wts["weight"].detach().numpy()
        bias = wts["bias"].detach().numpy() if has_bias else None

        input = torch.randn(*input_shape)
        output = m(input)
        output = output.detach().numpy()
        input = input.detach().numpy()

        output_shape = list(output.shape)
        if test_symbolic:
            # For symbolic input test
            # Make Batch Size and input channel as symbolic
            symbolic_batch_size = get_new_symbol()
            input_shape[0] = symbolic_batch_size
            output_shape[0] = symbolic_batch_size

        expected_output_types = tuple(output_shape[:]) + (types.fp32, )
        expected_outputs = [output]

        input_placeholders = {"x": mb.placeholder(shape=input_shape)}
        input_values = {"x": input}

        def build(x):
            arguments = {
                "x": x,
                "weight": weight,
                "pad": paddings,
                "pad_type": "custom",
                "strides": strides,
                "dilations": dilations,
                "groups": groups,
            }
            if has_bias:
                arguments["bias"] = bias
            if test_output_shape:
                arguments["output_shape"] = output.shape
            return mb.conv_transpose(**arguments)

        run_compare_builder(
            build,
            input_placeholders,
            input_values,
            expected_output_types,
            expected_outputs,
            use_cpu_only=use_cpu_only,
            frontend_only=False,
            backend=backend,
        )
Пример #29
0
    def __init__(self, featureDim, latentDim):
        super(autoencoder_3conv_vect_vae_conditional, self).__init__()

        self.inputFeat_dim = 5  # +1
        self.input_frameLeng = 120
        self.encode_frameLeng = self.input_frameLeng / 8  #15

        self.encodeDim_conv1 = 10
        self.encodeDim_conv2 = 20
        self.encodeDim_conv3 = 40
        self.encodeDim_lin1 = 100

        self.latentDim = latentDim  #100

        self.encoder_conv_1 = nn.Sequential(
            #nn.Dropout(0.25),
            nn.Conv1d(self.inputFeat_dim + 1,
                      self.encodeDim_conv1,
                      kernel_size=5,
                      stride=2,
                      padding=2),  #+1 for the label
            nn.ReLU(True),
            nn.BatchNorm1d(self.encodeDim_conv1))

        self.encoder_conv_2 = nn.Sequential(
            #nn.Dropout(0.25),
            nn.Conv1d(self.encodeDim_conv1,
                      self.encodeDim_conv2,
                      kernel_size=5,
                      stride=2,
                      padding=2),
            nn.ReLU(True),
            nn.BatchNorm1d(self.encodeDim_conv2))

        self.encoder_conv_3 = nn.Sequential(
            #nn.Dropout(0.25),
            nn.Conv1d(self.encodeDim_conv2,
                      self.encodeDim_conv3,
                      kernel_size=5,
                      stride=2,
                      padding=2),
            nn.ReLU(True),
            nn.BatchNorm1d(self.encodeDim_conv3))

        self.encoder_lin1 = nn.Sequential(
            nn.Linear(self.encodeDim_conv3 * self.encode_frameLeng,
                      self.encodeDim_lin1),  #40x15 (600) -> 100
            nn.ReLU(True),
            nn.BatchNorm1d(self.encodeDim_lin1))

        self.encoder_lin21 = nn.Linear(self.encodeDim_lin1, self.latentDim)
        self.encoder_lin22 = nn.Linear(self.encodeDim_lin1, self.latentDim)

        self.decoder_lin1 = nn.Sequential(
            nn.Linear(self.latentDim + 1,
                      self.encodeDim_lin1),  #+1 for the label
            nn.ReLU(True),
            nn.BatchNorm1d(self.encodeDim_lin1))

        self.decoder_lin2 = nn.Sequential(
            nn.Linear(self.encodeDim_lin1,
                      self.encodeDim_conv3 * self.encode_frameLeng),
            nn.ReLU(True),
            nn.BatchNorm1d(self.encodeDim_conv3 * self.encode_frameLeng))

        self.decoder_conv_1 = nn.Sequential(
            #nn.MaxUnpool1d(kernel_size=2, stride=2),
            #nn.Dropout(0.25),
            nn.ConvTranspose1d(self.encodeDim_conv3,
                               self.encodeDim_conv2,
                               kernel_size=5,
                               stride=2,
                               padding=2,
                               output_padding=1), )

        self.decoder_conv_2 = nn.Sequential(
            #nn.MaxUnpool1d(kernel_size=2, stride=2),
            #nn.Dropout(0.25),
            nn.ConvTranspose1d(self.encodeDim_conv2,
                               self.encodeDim_conv1,
                               kernel_size=5,
                               stride=2,
                               padding=2,
                               output_padding=1), )

        self.decoder_conv_3 = nn.Sequential(
            #nn.MaxUnpool1d(kernel_size=2, stride=2),
            #nn.Dropout(0.25),
            nn.ConvTranspose1d(self.encodeDim_conv1,
                               self.inputFeat_dim,
                               kernel_size=5,
                               stride=2,
                               padding=2,
                               output_padding=1), )
Пример #30
0
 def _init_module(self, x):
     self._init_params(x)
     self.module = nn.ConvTranspose1d(*self.args, **self.kwargs)