Пример #1
0
    def __init__(self, mel_channel):
        super(Generator, self).__init__()
        self.mel_channel = mel_channel

        self.generator = nn.Sequential(
            nn.ReflectionPad1d(3),
            nn.utils.weight_norm(nn.Conv1d(mel_channel, 512, kernel_size=7, stride=1)),

            nn.LeakyReLU(0.2),
            nn.utils.weight_norm(nn.ConvTranspose1d(512, 256, kernel_size=16, stride=8, padding=4)),

            ResStack(256),

            nn.LeakyReLU(0.2),
            nn.utils.weight_norm(nn.ConvTranspose1d(256, 128, kernel_size=16, stride=8, padding=4)),

            ResStack(128),

            nn.LeakyReLU(0.2),
            nn.utils.weight_norm(nn.ConvTranspose1d(128, 64, kernel_size=4, stride=2, padding=1)),

            ResStack(64),

            nn.LeakyReLU(0.2),
            nn.utils.weight_norm(nn.ConvTranspose1d(64, 32, kernel_size=4, stride=2, padding=1)),

            ResStack(32),

            nn.LeakyReLU(0.2),
            nn.ReflectionPad1d(3),
            nn.utils.weight_norm(nn.Conv1d(32, 1, kernel_size=7, stride=1)),
            nn.Tanh(),
        )
Пример #2
0
    def __init__(self):
        super().__init__()

        input_size = 512

        upsample = [8, 8, 2, 2]

        layers = [nn.ReflectionPad1d(3), WMConv1d(80, input_size, 7)]

        for i, s in enumerate(upsample):
            input_size //= 2

            layers += [
                nn.LeakyReLU(0.3),
                WMConvTranspose1d(input_size * 2,
                                  input_size,
                                  s * 2,
                                  s,
                                  padding=s // 2 + s % 2),
                ResStack(input_size)
            ]

        layers += [
            nn.LeakyReLU(0.3),
            nn.ReflectionPad1d(3),
            WMConv1d(input_size, 1, 7, 1),
            nn.Tanh()
        ]

        self.model = nn.Sequential(*layers)
Пример #3
0
    def __init__(self,
                 classes,
                 input_acc_size,
                 input_nc=1,
                 output_nc=1,
                 ngf=64,
                 n_blocks=4):
        super(Generator, self).__init__()

        self.classes = classes
        self.input_acc_size = input_acc_size

        # label emmbedding
        model = [nn.Embedding(classes, input_acc_size)]
        self.embed_model = nn.Sequential(*model)

        # concatenated
        model = [
            nn.ReflectionPad1d(1),
            nn.Conv1d(input_nc, ngf, kernel_size=8, padding=0, bias=True),
            nn.InstanceNorm1d(ngf),
            nn.ReLU(True)
        ]

        n_downsampling = 2
        for i in range(n_downsampling):
            mult = 2**i
            model += [
                nn.Conv1d(ngf * mult,
                          ngf * mult * 2,
                          kernel_size=3,
                          stride=2,
                          padding=1,
                          bias=True),
                nn.InstanceNorm1d(ngf * mult * 2),
                nn.ReLU(True)
            ]

        mult = 2**n_downsampling
        for i in range(n_blocks):
            model += [ResnetBlock(ngf * mult)]

        for i in range(n_downsampling):
            mult = 2**(n_downsampling - i)
            model += [
                nn.ConvTranspose1d(ngf * mult,
                                   int(ngf * mult / 2),
                                   kernel_size=4,
                                   stride=2,
                                   padding=1,
                                   output_padding=1,
                                   bias=True),
                nn.InstanceNorm1d(int(ngf * mult / 2)),
                nn.ReLU(True)
            ]
        model += [nn.ReflectionPad1d(3)]
        model += [nn.Conv1d(ngf, output_nc, kernel_size=8, padding=1)]
        model += [nn.Tanh()]

        self.model = nn.Sequential(*model)
Пример #4
0
    def __init__(self):
        super().__init__()

        down_sampling = 4

        channels = [64, 256, 1024, 1024]

        prev_channel = 16

        self.blocks = nn.ModuleList([
            nn.Sequential(nn.ReflectionPad1d(7), WMConv1d(1, prev_channel, 15),
                          nn.LeakyReLU(0.3))
        ])

        for i, channel in enumerate(channels):
            self.blocks.extend([
                nn.Sequential(
                    nn.ReflectionPad1d(20),
                    WMConv1d(prev_channel, channel, 41, 4, groups=4**(i + 1)),
                    nn.LeakyReLU(0.3))
            ])

            prev_channel = channel

        self.blocks.extend([
            nn.Sequential(nn.ReflectionPad1d(2),
                          WMConv1d(prev_channel, 1024, 5, 1),
                          nn.LeakyReLU(0.3)),
            nn.Sequential(nn.ReflectionPad1d(1), WMConv1d(1024, 1, 3, 1))
        ])
Пример #5
0
	def __init__(self, ic = 1, oc = 1, norm_type = 'instancenorm', use_sn = False):
		super(ResBlock, self).__init__()
		self.ic = ic
		self.oc = oc
		self.norm_type = norm_type
		self.use_sn = use_sn

		self.relu = nn.ReLU(inplace = True)
		self.reflection_pad1 = nn.ReflectionPad1d(15)
		self.reflection_pad2 = nn.ReflectionPad1d(15)

		self.conv1 = nn.Conv1d(ic, oc, 31, 1, 0, bias = False)
		self.conv2 = nn.Conv1d(oc, oc, 31, 1, 0, bias = False)

		if(self.use_sn == True):
			self.conv1 = SpectralNorm(self.conv1)
			self.conv2 = SpectralNorm(self.conv2)
			
		if(self.norm_type == 'batchnorm'):
			self.bn1 = nn.BatchNorm1d(oc)
			self.bn2 = nn.BatchNorm1d(oc)

		elif(self.norm_type == 'instancenorm'):
			self.bn1 = nn.InstanceNorm1d(oc)
			self.bn2 = nn.InstanceNorm1d(oc)
Пример #6
0
    def __init__(self, input_channel=80, hu=512, ku=[16, 16, 4, 4], kr=[3, 7, 11], Dr=[1, 3, 5]):
        super(Generator, self).__init__()
        self.input = nn.Sequential(
            nn.ReflectionPad1d(3),
            nn.utils.weight_norm(nn.Conv1d(input_channel, hu, kernel_size=7))
        )

        generator = []
        
        for k in ku:
            inp = hu
            out = int(inp/2)
            generator += [
                nn.LeakyReLU(0.2),
                nn.utils.weight_norm(nn.ConvTranspose1d(inp, out, k, k//2)),
                MRF(kr, out, Dr)
            ]
            hu = out
        self.generator = nn.Sequential(*generator)

        self.output = nn.Sequential(
            nn.LeakyReLU(0.2),
            nn.ReflectionPad1d(3),
            nn.utils.weight_norm(nn.Conv1d(hu, 1, kernel_size=7, stride=1)),
            nn.Tanh()

        )
Пример #7
0
    def __init__(
        self,
        in_channels=80,
        out_channels=1,
        proj_kernel=7,
        base_channels=512,
        upsample_factors=(8, 8, 2, 2),
        res_kernel=3,
        num_res_blocks=3,
    ):
        super().__init__()

        # assert model parameters
        assert (proj_kernel - 1) % 2 == 0, " [!] proj_kernel should be an odd number."

        # setup additional model parameters
        base_padding = (proj_kernel - 1) // 2
        act_slope = 0.2
        self.inference_padding = 2

        # initial layer
        layers = []
        layers += [
            nn.ReflectionPad1d(base_padding),
            weight_norm(nn.Conv1d(in_channels, base_channels, kernel_size=proj_kernel, stride=1, bias=True)),
        ]

        # upsampling layers and residual stacks
        for idx, upsample_factor in enumerate(upsample_factors):
            layer_in_channels = base_channels // (2 ** idx)
            layer_out_channels = base_channels // (2 ** (idx + 1))
            layer_filter_size = upsample_factor * 2
            layer_stride = upsample_factor
            layer_output_padding = upsample_factor % 2
            layer_padding = upsample_factor // 2 + layer_output_padding
            layers += [
                nn.LeakyReLU(act_slope),
                weight_norm(
                    nn.ConvTranspose1d(
                        layer_in_channels,
                        layer_out_channels,
                        layer_filter_size,
                        stride=layer_stride,
                        padding=layer_padding,
                        output_padding=layer_output_padding,
                        bias=True,
                    )
                ),
                ResidualStack(channels=layer_out_channels, num_res_blocks=num_res_blocks, kernel_size=res_kernel),
            ]

        layers += [nn.LeakyReLU(act_slope)]

        # final layer
        layers += [
            nn.ReflectionPad1d(base_padding),
            weight_norm(nn.Conv1d(layer_out_channels, out_channels, proj_kernel, stride=1, bias=True)),
            nn.Tanh(),
        ]
        self.layers = nn.Sequential(*layers)
Пример #8
0
    def __init__(self, channels, kernel_size=8, global_pool=None, convpool=None, compress=False, batchnorm=False):
        super(Encoder, self).__init__()

        model = []
        acti = nn.LeakyReLU(0.2)

        nr_layer = len(channels) - 2 if compress else len(channels) - 1

        for i in range(nr_layer):
            if convpool is None:
                pad = (kernel_size - 1) // 2
                model.append(nn.ReflectionPad1d(pad))
                model.append(nn.Conv1d(channels[i], channels[i + 1], kernel_size=kernel_size, stride=2))
                if batchnorm:
                    model.append(nn.BatchNorm1d(channels[i + 1]))
                model.append(acti)
            else:  # body & view
                pad = (kernel_size - 1) // 2
                model.append(nn.ReflectionPad1d(pad))
                model.append(nn.Conv1d(channels[i], channels[i + 1], kernel_size=kernel_size, stride=1))
                if batchnorm:
                    model.append(nn.BatchNorm1d(channels[i + 1]))
                model.append(acti)
                model.append(convpool(kernel_size=2, stride=2))  # nn.MaxPool1d

        self.global_pool = global_pool
        self.compress = compress

        self.model = nn.Sequential(*model)

        if self.compress:
            self.conv1x1 = nn.Conv1d(channels[-2], channels[-1], kernel_size=1)

        self.last_conv = nn.Conv1d(channels[-1], channels[-1], kernel_size=1, bias=False)
Пример #9
0
    def __init__(self,
                 channels,
                 padding=3,
                 kernel_size=8,
                 conv_stride=2,
                 conv_pool=None):
        super(ConvEncoder, self).__init__()

        self.in_channels = channels[0]

        model = []
        acti = nn.LeakyReLU(0.2)

        nr_layer = len(channels) - 1

        for i in range(nr_layer):
            if conv_pool is None:
                model.append(nn.ReflectionPad1d(padding))
                model.append(
                    nn.Conv1d(channels[i],
                              channels[i + 1],
                              kernel_size=kernel_size,
                              stride=conv_stride))
                model.append(acti)
            else:
                model.append(nn.ReflectionPad1d(padding))
                model.append(
                    nn.Conv1d(channels[i],
                              channels[i + 1],
                              kernel_size=kernel_size,
                              stride=conv_stride))
                model.append(acti)
                model.append(conv_pool(kernel_size=2, stride=2))

        self.model = nn.Sequential(*model)
Пример #10
0
    def __init__(self, mel_dim):
        super().__init__()

        factor = [8, 8, 2, 2]

        layers = [
            nn.ReflectionPad1d(3),  # 3+80+3 = 86
            weight_norm(nn.Conv1d(mel_dim, 512, kernel_size=7)),
        ]

        input_size = 512
        for f in factor:
            layers += [
                encoder_sequential(input_size,
                                   input_size // 2,
                                   kernel_size=f * 2,
                                   stride=f,
                                   padding=f // 2 + f % 2)
            ]
            input_size //= 2
            for d in range(3):
                layers += [residual_stack(input_size, 3**d)]

        layers += [
            nn.LeakyReLU(0.2),
            nn.ReflectionPad1d(3),
            weight_norm(nn.Conv1d(32, 1, kernel_size=7)),
            nn.Tanh(),
        ]

        self.generator = nn.Sequential(*layers)
Пример #11
0
 def __init__(self,input_channels,output_channels,kernel_size,stride,drop_out_prob=-1.0,dilation=1,bn=True,activation_use=True):
     super(Conv1dBlock, self).__init__()
     self.input_channels = input_channels
     self.output_channels = output_channels
     self.kernel_size = kernel_size
     self.stride = stride
     self.drop_out_prob = drop_out_prob
     self.dilation = dilation
     self.activation_use = activation_use
     self.padding = kernel_size[0]
     '''Padding Calculation'''
     input_rows = input_channels
     filter_rows = kernel_size[0]
     out_rows = (input_rows + stride - 1) // stride
     self.padding_rows = max(0, (out_rows -1) * stride + (filter_rows -1) * dilation + 1 - input_rows)
     if self.padding_rows > 0:
         if self.padding_rows % 2 == 0:
             self.paddingAdded = nn.ReflectionPad1d(self.padding_rows // 2)
         else:
             self.paddingAdded = nn.ReflectionPad1d((self.padding_rows //2,(self.padding_rows +1)//2))
     else:
         self.paddingAdded =  nn.Identity()
     self.conv1 = nn.Conv1d(in_channels=input_channels,out_channels=output_channels,
                       kernel_size=kernel_size,stride=stride,padding=0,dilation=dilation)
     self.batch_norm = nn.BatchNorm1d(num_features=output_channels,momentum=0.9,eps=0.001) if bn else nn.Identity()
     self.drop_out = nn.Dropout(drop_out_prob) if self.drop_out_prob != -1 else nn.Identity()
Пример #12
0
    def __init__(self,
                 mel_channel,
                 n_residual_layers,
                 ratios=[8, 8, 4],
                 mult=256,
                 out_band=1):
        super(Generator, self).__init__()
        self.mel_channel = mel_channel

        generator = [
            nn.ReflectionPad1d(3),
            nn.utils.weight_norm(
                nn.Conv1d(mel_channel, mult * 2, kernel_size=7, stride=1)),
        ]

        # Upsample to raw audio scale
        for _, r in enumerate(ratios):
            generator += [Upsample(mult, r)]
            for j in range(n_residual_layers):
                generator += [ResStack(mult, dilation=3**j)]

            mult //= 2

        generator += [
            nn.LeakyReLU(0.2),
            nn.ReflectionPad1d(3),
            nn.utils.weight_norm(
                nn.Conv1d(mult * 2, out_band, kernel_size=7, stride=1)),
            nn.Tanh(),
        ]

        self.generator = nn.Sequential(*generator)
        self.apply(weights_init)
Пример #13
0
    def __init__(self):
        super().__init__()

        self.Conv_1 = nn.Sequential(
            nn.ReflectionPad1d(7), weight_norm(nn.Conv1d(1, 16,
                                                         kernel_size=15)),
            nn.LeakyReLU(0.2))
        self.Conv_2 = nn.Sequential(
            weight_norm(
                nn.Conv1d(16,
                          64,
                          kernel_size=41,
                          stride=4,
                          padding=20,
                          groups=4)), nn.LeakyReLU(0.2))
        self.Conv_3 = nn.Sequential(
            weight_norm(
                nn.Conv1d(64,
                          256,
                          kernel_size=41,
                          stride=4,
                          padding=20,
                          groups=16)), nn.LeakyReLU(0.2))
        self.Conv_4 = nn.Sequential(
            weight_norm(
                nn.Conv1d(256,
                          1024,
                          kernel_size=41,
                          stride=4,
                          padding=20,
                          groups=64)), residual_stack(1024, 3),
            residual_stack(1024, 9), nn.LeakyReLU(0.2))
        self.ConvTrans_4 = nn.Sequential(
            weight_norm(
                nn.ConvTranspose1d(1024,
                                   256,
                                   kernel_size=16,
                                   stride=4,
                                   padding=6)), residual_stack(256, 3),
            residual_stack(256, 9), nn.LeakyReLU(0.2))
        self.ConvTrans_3 = nn.Sequential(
            weight_norm(
                nn.ConvTranspose1d(256,
                                   64,
                                   kernel_size=16,
                                   stride=4,
                                   padding=6)), residual_stack(64, 3),
            residual_stack(64, 9), nn.LeakyReLU(0.2))
        self.ConvTrans_2 = nn.Sequential(
            weight_norm(
                nn.ConvTranspose1d(64, 16, kernel_size=16, stride=4,
                                   padding=6)), residual_stack(16, 3),
            residual_stack(16, 9), nn.LeakyReLU(0.2))
        self.ConvTrans_1 = nn.Sequential(
            nn.ReflectionPad1d(3), weight_norm(nn.Conv1d(16, 1,
                                                         kernel_size=7)),
            nn.Tanh())
Пример #14
0
    def __init__(self, config, base_width, n_labels):
        self.stat_dim = 1500
        super().__init__()
        in_dim = config['input_dim']
        self.tdnn_fr1 = nn.Sequential(
            nn.Conv1d(in_dim, base_width, stride=1, dilation=1, kernel_size=5),
            nn.BatchNorm1d(base_width),
            nn.ReLU(True),
        )
        
        self.tdnn_fr2 = nn.Sequential(
            nn.Conv1d(base_width, base_width, stride=1, dilation=3, kernel_size=3),
            nn.BatchNorm1d(base_width),
            nn.ReLU(True),
            nn.Conv1d(base_width, base_width, stride=1, dilation=3, kernel_size=3),
            nn.BatchNorm1d(base_width),
            nn.ReflectionPad1d(6)
        )
        
        self.tdnn_fr3 = nn.Sequential( 
            nn.Conv1d(base_width, base_width, stride=1, dilation=3, kernel_size=3),
            nn.BatchNorm1d(base_width),
            nn.ReLU(True),
            nn.Conv1d(base_width, base_width, stride=1, dilation=3, kernel_size=3),
            nn.BatchNorm1d(base_width),
            nn.ReflectionPad1d(6)
        )
        
        self.connect_conv = nn.Conv1d(base_width, self.stat_dim ,kernel_size=1)
        
        self.tdnn_fr4 = nn.Sequential(
            nn.Conv1d(base_width, base_width, stride=1, dilation=3, kernel_size=3),
            nn.BatchNorm1d(base_width),
            nn.ReLU(True),
            nn.Conv1d(base_width, self.stat_dim, stride=1, dilation=1, kernel_size=1),
            nn.BatchNorm1d(self.stat_dim),
            nn.ReflectionPad1d(3)
        )
        
        self.tdnn_uttr = nn.Sequential(
            st_pool_layer(),
            # ST_pool = 1
            nn.Linear(self.stat_dim*2, base_width),
            nn.BatchNorm1d(base_width),
            nn.ReLU(True),
            # xvector = 4
        )

        self.classifier = nn.Sequential(
            nn.Linear(base_width, base_width),
            nn.BatchNorm1d(base_width),
            nn.Linear(base_width, n_labels)
        )
        
        self._initialize_weights()
Пример #15
0
    def __init__(self, input_size, ngf, n_residual_layers, ratios=[8, 8, 2, 2]):
        super().__init__()
        # ratios = [8, 8, 2, 2]
        self.hop_length = np.prod(ratios)
        mult = int(2 ** len(ratios))

        model = [
            nn.ReflectionPad1d(3),
            WNConv1d(input_size, mult * ngf, kernel_size=7, padding=0),
        ]

        # Upsample to raw audio scale
        for i, r in enumerate(ratios):
            if i == 0:
                model += [
                    nn.LeakyReLU(0.2),
                    BGRU(mult * ngf),
                    WNConvTranspose1d(
                        mult * ngf,
                        mult * ngf // 2,
                        kernel_size=r * 2,
                        stride=r,
                        padding=r // 2 + r % 2,
                        output_padding=r % 2,
                    ),
                ]
            else:
                model += [
                    nn.LeakyReLU(0.2),
                    WNConvTranspose1d(
                        mult * ngf,
                        mult * ngf // 2,
                        kernel_size=r * 2,
                        stride=r,
                        padding=r // 2 + r % 2,
                        output_padding=r % 2,
                    ),
                ]

            for j in range(n_residual_layers):
                model += [ResnetBlock(mult * ngf // 2, dilation=3 ** j)]

            mult //= 2

        model += [
            nn.LeakyReLU(0.2),
            nn.ReflectionPad1d(3),
            # BGRU(ngf),
            WNConv1d(ngf, 1, kernel_size=7, padding=0),
            nn.Tanh(),
        ]

        self.model = nn.Sequential(*model)
        self.apply(weights_init)
Пример #16
0
    def __init__(self, input_size, ngf, n_residual_layers):
        # In original paper, input_size == n_mel_channels
        # ngf is a model hyperparameter, meaning the final number of feature maps in Generator, 32 in paper.
        super().__init__()
        # ratios = [8, 8, 2, 2] # 4 stages of upsampling: 8x, 8x, 2x, 2x --> 256x (hop_length when calculating Mel Spectrogram)
        ratios = [8, 8, 4, 2, 2
                  ]  # Note(houwx): Modify to 8x, 8x, 4x, 2x, 2x = 256 x 4 here
        self.hop_length = np.prod(ratios)
        mult = int(2**len(ratios))  # 16

        model = [
            nn.ReflectionPad1d(
                3
            ),  # Padding on left & right: (N, n_mel, T_mel) --> (N, n_mel, T_mel + 3 left + 3 right)
            WNConv1d(
                in_channels=input_size,
                out_channels=mult * ngf,
                kernel_size=7,
                padding=0
            ),  # (N, n_mel=80, T_mel + 6) --> (N, ngf * mult=16 * 32, T_mel)
        ]

        # Upsample to raw audio scale
        for i, r in enumerate(ratios):
            model += [
                nn.LeakyReLU(0.2),
                WNConvTranspose1d(
                    mult * ngf,
                    mult * ngf // 2,
                    kernel_size=r * 2,  # [16, 16, 4, 4]
                    stride=r,  # [8, 8, 2, 2]
                    padding=r // 2 + r % 2,  # [4, 4,1, 1]
                    output_padding=r % 2,  # All 0s
                ),  # First upsample as example: (N, ngf * mult=16*32, T_mel) --> (N, ngf * mult // 2=16*16, (T_mel - 1)* r - r + r * 2 - 1 + 1 = T_mel * r)
            ]

            for j in range(n_residual_layers):  # [0 , 1, 2]
                model += [ResnetBlock(mult * ngf // 2, dilation=3**j)]
                # No Change in shape. First ResBlock: (N, ngf * mult / 2, T_mel * r) --> (N, ngf * mult / 2, T_mel * r)

            mult //= 2
        # After 4 stages, get: (N, ngf, T_mel * 256)

        model += [
            nn.LeakyReLU(0.2),
            nn.ReflectionPad1d(
                3),  # (N, ngf, T_mel * 256) -- > (N, ngf, T_mel * 256 + 3 + 3)
            WNConv1d(ngf, 1, kernel_size=7, padding=0
                     ),  # (N, ngf, T_mel * 256 + 6) --> (N, 1, T_mel * 256)
            nn.Tanh(),
        ]

        self.model = nn.Sequential(*model)
        self.apply(weights_init)
Пример #17
0
    def __init__(self, in_features):
        super(ResidualBlock, self).__init__()

        conv_block = [nn.ReflectionPad1d(1),
                      nn.Conv1d(in_features, in_features, 3),
                      nn.InstanceNorm2d(in_features),
                      nn.ReLU(inplace=True),
                      nn.ReflectionPad1d(1),
                      nn.Conv1d(in_features, in_features, 3),
                      nn.InstanceNorm2d(in_features)]

        self.conv_block = nn.Sequential(*conv_block)
Пример #18
0
    def __init__(
        self,
        in_dim,
        hidden_dim,
        out_dim,
        num_layers=4,
        # NOTE: you must carefully set the following parameters
        in_lf0_idx=300,
        in_lf0_min=5.3936276,
        in_lf0_max=6.491111,
        out_lf0_idx=180,
        out_lf0_mean=5.953093881972361,
        out_lf0_scale=0.23435173188961034,
        init_type="none",
        use_mdn=False,
        num_gaussians=8,
        dim_wise=False,
    ):
        super().__init__()
        self.in_lf0_idx = in_lf0_idx
        self.in_lf0_min = in_lf0_min
        self.in_lf0_max = in_lf0_max
        self.out_lf0_idx = out_lf0_idx
        self.out_lf0_mean = out_lf0_mean
        self.out_lf0_scale = out_lf0_scale
        self.use_mdn = use_mdn

        model = [
            nn.ReflectionPad1d(3),
            WNConv1d(in_dim, hidden_dim, kernel_size=7, padding=0),
        ]
        for n in range(num_layers):
            model.append(ResnetBlock(hidden_dim, dilation=2 ** n))

        last_conv_out_dim = hidden_dim if use_mdn else out_dim
        model += [
            nn.LeakyReLU(0.2),
            nn.ReflectionPad1d(3),
            WNConv1d(hidden_dim, last_conv_out_dim, kernel_size=7, padding=0),
        ]
        self.model = nn.Sequential(*model)

        if self.use_mdn:
            self.mdn_layer = MDNLayer(
                in_dim=hidden_dim,
                out_dim=out_dim,
                num_gaussians=num_gaussians,
                dim_wise=dim_wise,
            )
        else:
            self.mdn_layer = None

        init_weights(self, init_type)
Пример #19
0
    def __init__(self):
        super(JCU_Discriminator, self).__init__()
        self.mel_conv = nn.Sequential(
            nn.ReflectionPad1d(3),
            nn.utils.weight_norm(nn.Conv1d(80, 128, kernel_size=2, stride=1)),
            nn.LeakyReLU(0.2, True),
        )
        x_conv = [
            nn.ReflectionPad1d(7),
            nn.utils.weight_norm(nn.Conv1d(1, 16, kernel_size=7, stride=1)),
            nn.LeakyReLU(0.2, True),
        ]
        x_conv += [
            nn.utils.weight_norm(
                nn.Conv1d(
                    16,
                    64,
                    kernel_size=41,
                    stride=4,
                    padding=4 * 5,
                    groups=16 // 4,
                )),
            nn.LeakyReLU(0.2),
        ]
        x_conv += [
            nn.utils.weight_norm(
                nn.Conv1d(
                    64,
                    128,
                    kernel_size=21,
                    stride=2,
                    padding=2 * 5,
                    groups=64 // 4,
                )),
            nn.LeakyReLU(0.2),
        ]
        self.x_conv = nn.Sequential(*x_conv)
        self.mel_conv2 = nn.Sequential(
            nn.utils.weight_norm(
                nn.Conv1d(128, 128, kernel_size=5, stride=1, padding=2)),
            nn.LeakyReLU(0.2, True),
        )
        self.mel_conv3 = nn.utils.weight_norm(
            nn.Conv1d(128, 1, kernel_size=3, stride=1, padding=1))

        self.x_conv2 = nn.Sequential(
            nn.utils.weight_norm(
                nn.Conv1d(128, 128, kernel_size=5, stride=1, padding=2)),
            nn.LeakyReLU(0.2, True),
        )
        self.x_conv3 = nn.utils.weight_norm(
            nn.Conv1d(128, 1, kernel_size=3, stride=1, padding=1))
Пример #20
0
    def __init__(
        self,
        in_dim,
        ff_hidden_dim=2048,
        conv_hidden_dim=1024,
        lstm_hidden_dim=256,
        out_dim=199,
        dropout=0.0,
        num_lstm_layers=2,
        bidirectional=True,
        init_type="none",
    ):
        super().__init__()

        self.ff = nn.Sequential(
            nn.Linear(in_dim, ff_hidden_dim),
            nn.ReLU(),
            nn.Linear(ff_hidden_dim, ff_hidden_dim),
            nn.ReLU(),
            nn.Linear(ff_hidden_dim, ff_hidden_dim),
            nn.ReLU(),
        )

        self.conv = nn.Sequential(
            nn.ReflectionPad1d(3),
            nn.Conv1d(ff_hidden_dim, conv_hidden_dim, kernel_size=7, padding=0),
            nn.BatchNorm1d(conv_hidden_dim),
            nn.ReLU(),
            nn.ReflectionPad1d(3),
            nn.Conv1d(conv_hidden_dim, conv_hidden_dim, kernel_size=7, padding=0),
            nn.BatchNorm1d(conv_hidden_dim),
            nn.ReLU(),
            nn.ReflectionPad1d(3),
            nn.Conv1d(conv_hidden_dim, conv_hidden_dim, kernel_size=7, padding=0),
            nn.BatchNorm1d(conv_hidden_dim),
            nn.ReLU(),
        )

        num_direction = 2 if bidirectional else 1
        self.lstm = nn.LSTM(
            conv_hidden_dim,
            lstm_hidden_dim,
            num_lstm_layers,
            bidirectional=True,
            batch_first=True,
            dropout=dropout,
        )

        last_in_dim = num_direction * lstm_hidden_dim
        self.fc = nn.Linear(last_in_dim, out_dim)
        init_weights(self, init_type)
def matchDimensions1D(trunk, mask):
    """ Finds difference in dimensions between two matrices and pads smallest to allow for matrix operations """
    difference = findDifferenceOneAxis(trunk, mask, 1)
    left, right = findPaddingBothSidesOneAxis(np.absolute(difference))

    if difference > 0:
        pad = nn.ReflectionPad1d((left, right))
        mask = pad(mask)
        return trunk, mask
    elif difference < 0:
        pad = nn.ReflectionPad1d((left, right))
        trunk = pad(trunk)
        return trunk, mask
    return trunk, mask
Пример #22
0
    def __init__(self, in_dim, hidden_dim, out_dim, num_layers=4, dropout=0.0):
        super().__init__()
        model = [
            nn.ReflectionPad1d(3),
            WNConv1d(in_dim, hidden_dim, kernel_size=7, padding=0),
        ]
        for n in range(num_layers):
            model.append(ResnetBlock(hidden_dim, dilation=2**n))
        model += [
            nn.LeakyReLU(0.2),
            nn.ReflectionPad1d(3),
            WNConv1d(hidden_dim, out_dim, kernel_size=7, padding=0),
        ]

        self.model = nn.Sequential(*model)
Пример #23
0
    def __init__(self, kernel, channel, padding, dilations=[1, 3, 5]):
        super().__init__()
        resstack = []
        for dilation in dilations:
            resstack += [
                nn.LeakyReLU(0.2),
                nn.ReflectionPad1d(dilation),
                nn.utils.weight_norm(nn.Conv1d(channel, channel, kernel_size=kernel, dilation=dilation)),
                nn.LeakyReLU(0.2),
                nn.ReflectionPad1d(padding),
                nn.utils.weight_norm(nn.Conv1d(channel, channel, kernel_size=1)),
            ]
        self.resstack = nn.Sequential(*resstack)

        self.shortcut = nn.utils.weight_norm(nn.Conv1d(channel, channel, kernel_size=1))
Пример #24
0
 def __init__(
     self,
     c_in: int,
     c_h: int,
     c_out: int,
     kernel_size: int,
     bank_size: int,
     bank_scale: int,
     c_bank: int,
     n_conv_blocks: int,
     n_dense_blocks: int,
     subsample: List[int],
     act: str,
     dropout_rate: float,
 ):
     super(SpeakerEncoder, self).__init__()
     self.c_in = c_in
     self.c_h = c_h
     self.c_out = c_out
     self.kernel_size = kernel_size
     self.n_conv_blocks = n_conv_blocks
     self.n_dense_blocks = n_dense_blocks
     self.subsample = subsample
     self.act = get_act(act)
     self.conv_bank = ConvBank(c_in, c_bank, bank_size, bank_scale, act)
     in_channels = c_bank * (bank_size // bank_scale) + c_in
     self.in_conv_layer = nn.Conv1d(in_channels, c_h, kernel_size=1)
     self.first_conv_layers = nn.ModuleList([
         nn.Sequential(
             nn.ReflectionPad1d((kernel_size // 2,
                                 kernel_size // 2 - 1 + kernel_size % 2)),
             nn.Conv1d(c_h, c_h, kernel_size=kernel_size),
         ) for _ in range(n_conv_blocks)
     ])
     self.second_conv_layers = nn.ModuleList([
         nn.Sequential(
             nn.ReflectionPad1d((kernel_size // 2,
                                 kernel_size // 2 - 1 + kernel_size % 2)),
             nn.Conv1d(c_h, c_h, kernel_size=kernel_size, stride=sub),
         ) for sub, _ in zip(subsample, range(n_conv_blocks))
     ])
     self.pooling_layer = nn.AdaptiveAvgPool1d(1)
     self.first_dense_layers = nn.ModuleList(
         [nn.Linear(c_h, c_h) for _ in range(n_dense_blocks)])
     self.second_dense_layers = nn.ModuleList(
         [nn.Linear(c_h, c_h) for _ in range(n_dense_blocks)])
     self.output_layer = nn.Linear(c_h, c_out)
     self.dropout_layer = nn.Dropout(p=dropout_rate)
Пример #25
0
    def __init__(
        self,
        in_dim,
        hidden_dim,
        out_dim,
        num_layers=4,
        init_type="none",
        use_mdn=False,
        num_gaussians=8,
        dim_wise=False,
        **kwargs,
    ):
        super().__init__()
        self.use_mdn = use_mdn

        if "dropout" in kwargs:
            warn(
                "dropout argument in Conv1dResnet is deprecated"
                " and will be removed in future versions"
            )

        model = [
            nn.ReflectionPad1d(3),
            WNConv1d(in_dim, hidden_dim, kernel_size=7, padding=0),
        ]
        for n in range(num_layers):
            model.append(ResnetBlock(hidden_dim, dilation=2 ** n))

        last_conv_out_dim = hidden_dim if use_mdn else out_dim
        model += [
            nn.LeakyReLU(0.2),
            nn.ReflectionPad1d(3),
            WNConv1d(hidden_dim, last_conv_out_dim, kernel_size=7, padding=0),
        ]

        self.model = nn.Sequential(*model)

        if self.use_mdn:
            self.mdn_layer = MDNLayer(
                in_dim=hidden_dim,
                out_dim=out_dim,
                num_gaussians=num_gaussians,
                dim_wise=dim_wise,
            )
        else:
            self.mdn_layer = None

        init_weights(self, init_type)
Пример #26
0
 def _conv(self,
           in_c: int,
           out_c: int,
           kernel_sz: Optional[Tuple[int]] = None,
           bias: bool = False,
           seq: bool = True,
           stride: Tuple[int] = None):
     ksz = kernel_sz or self.kernel_sz
     bias = False if self.norm != 'none' and not bias else True
     stride = stride or tuple([1 for _ in ksz])
     if not self.separable or all([ks == 1 for ks in ksz]):
         layers = [nn.Conv3d(in_c, out_c, ksz, bias=bias, stride=stride)] if self.dim == 3 else \
                  [nn.Conv2d(in_c, out_c, ksz, bias=bias, stride=stride)] if self.dim == 2 else \
                  [nn.Conv1d(in_c, out_c, ksz, bias=bias, stride=stride)]
     else:
         layers = [SeparableConv3d(in_c, out_c, ksz, bias=bias, stride=stride)] if self.dim == 3 else \
                  [SeparableConv2d(in_c, out_c, ksz, bias=bias, stride=stride)] if self.dim == 2 else \
                  [SeparableConv1d(in_c, out_c, ksz, bias=bias, stride=stride)]
     if any([ks > 1 for ks in ksz]):
         rp = tuple([
             ks // 2 for p in zip(reversed(ksz), reversed(ksz)) for ks in p
         ])
         layers = [nn.ReplicationPad3d(rp)] + layers if self.dim == 3 else \
                  [nn.ReflectionPad2d(rp)] + layers  if self.dim == 2 else \
                  [nn.ReflectionPad1d(rp)] + layers
     if seq and len(layers) > 1:
         c = nn.Sequential(*layers)
     else:
         c = layers if len(layers) > 1 else layers[0]
     return c
Пример #27
0
    def __init__(self, inchannel, outchannel, uppool, filter_size):
        super(skip_connection_de, self).__init__()
        self.inchannel = inchannel
        self.outchannel = outchannel
        self.uppool = uppool
        self.filter_size = filter_size

        if uppool:  ## input size -> input size*2
            self.pipeline = nn.Sequential(
                nn.Upsample(scale_factor=2, mode='linear'),
                nn.ReflectionPad1d(filter_size // 2),
                nn.Conv1d(inchannel,
                          outchannel,
                          self.filter_size,
                          1,
                          padding=0,
                          bias=False), nn.BatchNorm1d(outchannel),
                nn.LeakyReLU())
        else:  ## input size same
            self.pipeline = nn.Sequential(
                nn.Conv1d(inchannel,
                          outchannel,
                          self.filter_size,
                          1,
                          filter_size // 2,
                          bias=False),
                nn.BatchNorm1d(outchannel),
                nn.LeakyReLU(),
            )
Пример #28
0
    def __init__(self, channels, num_res_blocks, kernel_size):
        super(ResidualStack, self).__init__()

        assert (kernel_size - 1) % 2 == 0, " [!] kernel_size has to be odd."
        base_padding = (kernel_size - 1) // 2

        self.blocks = nn.ModuleList()
        for idx in range(num_res_blocks):
            layer_kernel_size = kernel_size
            layer_dilation = layer_kernel_size**idx
            layer_padding = base_padding * layer_dilation
            self.blocks += [
                nn.Sequential(
                    nn.LeakyReLU(0.2),
                    nn.ReflectionPad1d(layer_padding),
                    weight_norm(
                        nn.Conv1d(channels,
                                  channels,
                                  kernel_size=kernel_size,
                                  dilation=layer_dilation,
                                  bias=True)),
                    nn.LeakyReLU(0.2),
                    weight_norm(
                        nn.Conv1d(channels, channels, kernel_size=1,
                                  bias=True)),
                )
            ]

        self.shortcuts = nn.ModuleList([
            weight_norm(nn.Conv1d(channels, channels, kernel_size=1,
                                  bias=True)) for i in range(num_res_blocks)
        ])
Пример #29
0
	def __init__(self, ni, no, ks, stride, pad = None, pad_type = 'Zero', output_pad = 0, use_bn = True, use_sn = False, norm_type = 'batchnorm', activation_type = 'leakyrelu'):
		super(DeConvBlock, self).__init__()
		self.use_bn = use_bn
		self.use_sn = use_sn
		self.norm_type = norm_type
		self.pad_type = pad_type

		if(pad is None):
			pad = ks // 2 // stride

		if(self.pad_type == 'Zero'):
			self.deconv = nn.ConvTranspose1d(ni, no, ks, stride, pad, output_padding = output_pad, bias = False)
		elif(self.pad_type == 'Reflection'):
			self.deconv = nn.ConvTranspose1d(ni, no, ks, stride, 0, output_padding = output_pad, bias = False)
			self.reflection = nn.ReflectionPad1d(pad)
		
		if(self.use_bn == True):
			if(self.norm_type == 'batchnorm'):
				self.bn = nn.BatchNorm1d(no)
			elif(self.norm_type == 'instancenorm'):
				self.bn = nn.InstanceNorm1d(no)

		if(self.use_sn == True):
			self.deconv = SpectralNorm(self.deconv)

		if(activation_type == 'relu'):
			self.act = nn.ReLU(inplace = True)
		elif(activation_type == 'leakyrelu'):
			self.act = nn.LeakyReLU(0.2, inplace = True)
		elif(activation_type == 'elu'):
			self.act = nn.ELU(inplace = True)
		elif(activation_type == 'selu'):
			self.act = nn.SELU(inplace = True)
		elif(activation_type == None):
			self.act = Nothing()
Пример #30
0
    def forward(self, x):
        x = broadcast_dim(x)
        if self.center:
            if self.pad_mode == 'constant':
                padding = nn.ConstantPad1d(self.kernal_width // 2, 0)
            elif self.pad_mode == 'reflect':
                padding = nn.ReflectionPad1d(self.kernal_width // 2)

            x = padding(x)

        # STFT
        fourier_real = conv1d(x, self.wcos, stride=self.hop_length)
        fourier_imag = conv1d(x, self.wsin, stride=self.hop_length)

        # CQT
        CQT_real, CQT_imag = complex_mul(
            (self.cqt_kernels_real, self.cqt_kernels_imag),
            (fourier_real, fourier_imag))

        # Getting CQT Amplitude
        CQT = torch.sqrt(CQT_real.pow(2) + CQT_imag.pow(2))

        if self.norm:
            return CQT / self.kernal_width * torch.sqrt(
                self.lenghts.view(-1, 1))
        else:
            return CQT * torch.sqrt(self.lenghts.view(-1, 1))