def __init__(self, kernel_size=3, channels=32, dilation=1, bias=True, nonlinear_activation="LeakyReLU", nonlinear_activation_params={"negative_slope": 0.2}, pad="ReflectionPad1d", pad_params={}, use_causal_conv=False, ): """Initialize ResidualStack module. Args: kernel_size (int): Kernel size of dilation convolution layer. channels (int): Number of channels of convolution layers. dilation (int): Dilation factor. bias (bool): Whether to add bias parameter in convolution layers. nonlinear_activation (str): Activation function module name. nonlinear_activation_params (dict): Hyperparameters for activation function. pad (str): Padding function module name before dilated convolution layer. pad_params (dict): Hyperparameters for padding function. use_causal_conv (bool): Whether to use causal convolution. """ super(ResidualStack, self).__init__() # defile residual stack part if not use_causal_conv: assert (kernel_size - 1) % 2 == 0, "Not support even number kernel size." self.stack = torch.nn.Sequential( getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params), getattr(torch.nn, pad)((kernel_size - 1) // 2 * dilation, **pad_params), torch.nn.Conv1d(channels, channels, kernel_size, dilation=dilation, bias=bias), getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params), torch.nn.Conv1d(channels, channels, 1, bias=bias), ) else: self.stack = torch.nn.Sequential( getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params), CausalConv1d(channels, channels, kernel_size, dilation=dilation, bias=bias, pad=pad, pad_params=pad_params), getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params), torch.nn.Conv1d(channels, channels, 1, bias=bias), ) # defile extra layer for skip connection self.skip_layer = torch.nn.Conv1d(channels, channels, 1, bias=bias)
def test_causal_conv(kernel_size, dilation, pad, pad_params): x = torch.randn(1, 1, 32) conv = CausalConv1d(1, 1, kernel_size, dilation, pad=pad, pad_params=pad_params) y1 = conv(x) x[:, :, 16:] += torch.randn(1, 1, 16) y2 = conv(x) assert x.size(2) == y1.size(2) np.testing.assert_array_equal( y1[:, :, :16].cpu().numpy(), y2[:, :, :16].cpu().numpy(), )
def __init__( self, in_channels=80, out_channels=1, kernel_size=7, channels=512, bias=True, upsample_scales=[8, 8, 2, 2], stack_kernel_size=3, stacks=3, nonlinear_activation="LeakyReLU", nonlinear_activation_params={"negative_slope": 0.2}, pad="ReflectionPad1d", pad_params={}, use_final_nonlinear_activation=True, use_weight_norm=True, use_causal_conv=False, ): """Initialize MelGANGenerator module. Args: in_channels (int): Number of input channels. out_channels (int): Number of output channels. kernel_size (int): Kernel size of initial and final conv layer. channels (int): Initial number of channels for conv layer. bias (bool): Whether to add bias parameter in convolution layers. upsample_scales (list): List of upsampling scales. stack_kernel_size (int): Kernel size of dilated conv layers in residual stack. stacks (int): Number of stacks in a single residual stack. nonlinear_activation (str): Activation function module name. nonlinear_activation_params (dict): Hyperparameters for activation function. pad (str): Padding function module name before dilated convolution layer. pad_params (dict): Hyperparameters for padding function. use_final_nonlinear_activation (torch.nn.Module): Activation function for the final layer. use_weight_norm (bool): Whether to use weight norm. If set to true, it will be applied to all of the conv layers. use_causal_conv (bool): Whether to use causal convolution. """ super(MelGANGenerator, self).__init__() # check hyper parameters is valid assert channels >= np.prod(upsample_scales) assert channels % (2**len(upsample_scales)) == 0 if not use_causal_conv: assert (kernel_size - 1) % 2 == 0, "Not support even number kernel size." # add initial layer layers = [] if not use_causal_conv: layers += [ getattr(torch.nn, pad)((kernel_size - 1) // 2, **pad_params), torch.nn.Conv1d(in_channels, channels, kernel_size, bias=bias), ] else: layers += [ CausalConv1d( in_channels, channels, kernel_size, bias=bias, pad=pad, pad_params=pad_params, ), ] for i, upsample_scale in enumerate(upsample_scales): # add upsampling layer layers += [ getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params) ] if not use_causal_conv: layers += [ torch.nn.ConvTranspose1d( channels // (2**i), channels // (2**(i + 1)), upsample_scale * 2, stride=upsample_scale, padding=upsample_scale // 2 + upsample_scale % 2, output_padding=upsample_scale % 2, bias=bias, ) ] else: layers += [ CausalConvTranspose1d( channels // (2**i), channels // (2**(i + 1)), upsample_scale * 2, stride=upsample_scale, bias=bias, ) ] # add residual stack for j in range(stacks): layers += [ ResidualStack( kernel_size=stack_kernel_size, channels=channels // (2**(i + 1)), dilation=stack_kernel_size**j, bias=bias, nonlinear_activation=nonlinear_activation, nonlinear_activation_params=nonlinear_activation_params, pad=pad, pad_params=pad_params, use_causal_conv=use_causal_conv, ) ] # add final layer layers += [ getattr(torch.nn, nonlinear_activation)(**nonlinear_activation_params) ] if not use_causal_conv: layers += [ getattr(torch.nn, pad)((kernel_size - 1) // 2, **pad_params), torch.nn.Conv1d(channels // (2**(i + 1)), out_channels, kernel_size, bias=bias), ] else: layers += [ CausalConv1d( channels // (2**(i + 1)), out_channels, kernel_size, bias=bias, pad=pad, pad_params=pad_params, ), ] if use_final_nonlinear_activation: layers += [torch.nn.Tanh()] # define the model as a single function self.melgan = torch.nn.Sequential(*layers) # apply weight norm if use_weight_norm: self.apply_weight_norm() # reset parameters self.reset_parameters() # initialize pqmf for inference self.pqmf = None
def __init__( self, in_channels=80, out_channels=1, channels=512, kernel_size=7, upsample_scales=(8, 8, 2, 2), upsample_kernel_sizes=(16, 16, 4, 4), resblock_kernel_sizes=(3, 7, 11), resblock_dilations=[(1, 3, 5), (1, 3, 5), (1, 3, 5)], use_additional_convs=True, bias=True, nonlinear_activation="LeakyReLU", nonlinear_activation_params={"negative_slope": 0.1}, use_causal_conv=False, use_weight_norm=True, ): """Initialize HiFiGANGenerator module. Args: in_channels (int): Number of input channels. out_channels (int): Number of output channels. channels (int): Number of hidden representation channels. kernel_size (int): Kernel size of initial and final conv layer. upsample_scales (list): List of upsampling scales. upsample_kernel_sizes (list): List of kernel sizes for upsampling layers. resblock_kernel_sizes (list): List of kernel sizes for residual blocks. resblock_dilations (list): List of dilation list for residual blocks. use_additional_convs (bool): Whether to use additional conv layers in residual blocks. bias (bool): Whether to add bias parameter in convolution layers. nonlinear_activation (str): Activation function module name. nonlinear_activation_params (dict): Hyperparameters for activation function. use_causal_conv (bool): Whether to use causal structure. use_weight_norm (bool): Whether to use weight norm. If set to true, it will be applied to all of the conv layers. """ super().__init__() # check hyperparameters are valid assert kernel_size % 2 == 1, "Kernel size must be odd number." assert len(upsample_scales) == len(upsample_kernel_sizes) assert len(resblock_dilations) == len(resblock_kernel_sizes) # define modules self.num_upsamples = len(upsample_kernel_sizes) self.num_blocks = len(resblock_kernel_sizes) self.use_causal_conv = use_causal_conv if not use_causal_conv: self.input_conv = torch.nn.Conv1d( in_channels, channels, kernel_size, bias=bias, padding=(kernel_size - 1) // 2, ) else: self.input_conv = CausalConv1d( in_channels, channels, kernel_size, bias=bias, ) self.upsamples = torch.nn.ModuleList() self.blocks = torch.nn.ModuleList() for i in range(len(upsample_kernel_sizes)): assert upsample_kernel_sizes[i] == 2 * upsample_scales[i] if not use_causal_conv: self.upsamples += [ torch.nn.Sequential( getattr(torch.nn, nonlinear_activation)( **nonlinear_activation_params), torch.nn.ConvTranspose1d( channels // (2**i), channels // (2**(i + 1)), upsample_kernel_sizes[i], upsample_scales[i], padding=upsample_scales[i] // 2 + upsample_scales[i] % 2, output_padding=upsample_scales[i] % 2, bias=bias, ), ) ] else: self.upsamples += [ torch.nn.Sequential( getattr(torch.nn, nonlinear_activation)( **nonlinear_activation_params), CausalConvTranspose1d( channels // (2**i), channels // (2**(i + 1)), upsample_kernel_sizes[i], upsample_scales[i], bias=bias, ), ) ] for j in range(len(resblock_kernel_sizes)): self.blocks += [ ResidualBlock( kernel_size=resblock_kernel_sizes[j], channels=channels // (2**(i + 1)), dilations=resblock_dilations[j], bias=bias, use_additional_convs=use_additional_convs, nonlinear_activation=nonlinear_activation, nonlinear_activation_params=nonlinear_activation_params, use_causal_conv=use_causal_conv, ) ] if not use_causal_conv: self.output_conv = torch.nn.Sequential( # NOTE(kan-bayashi): follow official implementation but why # using different slope parameter here? (0.1 vs. 0.01) torch.nn.LeakyReLU(), torch.nn.Conv1d( channels // (2**(i + 1)), out_channels, kernel_size, bias=bias, padding=(kernel_size - 1) // 2, ), torch.nn.Tanh(), ) else: self.output_conv = torch.nn.Sequential( # NOTE(kan-bayashi): follow official implementation but why # using different slope parameter here? (0.1 vs. 0.01) torch.nn.LeakyReLU(), CausalConv1d( channels // (2**(i + 1)), out_channels, kernel_size, bias=bias, ), torch.nn.Tanh(), ) # apply weight norm if use_weight_norm: self.apply_weight_norm() # reset parameters self.reset_parameters()