def __init__(self, input_dim, output_dim, num_blocks, kernel_size, dropout, generated=False): super(Encoder, self).__init__() assert num_blocks > 0, ('There must be at least one convolutional block in the encoder.') assert output_dim % 2 == 0, ('Bidirectional LSTM output dimension must be divisible by 2.') convs = [ConvBlock(input_dim, output_dim, kernel_size, dropout, 'relu')] + \ [ConvBlock(output_dim, output_dim, kernel_size, dropout, 'relu') for _ in range(num_blocks - 1)] self._convs = Sequential(*convs) self._lstm = LSTM(output_dim, output_dim // 2, batch_first=True, bidirectional=True)
def __init__(self, input_dimension, postnet_dimension, num_blocks, kernel_size, dropout): super(Postnet, self).__init__() assert num_blocks > 1, ( 'There must be at least two convolutional blocks in the post-net.') self._convs = Sequential( ConvBlock(input_dimension, postnet_dimension, kernel_size, dropout, 'tanh'), *[ ConvBlock(postnet_dimension, postnet_dimension, kernel_size, dropout, 'tanh') for _ in range(num_blocks - 2) ], ConvBlock(postnet_dimension, input_dimension, kernel_size, dropout, 'identity'))
def __init__(self, hparams): super(Encoder, self).__init__() _convolutions = [ ConvBlock(dimensions=1, in_channels=hparams.encoder_embedding_dim, out_channels=hparams.encoder_embedding_dim, kernel_size=hparams.encoder_kernel_size, stride=1, padding=int((hparams.encoder_kernel_size - 1) / 2), dilation=1, activation=hparams.activation, bn=True, dropout=0.5, initscheme=hparams.initscheme, nonlinearity=hparams.activation) for _ in range(hparams.encoder_n_convolutions) ] self.convolutions = nn.Sequential(*_convolutions) self.lstm = nn.LSTM(input_size=hparams.encoder_embedding_dim, hidden_size=int(hparams.encoder_embedding_dim / 2), num_layers=1, batch_first=True, bidirectional=True)
def __init__(self, hparams): super(Postnet, self).__init__() self.convolutions = nn.ModuleList() params = [(hparams.postnet_embedding_dim, hparams.postnet_embedding_dim, "tanh") for _ in range(hparams.postnet_n_convolutions)] params[0] = (hparams.n_mel_channels, hparams.postnet_embedding_dim, "tanh") params[-1] = (hparams.postnet_embedding_dim, hparams.n_mel_channels, "linear") _modules = [ ConvBlock(dimensions=1, in_channels=in_channels, out_channels=out_channels, kernel_size=hparams.postnet_kernel_size, stride=1, padding=int((hparams.postnet_kernel_size - 1) / 2), dilation=1, activation=activation, bn=True, dropout=0.5, initscheme=hparams.initscheme, nonlinearity=activation) for in_channels, out_channels, activation in params ] self.convolutions = nn.Sequential(*_modules)
def __init__(self, input_dim, output_dim, dropout, groups=1): super(ConvolutionalEncoder, self).__init__() self._groups = groups self._input_dim = input_dim self._output_dim = output_dim input_dim *= groups output_dim *= groups layers = [ConvBlock(input_dim, output_dim, 1, dropout, activation='relu', groups=groups), ConvBlock(output_dim, output_dim, 1, dropout, groups=groups)] + \ [HighwayConvBlock(output_dim, output_dim, 3, dropout, dilation=3**i, groups=groups) for i in range(4)] + \ [HighwayConvBlock(output_dim, output_dim, 3, dropout, dilation=3**i, groups=groups) for i in range(4)] + \ [HighwayConvBlock(output_dim, output_dim, 3, dropout, dilation=1, groups=groups) for _ in range(2)] + \ [HighwayConvBlock(output_dim, output_dim, 1, dropout, dilation=1, groups=groups) for _ in range(2)] self._layers = Sequential(*layers)
def __init__(self, input_dim, output_dim, bank_size, bank_channels, projection_channels, projection_kernel_size, highway_dim, gru_dim, dropout): super(PostnetCBHG, self).__init__() assert gru_dim % 2 == 0, ( 'Bidirectional GRU dimension must be divisible by 2.') self._bank = ModuleList([ ConvBlock(input_dim, bank_channels, k, dropout, 'relu') for k in range(1, bank_size + 1) ]) self._pool_and_project = Sequential( ConstantPad1d((0, 1), 0.0), MaxPool1d(2, stride=1), ConvBlock(bank_channels * bank_size, projection_channels, projection_kernel_size, dropout, 'relu'), ConvBlock(projection_channels, input_dim, projection_kernel_size, dropout, 'identity')) highways = [HighwayLayer(highway_dim) for _ in range(4)] self._highway_layers = Sequential(Linear(input_dim, highway_dim), ReLU(), *highways) self._gru = GRU(highway_dim, gru_dim // 2, batch_first=True, bidirectional=True) self._output_layer = Linear(gru_dim, output_dim)
def __init__(self, hparams): super().__init__() channels = zip([1] + hparams.reference_encoder_filters[:-1], hparams.reference_encoder_filters) self.convs = torch.nn.ModuleList([ ConvBlock(dimensions=2, in_channels=in_channels, out_channels=out_channels, kernel_size=hparams.reference_encoder_kernel, stride=hparams.reference_encoder_strides, padding=hparams.reference_encoder_pad, activation=hparams.reference_encoder_activation, bn=True, initscheme=hparams.initscheme, nonlinearity=hparams.reference_encoder_activation) for in_channels, out_channels in channels ]) self.conv_params = { "kernel_size": hparams.reference_encoder_kernel[0], "stride": hparams.reference_encoder_strides[0], "pad": hparams.reference_encoder_pad[0], "n_convs": len(hparams.reference_encoder_filters) } self.n_mels = hparams.n_mel_channels out_channels = self.calculate_size(dim_size=self.n_mels, **self.conv_params) self.gru = torch.nn.GRU( input_size=hparams.reference_encoder_filters[-1] * out_channels, hidden_size=hparams.encoder_embedding_dim // 2, batch_first=True)
def Discriminator(input_shape, norm_layer, use_antialias, impl, ndf=64): """ Create a PatchGAN discriminator. PatchGAN classifier described in the original pix2pix paper (https://arxiv.org/abs/1611.07004). Such a patch-level discriminator architecture has fewer parameters than a full-image discriminator and can work on arbitrarily-sized images in a fully convolutional fashion. """ use_bias = (norm_layer == 'instance') inputs = Input(shape=input_shape) if use_antialias: x = ConvBlock(ndf, 4, padding='same', activation=tf.nn.leaky_relu)(inputs) x = AntialiasSampling(4, mode='down', impl=impl)(x) x = ConvBlock(ndf * 2, 4, padding='same', use_bias=use_bias, norm_layer=norm_layer, activation=tf.nn.leaky_relu)(x) x = AntialiasSampling(4, mode='down', impl=impl)(x) x = ConvBlock(ndf * 4, 4, padding='same', use_bias=use_bias, norm_layer=norm_layer, activation=tf.nn.leaky_relu)(x) x = AntialiasSampling(4, mode='down', impl=impl)(x) else: x = ConvBlock(ndf, 4, strides=2, padding='same', activation=tf.nn.leaky_relu)(inputs) x = ConvBlock(ndf * 2, 4, strides=2, padding='same', use_bias=use_bias, norm_layer=norm_layer, activation=tf.nn.leaky_relu)(x) x = ConvBlock(ndf * 4, 4, strides=2, padding='same', use_bias=use_bias, norm_layer=norm_layer, activation=tf.nn.leaky_relu)(x) x = Padding2D(1, pad_type='constant')(x) x = ConvBlock(ndf * 8, 4, padding='valid', use_bias=use_bias, norm_layer=norm_layer, activation=tf.nn.leaky_relu)(x) x = Padding2D(1, pad_type='constant')(x) outputs = ConvBlock(1, 4, padding='valid', use_bias=use_bias)(x) return Model(inputs=inputs, outputs=outputs, name='discriminator')
def Generator(input_shape, output_shape, norm_layer, use_antialias: bool, resnet_blocks: int, downsample_blocks: int, impl, ngf=64, max_kernel_size=256, use_noise=False, freeze_noise=False): """ Create a Resnet-based generator. Adapt from Justin Johnson's neural style transfer project(https://github.com/jcjohnson/fast-neural-style). For BatchNorm, we use learnable affine parameters and track running statistics (mean/stddev). For InstanceNorm, we do not use learnable affine parameters. We do not track running statistics. """ # use_bias = (norm_layer == 'instance') use_bias = (norm_layer == 'instance') def get_n_filter(i: int) -> int: size = ngf * 2**i return max_kernel_size if size > max_kernel_size else size inputs = Input(shape=input_shape) x = Padding2D(3, pad_type='reflect')(inputs) x = ConvDepthwiseBlock(get_n_filter(0), 7, padding='valid', use_bias=use_bias, norm_layer=norm_layer, activation='relu')(x) for i in range(1, downsample_blocks + 1): x = ConvDepthwiseBlock(get_n_filter(i), 3, (2, 2), padding='same', use_bias=use_bias, norm_layer=norm_layer, activation='relu')(x) for _ in range(resnet_blocks): x = InvertedResBlock(get_n_filter(downsample_blocks), 3, use_bias, norm_layer)(x) for i in range(downsample_blocks - 1, -1, -1): if use_antialias: x = tf.keras.layers.UpSampling2D(size=(2, 2), interpolation='bilinear')(x) if use_noise: x = RandomNoise(name=f'noise{i}', freeze_noise=freeze_noise)(x) x = ConvBlock(get_n_filter(i), 3, (1, 1), padding='same', use_bias=use_bias, norm_layer=norm_layer, activation='relu')(x) else: if use_noise: x = RandomNoise(name=f'noise{i}', freeze_noise=freeze_noise)(x) x = ConvTransposeBlock(get_n_filter(i), 3, (2, 2), padding='same', use_bias=use_bias, norm_layer=norm_layer, activation='relu')(x) x = Padding2D(3, pad_type='reflect')(x) outputs = ConvBlock(output_shape[-1], 7, padding='valid', activation='tanh', use_bias=use_bias)(x) return Model(inputs=inputs, outputs=outputs, name='generator')