示例#1
0
 def __init__(self, input_dim, output_dim, num_blocks, kernel_size, dropout, generated=False):
     super(Encoder, self).__init__()
     assert num_blocks > 0, ('There must be at least one convolutional block in the encoder.')
     assert output_dim % 2 == 0, ('Bidirectional LSTM output dimension must be divisible by 2.')
     convs = [ConvBlock(input_dim, output_dim, kernel_size, dropout, 'relu')] + \
             [ConvBlock(output_dim, output_dim, kernel_size, dropout, 'relu') for _ in range(num_blocks - 1)]
     self._convs = Sequential(*convs)
     self._lstm = LSTM(output_dim, output_dim // 2, batch_first=True, bidirectional=True)
示例#2
0
 def __init__(self, input_dimension, postnet_dimension, num_blocks,
              kernel_size, dropout):
     super(Postnet, self).__init__()
     assert num_blocks > 1, (
         'There must be at least two convolutional blocks in the post-net.')
     self._convs = Sequential(
         ConvBlock(input_dimension, postnet_dimension, kernel_size, dropout,
                   'tanh'), *[
                       ConvBlock(postnet_dimension, postnet_dimension,
                                 kernel_size, dropout, 'tanh')
                       for _ in range(num_blocks - 2)
                   ],
         ConvBlock(postnet_dimension, input_dimension, kernel_size, dropout,
                   'identity'))
示例#3
0
    def __init__(self, hparams):
        super(Encoder, self).__init__()

        _convolutions = [
            ConvBlock(dimensions=1,
                      in_channels=hparams.encoder_embedding_dim,
                      out_channels=hparams.encoder_embedding_dim,
                      kernel_size=hparams.encoder_kernel_size,
                      stride=1,
                      padding=int((hparams.encoder_kernel_size - 1) / 2),
                      dilation=1,
                      activation=hparams.activation,
                      bn=True,
                      dropout=0.5,
                      initscheme=hparams.initscheme,
                      nonlinearity=hparams.activation)
            for _ in range(hparams.encoder_n_convolutions)
        ]

        self.convolutions = nn.Sequential(*_convolutions)

        self.lstm = nn.LSTM(input_size=hparams.encoder_embedding_dim,
                            hidden_size=int(hparams.encoder_embedding_dim / 2),
                            num_layers=1,
                            batch_first=True,
                            bidirectional=True)
示例#4
0
    def __init__(self, hparams):
        super(Postnet, self).__init__()
        self.convolutions = nn.ModuleList()

        params = [(hparams.postnet_embedding_dim,
                   hparams.postnet_embedding_dim, "tanh")
                  for _ in range(hparams.postnet_n_convolutions)]

        params[0] = (hparams.n_mel_channels, hparams.postnet_embedding_dim,
                     "tanh")
        params[-1] = (hparams.postnet_embedding_dim, hparams.n_mel_channels,
                      "linear")

        _modules = [
            ConvBlock(dimensions=1,
                      in_channels=in_channels,
                      out_channels=out_channels,
                      kernel_size=hparams.postnet_kernel_size,
                      stride=1,
                      padding=int((hparams.postnet_kernel_size - 1) / 2),
                      dilation=1,
                      activation=activation,
                      bn=True,
                      dropout=0.5,
                      initscheme=hparams.initscheme,
                      nonlinearity=activation)
            for in_channels, out_channels, activation in params
        ]

        self.convolutions = nn.Sequential(*_modules)
示例#5
0
    def __init__(self, input_dim, output_dim, dropout, groups=1):
        super(ConvolutionalEncoder, self).__init__()

        self._groups = groups
        self._input_dim = input_dim
        self._output_dim = output_dim

        input_dim *= groups
        output_dim *= groups

        layers = [ConvBlock(input_dim, output_dim, 1, dropout, activation='relu', groups=groups),
                  ConvBlock(output_dim, output_dim, 1, dropout, groups=groups)] + \
                 [HighwayConvBlock(output_dim, output_dim, 3, dropout, dilation=3**i, groups=groups) for i in range(4)] + \
                 [HighwayConvBlock(output_dim, output_dim, 3, dropout, dilation=3**i, groups=groups) for i in range(4)] + \
                 [HighwayConvBlock(output_dim, output_dim, 3, dropout, dilation=1, groups=groups) for _ in range(2)] + \
                 [HighwayConvBlock(output_dim, output_dim, 1, dropout, dilation=1, groups=groups) for _ in range(2)]

        self._layers = Sequential(*layers)
示例#6
0
 def __init__(self, input_dim, output_dim, bank_size, bank_channels,
              projection_channels, projection_kernel_size, highway_dim,
              gru_dim, dropout):
     super(PostnetCBHG, self).__init__()
     assert gru_dim % 2 == 0, (
         'Bidirectional GRU dimension must be divisible by 2.')
     self._bank = ModuleList([
         ConvBlock(input_dim, bank_channels, k, dropout, 'relu')
         for k in range(1, bank_size + 1)
     ])
     self._pool_and_project = Sequential(
         ConstantPad1d((0, 1), 0.0), MaxPool1d(2, stride=1),
         ConvBlock(bank_channels * bank_size, projection_channels,
                   projection_kernel_size, dropout, 'relu'),
         ConvBlock(projection_channels, input_dim, projection_kernel_size,
                   dropout, 'identity'))
     highways = [HighwayLayer(highway_dim) for _ in range(4)]
     self._highway_layers = Sequential(Linear(input_dim, highway_dim),
                                       ReLU(), *highways)
     self._gru = GRU(highway_dim,
                     gru_dim // 2,
                     batch_first=True,
                     bidirectional=True)
     self._output_layer = Linear(gru_dim, output_dim)
示例#7
0
    def __init__(self, hparams):
        super().__init__()

        channels = zip([1] + hparams.reference_encoder_filters[:-1],
                       hparams.reference_encoder_filters)

        self.convs = torch.nn.ModuleList([
            ConvBlock(dimensions=2,
                      in_channels=in_channels,
                      out_channels=out_channels,
                      kernel_size=hparams.reference_encoder_kernel,
                      stride=hparams.reference_encoder_strides,
                      padding=hparams.reference_encoder_pad,
                      activation=hparams.reference_encoder_activation,
                      bn=True,
                      initscheme=hparams.initscheme,
                      nonlinearity=hparams.reference_encoder_activation)
            for in_channels, out_channels in channels
        ])

        self.conv_params = {
            "kernel_size": hparams.reference_encoder_kernel[0],
            "stride": hparams.reference_encoder_strides[0],
            "pad": hparams.reference_encoder_pad[0],
            "n_convs": len(hparams.reference_encoder_filters)
        }

        self.n_mels = hparams.n_mel_channels

        out_channels = self.calculate_size(dim_size=self.n_mels,
                                           **self.conv_params)

        self.gru = torch.nn.GRU(
            input_size=hparams.reference_encoder_filters[-1] * out_channels,
            hidden_size=hparams.encoder_embedding_dim // 2,
            batch_first=True)
示例#8
0
def Discriminator(input_shape, norm_layer, use_antialias, impl, ndf=64):
    """ Create a PatchGAN discriminator.
    PatchGAN classifier described in the original pix2pix paper (https://arxiv.org/abs/1611.07004).
    Such a patch-level discriminator architecture has fewer parameters
    than a full-image discriminator and can work on arbitrarily-sized images
    in a fully convolutional fashion.
    """
    use_bias = (norm_layer == 'instance')

    inputs = Input(shape=input_shape)

    if use_antialias:
        x = ConvBlock(ndf, 4, padding='same',
                      activation=tf.nn.leaky_relu)(inputs)
        x = AntialiasSampling(4, mode='down', impl=impl)(x)
        x = ConvBlock(ndf * 2,
                      4,
                      padding='same',
                      use_bias=use_bias,
                      norm_layer=norm_layer,
                      activation=tf.nn.leaky_relu)(x)
        x = AntialiasSampling(4, mode='down', impl=impl)(x)
        x = ConvBlock(ndf * 4,
                      4,
                      padding='same',
                      use_bias=use_bias,
                      norm_layer=norm_layer,
                      activation=tf.nn.leaky_relu)(x)
        x = AntialiasSampling(4, mode='down', impl=impl)(x)
    else:
        x = ConvBlock(ndf,
                      4,
                      strides=2,
                      padding='same',
                      activation=tf.nn.leaky_relu)(inputs)
        x = ConvBlock(ndf * 2,
                      4,
                      strides=2,
                      padding='same',
                      use_bias=use_bias,
                      norm_layer=norm_layer,
                      activation=tf.nn.leaky_relu)(x)
        x = ConvBlock(ndf * 4,
                      4,
                      strides=2,
                      padding='same',
                      use_bias=use_bias,
                      norm_layer=norm_layer,
                      activation=tf.nn.leaky_relu)(x)

    x = Padding2D(1, pad_type='constant')(x)
    x = ConvBlock(ndf * 8,
                  4,
                  padding='valid',
                  use_bias=use_bias,
                  norm_layer=norm_layer,
                  activation=tf.nn.leaky_relu)(x)
    x = Padding2D(1, pad_type='constant')(x)
    outputs = ConvBlock(1, 4, padding='valid', use_bias=use_bias)(x)

    return Model(inputs=inputs, outputs=outputs, name='discriminator')
示例#9
0
def Generator(input_shape,
              output_shape,
              norm_layer,
              use_antialias: bool,
              resnet_blocks: int,
              downsample_blocks: int,
              impl,
              ngf=64,
              max_kernel_size=256,
              use_noise=False,
              freeze_noise=False):
    """ Create a Resnet-based generator.
    Adapt from Justin Johnson's neural style transfer project(https://github.com/jcjohnson/fast-neural-style).
    For BatchNorm, we use learnable affine parameters and track running statistics (mean/stddev).
    For InstanceNorm, we do not use learnable affine parameters. We do not track running statistics. 
    """
    # use_bias = (norm_layer == 'instance')
    use_bias = (norm_layer == 'instance')

    def get_n_filter(i: int) -> int:
        size = ngf * 2**i
        return max_kernel_size if size > max_kernel_size else size

    inputs = Input(shape=input_shape)
    x = Padding2D(3, pad_type='reflect')(inputs)
    x = ConvDepthwiseBlock(get_n_filter(0),
                           7,
                           padding='valid',
                           use_bias=use_bias,
                           norm_layer=norm_layer,
                           activation='relu')(x)
    for i in range(1, downsample_blocks + 1):
        x = ConvDepthwiseBlock(get_n_filter(i),
                               3, (2, 2),
                               padding='same',
                               use_bias=use_bias,
                               norm_layer=norm_layer,
                               activation='relu')(x)

    for _ in range(resnet_blocks):
        x = InvertedResBlock(get_n_filter(downsample_blocks), 3, use_bias,
                             norm_layer)(x)

    for i in range(downsample_blocks - 1, -1, -1):
        if use_antialias:
            x = tf.keras.layers.UpSampling2D(size=(2, 2),
                                             interpolation='bilinear')(x)
            if use_noise:
                x = RandomNoise(name=f'noise{i}', freeze_noise=freeze_noise)(x)
            x = ConvBlock(get_n_filter(i),
                          3, (1, 1),
                          padding='same',
                          use_bias=use_bias,
                          norm_layer=norm_layer,
                          activation='relu')(x)
        else:
            if use_noise:
                x = RandomNoise(name=f'noise{i}', freeze_noise=freeze_noise)(x)
            x = ConvTransposeBlock(get_n_filter(i),
                                   3, (2, 2),
                                   padding='same',
                                   use_bias=use_bias,
                                   norm_layer=norm_layer,
                                   activation='relu')(x)
    x = Padding2D(3, pad_type='reflect')(x)
    outputs = ConvBlock(output_shape[-1],
                        7,
                        padding='valid',
                        activation='tanh',
                        use_bias=use_bias)(x)

    return Model(inputs=inputs, outputs=outputs, name='generator')