Exemplo n.º 1
0
def generator(options):
    """ A function that defines the generator based on the specified options.
    """

    skips = []
    num_layers = len(options['generator_encoder_num_kernels'])
    audio_shape = (options['window_length'], options['feat_dim'])
    generator_encoder_num_kernels = options['generator_encoder_num_kernels']
    generator_decoder_num_kernels = options['generator_decoder_num_kernels']
    filter_length = options['filter_length']
    strides = options['strides']
    padding = options['padding']
    use_bias = options['use_bias']
    std_dev = options['initializer_std_dev']
    show_summary = options['show_summary']
    z_in_use = options['z_in_use']

    ## Define the encoder
    encoder_in = Input(shape=audio_shape)
    encoder_out = encoder_in

    for layer_i, num_kernels in enumerate(generator_encoder_num_kernels):
        # Add convolution layer
        encoder_out = Conv1D(
            num_kernels,
            filter_length,
            strides=strides,
            padding=padding,
            use_bias=use_bias,
            kernel_initializer=tf.truncated_normal_initializer(
                stddev=std_dev))(encoder_out)

        # Add skip connections
        if layer_i < num_layers - 1:
            skips.append(encoder_out)

        # Apply PReLU
        encoder_out = PReLU(alpha_initializer='zeros',
                            weights=None)(encoder_out)

    ## Define the intermediate noise layer z
    z_dim = options['z_dim']
    # z = Input(shape=z_dim, name='noise_input')
    z = Input(shape=z_dim)

    ## Define the decoder
    if z_in_use:
        decoder_out = keras.layers.concatenate([encoder_out, z])
    else:
        decoder_out = encoder_out

    # Shape variables updated through the loop
    n_rows = z_dim[0]
    n_cols = decoder_out.get_shape().as_list()[-1]

    for layer_i, num_kernels in enumerate(generator_decoder_num_kernels):
        shape_in = decoder_out.get_shape().as_list()

        # Need to transform the data to be in 3D, as conv2dtranspose need 3D input
        new_shape = (shape_in[1], 1, shape_in[2])
        decoder_out = Reshape(new_shape)(decoder_out)
        decoder_out = Conv2DTranspose(
            num_kernels, [filter_length, 1],
            strides=[strides, 1],
            padding=padding,
            use_bias=use_bias,
            kernel_initializer=tf.truncated_normal_initializer(
                stddev=std_dev))(decoder_out)

        # Reshape back to 2D
        n_rows = strides * n_rows
        n_cols = num_kernels
        decoder_out.set_shape([None, n_rows, 1, n_cols])
        new_shape = (n_rows, n_cols)

        if layer_i == (num_layers - 1):
            decoder_out = Reshape(new_shape)(decoder_out)

        else:
            decoder_out = Reshape(new_shape)(decoder_out)

        if layer_i < num_layers - 1:
            # Apply PReLU
            decoder_out = PReLU(alpha_initializer='zeros',
                                weights=None)(decoder_out)
            # Add skip connections
            skips_dec = skips[-(layer_i + 1)]
            decoder_out = keras.layers.concatenate([decoder_out, skips_dec])

    ## Create the model graph
    if z_in_use:
        G = Model(inputs=[encoder_in, z], outputs=decoder_out)
    else:
        G = Model(inputs=[encoder_in], outputs=decoder_out)

    if show_summary:
        G.summary()

    return G
Exemplo n.º 2
0
def generator(opts):
    kwidth = opts['filterlength']
    strides = opts['strides']
    pool = strides
    g_enc_numkernels = opts['g_enc_numkernels']
    g_dec_numkernels = opts['g_dec_numkernels']
    window_length = opts['window_length']
    featdim = opts['featdim']
    batch_size = opts['batch_size']
    if opts['GT_init_G']:
        gt = np.expand_dims(opts['gt'], axis=1)
        num_gt_filters = gt.shape[2]
        gt_filterlength = gt.shape[0]
        gt_bias = np.zeros((num_gt_filters, ))
    if opts['preemph_G']:
        preemph_init = np.array(opts['preemph_init']).T
        preemph_init = np.expand_dims(preemph_init, axis=1)

    use_bias = True
    skips = []
    #kernel_init = keras.initializers.TruncatedNormal(stddev=0.02)
    kernel_init = 'glorot_uniform'

    wav_in = Input(shape=(window_length, featdim))
    if opts['preemph_G']:
        enc_out = Conv1D(1,
                         2,
                         kernel_initializer=kernel_init,
                         strides=opts['preemph_stride'],
                         padding="same",
                         use_bias=False,
                         name="G_preemphlayer")(wav_in)
    else:
        enc_out = wav_in

    # Defining the Encoder
    for layernum, numkernels in enumerate(g_enc_numkernels):
        if layernum == 0 and opts['GT_init_G']:
            enc_out = Conv1D(num_gt_filters,
                             gt_filterlength,
                             kernel_initializer=kernel_init,
                             strides=pool,
                             padding="same",
                             use_bias=use_bias,
                             name="G_gtlayer")(enc_out)
        else:
            enc_out = Conv1D(numkernels,
                             kwidth,
                             strides=pool,
                             kernel_initializer=kernel_init,
                             padding="same",
                             use_bias=use_bias)(enc_out)

        # for skip connections
        if layernum < len(g_enc_numkernels) - 1:
            skips.append(enc_out)
        if opts['applyprelu']:
            enc_out = PReLU(alpha_initializer='zero', weights=None)(enc_out)
        else:
            enc_out = LeakyReLU(alpha=opts['leakyrelualpha'])(enc_out)

    num_enc_layers = len(g_enc_numkernels)
    z_rows = int(window_length / (pool**num_enc_layers))
    z_cols = g_enc_numkernels[-1]

    # Adding the intermediate noise layer
    if not opts['z_off']:
        z = Input(shape=(z_rows, z_cols), name='noise_input')
        dec_out = keras.layers.concatenate([enc_out, z])
    else:
        dec_out = enc_out

    # Now to the decoder part
    nrows = z_rows
    ncols = dec_out.get_shape().as_list()[-1]
    for declayernum, decnumkernels in enumerate(g_dec_numkernels):
        # reshape for the conv2dtranspose layer as it needs 3D input
        indim = dec_out.get_shape().as_list()
        newshape = (indim[1], 1, indim[2])
        dec_out = Reshape(newshape)(dec_out)
        # add the conv2dtranspose layer
        dec_out = Conv2DTranspose(decnumkernels, [kwidth, 1],
                                  strides=[strides, 1],
                                  kernel_initializer=kernel_init,
                                  padding="same",
                                  use_bias=use_bias)(dec_out)
        # Reshape back to 2D
        nrows *= strides  # number of rows get multiplied by strides
        ncols = decnumkernels  # number of cols is the same as number of kernels
        dec_out.set_shape(
            [None, nrows, 1,
             ncols])  # for correcting shape issue with conv2dtranspose
        newshape = (nrows, ncols)
        if declayernum == len(g_dec_numkernels) - 1:
            dec_out = Reshape(newshape, name="g_output")(
                dec_out)  # name the final output as  g_output
        else:
            dec_out = Reshape(newshape)(dec_out)

        # add skip and prelu until the second-last layer
        if declayernum < len(g_dec_numkernels) - 1:
            if opts['applyprelu']:
                dec_out = PReLU(alpha_initializer='zero',
                                weights=None)(dec_out)
            else:
                dec_out = LeakyReLU(alpha=opts['leakyrelualpha'])(dec_out)
            # Now add the skip connection
            skip_ = skips[-(declayernum + 1)]
            dec_out = keras.layers.concatenate([dec_out, skip_])

    # Add tanh of G uses tanh activation
    if opts['Gtanh']:
        dec_out = Activation('tanh')(dec_out)

    # Create the model graph
    if opts['z_off']:
        G = Model(inputs=[wav_in], outputs=[dec_out])
    else:
        G = Model(inputs=[wav_in, z], outputs=[dec_out])

    # add GT initilization
    if opts['GT_init_G']:
        G.get_layer("G_gtlayer").set_weights([gt, gt_bias])
        # set it trainable or not
        if opts['gt_fixed']:
            G.get_layer("G_gtlayer").trainable = False

    # add preemph initialization
    if opts['preemph_G']:
        G.get_layer("G_preemphlayer").set_weights([preemph_init])

    if opts['show_summary']:
        G.summary()

    return G