def train(data, epochs, batch_size=1, gen_lr=5e-6, disc_lr=5e-7, epoch_offset=0):
    generator = Generator(input_shape=[None,None,2])
    discriminator = Discriminator(input_shape=[None,None,1])

    generator_optimizer = tf.keras.optimizers.Adam(gen_lr)
    discriminator_optimizer = tf.keras.optimizers.Adam(disc_lr)

    model_name = data['training'].origin+'_2_any'
    checkpoint_prefix = os.path.join(CHECKPOINT_DIR, model_name)
    if(not os.path.isdir(checkpoint_prefix)):
        os.makedirs(checkpoint_prefix)
    else:
        if(os.path.isfile(os.path.join(checkpoint_prefix, 'generator.h5'))):
            generator.load_weights(os.path.join(checkpoint_prefix, 'generator.h5'), by_name=True)
            print('Generator weights restorred from ' + checkpoint_prefix)

        if(os.path.isfile(os.path.join(checkpoint_prefix, 'discriminator.h5'))):
            discriminator.load_weights(os.path.join(checkpoint_prefix, 'discriminator.h5'), by_name=True)
            print('Discriminator weights restorred from ' + checkpoint_prefix)

    # Get the number of batches in the training set
    epoch_size = data['training'].__len__()

    print()
    print("Started training with the following parameters: ")
    print("\tCheckpoints: \t", checkpoint_prefix)
    print("\tEpochs: \t", epochs)
    print("\tgen_lr: \t", gen_lr)
    print("\tdisc_lr: \t", disc_lr)
    print("\tBatchSize: \t", batch_size)
    print("\tnBatches: \t", epoch_size)
    print()

    # Precompute the test input and target for validation
    audio_input = load_audio(os.path.join(TEST_AUDIOS_PATH, data['training'].origin+'.wav'))
    mag_input, phase = forward_transform(audio_input)
    mag_input = amplitude_to_db(mag_input)
    test_input = slice_magnitude(mag_input, mag_input.shape[0])
    test_input = (test_input * 2) - 1

    test_inputs = []
    test_targets = []

    for t in data['training'].target:
        audio_target = load_audio(os.path.join(TEST_AUDIOS_PATH, t+'.wav'))
        mag_target, _ = forward_transform(audio_target)
        mag_target = amplitude_to_db(mag_target)
        test_target = slice_magnitude(mag_target, mag_target.shape[0])
        test_target = (test_target * 2) - 1

        test_target_perm = test_target[np.random.permutation(test_target.shape[0]),:,:,:]
        test_inputs.append(np.concatenate([test_input, test_target_perm], axis=3))
        test_targets.append(test_target)

    gen_mae_list, gen_mae_val_list  = [], []
    gen_loss_list, gen_loss_val_list  = [], []
    disc_loss_list, disc_loss_val_list  = [], []
    for epoch in range(epochs):
        gen_mae_total, gen_mae_val_total = 0, 0
        gen_loss_total, gen_loss_val_total = 0, 0
        disc_loss_total, disc_loss_val_total = 0, 0

        print('Epoch {}/{}'.format((epoch+1)+epoch_offset, epochs+epoch_offset))
        progbar = tf.keras.utils.Progbar(epoch_size)
        for i in range(epoch_size):
            # Get the data from the DataGenerator
            input_image, target = data['training'].__getitem__(i) 
            with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
                # Generate a fake image
                gen_output = generator(input_image, training=True)
                
                # Train the discriminator
                disc_real_output = discriminator([input_image[:,:,:,0:1], target], training=True)
                disc_generated_output = discriminator([input_image[:,:,:,0:1], gen_output], training=True)
                
                # Compute the losses
                gen_mae = l1_loss(target, gen_output)
                gen_loss = generator_loss(disc_generated_output, gen_mae)
                disc_loss = discriminator_loss(disc_real_output, disc_generated_output)
                
                # Compute the gradients
                generator_gradients = gen_tape.gradient(gen_loss,generator.trainable_variables)
                discriminator_gradients = disc_tape.gradient(disc_loss, discriminator.trainable_variables)
                
                # Apply the gradients
                generator_optimizer.apply_gradients(zip(generator_gradients, generator.trainable_variables))
                discriminator_optimizer.apply_gradients(zip(discriminator_gradients, discriminator.trainable_variables))

                # Update the progress bar
                gen_mae = gen_mae.numpy()
                gen_loss = gen_loss.numpy()
                disc_loss = disc_loss.numpy()
                
                gen_mae_total += gen_mae
                gen_loss_total += gen_loss
                disc_loss_total += disc_loss

                progbar.add(1, values=[
                                        ("gen_mae", gen_mae), 
                                        ("gen_loss", gen_loss), 
                                        ("disc_loss", disc_loss)
                                    ])

        gen_mae_list.append(gen_mae_total/epoch_size)
        gen_mae_val_list.append(gen_mae_val_total/epoch_size)
        gen_loss_list.append(gen_loss_total/epoch_size)
        gen_loss_val_list.append(gen_loss_val_total/epoch_size)
        disc_loss_list.append(disc_loss_total/epoch_size)
        disc_loss_val_list.append(disc_loss_val_total/epoch_size)

        history = pd.DataFrame({
                                    'gen_mae': gen_mae_list, 
                                    'gen_mae_val': gen_mae_val_list, 
                                    'gen_loss': gen_loss_list,
                                    'gen_loss_val': gen_loss_val_list,
                                    'disc_loss': disc_loss_list,
                                    'disc_loss_val': disc_loss_val_list
                                })
        write_csv(history, os.path.join(checkpoint_prefix, 'history.csv'))

        epoch_output = os.path.join(OUTPUT_PATH, model_name, str((epoch+1)+epoch_offset).zfill(3))
        init_directory(epoch_output)

        # Generate audios and save spectrograms for the entire audios
        for j in range(len(data['training'].target)):
            prediction = generator(test_inputs[j], training=False)
            prediction = (prediction + 1) / 2
            generate_images(prediction, (test_inputs[j] + 1) / 2, (test_targets[j] + 1) / 2, os.path.join(epoch_output, 'spectrogram_'+data['training'].target[j]))
            generate_audio(prediction, phase, os.path.join(epoch_output, 'audio_'+data['training'].target[j]+'.wav'))
        print('Epoch outputs saved in ' + epoch_output)

        # Save the weights
        generator.save_weights(os.path.join(checkpoint_prefix, 'generator.h5'))
        discriminator.save_weights(os.path.join(checkpoint_prefix, 'discriminator.h5'))
        print('Weights saved in ' + checkpoint_prefix)

        # Callback at the end of the epoch for the DataGenerator
        data['training'].on_epoch_end()
Пример #2
0
    mag = db_to_amplitude(mag_db)
    audio_out = inverse_transform(mag, phase)
    write_audio(output_filename, audio_out)


if __name__ == "__main__":
    ap = argparse.ArgumentParser()
    ap.add_argument('--model', required=True)
    ap.add_argument('--input', required=True)
    ap.add_argument('--output', required=True)
    args = ap.parse_args()

    assert os.path.isfile(args.model), 'Model not found'
    assert os.path.isfile(args.input), 'Input audio not found'

    _, ext = os.path.splitext(args.input)
    assert ext in ['.wav', '.mp3', '.ogg'], 'Invalid audio format'

    # Enable mixed precision
    os.environ['TF_ENABLE_AUTO_MIXED_PRECISION'] = '1'

    model = Generator(input_shape=[None, None, 1])
    model.load_weights(args.model)
    print('Weights loaded from', args.model)

    base_output_path, _ = os.path.split(args.output)
    init_directory(base_output_path)
    print('Created directory', base_output_path)

    predict(model, args.input, args.output)
    print('Prediction saved in', args.output)
Пример #3
0
    ap.add_argument('--duration_rate', required=False, default=4)
    ap.add_argument('--transpose', required=False, default=0)
    args = ap.parse_args()

    assert os.path.isdir(args.nsynth_path), 'NSynth Dataset not found'
    assert os.path.isdir(args.midi_path), 'MIDI Dataset not found'

    instruments = [
        {'name': 'guitar', 'source_type': 'acoustic', 'preset': 0},
        {'name': 'keyboard', 'source_type': 'acoustic', 'preset': 0},
        {'name': 'string', 'source_type': 'acoustic', 'preset': 0},
        {'name': 'synth_lead', 'source_type': 'synthetic', 'preset': 0}
    ]
    
    midifiles = list(files_within(args.midi_path, '*.mid'))
    init_directory(args.audios_path)

    print()
    print("Instrumentos: \t", len(instruments), [instrument['name'] for instrument in instruments])
    print("MIDI archivos: \t", len(midifiles))
    print()

    for instrument in instruments:
        synth = NoteSynthesizer(
                                    dataset_path=args.nsynth_path, 
                                    sr=NSYNTH_SAMPLE_RATE, 
                                    velocities=NSYNTH_VELOCITIES, 
                                    transpose=float(args.transpose)
                                )
        synth.preload_notes(instrument=instrument['name'], source_type=instrument['source_type'])
        
Пример #4
0
def train(data, epochs, batch_size=1, lr=1e-3, epoch_offset=0):
    generator = Generator()
    generator_optimizer = tf.keras.optimizers.Adam(lr)

    model_name = data['training'].origin + '_2_' + data[
        'training'].target + '_generator'
    checkpoint_prefix = os.path.join(CHECKPOINT_DIR, model_name)
    if (not os.path.isdir(checkpoint_prefix)):
        os.makedirs(checkpoint_prefix)
    else:
        if (os.path.isfile(os.path.join(checkpoint_prefix, 'generator.h5'))):
            generator.load_weights(os.path.join(checkpoint_prefix,
                                                'generator.h5'),
                                   by_name=True)
            print('Generator weights restorred from ' + checkpoint_prefix)

    # Get the number of batches in the training set
    epoch_size = data['training'].__len__()

    print()
    print("Started training with the following parameters: ")
    print("\tCheckpoints: \t", checkpoint_prefix)
    print("\tEpochs: \t", epochs)
    print("\tgen_lr: \t", lr)
    print("\tBatchSize: \t", batch_size)
    print("\tnBatches: \t", epoch_size)
    print()

    # Precompute the test input and target for validation
    audio_input = load_audio(
        os.path.join(TEST_AUDIOS_PATH, data['training'].origin + '.wav'))
    mag_input, phase = forward_transform(audio_input)
    mag_input = amplitude_to_db(mag_input)
    test_input = slice_magnitude(mag_input, mag_input.shape[0])
    test_input = (test_input * 2) - 1

    audio_target = load_audio(
        os.path.join(TEST_AUDIOS_PATH, data['training'].target + '.wav'))
    mag_target, _ = forward_transform(audio_target)
    mag_target = amplitude_to_db(mag_target)
    test_target = slice_magnitude(mag_target, mag_target.shape[0])
    test_target = (test_target * 2) - 1

    gen_mae_list, gen_mae_val_list = [], []
    for epoch in range(epochs):
        gen_mae_total, gen_mae_val_total = 0, 0
        print('Epoch {}/{}'.format((epoch + 1) + epoch_offset,
                                   epochs + epoch_offset))
        progbar = tf.keras.utils.Progbar(epoch_size)
        for i in range(epoch_size):
            input_image, target = data['training'].__getitem__(i)
            with tf.GradientTape() as gen_tape:
                # Generate a fake image
                gen_output = generator(input_image, training=True)

                # Compute the losses
                gen_mae = l1_loss(target, gen_output)  # Timbre transfer
                # gen_mae = l1_loss(input_image, gen_output) # Autoencoder

                # Compute the gradients
                generator_gradients = gen_tape.gradient(
                    gen_mae, generator.trainable_variables)

                # Apply the gradients
                generator_optimizer.apply_gradients(
                    zip(generator_gradients, generator.trainable_variables))

                # Update the progress bar
                gen_mae = gen_mae.numpy()
                gen_mae_total += gen_mae
                progbar.add(1, values=[("gen_mae", gen_mae)])

        gen_mae_total /= epoch_size
        gen_mae_list.append(gen_mae_total)
        gen_mae_val_list.append(gen_mae_val_total)

        history = pd.DataFrame({
            'gen_mae': gen_mae_list,
            'gen_mae_val': gen_mae_val_list
        })
        write_csv(history, os.path.join(checkpoint_prefix, 'history.csv'))

        epoch_output = os.path.join(OUTPUT_PATH, model_name,
                                    str((epoch + 1) + epoch_offset).zfill(3))
        init_directory(epoch_output)

        # Generate audios and save spectrograms for the entire audios
        prediction = generator(test_input, training=False)
        prediction = (prediction + 1) / 2
        generate_images(prediction, (test_input + 1) / 2,
                        (test_target + 1) / 2,
                        os.path.join(epoch_output, 'spectrogram'))
        generate_audio(prediction, phase,
                       os.path.join(epoch_output, 'audio.wav'))
        print('Epoch outputs saved in ' + epoch_output)

        # Save the weights
        generator.save_weights(os.path.join(checkpoint_prefix, 'generator.h5'))
        print('Weights saved in ' + checkpoint_prefix)

        # Callback at the end of the epoch for the DataGenerator
        data['training'].on_epoch_end()
from data import (amplitude_to_db, forward_transform, init_directory,
                  load_audio, slice_magnitude)

if __name__ == "__main__":
    ap = argparse.ArgumentParser()
    ap.add_argument('--audios_path', required=True)
    ap.add_argument('--features_path', required=True)
    args = ap.parse_args()

    assert os.path.isdir(args.audios_path), 'Audios not found'

    for instrument in os.listdir(args.audios_path):
        print(instrument)
        audios_dir = os.path.join(args.audios_path, instrument)
        features_dir = os.path.join(args.features_path, instrument)
        init_directory(features_dir)

        for f in os.listdir(audios_dir):
            name, _ = os.path.splitext(f)

            audio = load_audio(os.path.join(audios_dir, f))
            mag, _ = forward_transform(audio)
            mag = amplitude_to_db(mag)

            mag_sliced = slice_magnitude(mag, mag.shape[0])

            print(name, mag_sliced.shape[0])
            for i in range(mag_sliced.shape[0]):
                out_name = os.path.join(features_dir,
                                        name + '_' + str(i).zfill(3) + '.npy')
                if (not os.path.isfile(out_name)):