Exemplo n.º 1
0
 def test_interpret_one_hot_on_A0(self):
     midi_pitch_input = 21
     one_hot = encoder.one_hot_encode_midi_pitch(midi_pitch_input)
     print()
     midi_pitch_output = encoder.interpret_one_hot(one_hot, printing=True)
     print(
         f'midi_pitch_input: {midi_pitch_input:>3}    midi_pitch_output: {midi_pitch_output:>3}'
     )
     self.assertEqual(midi_pitch_input, midi_pitch_output)
Exemplo n.º 2
0
 def test_interpret_one_hot_on_rest(self):
     midi_pitch_input = encoder.REST_MIDI_ENCODING
     one_hot = encoder.one_hot_encode_midi_pitch(midi_pitch_input)
     print()
     midi_pitch_output = encoder.interpret_one_hot(one_hot, printing=True)
     print(
         f'midi_pitch_input: {midi_pitch_input:>3}    midi_pitch_output: {midi_pitch_output:>3}'
     )
     self.assertEqual(midi_pitch_input, midi_pitch_output)
Exemplo n.º 3
0
 def test_interpret_one_hot_on_putting_lows_last(self):
     midi_pitch_input = 60
     one_hot = encoder.one_hot_encode_midi_pitch(midi_pitch_input,
                                                 low_last=True)
     print()
     midi_pitch_output = encoder.interpret_one_hot(one_hot,
                                                   low_last=True,
                                                   printing=True)
     print(
         f'midi_pitch_input: {midi_pitch_input:>3}    midi_pitch_output: {midi_pitch_output:>3}'
     )
     self.assertEqual(midi_pitch_input, midi_pitch_output)
Exemplo n.º 4
0
def show_example_prediction(model_name,
                            i=0,
                            version=1,
                            x_val=None,
                            y_val=None,
                            printing_in_full=False):
    if x_val is None or y_val is None:
        _, _, x_val, y_val = load_data_arrays(version)
    example = (x_val[i], y_val[i])
    model_input = example[0].reshape(1, example[0].shape[0], 1)
    example_prediction = load_model_and_get_predictions(
        model_name, model_input)[0]
    example_ground_truth = example[1]
    i_prediction = np.argmax(example_prediction)
    i_ground_truth = np.argmax(example_ground_truth)
    midi_pitch_prediction = interpret_one_hot(example_prediction)
    midi_pitch_ground_truth = interpret_one_hot(example_ground_truth)
    note_name_prediction = get_note_name(midi_pitch_prediction)
    note_name_ground_truth = get_note_name(midi_pitch_ground_truth)

    if printing_in_full:
        print(f'\nmodel_input: {model_input.shape}')
        print(model_input)
        print(f'\nexample_prediction: {example_prediction.shape}')
        print(example_prediction)
        print(f'\nexample_ground_truth: {example_ground_truth.shape}')
        print(example_ground_truth)

    print()
    print(f'val instance {i}')
    print(
        f'         i_prediction: {i_prediction:>4}            i_ground_truth: {i_ground_truth:>4}'
    )
    print(
        f'midi_pitch_prediction: {midi_pitch_prediction:>4}   midi_pitch_ground_truth: {midi_pitch_ground_truth:>4}'
    )
    print(
        f' note_name_prediction: {note_name_prediction:>4}    note_name_ground_truth: {note_name_ground_truth:>4}'
    )
def plot_wav_prediction(note,
                        example,
                        model_name,
                        method='scipy',
                        printing=False,
                        plotting_spectrogram=False,
                        showing=True):
    wav_file = f'wav_files/single_{note}_{example}.wav'
    model = load_model(model_name)
    if method == 'scipy':
        if plotting_spectrogram:
            _, times, spectrogram = plot_spectrogram(wav_file,
                                                     strategy='scipy',
                                                     showing=False)
        else:
            _, times, spectrogram = get_spectrogram(wav_file, strategy='scipy')
    else:
        if plotting_spectrogram:
            _, times, spectrogram = plot_spectrogram(wav_file,
                                                     strategy='pyplot',
                                                     showing=False)
        else:
            _, times, spectrogram = get_spectrogram(wav_file,
                                                    strategy='pyplot')
    if printing:
        print(spectrogram.shape)
    midi_pitch_predictions = np.zeros(spectrogram.shape[1])
    for i in range(spectrogram.shape[1]):
        periodogram = spectrogram[:, i].reshape(1, spectrogram.shape[0], 1)
        pitch_probabilities = model.predict(periodogram)[0]
        midi_pitch_predictions[i] = interpret_one_hot(pitch_probabilities)
        if printing:
            print(midi_pitch_predictions[i])
            print(pitch_probabilities)

    plt.figure()
    plt.title(f'Prediction of \"{wav_file}\"\nby model \"{model_name}\"')
    plt.plot(times, midi_pitch_predictions)
    plt.xlabel('time (seconds)')
    plt.ylabel('MIDI pitch')
    plt.ylim(-2, 109)
    if showing:
        plt.show()
def predict_each_window_of_wav_file(file_name,
                                    wav_path='wav_files',
                                    adding_spectral_powers=True,
                                    normalising=True,
                                    window_size=25,
                                    model_name=None,
                                    printing=False):

    if wav_path is None:
        wav_file_full_path = f'{file_name}.wav'
    else:
        wav_file_full_path = f'{wav_path}/{file_name}.wav'

    # get the spectrogram of the file and swap the axes to get an array of periodograms
    _, _, spectrogram = get_spectrogram(wav_file_full_path,
                                        window_size=window_size)
    periodograms = np.swapaxes(spectrogram, 0, 1)
    if adding_spectral_powers:
        periodograms = add_spectral_powers(periodograms)
    periodograms = periodograms.reshape(
        (periodograms.shape[0], periodograms.shape[1], 1))

    if normalising:
        periodograms = normalise(
            periodograms,
            spectral_powers_present=adding_spectral_powers,
            first_order_differences_present=False)

    # load the specified model and use it to predict the pitch at each window
    model = load_model(model_name)
    probabilities = model.predict(periodograms)
    predictions = np.empty(len(probabilities), dtype=object)
    for i in range(len(probabilities)):
        predictions[i] = interpret_one_hot(probabilities[i], encoding=None)

    if printing:
        print(predictions.shape)
        print(predictions)

    return predictions
def create_comparison_text_file(file_name,
                                model_name,
                                window_size=50,
                                wav_path='wav_files',
                                xml_path='xml_files',
                                adding_spectral_powers=True,
                                normalising=True,
                                save_name=None,
                                printing=False):

    if save_name is None:
        save_name = file_name

    if wav_path is None:
        wav_file_full_path = f'{file_name}.wav'
    else:
        wav_file_full_path = f'{wav_path}/{file_name}.wav'

    # get the ground-truth pitch for the file
    ground_truth = get_monophonic_ground_truth(file_name,
                                               wav_path=wav_path,
                                               xml_path=xml_path,
                                               window_size=window_size)

    # get the spectrogram of the file and swap the axes to get an array of periodograms
    _, _, spectrogram = get_spectrogram(wav_file_full_path,
                                        window_size=window_size)
    periodograms = np.swapaxes(spectrogram, 0, 1)
    if adding_spectral_powers:
        periodograms = add_spectral_powers(periodograms)
    periodograms = periodograms.reshape(
        (periodograms.shape[0], periodograms.shape[1], 1))

    if normalising:
        periodograms = normalise(
            periodograms,
            spectral_powers_present=adding_spectral_powers,
            first_order_differences_present=False)

    # load the specified model and use it to predict the pitch at each window
    model = load_model(model_name)
    probabilities = model.predict(periodograms)
    predictions = np.empty(len(probabilities), dtype=object)
    for i in range(len(probabilities)):
        predictions[i] = interpret_one_hot(probabilities[i], encoding=None)

    # write the ground truth pitches and pitch predictions to a text file
    f = open(f'txt_files/{save_name}.txt', 'w')
    f.write('        time step:   ')
    for time_step in range(len(ground_truth)):
        f.write(f'{time_step:<5}')
    f.write('\n')
    f.write('     ground truth:   ')
    for pitch in ground_truth:
        f.write(f'{pitch:<5}')
    f.write('\n')
    f.write('model predictions:   ')
    for pitch in predictions:
        f.write(f'{pitch:<5}')
    f.close()

    if printing:
        print(spectrogram.shape)
        print(spectrogram)
        print()
        print(predictions.shape)
        print(predictions)
        print()
        print(ground_truth.shape)
        print(ground_truth)